1 //
   2 // Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2014, 2019, Red Hat, Inc. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //
  24 //
  25 
  26 // AArch64 Architecture Description File
  27 
  28 //----------REGISTER DEFINITION BLOCK------------------------------------------
  29 // This information is used by the matcher and the register allocator to
  30 // describe individual registers and classes of registers within the target
  31 // archtecture.
  32 
  33 register %{
  34 //----------Architecture Description Register Definitions----------------------
  35 // General Registers
  36 // "reg_def"  name ( register save type, C convention save type,
  37 //                   ideal register type, encoding );
  38 // Register Save Types:
  39 //
  40 // NS  = No-Save:       The register allocator assumes that these registers
  41 //                      can be used without saving upon entry to the method, &
  42 //                      that they do not need to be saved at call sites.
  43 //
  44 // SOC = Save-On-Call:  The register allocator assumes that these registers
  45 //                      can be used without saving upon entry to the method,
  46 //                      but that they must be saved at call sites.
  47 //
  48 // SOE = Save-On-Entry: The register allocator assumes that these registers
  49 //                      must be saved before using them upon entry to the
  50 //                      method, but they do not need to be saved at call
  51 //                      sites.
  52 //
  53 // AS  = Always-Save:   The register allocator assumes that these registers
  54 //                      must be saved before using them upon entry to the
  55 //                      method, & that they must be saved at call sites.
  56 //
  57 // Ideal Register Type is used to determine how to save & restore a
  58 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  59 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  60 //
  61 // The encoding number is the actual bit-pattern placed into the opcodes.
  62 
  63 // We must define the 64 bit int registers in two 32 bit halves, the
  64 // real lower register and a virtual upper half register. upper halves
  65 // are used by the register allocator but are not actually supplied as
  66 // operands to memory ops.
  67 //
  68 // follow the C1 compiler in making registers
  69 //
  70 //   r0-r7,r10-r26 volatile (caller save)
  71 //   r27-r32 system (no save, no allocate)
  72 //   r8-r9 invisible to the allocator (so we can use them as scratch regs)
  73 //
  74 // as regards Java usage. we don't use any callee save registers
  75 // because this makes it difficult to de-optimise a frame (see comment
  76 // in x86 implementation of Deoptimization::unwind_callee_save_values)
  77 //
  78 
  79 // General Registers
  80 
  81 reg_def R0      ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()         );
  82 reg_def R0_H    ( SOC, SOC, Op_RegI,  0, r0->as_VMReg()->next() );
  83 reg_def R1      ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()         );
  84 reg_def R1_H    ( SOC, SOC, Op_RegI,  1, r1->as_VMReg()->next() );
  85 reg_def R2      ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()         );
  86 reg_def R2_H    ( SOC, SOC, Op_RegI,  2, r2->as_VMReg()->next() );
  87 reg_def R3      ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()         );
  88 reg_def R3_H    ( SOC, SOC, Op_RegI,  3, r3->as_VMReg()->next() );
  89 reg_def R4      ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()         );
  90 reg_def R4_H    ( SOC, SOC, Op_RegI,  4, r4->as_VMReg()->next() );
  91 reg_def R5      ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()         );
  92 reg_def R5_H    ( SOC, SOC, Op_RegI,  5, r5->as_VMReg()->next() );
  93 reg_def R6      ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()         );
  94 reg_def R6_H    ( SOC, SOC, Op_RegI,  6, r6->as_VMReg()->next() );
  95 reg_def R7      ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()         );
  96 reg_def R7_H    ( SOC, SOC, Op_RegI,  7, r7->as_VMReg()->next() );
  97 reg_def R10     ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()        );
  98 reg_def R10_H   ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next());
  99 reg_def R11     ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()        );
 100 reg_def R11_H   ( SOC, SOC, Op_RegI, 11, r11->as_VMReg()->next());
 101 reg_def R12     ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()        );
 102 reg_def R12_H   ( SOC, SOC, Op_RegI, 12, r12->as_VMReg()->next());
 103 reg_def R13     ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()        );
 104 reg_def R13_H   ( SOC, SOC, Op_RegI, 13, r13->as_VMReg()->next());
 105 reg_def R14     ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()        );
 106 reg_def R14_H   ( SOC, SOC, Op_RegI, 14, r14->as_VMReg()->next());
 107 reg_def R15     ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()        );
 108 reg_def R15_H   ( SOC, SOC, Op_RegI, 15, r15->as_VMReg()->next());
 109 reg_def R16     ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()        );
 110 reg_def R16_H   ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
 111 reg_def R17     ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()        );
 112 reg_def R17_H   ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
 113 reg_def R18     ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()        );
 114 reg_def R18_H   ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
 115 reg_def R19     ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()        );
 116 reg_def R19_H   ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
 117 reg_def R20     ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()        ); // caller esp
 118 reg_def R20_H   ( SOC, SOE, Op_RegI, 20, r20->as_VMReg()->next());
 119 reg_def R21     ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()        );
 120 reg_def R21_H   ( SOC, SOE, Op_RegI, 21, r21->as_VMReg()->next());
 121 reg_def R22     ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()        );
 122 reg_def R22_H   ( SOC, SOE, Op_RegI, 22, r22->as_VMReg()->next());
 123 reg_def R23     ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()        );
 124 reg_def R23_H   ( SOC, SOE, Op_RegI, 23, r23->as_VMReg()->next());
 125 reg_def R24     ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()        );
 126 reg_def R24_H   ( SOC, SOE, Op_RegI, 24, r24->as_VMReg()->next());
 127 reg_def R25     ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()        );
 128 reg_def R25_H   ( SOC, SOE, Op_RegI, 25, r25->as_VMReg()->next());
 129 reg_def R26     ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()        );
 130 reg_def R26_H   ( SOC, SOE, Op_RegI, 26, r26->as_VMReg()->next());
 131 reg_def R27     (  NS, SOE, Op_RegI, 27, r27->as_VMReg()        ); // heapbase
 132 reg_def R27_H   (  NS, SOE, Op_RegI, 27, r27->as_VMReg()->next());
 133 reg_def R28     (  NS, SOE, Op_RegI, 28, r28->as_VMReg()        ); // thread
 134 reg_def R28_H   (  NS, SOE, Op_RegI, 28, r28->as_VMReg()->next());
 135 reg_def R29     (  NS,  NS, Op_RegI, 29, r29->as_VMReg()        ); // fp
 136 reg_def R29_H   (  NS,  NS, Op_RegI, 29, r29->as_VMReg()->next());
 137 reg_def R30     (  NS,  NS, Op_RegI, 30, r30->as_VMReg()        ); // lr
 138 reg_def R30_H   (  NS,  NS, Op_RegI, 30, r30->as_VMReg()->next());
 139 reg_def R31     (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()     ); // sp
 140 reg_def R31_H   (  NS,  NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
 141 
 142 // ----------------------------
 143 // Float/Double Registers
 144 // ----------------------------
 145 
 146 // Double Registers
 147 
 148 // The rules of ADL require that double registers be defined in pairs.
 149 // Each pair must be two 32-bit values, but not necessarily a pair of
 150 // single float registers. In each pair, ADLC-assigned register numbers
 151 // must be adjacent, with the lower number even. Finally, when the
 152 // CPU stores such a register pair to memory, the word associated with
 153 // the lower ADLC-assigned number must be stored to the lower address.
 154 
 155 // AArch64 has 32 floating-point registers. Each can store a vector of
 156 // single or double precision floating-point values up to 8 * 32
 157 // floats, 4 * 64 bit floats or 2 * 128 bit floats.  We currently only
 158 // use the first float or double element of the vector.
 159 
 160 // for Java use float registers v0-v15 are always save on call whereas
 161 // the platform ABI treats v8-v15 as callee save). float registers
 162 // v16-v31 are SOC as per the platform spec
 163 
 164   reg_def V0   ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()          );
 165   reg_def V0_H ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next()  );
 166   reg_def V0_J ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(2) );
 167   reg_def V0_K ( SOC, SOC, Op_RegF,  0, v0->as_VMReg()->next(3) );
 168 
 169   reg_def V1   ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()          );
 170   reg_def V1_H ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next()  );
 171   reg_def V1_J ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(2) );
 172   reg_def V1_K ( SOC, SOC, Op_RegF,  1, v1->as_VMReg()->next(3) );
 173 
 174   reg_def V2   ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()          );
 175   reg_def V2_H ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next()  );
 176   reg_def V2_J ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(2) );
 177   reg_def V2_K ( SOC, SOC, Op_RegF,  2, v2->as_VMReg()->next(3) );
 178 
 179   reg_def V3   ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()          );
 180   reg_def V3_H ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next()  );
 181   reg_def V3_J ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(2) );
 182   reg_def V3_K ( SOC, SOC, Op_RegF,  3, v3->as_VMReg()->next(3) );
 183 
 184   reg_def V4   ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()          );
 185   reg_def V4_H ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next()  );
 186   reg_def V4_J ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(2) );
 187   reg_def V4_K ( SOC, SOC, Op_RegF,  4, v4->as_VMReg()->next(3) );
 188 
 189   reg_def V5   ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()          );
 190   reg_def V5_H ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next()  );
 191   reg_def V5_J ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(2) );
 192   reg_def V5_K ( SOC, SOC, Op_RegF,  5, v5->as_VMReg()->next(3) );
 193 
 194   reg_def V6   ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()          );
 195   reg_def V6_H ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next()  );
 196   reg_def V6_J ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(2) );
 197   reg_def V6_K ( SOC, SOC, Op_RegF,  6, v6->as_VMReg()->next(3) );
 198 
 199   reg_def V7   ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()          );
 200   reg_def V7_H ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next()  );
 201   reg_def V7_J ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(2) );
 202   reg_def V7_K ( SOC, SOC, Op_RegF,  7, v7->as_VMReg()->next(3) );
 203 
 204   reg_def V8   ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()          );
 205   reg_def V8_H ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next()  );
 206   reg_def V8_J ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(2) );
 207   reg_def V8_K ( SOC, SOC, Op_RegF,  8, v8->as_VMReg()->next(3) );
 208 
 209   reg_def V9   ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()          );
 210   reg_def V9_H ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next()  );
 211   reg_def V9_J ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(2) );
 212   reg_def V9_K ( SOC, SOC, Op_RegF,  9, v9->as_VMReg()->next(3) );
 213 
 214   reg_def V10  ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()         );
 215   reg_def V10_H( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next() );
 216   reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2));
 217   reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3));
 218 
 219   reg_def V11  ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()         );
 220   reg_def V11_H( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next() );
 221   reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2));
 222   reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3));
 223 
 224   reg_def V12  ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()         );
 225   reg_def V12_H( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next() );
 226   reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2));
 227   reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3));
 228 
 229   reg_def V13  ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()         );
 230   reg_def V13_H( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next() );
 231   reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2));
 232   reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3));
 233 
 234   reg_def V14  ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()         );
 235   reg_def V14_H( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next() );
 236   reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2));
 237   reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3));
 238 
 239   reg_def V15  ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()         );
 240   reg_def V15_H( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next() );
 241   reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2));
 242   reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3));
 243 
 244   reg_def V16  ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()         );
 245   reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() );
 246   reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2));
 247   reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3));
 248 
 249   reg_def V17  ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()         );
 250   reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() );
 251   reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2));
 252   reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3));
 253 
 254   reg_def V18  ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()         );
 255   reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() );
 256   reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2));
 257   reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3));
 258 
 259   reg_def V19  ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()         );
 260   reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() );
 261   reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2));
 262   reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3));
 263 
 264   reg_def V20  ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()         );
 265   reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() );
 266   reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2));
 267   reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3));
 268 
 269   reg_def V21  ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()         );
 270   reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() );
 271   reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2));
 272   reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3));
 273 
 274   reg_def V22  ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()         );
 275   reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() );
 276   reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2));
 277   reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3));
 278 
 279   reg_def V23  ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()         );
 280   reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() );
 281   reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2));
 282   reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3));
 283 
 284   reg_def V24  ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()         );
 285   reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() );
 286   reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2));
 287   reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3));
 288 
 289   reg_def V25  ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()         );
 290   reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() );
 291   reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2));
 292   reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3));
 293 
 294   reg_def V26  ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()         );
 295   reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() );
 296   reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2));
 297   reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3));
 298 
 299   reg_def V27  ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()         );
 300   reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() );
 301   reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2));
 302   reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3));
 303 
 304   reg_def V28  ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()         );
 305   reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() );
 306   reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2));
 307   reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3));
 308 
 309   reg_def V29  ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()         );
 310   reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() );
 311   reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2));
 312   reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3));
 313 
 314   reg_def V30  ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()         );
 315   reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() );
 316   reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2));
 317   reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3));
 318 
 319   reg_def V31  ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()         );
 320   reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() );
 321   reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2));
 322   reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3));
 323 
 324 // ----------------------------
 325 // Special Registers
 326 // ----------------------------
 327 
 328 // the AArch64 CSPR status flag register is not directly acessible as
 329 // instruction operand. the FPSR status flag register is a system
 330 // register which can be written/read using MSR/MRS but again does not
 331 // appear as an operand (a code identifying the FSPR occurs as an
 332 // immediate value in the instruction).
 333 
 334 reg_def RFLAGS(SOC, SOC, 0, 32, VMRegImpl::Bad());
 335 
 336 
 337 // Specify priority of register selection within phases of register
 338 // allocation.  Highest priority is first.  A useful heuristic is to
 339 // give registers a low priority when they are required by machine
 340 // instructions, like EAX and EDX on I486, and choose no-save registers
 341 // before save-on-call, & save-on-call before save-on-entry.  Registers
 342 // which participate in fixed calling sequences should come last.
 343 // Registers which are used as pairs must fall on an even boundary.
 344 
 345 alloc_class chunk0(
 346     // volatiles
 347     R10, R10_H,
 348     R11, R11_H,
 349     R12, R12_H,
 350     R13, R13_H,
 351     R14, R14_H,
 352     R15, R15_H,
 353     R16, R16_H,
 354     R17, R17_H,
 355     R18, R18_H,
 356 
 357     // arg registers
 358     R0, R0_H,
 359     R1, R1_H,
 360     R2, R2_H,
 361     R3, R3_H,
 362     R4, R4_H,
 363     R5, R5_H,
 364     R6, R6_H,
 365     R7, R7_H,
 366 
 367     // non-volatiles
 368     R19, R19_H,
 369     R20, R20_H,
 370     R21, R21_H,
 371     R22, R22_H,
 372     R23, R23_H,
 373     R24, R24_H,
 374     R25, R25_H,
 375     R26, R26_H,
 376 
 377     // non-allocatable registers
 378 
 379     R27, R27_H, // heapbase
 380     R28, R28_H, // thread
 381     R29, R29_H, // fp
 382     R30, R30_H, // lr
 383     R31, R31_H, // sp
 384 );
 385 
 386 alloc_class chunk1(
 387 
 388     // no save
 389     V16, V16_H, V16_J, V16_K,
 390     V17, V17_H, V17_J, V17_K,
 391     V18, V18_H, V18_J, V18_K,
 392     V19, V19_H, V19_J, V19_K,
 393     V20, V20_H, V20_J, V20_K,
 394     V21, V21_H, V21_J, V21_K,
 395     V22, V22_H, V22_J, V22_K,
 396     V23, V23_H, V23_J, V23_K,
 397     V24, V24_H, V24_J, V24_K,
 398     V25, V25_H, V25_J, V25_K,
 399     V26, V26_H, V26_J, V26_K,
 400     V27, V27_H, V27_J, V27_K,
 401     V28, V28_H, V28_J, V28_K,
 402     V29, V29_H, V29_J, V29_K,
 403     V30, V30_H, V30_J, V30_K,
 404     V31, V31_H, V31_J, V31_K,
 405 
 406     // arg registers
 407     V0, V0_H, V0_J, V0_K,
 408     V1, V1_H, V1_J, V1_K,
 409     V2, V2_H, V2_J, V2_K,
 410     V3, V3_H, V3_J, V3_K,
 411     V4, V4_H, V4_J, V4_K,
 412     V5, V5_H, V5_J, V5_K,
 413     V6, V6_H, V6_J, V6_K,
 414     V7, V7_H, V7_J, V7_K,
 415 
 416     // non-volatiles
 417     V8, V8_H, V8_J, V8_K,
 418     V9, V9_H, V9_J, V9_K,
 419     V10, V10_H, V10_J, V10_K,
 420     V11, V11_H, V11_J, V11_K,
 421     V12, V12_H, V12_J, V12_K,
 422     V13, V13_H, V13_J, V13_K,
 423     V14, V14_H, V14_J, V14_K,
 424     V15, V15_H, V15_J, V15_K,
 425 );
 426 
 427 alloc_class chunk2(RFLAGS);
 428 
 429 //----------Architecture Description Register Classes--------------------------
 430 // Several register classes are automatically defined based upon information in
 431 // this architecture description.
 432 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 433 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 434 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 435 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 436 //
 437 
 438 // Class for all 32 bit integer registers -- excludes SP which will
 439 // never be used as an integer register
 440 reg_class any_reg32(
 441     R0,
 442     R1,
 443     R2,
 444     R3,
 445     R4,
 446     R5,
 447     R6,
 448     R7,
 449     R10,
 450     R11,
 451     R12,
 452     R13,
 453     R14,
 454     R15,
 455     R16,
 456     R17,
 457     R18,
 458     R19,
 459     R20,
 460     R21,
 461     R22,
 462     R23,
 463     R24,
 464     R25,
 465     R26,
 466     R27,
 467     R28,
 468     R29,
 469     R30
 470 );
 471 
 472 // Singleton class for R0 int register
 473 reg_class int_r0_reg(R0);
 474 
 475 // Singleton class for R2 int register
 476 reg_class int_r2_reg(R2);
 477 
 478 // Singleton class for R3 int register
 479 reg_class int_r3_reg(R3);
 480 
 481 // Singleton class for R4 int register
 482 reg_class int_r4_reg(R4);
 483 
 484 // Class for all long integer registers (including RSP)
 485 reg_class any_reg(
 486     R0, R0_H,
 487     R1, R1_H,
 488     R2, R2_H,
 489     R3, R3_H,
 490     R4, R4_H,
 491     R5, R5_H,
 492     R6, R6_H,
 493     R7, R7_H,
 494     R10, R10_H,
 495     R11, R11_H,
 496     R12, R12_H,
 497     R13, R13_H,
 498     R14, R14_H,
 499     R15, R15_H,
 500     R16, R16_H,
 501     R17, R17_H,
 502     R18, R18_H,
 503     R19, R19_H,
 504     R20, R20_H,
 505     R21, R21_H,
 506     R22, R22_H,
 507     R23, R23_H,
 508     R24, R24_H,
 509     R25, R25_H,
 510     R26, R26_H,
 511     R27, R27_H,
 512     R28, R28_H,
 513     R29, R29_H,
 514     R30, R30_H,
 515     R31, R31_H
 516 );
 517 
 518 // Class for all non-special integer registers
 519 reg_class no_special_reg32_no_fp(
 520     R0,
 521     R1,
 522     R2,
 523     R3,
 524     R4,
 525     R5,
 526     R6,
 527     R7,
 528     R10,
 529     R11,
 530     R12,                        // rmethod
 531     R13,
 532     R14,
 533     R15,
 534     R16,
 535     R17,
 536     R18,
 537     R19,
 538     R20,
 539     R21,
 540     R22,
 541     R23,
 542     R24,
 543     R25,
 544     R26
 545  /* R27, */                     // heapbase
 546  /* R28, */                     // thread
 547  /* R29, */                     // fp
 548  /* R30, */                     // lr
 549  /* R31 */                      // sp
 550 );
 551 
 552 reg_class no_special_reg32_with_fp(
 553     R0,
 554     R1,
 555     R2,
 556     R3,
 557     R4,
 558     R5,
 559     R6,
 560     R7,
 561     R10,
 562     R11,
 563     R12,                        // rmethod
 564     R13,
 565     R14,
 566     R15,
 567     R16,
 568     R17,
 569     R18,
 570     R19,
 571     R20,
 572     R21,
 573     R22,
 574     R23,
 575     R24,
 576     R25,
 577     R26
 578  /* R27, */                     // heapbase
 579  /* R28, */                     // thread
 580     R29,                        // fp
 581  /* R30, */                     // lr
 582  /* R31 */                      // sp
 583 );
 584 
 585 reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
 586 
 587 // Class for all non-special long integer registers
 588 reg_class no_special_reg_no_fp(
 589     R0, R0_H,
 590     R1, R1_H,
 591     R2, R2_H,
 592     R3, R3_H,
 593     R4, R4_H,
 594     R5, R5_H,
 595     R6, R6_H,
 596     R7, R7_H,
 597     R10, R10_H,
 598     R11, R11_H,
 599     R12, R12_H,                 // rmethod
 600     R13, R13_H,
 601     R14, R14_H,
 602     R15, R15_H,
 603     R16, R16_H,
 604     R17, R17_H,
 605     R18, R18_H,
 606     R19, R19_H,
 607     R20, R20_H,
 608     R21, R21_H,
 609     R22, R22_H,
 610     R23, R23_H,
 611     R24, R24_H,
 612     R25, R25_H,
 613     R26, R26_H,
 614  /* R27, R27_H, */              // heapbase
 615  /* R28, R28_H, */              // thread
 616  /* R29, R29_H, */              // fp
 617  /* R30, R30_H, */              // lr
 618  /* R31, R31_H */               // sp
 619 );
 620 
 621 reg_class no_special_reg_with_fp(
 622     R0, R0_H,
 623     R1, R1_H,
 624     R2, R2_H,
 625     R3, R3_H,
 626     R4, R4_H,
 627     R5, R5_H,
 628     R6, R6_H,
 629     R7, R7_H,
 630     R10, R10_H,
 631     R11, R11_H,
 632     R12, R12_H,                 // rmethod
 633     R13, R13_H,
 634     R14, R14_H,
 635     R15, R15_H,
 636     R16, R16_H,
 637     R17, R17_H,
 638     R18, R18_H,
 639     R19, R19_H,
 640     R20, R20_H,
 641     R21, R21_H,
 642     R22, R22_H,
 643     R23, R23_H,
 644     R24, R24_H,
 645     R25, R25_H,
 646     R26, R26_H,
 647  /* R27, R27_H, */              // heapbase
 648  /* R28, R28_H, */              // thread
 649     R29, R29_H,                 // fp
 650  /* R30, R30_H, */              // lr
 651  /* R31, R31_H */               // sp
 652 );
 653 
 654 reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
 655 
 656 // Class for 64 bit register r0
 657 reg_class r0_reg(
 658     R0, R0_H
 659 );
 660 
 661 // Class for 64 bit register r1
 662 reg_class r1_reg(
 663     R1, R1_H
 664 );
 665 
 666 // Class for 64 bit register r2
 667 reg_class r2_reg(
 668     R2, R2_H
 669 );
 670 
 671 // Class for 64 bit register r3
 672 reg_class r3_reg(
 673     R3, R3_H
 674 );
 675 
 676 // Class for 64 bit register r4
 677 reg_class r4_reg(
 678     R4, R4_H
 679 );
 680 
 681 // Class for 64 bit register r5
 682 reg_class r5_reg(
 683     R5, R5_H
 684 );
 685 
 686 // Class for 64 bit register r10
 687 reg_class r10_reg(
 688     R10, R10_H
 689 );
 690 
 691 // Class for 64 bit register r11
 692 reg_class r11_reg(
 693     R11, R11_H
 694 );
 695 
 696 // Class for method register
 697 reg_class method_reg(
 698     R12, R12_H
 699 );
 700 
 701 // Class for heapbase register
 702 reg_class heapbase_reg(
 703     R27, R27_H
 704 );
 705 
 706 // Class for thread register
 707 reg_class thread_reg(
 708     R28, R28_H
 709 );
 710 
 711 // Class for frame pointer register
 712 reg_class fp_reg(
 713     R29, R29_H
 714 );
 715 
 716 // Class for link register
 717 reg_class lr_reg(
 718     R30, R30_H
 719 );
 720 
 721 // Class for long sp register
 722 reg_class sp_reg(
 723   R31, R31_H
 724 );
 725 
 726 // Class for all pointer registers
 727 reg_class ptr_reg(
 728     R0, R0_H,
 729     R1, R1_H,
 730     R2, R2_H,
 731     R3, R3_H,
 732     R4, R4_H,
 733     R5, R5_H,
 734     R6, R6_H,
 735     R7, R7_H,
 736     R10, R10_H,
 737     R11, R11_H,
 738     R12, R12_H,
 739     R13, R13_H,
 740     R14, R14_H,
 741     R15, R15_H,
 742     R16, R16_H,
 743     R17, R17_H,
 744     R18, R18_H,
 745     R19, R19_H,
 746     R20, R20_H,
 747     R21, R21_H,
 748     R22, R22_H,
 749     R23, R23_H,
 750     R24, R24_H,
 751     R25, R25_H,
 752     R26, R26_H,
 753     R27, R27_H,
 754     R28, R28_H,
 755     R29, R29_H,
 756     R30, R30_H,
 757     R31, R31_H
 758 );
 759 
 760 // Class for all non_special pointer registers
 761 reg_class no_special_ptr_reg(
 762     R0, R0_H,
 763     R1, R1_H,
 764     R2, R2_H,
 765     R3, R3_H,
 766     R4, R4_H,
 767     R5, R5_H,
 768     R6, R6_H,
 769     R7, R7_H,
 770     R10, R10_H,
 771     R11, R11_H,
 772     R12, R12_H,
 773     R13, R13_H,
 774     R14, R14_H,
 775     R15, R15_H,
 776     R16, R16_H,
 777     R17, R17_H,
 778     R18, R18_H,
 779     R19, R19_H,
 780     R20, R20_H,
 781     R21, R21_H,
 782     R22, R22_H,
 783     R23, R23_H,
 784     R24, R24_H,
 785     R25, R25_H,
 786     R26, R26_H,
 787  /* R27, R27_H, */              // heapbase
 788  /* R28, R28_H, */              // thread
 789  /* R29, R29_H, */              // fp
 790  /* R30, R30_H, */              // lr
 791  /* R31, R31_H */               // sp
 792 );
 793 
 794 // Class for all float registers
 795 reg_class float_reg(
 796     V0,
 797     V1,
 798     V2,
 799     V3,
 800     V4,
 801     V5,
 802     V6,
 803     V7,
 804     V8,
 805     V9,
 806     V10,
 807     V11,
 808     V12,
 809     V13,
 810     V14,
 811     V15,
 812     V16,
 813     V17,
 814     V18,
 815     V19,
 816     V20,
 817     V21,
 818     V22,
 819     V23,
 820     V24,
 821     V25,
 822     V26,
 823     V27,
 824     V28,
 825     V29,
 826     V30,
 827     V31
 828 );
 829 
 830 // Double precision float registers have virtual `high halves' that
 831 // are needed by the allocator.
 832 // Class for all double registers
 833 reg_class double_reg(
 834     V0, V0_H,
 835     V1, V1_H,
 836     V2, V2_H,
 837     V3, V3_H,
 838     V4, V4_H,
 839     V5, V5_H,
 840     V6, V6_H,
 841     V7, V7_H,
 842     V8, V8_H,
 843     V9, V9_H,
 844     V10, V10_H,
 845     V11, V11_H,
 846     V12, V12_H,
 847     V13, V13_H,
 848     V14, V14_H,
 849     V15, V15_H,
 850     V16, V16_H,
 851     V17, V17_H,
 852     V18, V18_H,
 853     V19, V19_H,
 854     V20, V20_H,
 855     V21, V21_H,
 856     V22, V22_H,
 857     V23, V23_H,
 858     V24, V24_H,
 859     V25, V25_H,
 860     V26, V26_H,
 861     V27, V27_H,
 862     V28, V28_H,
 863     V29, V29_H,
 864     V30, V30_H,
 865     V31, V31_H
 866 );
 867 
 868 // Class for all 64bit vector registers
 869 reg_class vectord_reg(
 870     V0, V0_H,
 871     V1, V1_H,
 872     V2, V2_H,
 873     V3, V3_H,
 874     V4, V4_H,
 875     V5, V5_H,
 876     V6, V6_H,
 877     V7, V7_H,
 878     V8, V8_H,
 879     V9, V9_H,
 880     V10, V10_H,
 881     V11, V11_H,
 882     V12, V12_H,
 883     V13, V13_H,
 884     V14, V14_H,
 885     V15, V15_H,
 886     V16, V16_H,
 887     V17, V17_H,
 888     V18, V18_H,
 889     V19, V19_H,
 890     V20, V20_H,
 891     V21, V21_H,
 892     V22, V22_H,
 893     V23, V23_H,
 894     V24, V24_H,
 895     V25, V25_H,
 896     V26, V26_H,
 897     V27, V27_H,
 898     V28, V28_H,
 899     V29, V29_H,
 900     V30, V30_H,
 901     V31, V31_H
 902 );
 903 
 904 // Class for all 128bit vector registers
 905 reg_class vectorx_reg(
 906     V0, V0_H, V0_J, V0_K,
 907     V1, V1_H, V1_J, V1_K,
 908     V2, V2_H, V2_J, V2_K,
 909     V3, V3_H, V3_J, V3_K,
 910     V4, V4_H, V4_J, V4_K,
 911     V5, V5_H, V5_J, V5_K,
 912     V6, V6_H, V6_J, V6_K,
 913     V7, V7_H, V7_J, V7_K,
 914     V8, V8_H, V8_J, V8_K,
 915     V9, V9_H, V9_J, V9_K,
 916     V10, V10_H, V10_J, V10_K,
 917     V11, V11_H, V11_J, V11_K,
 918     V12, V12_H, V12_J, V12_K,
 919     V13, V13_H, V13_J, V13_K,
 920     V14, V14_H, V14_J, V14_K,
 921     V15, V15_H, V15_J, V15_K,
 922     V16, V16_H, V16_J, V16_K,
 923     V17, V17_H, V17_J, V17_K,
 924     V18, V18_H, V18_J, V18_K,
 925     V19, V19_H, V19_J, V19_K,
 926     V20, V20_H, V20_J, V20_K,
 927     V21, V21_H, V21_J, V21_K,
 928     V22, V22_H, V22_J, V22_K,
 929     V23, V23_H, V23_J, V23_K,
 930     V24, V24_H, V24_J, V24_K,
 931     V25, V25_H, V25_J, V25_K,
 932     V26, V26_H, V26_J, V26_K,
 933     V27, V27_H, V27_J, V27_K,
 934     V28, V28_H, V28_J, V28_K,
 935     V29, V29_H, V29_J, V29_K,
 936     V30, V30_H, V30_J, V30_K,
 937     V31, V31_H, V31_J, V31_K
 938 );
 939 
 940 // Class for 128 bit register v0
 941 reg_class v0_reg(
 942     V0, V0_H
 943 );
 944 
 945 // Class for 128 bit register v1
 946 reg_class v1_reg(
 947     V1, V1_H
 948 );
 949 
 950 // Class for 128 bit register v2
 951 reg_class v2_reg(
 952     V2, V2_H
 953 );
 954 
 955 // Class for 128 bit register v3
 956 reg_class v3_reg(
 957     V3, V3_H
 958 );
 959 
 960 // Singleton class for condition codes
 961 reg_class int_flags(RFLAGS);
 962 
 963 %}
 964 
 965 //----------DEFINITION BLOCK---------------------------------------------------
 966 // Define name --> value mappings to inform the ADLC of an integer valued name
 967 // Current support includes integer values in the range [0, 0x7FFFFFFF]
 968 // Format:
 969 //        int_def  <name>         ( <int_value>, <expression>);
 970 // Generated Code in ad_<arch>.hpp
 971 //        #define  <name>   (<expression>)
 972 //        // value == <int_value>
 973 // Generated code in ad_<arch>.cpp adlc_verification()
 974 //        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
 975 //
 976 
 977 // we follow the ppc-aix port in using a simple cost model which ranks
 978 // register operations as cheap, memory ops as more expensive and
 979 // branches as most expensive. the first two have a low as well as a
 980 // normal cost. huge cost appears to be a way of saying don't do
 981 // something
 982 
 983 definitions %{
 984   // The default cost (of a register move instruction).
 985   int_def INSN_COST            (    100,     100);
 986   int_def BRANCH_COST          (    200,     2 * INSN_COST);
 987   int_def CALL_COST            (    200,     2 * INSN_COST);
 988   int_def VOLATILE_REF_COST    (   1000,     10 * INSN_COST);
 989 %}
 990 
 991 
 992 //----------SOURCE BLOCK-------------------------------------------------------
 993 // This is a block of C++ code which provides values, functions, and
 994 // definitions necessary in the rest of the architecture description
 995 
 996 source_hpp %{
 997 
 998 #include "asm/macroAssembler.hpp"
 999 #include "gc/shared/cardTable.hpp"
1000 #include "gc/shared/cardTableBarrierSet.hpp"
1001 #include "gc/shared/collectedHeap.hpp"
1002 #include "opto/addnode.hpp"
1003 
1004 class CallStubImpl {
1005 
1006   //--------------------------------------------------------------
1007   //---<  Used for optimization in Compile::shorten_branches  >---
1008   //--------------------------------------------------------------
1009 
1010  public:
1011   // Size of call trampoline stub.
1012   static uint size_call_trampoline() {
1013     return 0; // no call trampolines on this platform
1014   }
1015 
1016   // number of relocations needed by a call trampoline stub
1017   static uint reloc_call_trampoline() {
1018     return 0; // no call trampolines on this platform
1019   }
1020 };
1021 
1022 class HandlerImpl {
1023 
1024  public:
1025 
1026   static int emit_exception_handler(CodeBuffer &cbuf);
1027   static int emit_deopt_handler(CodeBuffer& cbuf);
1028 
1029   static uint size_exception_handler() {
1030     return MacroAssembler::far_branch_size();
1031   }
1032 
1033   static uint size_deopt_handler() {
1034     // count one adr and one far branch instruction
1035     return 4 * NativeInstruction::instruction_size;
1036   }
1037 };
1038 
1039  bool is_CAS(int opcode, bool maybe_volatile);
1040 
1041   // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1042 
1043   bool unnecessary_acquire(const Node *barrier);
1044   bool needs_acquiring_load(const Node *load);
1045 
1046   // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1047 
1048   bool unnecessary_release(const Node *barrier);
1049   bool unnecessary_volatile(const Node *barrier);
1050   bool needs_releasing_store(const Node *store);
1051 
1052   // predicate controlling translation of CompareAndSwapX
1053   bool needs_acquiring_load_exclusive(const Node *load);
1054 
1055   // predicate controlling translation of StoreCM
1056   bool unnecessary_storestore(const Node *storecm);
1057 
1058   // predicate controlling addressing modes
1059   bool size_fits_all_mem_uses(AddPNode* addp, int shift);
1060 %}
1061 
1062 source %{
1063 
1064   // Optimizaton of volatile gets and puts
1065   // -------------------------------------
1066   //
1067   // AArch64 has ldar<x> and stlr<x> instructions which we can safely
1068   // use to implement volatile reads and writes. For a volatile read
1069   // we simply need
1070   //
1071   //   ldar<x>
1072   //
1073   // and for a volatile write we need
1074   //
1075   //   stlr<x>
1076   //
1077   // Alternatively, we can implement them by pairing a normal
1078   // load/store with a memory barrier. For a volatile read we need
1079   //
1080   //   ldr<x>
1081   //   dmb ishld
1082   //
1083   // for a volatile write
1084   //
1085   //   dmb ish
1086   //   str<x>
1087   //   dmb ish
1088   //
1089   // We can also use ldaxr and stlxr to implement compare and swap CAS
1090   // sequences. These are normally translated to an instruction
1091   // sequence like the following
1092   //
1093   //   dmb      ish
1094   // retry:
1095   //   ldxr<x>   rval raddr
1096   //   cmp       rval rold
1097   //   b.ne done
1098   //   stlxr<x>  rval, rnew, rold
1099   //   cbnz      rval retry
1100   // done:
1101   //   cset      r0, eq
1102   //   dmb ishld
1103   //
1104   // Note that the exclusive store is already using an stlxr
1105   // instruction. That is required to ensure visibility to other
1106   // threads of the exclusive write (assuming it succeeds) before that
1107   // of any subsequent writes.
1108   //
1109   // The following instruction sequence is an improvement on the above
1110   //
1111   // retry:
1112   //   ldaxr<x>  rval raddr
1113   //   cmp       rval rold
1114   //   b.ne done
1115   //   stlxr<x>  rval, rnew, rold
1116   //   cbnz      rval retry
1117   // done:
1118   //   cset      r0, eq
1119   //
1120   // We don't need the leading dmb ish since the stlxr guarantees
1121   // visibility of prior writes in the case that the swap is
1122   // successful. Crucially we don't have to worry about the case where
1123   // the swap is not successful since no valid program should be
1124   // relying on visibility of prior changes by the attempting thread
1125   // in the case where the CAS fails.
1126   //
1127   // Similarly, we don't need the trailing dmb ishld if we substitute
1128   // an ldaxr instruction since that will provide all the guarantees we
1129   // require regarding observation of changes made by other threads
1130   // before any change to the CAS address observed by the load.
1131   //
1132   // In order to generate the desired instruction sequence we need to
1133   // be able to identify specific 'signature' ideal graph node
1134   // sequences which i) occur as a translation of a volatile reads or
1135   // writes or CAS operations and ii) do not occur through any other
1136   // translation or graph transformation. We can then provide
1137   // alternative aldc matching rules which translate these node
1138   // sequences to the desired machine code sequences. Selection of the
1139   // alternative rules can be implemented by predicates which identify
1140   // the relevant node sequences.
1141   //
1142   // The ideal graph generator translates a volatile read to the node
1143   // sequence
1144   //
1145   //   LoadX[mo_acquire]
1146   //   MemBarAcquire
1147   //
1148   // As a special case when using the compressed oops optimization we
1149   // may also see this variant
1150   //
1151   //   LoadN[mo_acquire]
1152   //   DecodeN
1153   //   MemBarAcquire
1154   //
1155   // A volatile write is translated to the node sequence
1156   //
1157   //   MemBarRelease
1158   //   StoreX[mo_release] {CardMark}-optional
1159   //   MemBarVolatile
1160   //
1161   // n.b. the above node patterns are generated with a strict
1162   // 'signature' configuration of input and output dependencies (see
1163   // the predicates below for exact details). The card mark may be as
1164   // simple as a few extra nodes or, in a few GC configurations, may
1165   // include more complex control flow between the leading and
1166   // trailing memory barriers. However, whatever the card mark
1167   // configuration these signatures are unique to translated volatile
1168   // reads/stores -- they will not appear as a result of any other
1169   // bytecode translation or inlining nor as a consequence of
1170   // optimizing transforms.
1171   //
1172   // We also want to catch inlined unsafe volatile gets and puts and
1173   // be able to implement them using either ldar<x>/stlr<x> or some
1174   // combination of ldr<x>/stlr<x> and dmb instructions.
1175   //
1176   // Inlined unsafe volatiles puts manifest as a minor variant of the
1177   // normal volatile put node sequence containing an extra cpuorder
1178   // membar
1179   //
1180   //   MemBarRelease
1181   //   MemBarCPUOrder
1182   //   StoreX[mo_release] {CardMark}-optional
1183   //   MemBarCPUOrder
1184   //   MemBarVolatile
1185   //
1186   // n.b. as an aside, a cpuorder membar is not itself subject to
1187   // matching and translation by adlc rules.  However, the rule
1188   // predicates need to detect its presence in order to correctly
1189   // select the desired adlc rules.
1190   //
1191   // Inlined unsafe volatile gets manifest as a slightly different
1192   // node sequence to a normal volatile get because of the
1193   // introduction of some CPUOrder memory barriers to bracket the
1194   // Load. However, but the same basic skeleton of a LoadX feeding a
1195   // MemBarAcquire, possibly thorugh an optional DecodeN, is still
1196   // present
1197   //
1198   //   MemBarCPUOrder
1199   //        ||       \\
1200   //   MemBarCPUOrder LoadX[mo_acquire]
1201   //        ||            |
1202   //        ||       {DecodeN} optional
1203   //        ||       /
1204   //     MemBarAcquire
1205   //
1206   // In this case the acquire membar does not directly depend on the
1207   // load. However, we can be sure that the load is generated from an
1208   // inlined unsafe volatile get if we see it dependent on this unique
1209   // sequence of membar nodes. Similarly, given an acquire membar we
1210   // can know that it was added because of an inlined unsafe volatile
1211   // get if it is fed and feeds a cpuorder membar and if its feed
1212   // membar also feeds an acquiring load.
1213   //
1214   // Finally an inlined (Unsafe) CAS operation is translated to the
1215   // following ideal graph
1216   //
1217   //   MemBarRelease
1218   //   MemBarCPUOrder
1219   //   CompareAndSwapX {CardMark}-optional
1220   //   MemBarCPUOrder
1221   //   MemBarAcquire
1222   //
1223   // So, where we can identify these volatile read and write
1224   // signatures we can choose to plant either of the above two code
1225   // sequences. For a volatile read we can simply plant a normal
1226   // ldr<x> and translate the MemBarAcquire to a dmb. However, we can
1227   // also choose to inhibit translation of the MemBarAcquire and
1228   // inhibit planting of the ldr<x>, instead planting an ldar<x>.
1229   //
1230   // When we recognise a volatile store signature we can choose to
1231   // plant at a dmb ish as a translation for the MemBarRelease, a
1232   // normal str<x> and then a dmb ish for the MemBarVolatile.
1233   // Alternatively, we can inhibit translation of the MemBarRelease
1234   // and MemBarVolatile and instead plant a simple stlr<x>
1235   // instruction.
1236   //
1237   // when we recognise a CAS signature we can choose to plant a dmb
1238   // ish as a translation for the MemBarRelease, the conventional
1239   // macro-instruction sequence for the CompareAndSwap node (which
1240   // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
1241   // Alternatively, we can elide generation of the dmb instructions
1242   // and plant the alternative CompareAndSwap macro-instruction
1243   // sequence (which uses ldaxr<x>).
1244   //
1245   // Of course, the above only applies when we see these signature
1246   // configurations. We still want to plant dmb instructions in any
1247   // other cases where we may see a MemBarAcquire, MemBarRelease or
1248   // MemBarVolatile. For example, at the end of a constructor which
1249   // writes final/volatile fields we will see a MemBarRelease
1250   // instruction and this needs a 'dmb ish' lest we risk the
1251   // constructed object being visible without making the
1252   // final/volatile field writes visible.
1253   //
1254   // n.b. the translation rules below which rely on detection of the
1255   // volatile signatures and insert ldar<x> or stlr<x> are failsafe.
1256   // If we see anything other than the signature configurations we
1257   // always just translate the loads and stores to ldr<x> and str<x>
1258   // and translate acquire, release and volatile membars to the
1259   // relevant dmb instructions.
1260   //
1261 
1262   // is_CAS(int opcode, bool maybe_volatile)
1263   //
1264   // return true if opcode is one of the possible CompareAndSwapX
1265   // values otherwise false.
1266 
1267   bool is_CAS(int opcode, bool maybe_volatile)
1268   {
1269     switch(opcode) {
1270       // We handle these
1271     case Op_CompareAndSwapI:
1272     case Op_CompareAndSwapL:
1273     case Op_CompareAndSwapP:
1274     case Op_CompareAndSwapN:
1275     case Op_CompareAndSwapB:
1276     case Op_CompareAndSwapS:
1277     case Op_GetAndSetI:
1278     case Op_GetAndSetL:
1279     case Op_GetAndSetP:
1280     case Op_GetAndSetN:
1281     case Op_GetAndAddI:
1282     case Op_GetAndAddL:
1283       return true;
1284     case Op_CompareAndExchangeI:
1285     case Op_CompareAndExchangeN:
1286     case Op_CompareAndExchangeB:
1287     case Op_CompareAndExchangeS:
1288     case Op_CompareAndExchangeL:
1289     case Op_CompareAndExchangeP:
1290     case Op_WeakCompareAndSwapB:
1291     case Op_WeakCompareAndSwapS:
1292     case Op_WeakCompareAndSwapI:
1293     case Op_WeakCompareAndSwapL:
1294     case Op_WeakCompareAndSwapP:
1295     case Op_WeakCompareAndSwapN:
1296       return maybe_volatile;
1297     default:
1298       return false;
1299     }
1300   }
1301 
1302   // helper to determine the maximum number of Phi nodes we may need to
1303   // traverse when searching from a card mark membar for the merge mem
1304   // feeding a trailing membar or vice versa
1305 
1306 // predicates controlling emit of ldr<x>/ldar<x> and associated dmb
1307 
1308 bool unnecessary_acquire(const Node *barrier)
1309 {
1310   assert(barrier->is_MemBar(), "expecting a membar");
1311 
1312   if (UseBarriersForVolatile) {
1313     // we need to plant a dmb
1314     return false;
1315   }
1316 
1317   MemBarNode* mb = barrier->as_MemBar();
1318 
1319   if (mb->trailing_load()) {
1320     return true;
1321   }
1322 
1323   if (mb->trailing_load_store()) {
1324     Node* load_store = mb->in(MemBarNode::Precedent);
1325     assert(load_store->is_LoadStore(), "unexpected graph shape");
1326     return is_CAS(load_store->Opcode(), true);
1327   }
1328 
1329   return false;
1330 }
1331 
1332 bool needs_acquiring_load(const Node *n)
1333 {
1334   assert(n->is_Load(), "expecting a load");
1335   if (UseBarriersForVolatile) {
1336     // we use a normal load and a dmb
1337     return false;
1338   }
1339 
1340   LoadNode *ld = n->as_Load();
1341 
1342   return ld->is_acquire();
1343 }
1344 
1345 bool unnecessary_release(const Node *n)
1346 {
1347   assert((n->is_MemBar() &&
1348           n->Opcode() == Op_MemBarRelease),
1349          "expecting a release membar");
1350 
1351   if (UseBarriersForVolatile) {
1352     // we need to plant a dmb
1353     return false;
1354   }
1355 
1356   MemBarNode *barrier = n->as_MemBar();
1357   if (!barrier->leading()) {
1358     return false;
1359   } else {
1360     Node* trailing = barrier->trailing_membar();
1361     MemBarNode* trailing_mb = trailing->as_MemBar();
1362     assert(trailing_mb->trailing(), "Not a trailing membar?");
1363     assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
1364 
1365     Node* mem = trailing_mb->in(MemBarNode::Precedent);
1366     if (mem->is_Store()) {
1367       assert(mem->as_Store()->is_release(), "");
1368       assert(trailing_mb->Opcode() == Op_MemBarVolatile, "");
1369       return true;
1370     } else {
1371       assert(mem->is_LoadStore(), "");
1372       assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
1373       return is_CAS(mem->Opcode(), true);
1374     }
1375   }
1376   return false;
1377 }
1378 
1379 bool unnecessary_volatile(const Node *n)
1380 {
1381   // assert n->is_MemBar();
1382   if (UseBarriersForVolatile) {
1383     // we need to plant a dmb
1384     return false;
1385   }
1386 
1387   MemBarNode *mbvol = n->as_MemBar();
1388 
1389   bool release = mbvol->trailing_store();
1390   assert(!release || (mbvol->in(MemBarNode::Precedent)->is_Store() && mbvol->in(MemBarNode::Precedent)->as_Store()->is_release()), "");
1391 #ifdef ASSERT
1392   if (release) {
1393     Node* leading = mbvol->leading_membar();
1394     assert(leading->Opcode() == Op_MemBarRelease, "");
1395     assert(leading->as_MemBar()->leading_store(), "");
1396     assert(leading->as_MemBar()->trailing_membar() == mbvol, "");
1397   }
1398 #endif
1399 
1400   return release;
1401 }
1402 
1403 // predicates controlling emit of str<x>/stlr<x> and associated dmbs
1404 
1405 bool needs_releasing_store(const Node *n)
1406 {
1407   // assert n->is_Store();
1408   if (UseBarriersForVolatile) {
1409     // we use a normal store and dmb combination
1410     return false;
1411   }
1412 
1413   StoreNode *st = n->as_Store();
1414 
1415   return st->trailing_membar() != NULL;
1416 }
1417 
1418 // predicate controlling translation of CAS
1419 //
1420 // returns true if CAS needs to use an acquiring load otherwise false
1421 
1422 bool needs_acquiring_load_exclusive(const Node *n)
1423 {
1424   assert(is_CAS(n->Opcode(), true), "expecting a compare and swap");
1425   if (UseBarriersForVolatile) {
1426     return false;
1427   }
1428 
1429   LoadStoreNode* ldst = n->as_LoadStore();
1430   if (is_CAS(n->Opcode(), false)) {
1431     assert(ldst->trailing_membar() != NULL, "expected trailing membar");
1432   } else {
1433     return ldst->trailing_membar() != NULL;
1434   }
1435 
1436   // so we can just return true here
1437   return true;
1438 }
1439 
1440 // predicate controlling translation of StoreCM
1441 //
1442 // returns true if a StoreStore must precede the card write otherwise
1443 // false
1444 
1445 bool unnecessary_storestore(const Node *storecm)
1446 {
1447   assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
1448 
1449   // we need to generate a dmb ishst between an object put and the
1450   // associated card mark when we are using CMS without conditional
1451   // card marking
1452 
1453   if (UseConcMarkSweepGC && !UseCondCardMark) {
1454     return false;
1455   }
1456 
1457   // a storestore is unnecesary in all other cases
1458 
1459   return true;
1460 }
1461 
1462 
1463 #define __ _masm.
1464 
1465 // advance declarations for helper functions to convert register
1466 // indices to register objects
1467 
1468 // the ad file has to provide implementations of certain methods
1469 // expected by the generic code
1470 //
1471 // REQUIRED FUNCTIONALITY
1472 
1473 //=============================================================================
1474 
1475 // !!!!! Special hack to get all types of calls to specify the byte offset
1476 //       from the start of the call to the point where the return address
1477 //       will point.
1478 
1479 int MachCallStaticJavaNode::ret_addr_offset()
1480 {
1481   // call should be a simple bl
1482   int off = 4;
1483   return off;
1484 }
1485 
1486 int MachCallDynamicJavaNode::ret_addr_offset()
1487 {
1488   return 16; // movz, movk, movk, bl
1489 }
1490 
1491 int MachCallRuntimeNode::ret_addr_offset() {
1492   // for generated stubs the call will be
1493   //   far_call(addr)
1494   // for real runtime callouts it will be six instructions
1495   // see aarch64_enc_java_to_runtime
1496   //   adr(rscratch2, retaddr)
1497   //   lea(rscratch1, RuntimeAddress(addr)
1498   //   stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)))
1499   //   blr(rscratch1)
1500   CodeBlob *cb = CodeCache::find_blob(_entry_point);
1501   if (cb) {
1502     return MacroAssembler::far_branch_size();
1503   } else {
1504     return 6 * NativeInstruction::instruction_size;
1505   }
1506 }
1507 
1508 // Indicate if the safepoint node needs the polling page as an input
1509 
1510 // the shared code plants the oop data at the start of the generated
1511 // code for the safepoint node and that needs ot be at the load
1512 // instruction itself. so we cannot plant a mov of the safepoint poll
1513 // address followed by a load. setting this to true means the mov is
1514 // scheduled as a prior instruction. that's better for scheduling
1515 // anyway.
1516 
1517 bool SafePointNode::needs_polling_address_input()
1518 {
1519   return true;
1520 }
1521 
1522 //=============================================================================
1523 
1524 #ifndef PRODUCT
1525 void MachBreakpointNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1526   st->print("BREAKPOINT");
1527 }
1528 #endif
1529 
1530 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1531   MacroAssembler _masm(&cbuf);
1532   __ brk(0);
1533 }
1534 
1535 uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
1536   return MachNode::size(ra_);
1537 }
1538 
1539 //=============================================================================
1540 
1541 #ifndef PRODUCT
1542   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
1543     st->print("nop \t# %d bytes pad for loops and calls", _count);
1544   }
1545 #endif
1546 
1547   void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
1548     MacroAssembler _masm(&cbuf);
1549     for (int i = 0; i < _count; i++) {
1550       __ nop();
1551     }
1552   }
1553 
1554   uint MachNopNode::size(PhaseRegAlloc*) const {
1555     return _count * NativeInstruction::instruction_size;
1556   }
1557 
1558 //=============================================================================
1559 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1560 
1561 int Compile::ConstantTable::calculate_table_base_offset() const {
1562   return 0;  // absolute addressing, no offset
1563 }
1564 
1565 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
1566 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
1567   ShouldNotReachHere();
1568 }
1569 
1570 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
1571   // Empty encoding
1572 }
1573 
1574 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
1575   return 0;
1576 }
1577 
1578 #ifndef PRODUCT
1579 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
1580   st->print("-- \t// MachConstantBaseNode (empty encoding)");
1581 }
1582 #endif
1583 
1584 #ifndef PRODUCT
1585 void MachPrologNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1586   Compile* C = ra_->C;
1587 
1588   int framesize = C->frame_slots() << LogBytesPerInt;
1589 
1590   if (C->need_stack_bang(framesize))
1591     st->print("# stack bang size=%d\n\t", framesize);
1592 
1593   if (framesize < ((1 << 9) + 2 * wordSize)) {
1594     st->print("sub  sp, sp, #%d\n\t", framesize);
1595     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
1596     if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
1597   } else {
1598     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
1599     if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
1600     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1601     st->print("sub  sp, sp, rscratch1");
1602   }
1603 }
1604 #endif
1605 
1606 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1607   Compile* C = ra_->C;
1608   MacroAssembler _masm(&cbuf);
1609 
1610   // n.b. frame size includes space for return pc and rfp
1611   const long framesize = C->frame_size_in_bytes();
1612   assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");
1613 
1614   // insert a nop at the start of the prolog so we can patch in a
1615   // branch if we need to invalidate the method later
1616   __ nop();
1617 
1618   int bangsize = C->bang_size_in_bytes();
1619   if (C->need_stack_bang(bangsize) && UseStackBanging)
1620     __ generate_stack_overflow_check(bangsize);
1621 
1622   __ build_frame(framesize);
1623 
1624   if (VerifyStackAtCalls) {
1625     Unimplemented();
1626   }
1627 
1628   C->set_frame_complete(cbuf.insts_size());
1629 
1630   if (C->has_mach_constant_base_node()) {
1631     // NOTE: We set the table base offset here because users might be
1632     // emitted before MachConstantBaseNode.
1633     Compile::ConstantTable& constant_table = C->constant_table();
1634     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
1635   }
1636 }
1637 
1638 uint MachPrologNode::size(PhaseRegAlloc* ra_) const
1639 {
1640   return MachNode::size(ra_); // too many variables; just compute it
1641                               // the hard way
1642 }
1643 
1644 int MachPrologNode::reloc() const
1645 {
1646   return 0;
1647 }
1648 
1649 //=============================================================================
1650 
1651 #ifndef PRODUCT
1652 void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1653   Compile* C = ra_->C;
1654   int framesize = C->frame_slots() << LogBytesPerInt;
1655 
1656   st->print("# pop frame %d\n\t",framesize);
1657 
1658   if (framesize == 0) {
1659     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1660   } else if (framesize < ((1 << 9) + 2 * wordSize)) {
1661     st->print("ldp  lr, rfp, [sp,#%d]\n\t", framesize - 2 * wordSize);
1662     st->print("add  sp, sp, #%d\n\t", framesize);
1663   } else {
1664     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
1665     st->print("add  sp, sp, rscratch1\n\t");
1666     st->print("ldp  lr, rfp, [sp],#%d\n\t", (2 * wordSize));
1667   }
1668 
1669   if (do_polling() && C->is_method_compilation()) {
1670     st->print("# touch polling page\n\t");
1671     st->print("mov  rscratch1, #0x%lx\n\t", p2i(os::get_polling_page()));
1672     st->print("ldr zr, [rscratch1]");
1673   }
1674 }
1675 #endif
1676 
1677 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1678   Compile* C = ra_->C;
1679   MacroAssembler _masm(&cbuf);
1680   int framesize = C->frame_slots() << LogBytesPerInt;
1681 
1682   __ remove_frame(framesize);
1683 
1684   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
1685     __ reserved_stack_check();
1686   }
1687 
1688   if (do_polling() && C->is_method_compilation()) {
1689     __ read_polling_page(rscratch1, os::get_polling_page(), relocInfo::poll_return_type);
1690   }
1691 }
1692 
1693 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
1694   // Variable size. Determine dynamically.
1695   return MachNode::size(ra_);
1696 }
1697 
1698 int MachEpilogNode::reloc() const {
1699   // Return number of relocatable values contained in this instruction.
1700   return 1; // 1 for polling page.
1701 }
1702 
1703 const Pipeline * MachEpilogNode::pipeline() const {
1704   return MachNode::pipeline_class();
1705 }
1706 
1707 // This method seems to be obsolete. It is declared in machnode.hpp
1708 // and defined in all *.ad files, but it is never called. Should we
1709 // get rid of it?
1710 int MachEpilogNode::safepoint_offset() const {
1711   assert(do_polling(), "no return for this epilog node");
1712   return 4;
1713 }
1714 
1715 //=============================================================================
1716 
1717 // Figure out which register class each belongs in: rc_int, rc_float or
1718 // rc_stack.
1719 enum RC { rc_bad, rc_int, rc_float, rc_stack };
1720 
1721 static enum RC rc_class(OptoReg::Name reg) {
1722 
1723   if (reg == OptoReg::Bad) {
1724     return rc_bad;
1725   }
1726 
1727   // we have 30 int registers * 2 halves
1728   // (rscratch1 and rscratch2 are omitted)
1729 
1730   if (reg < 60) {
1731     return rc_int;
1732   }
1733 
1734   // we have 32 float register * 2 halves
1735   if (reg < 60 + 128) {
1736     return rc_float;
1737   }
1738 
1739   // Between float regs & stack is the flags regs.
1740   assert(OptoReg::is_stack(reg), "blow up if spilling flags");
1741 
1742   return rc_stack;
1743 }
1744 
1745 uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream *st) const {
1746   Compile* C = ra_->C;
1747 
1748   // Get registers to move.
1749   OptoReg::Name src_hi = ra_->get_reg_second(in(1));
1750   OptoReg::Name src_lo = ra_->get_reg_first(in(1));
1751   OptoReg::Name dst_hi = ra_->get_reg_second(this);
1752   OptoReg::Name dst_lo = ra_->get_reg_first(this);
1753 
1754   enum RC src_hi_rc = rc_class(src_hi);
1755   enum RC src_lo_rc = rc_class(src_lo);
1756   enum RC dst_hi_rc = rc_class(dst_hi);
1757   enum RC dst_lo_rc = rc_class(dst_lo);
1758 
1759   assert(src_lo != OptoReg::Bad && dst_lo != OptoReg::Bad, "must move at least 1 register");
1760 
1761   if (src_hi != OptoReg::Bad) {
1762     assert((src_lo&1)==0 && src_lo+1==src_hi &&
1763            (dst_lo&1)==0 && dst_lo+1==dst_hi,
1764            "expected aligned-adjacent pairs");
1765   }
1766 
1767   if (src_lo == dst_lo && src_hi == dst_hi) {
1768     return 0;            // Self copy, no move.
1769   }
1770 
1771   bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
1772               (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
1773   int src_offset = ra_->reg2offset(src_lo);
1774   int dst_offset = ra_->reg2offset(dst_lo);
1775 
1776   if (bottom_type()->isa_vect() != NULL) {
1777     uint ireg = ideal_reg();
1778     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
1779     if (cbuf) {
1780       MacroAssembler _masm(cbuf);
1781       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
1782       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
1783         // stack->stack
1784         assert((src_offset & 7) == 0 && (dst_offset & 7) == 0, "unaligned stack offset");
1785         if (ireg == Op_VecD) {
1786           __ unspill(rscratch1, true, src_offset);
1787           __ spill(rscratch1, true, dst_offset);
1788         } else {
1789           __ spill_copy128(src_offset, dst_offset);
1790         }
1791       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
1792         __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1793                ireg == Op_VecD ? __ T8B : __ T16B,
1794                as_FloatRegister(Matcher::_regEncode[src_lo]));
1795       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
1796         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1797                        ireg == Op_VecD ? __ D : __ Q,
1798                        ra_->reg2offset(dst_lo));
1799       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
1800         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1801                        ireg == Op_VecD ? __ D : __ Q,
1802                        ra_->reg2offset(src_lo));
1803       } else {
1804         ShouldNotReachHere();
1805       }
1806     }
1807   } else if (cbuf) {
1808     MacroAssembler _masm(cbuf);
1809     switch (src_lo_rc) {
1810     case rc_int:
1811       if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
1812         if (is64) {
1813             __ mov(as_Register(Matcher::_regEncode[dst_lo]),
1814                    as_Register(Matcher::_regEncode[src_lo]));
1815         } else {
1816             MacroAssembler _masm(cbuf);
1817             __ movw(as_Register(Matcher::_regEncode[dst_lo]),
1818                     as_Register(Matcher::_regEncode[src_lo]));
1819         }
1820       } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
1821         if (is64) {
1822             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1823                      as_Register(Matcher::_regEncode[src_lo]));
1824         } else {
1825             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1826                      as_Register(Matcher::_regEncode[src_lo]));
1827         }
1828       } else {                    // gpr --> stack spill
1829         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1830         __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
1831       }
1832       break;
1833     case rc_float:
1834       if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
1835         if (is64) {
1836             __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
1837                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1838         } else {
1839             __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
1840                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1841         }
1842       } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
1843           if (cbuf) {
1844             __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1845                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1846         } else {
1847             __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1848                      as_FloatRegister(Matcher::_regEncode[src_lo]));
1849         }
1850       } else {                    // fpr --> stack spill
1851         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1852         __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
1853                  is64 ? __ D : __ S, dst_offset);
1854       }
1855       break;
1856     case rc_stack:
1857       if (dst_lo_rc == rc_int) {  // stack --> gpr load
1858         __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
1859       } else if (dst_lo_rc == rc_float) { // stack --> fpr load
1860         __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
1861                    is64 ? __ D : __ S, src_offset);
1862       } else {                    // stack --> stack copy
1863         assert(dst_lo_rc == rc_stack, "spill to bad register class");
1864         __ unspill(rscratch1, is64, src_offset);
1865         __ spill(rscratch1, is64, dst_offset);
1866       }
1867       break;
1868     default:
1869       assert(false, "bad rc_class for spill");
1870       ShouldNotReachHere();
1871     }
1872   }
1873 
1874   if (st) {
1875     st->print("spill ");
1876     if (src_lo_rc == rc_stack) {
1877       st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
1878     } else {
1879       st->print("%s -> ", Matcher::regName[src_lo]);
1880     }
1881     if (dst_lo_rc == rc_stack) {
1882       st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
1883     } else {
1884       st->print("%s", Matcher::regName[dst_lo]);
1885     }
1886     if (bottom_type()->isa_vect() != NULL) {
1887       st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
1888     } else {
1889       st->print("\t# spill size = %d", is64 ? 64:32);
1890     }
1891   }
1892 
1893   return 0;
1894 
1895 }
1896 
1897 #ifndef PRODUCT
1898 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1899   if (!ra_)
1900     st->print("N%d = SpillCopy(N%d)", _idx, in(1)->_idx);
1901   else
1902     implementation(NULL, ra_, false, st);
1903 }
1904 #endif
1905 
1906 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1907   implementation(&cbuf, ra_, false, NULL);
1908 }
1909 
1910 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1911   return MachNode::size(ra_);
1912 }
1913 
1914 //=============================================================================
1915 
1916 #ifndef PRODUCT
1917 void BoxLockNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
1918   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1919   int reg = ra_->get_reg_first(this);
1920   st->print("add %s, rsp, #%d]\t# box lock",
1921             Matcher::regName[reg], offset);
1922 }
1923 #endif
1924 
1925 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1926   MacroAssembler _masm(&cbuf);
1927 
1928   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1929   int reg    = ra_->get_encode(this);
1930 
1931   if (Assembler::operand_valid_for_add_sub_immediate(offset)) {
1932     __ add(as_Register(reg), sp, offset);
1933   } else {
1934     ShouldNotReachHere();
1935   }
1936 }
1937 
1938 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1939   // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_).
1940   return 4;
1941 }
1942 
1943 //=============================================================================
1944 
1945 #ifndef PRODUCT
1946 void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
1947 {
1948   st->print_cr("# MachUEPNode");
1949   if (UseCompressedClassPointers) {
1950     st->print_cr("\tldrw rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1951     if (Universe::narrow_klass_shift() != 0) {
1952       st->print_cr("\tdecode_klass_not_null rscratch1, rscratch1");
1953     }
1954   } else {
1955    st->print_cr("\tldr rscratch1, j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
1956   }
1957   st->print_cr("\tcmp r0, rscratch1\t # Inline cache check");
1958   st->print_cr("\tbne, SharedRuntime::_ic_miss_stub");
1959 }
1960 #endif
1961 
1962 void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
1963 {
1964   // This is the unverified entry point.
1965   MacroAssembler _masm(&cbuf);
1966 
1967   __ cmp_klass(j_rarg0, rscratch2, rscratch1);
1968   Label skip;
1969   // TODO
1970   // can we avoid this skip and still use a reloc?
1971   __ br(Assembler::EQ, skip);
1972   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1973   __ bind(skip);
1974 }
1975 
1976 uint MachUEPNode::size(PhaseRegAlloc* ra_) const
1977 {
1978   return MachNode::size(ra_);
1979 }
1980 
1981 // REQUIRED EMIT CODE
1982 
1983 //=============================================================================
1984 
1985 // Emit exception handler code.
1986 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf)
1987 {
1988   // mov rscratch1 #exception_blob_entry_point
1989   // br rscratch1
1990   // Note that the code buffer's insts_mark is always relative to insts.
1991   // That's why we must use the macroassembler to generate a handler.
1992   MacroAssembler _masm(&cbuf);
1993   address base = __ start_a_stub(size_exception_handler());
1994   if (base == NULL) {
1995     ciEnv::current()->record_failure("CodeCache is full");
1996     return 0;  // CodeBuffer::expand failed
1997   }
1998   int offset = __ offset();
1999   __ far_jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
2000   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
2001   __ end_a_stub();
2002   return offset;
2003 }
2004 
2005 // Emit deopt handler code.
2006 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf)
2007 {
2008   // Note that the code buffer's insts_mark is always relative to insts.
2009   // That's why we must use the macroassembler to generate a handler.
2010   MacroAssembler _masm(&cbuf);
2011   address base = __ start_a_stub(size_deopt_handler());
2012   if (base == NULL) {
2013     ciEnv::current()->record_failure("CodeCache is full");
2014     return 0;  // CodeBuffer::expand failed
2015   }
2016   int offset = __ offset();
2017 
2018   __ adr(lr, __ pc());
2019   __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
2020 
2021   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
2022   __ end_a_stub();
2023   return offset;
2024 }
2025 
2026 // REQUIRED MATCHER CODE
2027 
2028 //=============================================================================
2029 
2030 const bool Matcher::match_rule_supported(int opcode) {
2031 
2032   switch (opcode) {
2033   default:
2034     break;
2035   }
2036 
2037   if (!has_match_rule(opcode)) {
2038     return false;
2039   }
2040 
2041   return true;  // Per default match rules are supported.
2042 }
2043 
2044 const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
2045 
2046   // TODO
2047   // identify extra cases that we might want to provide match rules for
2048   // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
2049   bool ret_value = match_rule_supported(opcode);
2050   // Add rules here.
2051 
2052   return ret_value;  // Per default match rules are supported.
2053 }
2054 
2055 const bool Matcher::has_predicated_vectors(void) {
2056   return false;
2057 }
2058 
2059 const int Matcher::float_pressure(int default_pressure_threshold) {
2060   return default_pressure_threshold;
2061 }
2062 
2063 int Matcher::regnum_to_fpu_offset(int regnum)
2064 {
2065   Unimplemented();
2066   return 0;
2067 }
2068 
2069 // Is this branch offset short enough that a short branch can be used?
2070 //
2071 // NOTE: If the platform does not provide any short branch variants, then
2072 //       this method should return false for offset 0.
2073 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
2074   // The passed offset is relative to address of the branch.
2075 
2076   return (-32768 <= offset && offset < 32768);
2077 }
2078 
2079 const bool Matcher::isSimpleConstant64(jlong value) {
2080   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
2081   // Probably always true, even if a temp register is required.
2082   return true;
2083 }
2084 
2085 // true just means we have fast l2f conversion
2086 const bool Matcher::convL2FSupported(void) {
2087   return true;
2088 }
2089 
2090 // Vector width in bytes.
2091 const int Matcher::vector_width_in_bytes(BasicType bt) {
2092   int size = MIN2(16,(int)MaxVectorSize);
2093   // Minimum 2 values in vector
2094   if (size < 2*type2aelembytes(bt)) size = 0;
2095   // But never < 4
2096   if (size < 4) size = 0;
2097   return size;
2098 }
2099 
2100 // Limits on vector size (number of elements) loaded into vector.
2101 const int Matcher::max_vector_size(const BasicType bt) {
2102   return vector_width_in_bytes(bt)/type2aelembytes(bt);
2103 }
2104 const int Matcher::min_vector_size(const BasicType bt) {
2105 //  For the moment limit the vector size to 8 bytes
2106     int size = 8 / type2aelembytes(bt);
2107     if (size < 2) size = 2;
2108     return size;
2109 }
2110 
2111 // Vector ideal reg.
2112 const uint Matcher::vector_ideal_reg(int len) {
2113   switch(len) {
2114     case  8: return Op_VecD;
2115     case 16: return Op_VecX;
2116   }
2117   ShouldNotReachHere();
2118   return 0;
2119 }
2120 
2121 const uint Matcher::vector_shift_count_ideal_reg(int size) {
2122   switch(size) {
2123     case  8: return Op_VecD;
2124     case 16: return Op_VecX;
2125   }
2126   ShouldNotReachHere();
2127   return 0;
2128 }
2129 
2130 // AES support not yet implemented
2131 const bool Matcher::pass_original_key_for_aes() {
2132   return false;
2133 }
2134 
2135 // aarch64 supports misaligned vectors store/load.
2136 const bool Matcher::misaligned_vectors_ok() {
2137   return true;
2138 }
2139 
2140 // false => size gets scaled to BytesPerLong, ok.
2141 const bool Matcher::init_array_count_is_in_bytes = false;
2142 
2143 // Use conditional move (CMOVL)
2144 const int Matcher::long_cmove_cost() {
2145   // long cmoves are no more expensive than int cmoves
2146   return 0;
2147 }
2148 
2149 const int Matcher::float_cmove_cost() {
2150   // float cmoves are no more expensive than int cmoves
2151   return 0;
2152 }
2153 
2154 // Does the CPU require late expand (see block.cpp for description of late expand)?
2155 const bool Matcher::require_postalloc_expand = false;
2156 
2157 // Do we need to mask the count passed to shift instructions or does
2158 // the cpu only look at the lower 5/6 bits anyway?
2159 const bool Matcher::need_masked_shift_count = false;
2160 
2161 // This affects two different things:
2162 //  - how Decode nodes are matched
2163 //  - how ImplicitNullCheck opportunities are recognized
2164 // If true, the matcher will try to remove all Decodes and match them
2165 // (as operands) into nodes. NullChecks are not prepared to deal with
2166 // Decodes by final_graph_reshaping().
2167 // If false, final_graph_reshaping() forces the decode behind the Cmp
2168 // for a NullCheck. The matcher matches the Decode node into a register.
2169 // Implicit_null_check optimization moves the Decode along with the
2170 // memory operation back up before the NullCheck.
2171 bool Matcher::narrow_oop_use_complex_address() {
2172   return Universe::narrow_oop_shift() == 0;
2173 }
2174 
2175 bool Matcher::narrow_klass_use_complex_address() {
2176 // TODO
2177 // decide whether we need to set this to true
2178   return false;
2179 }
2180 
2181 bool Matcher::const_oop_prefer_decode() {
2182   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
2183   return Universe::narrow_oop_base() == NULL;
2184 }
2185 
2186 bool Matcher::const_klass_prefer_decode() {
2187   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
2188   return Universe::narrow_klass_base() == NULL;
2189 }
2190 
2191 // Is it better to copy float constants, or load them directly from
2192 // memory?  Intel can load a float constant from a direct address,
2193 // requiring no extra registers.  Most RISCs will have to materialize
2194 // an address into a register first, so they would do better to copy
2195 // the constant from stack.
2196 const bool Matcher::rematerialize_float_constants = false;
2197 
2198 // If CPU can load and store mis-aligned doubles directly then no
2199 // fixup is needed.  Else we split the double into 2 integer pieces
2200 // and move it piece-by-piece.  Only happens when passing doubles into
2201 // C code as the Java calling convention forces doubles to be aligned.
2202 const bool Matcher::misaligned_doubles_ok = true;
2203 
2204 // No-op on amd64
2205 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
2206   Unimplemented();
2207 }
2208 
2209 // Advertise here if the CPU requires explicit rounding operations to
2210 // implement the UseStrictFP mode.
2211 const bool Matcher::strict_fp_requires_explicit_rounding = false;
2212 
2213 // Are floats converted to double when stored to stack during
2214 // deoptimization?
2215 bool Matcher::float_in_double() { return false; }
2216 
2217 // Do ints take an entire long register or just half?
2218 // The relevant question is how the int is callee-saved:
2219 // the whole long is written but de-opt'ing will have to extract
2220 // the relevant 32 bits.
2221 const bool Matcher::int_in_long = true;
2222 
2223 // Return whether or not this register is ever used as an argument.
2224 // This function is used on startup to build the trampoline stubs in
2225 // generateOptoStub.  Registers not mentioned will be killed by the VM
2226 // call in the trampoline, and arguments in those registers not be
2227 // available to the callee.
2228 bool Matcher::can_be_java_arg(int reg)
2229 {
2230   return
2231     reg ==  R0_num || reg == R0_H_num ||
2232     reg ==  R1_num || reg == R1_H_num ||
2233     reg ==  R2_num || reg == R2_H_num ||
2234     reg ==  R3_num || reg == R3_H_num ||
2235     reg ==  R4_num || reg == R4_H_num ||
2236     reg ==  R5_num || reg == R5_H_num ||
2237     reg ==  R6_num || reg == R6_H_num ||
2238     reg ==  R7_num || reg == R7_H_num ||
2239     reg ==  V0_num || reg == V0_H_num ||
2240     reg ==  V1_num || reg == V1_H_num ||
2241     reg ==  V2_num || reg == V2_H_num ||
2242     reg ==  V3_num || reg == V3_H_num ||
2243     reg ==  V4_num || reg == V4_H_num ||
2244     reg ==  V5_num || reg == V5_H_num ||
2245     reg ==  V6_num || reg == V6_H_num ||
2246     reg ==  V7_num || reg == V7_H_num;
2247 }
2248 
2249 bool Matcher::is_spillable_arg(int reg)
2250 {
2251   return can_be_java_arg(reg);
2252 }
2253 
2254 bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
2255   return false;
2256 }
2257 
2258 RegMask Matcher::divI_proj_mask() {
2259   ShouldNotReachHere();
2260   return RegMask();
2261 }
2262 
2263 // Register for MODI projection of divmodI.
2264 RegMask Matcher::modI_proj_mask() {
2265   ShouldNotReachHere();
2266   return RegMask();
2267 }
2268 
2269 // Register for DIVL projection of divmodL.
2270 RegMask Matcher::divL_proj_mask() {
2271   ShouldNotReachHere();
2272   return RegMask();
2273 }
2274 
2275 // Register for MODL projection of divmodL.
2276 RegMask Matcher::modL_proj_mask() {
2277   ShouldNotReachHere();
2278   return RegMask();
2279 }
2280 
2281 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
2282   return FP_REG_mask();
2283 }
2284 
2285 bool size_fits_all_mem_uses(AddPNode* addp, int shift) {
2286   for (DUIterator_Fast imax, i = addp->fast_outs(imax); i < imax; i++) {
2287     Node* u = addp->fast_out(i);
2288     if (u->is_Mem()) {
2289       int opsize = u->as_Mem()->memory_size();
2290       assert(opsize > 0, "unexpected memory operand size");
2291       if (u->as_Mem()->memory_size() != (1<<shift)) {
2292         return false;
2293       }
2294     }
2295   }
2296   return true;
2297 }
2298 
2299 const bool Matcher::convi2l_type_required = false;
2300 
2301 // Should the Matcher clone shifts on addressing modes, expecting them
2302 // to be subsumed into complex addressing expressions or compute them
2303 // into registers?
2304 bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
2305   if (clone_base_plus_offset_address(m, mstack, address_visited)) {
2306     return true;
2307   }
2308 
2309   Node *off = m->in(AddPNode::Offset);
2310   if (off->Opcode() == Op_LShiftL && off->in(2)->is_Con() &&
2311       size_fits_all_mem_uses(m, off->in(2)->get_int()) &&
2312       // Are there other uses besides address expressions?
2313       !is_visited(off)) {
2314     address_visited.set(off->_idx); // Flag as address_visited
2315     mstack.push(off->in(2), Visit);
2316     Node *conv = off->in(1);
2317     if (conv->Opcode() == Op_ConvI2L &&
2318         // Are there other uses besides address expressions?
2319         !is_visited(conv)) {
2320       address_visited.set(conv->_idx); // Flag as address_visited
2321       mstack.push(conv->in(1), Pre_Visit);
2322     } else {
2323       mstack.push(conv, Pre_Visit);
2324     }
2325     address_visited.test_set(m->_idx); // Flag as address_visited
2326     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2327     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2328     return true;
2329   } else if (off->Opcode() == Op_ConvI2L &&
2330              // Are there other uses besides address expressions?
2331              !is_visited(off)) {
2332     address_visited.test_set(m->_idx); // Flag as address_visited
2333     address_visited.set(off->_idx); // Flag as address_visited
2334     mstack.push(off->in(1), Pre_Visit);
2335     mstack.push(m->in(AddPNode::Address), Pre_Visit);
2336     mstack.push(m->in(AddPNode::Base), Pre_Visit);
2337     return true;
2338   }
2339   return false;
2340 }
2341 
2342 void Compile::reshape_address(AddPNode* addp) {
2343 }
2344 
2345 
2346 #define MOV_VOLATILE(REG, BASE, INDEX, SCALE, DISP, SCRATCH, INSN)      \
2347   MacroAssembler _masm(&cbuf);                                          \
2348   {                                                                     \
2349     guarantee(INDEX == -1, "mode not permitted for volatile");          \
2350     guarantee(DISP == 0, "mode not permitted for volatile");            \
2351     guarantee(SCALE == 0, "mode not permitted for volatile");           \
2352     __ INSN(REG, as_Register(BASE));                                    \
2353   }
2354 
2355 typedef void (MacroAssembler::* mem_insn)(Register Rt, const Address &adr);
2356 typedef void (MacroAssembler::* mem_float_insn)(FloatRegister Rt, const Address &adr);
2357 typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
2358                                   MacroAssembler::SIMD_RegVariant T, const Address &adr);
2359 
2360   // Used for all non-volatile memory accesses.  The use of
2361   // $mem->opcode() to discover whether this pattern uses sign-extended
2362   // offsets is something of a kludge.
2363   static void loadStore(MacroAssembler masm, mem_insn insn,
2364                          Register reg, int opcode,
2365                          Register base, int index, int size, int disp)
2366   {
2367     Address::extend scale;
2368 
2369     // Hooboy, this is fugly.  We need a way to communicate to the
2370     // encoder that the index needs to be sign extended, so we have to
2371     // enumerate all the cases.
2372     switch (opcode) {
2373     case INDINDEXSCALEDI2L:
2374     case INDINDEXSCALEDI2LN:
2375     case INDINDEXI2L:
2376     case INDINDEXI2LN:
2377       scale = Address::sxtw(size);
2378       break;
2379     default:
2380       scale = Address::lsl(size);
2381     }
2382 
2383     if (index == -1) {
2384       (masm.*insn)(reg, Address(base, disp));
2385     } else {
2386       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2387       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2388     }
2389   }
2390 
2391   static void loadStore(MacroAssembler masm, mem_float_insn insn,
2392                          FloatRegister reg, int opcode,
2393                          Register base, int index, int size, int disp)
2394   {
2395     Address::extend scale;
2396 
2397     switch (opcode) {
2398     case INDINDEXSCALEDI2L:
2399     case INDINDEXSCALEDI2LN:
2400       scale = Address::sxtw(size);
2401       break;
2402     default:
2403       scale = Address::lsl(size);
2404     }
2405 
2406      if (index == -1) {
2407       (masm.*insn)(reg, Address(base, disp));
2408     } else {
2409       assert(disp == 0, "unsupported address mode: disp = %d", disp);
2410       (masm.*insn)(reg, Address(base, as_Register(index), scale));
2411     }
2412   }
2413 
2414   static void loadStore(MacroAssembler masm, mem_vector_insn insn,
2415                          FloatRegister reg, MacroAssembler::SIMD_RegVariant T,
2416                          int opcode, Register base, int index, int size, int disp)
2417   {
2418     if (index == -1) {
2419       (masm.*insn)(reg, T, Address(base, disp));
2420     } else {
2421       assert(disp == 0, "unsupported address mode");
2422       (masm.*insn)(reg, T, Address(base, as_Register(index), Address::lsl(size)));
2423     }
2424   }
2425 
2426 %}
2427 
2428 
2429 
2430 //----------ENCODING BLOCK-----------------------------------------------------
2431 // This block specifies the encoding classes used by the compiler to
2432 // output byte streams.  Encoding classes are parameterized macros
2433 // used by Machine Instruction Nodes in order to generate the bit
2434 // encoding of the instruction.  Operands specify their base encoding
2435 // interface with the interface keyword.  There are currently
2436 // supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
2437 // COND_INTER.  REG_INTER causes an operand to generate a function
2438 // which returns its register number when queried.  CONST_INTER causes
2439 // an operand to generate a function which returns the value of the
2440 // constant when queried.  MEMORY_INTER causes an operand to generate
2441 // four functions which return the Base Register, the Index Register,
2442 // the Scale Value, and the Offset Value of the operand when queried.
2443 // COND_INTER causes an operand to generate six functions which return
2444 // the encoding code (ie - encoding bits for the instruction)
2445 // associated with each basic boolean condition for a conditional
2446 // instruction.
2447 //
2448 // Instructions specify two basic values for encoding.  Again, a
2449 // function is available to check if the constant displacement is an
2450 // oop. They use the ins_encode keyword to specify their encoding
2451 // classes (which must be a sequence of enc_class names, and their
2452 // parameters, specified in the encoding block), and they use the
2453 // opcode keyword to specify, in order, their primary, secondary, and
2454 // tertiary opcode.  Only the opcode sections which a particular
2455 // instruction needs for encoding need to be specified.
2456 encode %{
2457   // Build emit functions for each basic byte or larger field in the
2458   // intel encoding scheme (opcode, rm, sib, immediate), and call them
2459   // from C++ code in the enc_class source block.  Emit functions will
2460   // live in the main source block for now.  In future, we can
2461   // generalize this by adding a syntax that specifies the sizes of
2462   // fields in an order, so that the adlc can build the emit functions
2463   // automagically
2464 
2465   // catch all for unimplemented encodings
2466   enc_class enc_unimplemented %{
2467     MacroAssembler _masm(&cbuf);
2468     __ unimplemented("C2 catch all");
2469   %}
2470 
2471   // BEGIN Non-volatile memory access
2472 
2473   enc_class aarch64_enc_ldrsbw(iRegI dst, memory mem) %{
2474     Register dst_reg = as_Register($dst$$reg);
2475     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsbw, dst_reg, $mem->opcode(),
2476                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2477   %}
2478 
2479   enc_class aarch64_enc_ldrsb(iRegI dst, memory mem) %{
2480     Register dst_reg = as_Register($dst$$reg);
2481     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsb, dst_reg, $mem->opcode(),
2482                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2483   %}
2484 
2485   enc_class aarch64_enc_ldrb(iRegI dst, memory mem) %{
2486     Register dst_reg = as_Register($dst$$reg);
2487     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2488                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2489   %}
2490 
2491   enc_class aarch64_enc_ldrb(iRegL dst, memory mem) %{
2492     Register dst_reg = as_Register($dst$$reg);
2493     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrb, dst_reg, $mem->opcode(),
2494                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2495   %}
2496 
2497   enc_class aarch64_enc_ldrshw(iRegI dst, memory mem) %{
2498     Register dst_reg = as_Register($dst$$reg);
2499     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrshw, dst_reg, $mem->opcode(),
2500                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2501   %}
2502 
2503   enc_class aarch64_enc_ldrsh(iRegI dst, memory mem) %{
2504     Register dst_reg = as_Register($dst$$reg);
2505     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsh, dst_reg, $mem->opcode(),
2506                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2507   %}
2508 
2509   enc_class aarch64_enc_ldrh(iRegI dst, memory mem) %{
2510     Register dst_reg = as_Register($dst$$reg);
2511     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2512                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2513   %}
2514 
2515   enc_class aarch64_enc_ldrh(iRegL dst, memory mem) %{
2516     Register dst_reg = as_Register($dst$$reg);
2517     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrh, dst_reg, $mem->opcode(),
2518                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2519   %}
2520 
2521   enc_class aarch64_enc_ldrw(iRegI dst, memory mem) %{
2522     Register dst_reg = as_Register($dst$$reg);
2523     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2524                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2525   %}
2526 
2527   enc_class aarch64_enc_ldrw(iRegL dst, memory mem) %{
2528     Register dst_reg = as_Register($dst$$reg);
2529     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrw, dst_reg, $mem->opcode(),
2530                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2531   %}
2532 
2533   enc_class aarch64_enc_ldrsw(iRegL dst, memory mem) %{
2534     Register dst_reg = as_Register($dst$$reg);
2535     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrsw, dst_reg, $mem->opcode(),
2536                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2537   %}
2538 
2539   enc_class aarch64_enc_ldr(iRegL dst, memory mem) %{
2540     Register dst_reg = as_Register($dst$$reg);
2541     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, $mem->opcode(),
2542                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2543   %}
2544 
2545   enc_class aarch64_enc_ldrs(vRegF dst, memory mem) %{
2546     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2547     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, dst_reg, $mem->opcode(),
2548                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2549   %}
2550 
2551   enc_class aarch64_enc_ldrd(vRegD dst, memory mem) %{
2552     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2553     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, dst_reg, $mem->opcode(),
2554                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2555   %}
2556 
2557   enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
2558     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2559     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
2560        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2561   %}
2562 
2563   enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{
2564     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2565     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D,
2566        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2567   %}
2568 
2569   enc_class aarch64_enc_ldrvQ(vecX dst, memory mem) %{
2570     FloatRegister dst_reg = as_FloatRegister($dst$$reg);
2571     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::Q,
2572        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2573   %}
2574 
2575   enc_class aarch64_enc_strb(iRegI src, memory mem) %{
2576     Register src_reg = as_Register($src$$reg);
2577     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strb, src_reg, $mem->opcode(),
2578                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2579   %}
2580 
2581   enc_class aarch64_enc_strb0(memory mem) %{
2582     MacroAssembler _masm(&cbuf);
2583     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2584                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2585   %}
2586 
2587   enc_class aarch64_enc_strb0_ordered(memory mem) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ membar(Assembler::StoreStore);
2590     loadStore(_masm, &MacroAssembler::strb, zr, $mem->opcode(),
2591                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2592   %}
2593 
2594   enc_class aarch64_enc_strh(iRegI src, memory mem) %{
2595     Register src_reg = as_Register($src$$reg);
2596     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strh, src_reg, $mem->opcode(),
2597                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2598   %}
2599 
2600   enc_class aarch64_enc_strh0(memory mem) %{
2601     MacroAssembler _masm(&cbuf);
2602     loadStore(_masm, &MacroAssembler::strh, zr, $mem->opcode(),
2603                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2604   %}
2605 
2606   enc_class aarch64_enc_strw(iRegI src, memory mem) %{
2607     Register src_reg = as_Register($src$$reg);
2608     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strw, src_reg, $mem->opcode(),
2609                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2610   %}
2611 
2612   enc_class aarch64_enc_strw0(memory mem) %{
2613     MacroAssembler _masm(&cbuf);
2614     loadStore(_masm, &MacroAssembler::strw, zr, $mem->opcode(),
2615                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2616   %}
2617 
2618   enc_class aarch64_enc_str(iRegL src, memory mem) %{
2619     Register src_reg = as_Register($src$$reg);
2620     // we sometimes get asked to store the stack pointer into the
2621     // current thread -- we cannot do that directly on AArch64
2622     if (src_reg == r31_sp) {
2623       MacroAssembler _masm(&cbuf);
2624       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2625       __ mov(rscratch2, sp);
2626       src_reg = rscratch2;
2627     }
2628     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, $mem->opcode(),
2629                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2630   %}
2631 
2632   enc_class aarch64_enc_str0(memory mem) %{
2633     MacroAssembler _masm(&cbuf);
2634     loadStore(_masm, &MacroAssembler::str, zr, $mem->opcode(),
2635                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2636   %}
2637 
2638   enc_class aarch64_enc_strs(vRegF src, memory mem) %{
2639     FloatRegister src_reg = as_FloatRegister($src$$reg);
2640     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strs, src_reg, $mem->opcode(),
2641                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2642   %}
2643 
2644   enc_class aarch64_enc_strd(vRegD src, memory mem) %{
2645     FloatRegister src_reg = as_FloatRegister($src$$reg);
2646     loadStore(MacroAssembler(&cbuf), &MacroAssembler::strd, src_reg, $mem->opcode(),
2647                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2648   %}
2649 
2650   enc_class aarch64_enc_strvS(vecD src, memory mem) %{
2651     FloatRegister src_reg = as_FloatRegister($src$$reg);
2652     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
2653        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2654   %}
2655 
2656   enc_class aarch64_enc_strvD(vecD src, memory mem) %{
2657     FloatRegister src_reg = as_FloatRegister($src$$reg);
2658     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D,
2659        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2660   %}
2661 
2662   enc_class aarch64_enc_strvQ(vecX src, memory mem) %{
2663     FloatRegister src_reg = as_FloatRegister($src$$reg);
2664     loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::Q,
2665        $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
2666   %}
2667 
2668   // END Non-volatile memory access
2669 
2670   // volatile loads and stores
2671 
2672   enc_class aarch64_enc_stlrb(iRegI src, memory mem) %{
2673     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2674                  rscratch1, stlrb);
2675   %}
2676 
2677   enc_class aarch64_enc_stlrh(iRegI src, memory mem) %{
2678     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2679                  rscratch1, stlrh);
2680   %}
2681 
2682   enc_class aarch64_enc_stlrw(iRegI src, memory mem) %{
2683     MOV_VOLATILE(as_Register($src$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2684                  rscratch1, stlrw);
2685   %}
2686 
2687 
2688   enc_class aarch64_enc_ldarsbw(iRegI dst, memory mem) %{
2689     Register dst_reg = as_Register($dst$$reg);
2690     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2691              rscratch1, ldarb);
2692     __ sxtbw(dst_reg, dst_reg);
2693   %}
2694 
2695   enc_class aarch64_enc_ldarsb(iRegL dst, memory mem) %{
2696     Register dst_reg = as_Register($dst$$reg);
2697     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2698              rscratch1, ldarb);
2699     __ sxtb(dst_reg, dst_reg);
2700   %}
2701 
2702   enc_class aarch64_enc_ldarbw(iRegI dst, memory mem) %{
2703     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2704              rscratch1, ldarb);
2705   %}
2706 
2707   enc_class aarch64_enc_ldarb(iRegL dst, memory mem) %{
2708     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2709              rscratch1, ldarb);
2710   %}
2711 
2712   enc_class aarch64_enc_ldarshw(iRegI dst, memory mem) %{
2713     Register dst_reg = as_Register($dst$$reg);
2714     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2715              rscratch1, ldarh);
2716     __ sxthw(dst_reg, dst_reg);
2717   %}
2718 
2719   enc_class aarch64_enc_ldarsh(iRegL dst, memory mem) %{
2720     Register dst_reg = as_Register($dst$$reg);
2721     MOV_VOLATILE(dst_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2722              rscratch1, ldarh);
2723     __ sxth(dst_reg, dst_reg);
2724   %}
2725 
2726   enc_class aarch64_enc_ldarhw(iRegI dst, memory mem) %{
2727     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2728              rscratch1, ldarh);
2729   %}
2730 
2731   enc_class aarch64_enc_ldarh(iRegL dst, memory mem) %{
2732     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2733              rscratch1, ldarh);
2734   %}
2735 
2736   enc_class aarch64_enc_ldarw(iRegI dst, memory mem) %{
2737     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2738              rscratch1, ldarw);
2739   %}
2740 
2741   enc_class aarch64_enc_ldarw(iRegL dst, memory mem) %{
2742     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2743              rscratch1, ldarw);
2744   %}
2745 
2746   enc_class aarch64_enc_ldar(iRegL dst, memory mem) %{
2747     MOV_VOLATILE(as_Register($dst$$reg), $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2748              rscratch1, ldar);
2749   %}
2750 
2751   enc_class aarch64_enc_fldars(vRegF dst, memory mem) %{
2752     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2753              rscratch1, ldarw);
2754     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
2755   %}
2756 
2757   enc_class aarch64_enc_fldard(vRegD dst, memory mem) %{
2758     MOV_VOLATILE(rscratch1, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2759              rscratch1, ldar);
2760     __ fmovd(as_FloatRegister($dst$$reg), rscratch1);
2761   %}
2762 
2763   enc_class aarch64_enc_stlr(iRegL src, memory mem) %{
2764     Register src_reg = as_Register($src$$reg);
2765     // we sometimes get asked to store the stack pointer into the
2766     // current thread -- we cannot do that directly on AArch64
2767     if (src_reg == r31_sp) {
2768         MacroAssembler _masm(&cbuf);
2769       assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
2770       __ mov(rscratch2, sp);
2771       src_reg = rscratch2;
2772     }
2773     MOV_VOLATILE(src_reg, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2774                  rscratch1, stlr);
2775   %}
2776 
2777   enc_class aarch64_enc_fstlrs(vRegF src, memory mem) %{
2778     {
2779       MacroAssembler _masm(&cbuf);
2780       FloatRegister src_reg = as_FloatRegister($src$$reg);
2781       __ fmovs(rscratch2, src_reg);
2782     }
2783     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2784                  rscratch1, stlrw);
2785   %}
2786 
2787   enc_class aarch64_enc_fstlrd(vRegD src, memory mem) %{
2788     {
2789       MacroAssembler _masm(&cbuf);
2790       FloatRegister src_reg = as_FloatRegister($src$$reg);
2791       __ fmovd(rscratch2, src_reg);
2792     }
2793     MOV_VOLATILE(rscratch2, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp,
2794                  rscratch1, stlr);
2795   %}
2796 
2797   // synchronized read/update encodings
2798 
2799   enc_class aarch64_enc_ldaxr(iRegL dst, memory mem) %{
2800     MacroAssembler _masm(&cbuf);
2801     Register dst_reg = as_Register($dst$$reg);
2802     Register base = as_Register($mem$$base);
2803     int index = $mem$$index;
2804     int scale = $mem$$scale;
2805     int disp = $mem$$disp;
2806     if (index == -1) {
2807        if (disp != 0) {
2808         __ lea(rscratch1, Address(base, disp));
2809         __ ldaxr(dst_reg, rscratch1);
2810       } else {
2811         // TODO
2812         // should we ever get anything other than this case?
2813         __ ldaxr(dst_reg, base);
2814       }
2815     } else {
2816       Register index_reg = as_Register(index);
2817       if (disp == 0) {
2818         __ lea(rscratch1, Address(base, index_reg, Address::lsl(scale)));
2819         __ ldaxr(dst_reg, rscratch1);
2820       } else {
2821         __ lea(rscratch1, Address(base, disp));
2822         __ lea(rscratch1, Address(rscratch1, index_reg, Address::lsl(scale)));
2823         __ ldaxr(dst_reg, rscratch1);
2824       }
2825     }
2826   %}
2827 
2828   enc_class aarch64_enc_stlxr(iRegLNoSp src, memory mem) %{
2829     MacroAssembler _masm(&cbuf);
2830     Register src_reg = as_Register($src$$reg);
2831     Register base = as_Register($mem$$base);
2832     int index = $mem$$index;
2833     int scale = $mem$$scale;
2834     int disp = $mem$$disp;
2835     if (index == -1) {
2836        if (disp != 0) {
2837         __ lea(rscratch2, Address(base, disp));
2838         __ stlxr(rscratch1, src_reg, rscratch2);
2839       } else {
2840         // TODO
2841         // should we ever get anything other than this case?
2842         __ stlxr(rscratch1, src_reg, base);
2843       }
2844     } else {
2845       Register index_reg = as_Register(index);
2846       if (disp == 0) {
2847         __ lea(rscratch2, Address(base, index_reg, Address::lsl(scale)));
2848         __ stlxr(rscratch1, src_reg, rscratch2);
2849       } else {
2850         __ lea(rscratch2, Address(base, disp));
2851         __ lea(rscratch2, Address(rscratch2, index_reg, Address::lsl(scale)));
2852         __ stlxr(rscratch1, src_reg, rscratch2);
2853       }
2854     }
2855     __ cmpw(rscratch1, zr);
2856   %}
2857 
2858   enc_class aarch64_enc_cmpxchg(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2859     MacroAssembler _masm(&cbuf);
2860     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2861     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2862                Assembler::xword, /*acquire*/ false, /*release*/ true,
2863                /*weak*/ false, noreg);
2864   %}
2865 
2866   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2867     MacroAssembler _masm(&cbuf);
2868     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2869     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2870                Assembler::word, /*acquire*/ false, /*release*/ true,
2871                /*weak*/ false, noreg);
2872   %}
2873 
2874   enc_class aarch64_enc_cmpxchgs(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2875     MacroAssembler _masm(&cbuf);
2876     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2877     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2878                Assembler::halfword, /*acquire*/ false, /*release*/ true,
2879                /*weak*/ false, noreg);
2880   %}
2881 
2882   enc_class aarch64_enc_cmpxchgb(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2883     MacroAssembler _masm(&cbuf);
2884     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2885     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2886                Assembler::byte, /*acquire*/ false, /*release*/ true,
2887                /*weak*/ false, noreg);
2888   %}
2889 
2890 
2891   // The only difference between aarch64_enc_cmpxchg and
2892   // aarch64_enc_cmpxchg_acq is that we use load-acquire in the
2893   // CompareAndSwap sequence to serve as a barrier on acquiring a
2894   // lock.
2895   enc_class aarch64_enc_cmpxchg_acq(memory mem, iRegLNoSp oldval, iRegLNoSp newval) %{
2896     MacroAssembler _masm(&cbuf);
2897     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2898     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2899                Assembler::xword, /*acquire*/ true, /*release*/ true,
2900                /*weak*/ false, noreg);
2901   %}
2902 
2903   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2904     MacroAssembler _masm(&cbuf);
2905     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2906     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2907                Assembler::word, /*acquire*/ true, /*release*/ true,
2908                /*weak*/ false, noreg);
2909   %}
2910 
2911   enc_class aarch64_enc_cmpxchgs_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2912     MacroAssembler _masm(&cbuf);
2913     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2914     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2915                Assembler::halfword, /*acquire*/ true, /*release*/ true,
2916                /*weak*/ false, noreg);
2917   %}
2918 
2919   enc_class aarch64_enc_cmpxchgb_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
2920     MacroAssembler _masm(&cbuf);
2921     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
2922     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
2923                Assembler::byte, /*acquire*/ true, /*release*/ true,
2924                /*weak*/ false, noreg);
2925   %}
2926 
2927   // auxiliary used for CompareAndSwapX to set result register
2928   enc_class aarch64_enc_cset_eq(iRegINoSp res) %{
2929     MacroAssembler _masm(&cbuf);
2930     Register res_reg = as_Register($res$$reg);
2931     __ cset(res_reg, Assembler::EQ);
2932   %}
2933 
2934   // prefetch encodings
2935 
2936   enc_class aarch64_enc_prefetchw(memory mem) %{
2937     MacroAssembler _masm(&cbuf);
2938     Register base = as_Register($mem$$base);
2939     int index = $mem$$index;
2940     int scale = $mem$$scale;
2941     int disp = $mem$$disp;
2942     if (index == -1) {
2943       __ prfm(Address(base, disp), PSTL1KEEP);
2944     } else {
2945       Register index_reg = as_Register(index);
2946       if (disp == 0) {
2947         __ prfm(Address(base, index_reg, Address::lsl(scale)), PSTL1KEEP);
2948       } else {
2949         __ lea(rscratch1, Address(base, disp));
2950         __ prfm(Address(rscratch1, index_reg, Address::lsl(scale)), PSTL1KEEP);
2951       }
2952     }
2953   %}
2954 
2955   /// mov envcodings
2956 
2957   enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
2958     MacroAssembler _masm(&cbuf);
2959     u_int32_t con = (u_int32_t)$src$$constant;
2960     Register dst_reg = as_Register($dst$$reg);
2961     if (con == 0) {
2962       __ movw(dst_reg, zr);
2963     } else {
2964       __ movw(dst_reg, con);
2965     }
2966   %}
2967 
2968   enc_class aarch64_enc_mov_imm(iRegL dst, immL src) %{
2969     MacroAssembler _masm(&cbuf);
2970     Register dst_reg = as_Register($dst$$reg);
2971     u_int64_t con = (u_int64_t)$src$$constant;
2972     if (con == 0) {
2973       __ mov(dst_reg, zr);
2974     } else {
2975       __ mov(dst_reg, con);
2976     }
2977   %}
2978 
2979   enc_class aarch64_enc_mov_p(iRegP dst, immP src) %{
2980     MacroAssembler _masm(&cbuf);
2981     Register dst_reg = as_Register($dst$$reg);
2982     address con = (address)$src$$constant;
2983     if (con == NULL || con == (address)1) {
2984       ShouldNotReachHere();
2985     } else {
2986       relocInfo::relocType rtype = $src->constant_reloc();
2987       if (rtype == relocInfo::oop_type) {
2988         __ movoop(dst_reg, (jobject)con, /*immediate*/true);
2989       } else if (rtype == relocInfo::metadata_type) {
2990         __ mov_metadata(dst_reg, (Metadata*)con);
2991       } else {
2992         assert(rtype == relocInfo::none, "unexpected reloc type");
2993         if (con < (address)(uintptr_t)os::vm_page_size()) {
2994           __ mov(dst_reg, con);
2995         } else {
2996           unsigned long offset;
2997           __ adrp(dst_reg, con, offset);
2998           __ add(dst_reg, dst_reg, offset);
2999         }
3000       }
3001     }
3002   %}
3003 
3004   enc_class aarch64_enc_mov_p0(iRegP dst, immP0 src) %{
3005     MacroAssembler _masm(&cbuf);
3006     Register dst_reg = as_Register($dst$$reg);
3007     __ mov(dst_reg, zr);
3008   %}
3009 
3010   enc_class aarch64_enc_mov_p1(iRegP dst, immP_1 src) %{
3011     MacroAssembler _masm(&cbuf);
3012     Register dst_reg = as_Register($dst$$reg);
3013     __ mov(dst_reg, (u_int64_t)1);
3014   %}
3015 
3016   enc_class aarch64_enc_mov_poll_page(iRegP dst, immPollPage src) %{
3017     MacroAssembler _masm(&cbuf);
3018     address page = (address)$src$$constant;
3019     Register dst_reg = as_Register($dst$$reg);
3020     unsigned long off;
3021     __ adrp(dst_reg, Address(page, relocInfo::poll_type), off);
3022     assert(off == 0, "assumed offset == 0");
3023   %}
3024 
3025   enc_class aarch64_enc_mov_byte_map_base(iRegP dst, immByteMapBase src) %{
3026     MacroAssembler _masm(&cbuf);
3027     __ load_byte_map_base($dst$$Register);
3028   %}
3029 
3030   enc_class aarch64_enc_mov_n(iRegN dst, immN src) %{
3031     MacroAssembler _masm(&cbuf);
3032     Register dst_reg = as_Register($dst$$reg);
3033     address con = (address)$src$$constant;
3034     if (con == NULL) {
3035       ShouldNotReachHere();
3036     } else {
3037       relocInfo::relocType rtype = $src->constant_reloc();
3038       assert(rtype == relocInfo::oop_type, "unexpected reloc type");
3039       __ set_narrow_oop(dst_reg, (jobject)con);
3040     }
3041   %}
3042 
3043   enc_class aarch64_enc_mov_n0(iRegN dst, immN0 src) %{
3044     MacroAssembler _masm(&cbuf);
3045     Register dst_reg = as_Register($dst$$reg);
3046     __ mov(dst_reg, zr);
3047   %}
3048 
3049   enc_class aarch64_enc_mov_nk(iRegN dst, immNKlass src) %{
3050     MacroAssembler _masm(&cbuf);
3051     Register dst_reg = as_Register($dst$$reg);
3052     address con = (address)$src$$constant;
3053     if (con == NULL) {
3054       ShouldNotReachHere();
3055     } else {
3056       relocInfo::relocType rtype = $src->constant_reloc();
3057       assert(rtype == relocInfo::metadata_type, "unexpected reloc type");
3058       __ set_narrow_klass(dst_reg, (Klass *)con);
3059     }
3060   %}
3061 
3062   // arithmetic encodings
3063 
3064   enc_class aarch64_enc_addsubw_imm(iRegI dst, iRegI src1, immIAddSub src2) %{
3065     MacroAssembler _masm(&cbuf);
3066     Register dst_reg = as_Register($dst$$reg);
3067     Register src_reg = as_Register($src1$$reg);
3068     int32_t con = (int32_t)$src2$$constant;
3069     // add has primary == 0, subtract has primary == 1
3070     if ($primary) { con = -con; }
3071     if (con < 0) {
3072       __ subw(dst_reg, src_reg, -con);
3073     } else {
3074       __ addw(dst_reg, src_reg, con);
3075     }
3076   %}
3077 
3078   enc_class aarch64_enc_addsub_imm(iRegL dst, iRegL src1, immLAddSub src2) %{
3079     MacroAssembler _masm(&cbuf);
3080     Register dst_reg = as_Register($dst$$reg);
3081     Register src_reg = as_Register($src1$$reg);
3082     int32_t con = (int32_t)$src2$$constant;
3083     // add has primary == 0, subtract has primary == 1
3084     if ($primary) { con = -con; }
3085     if (con < 0) {
3086       __ sub(dst_reg, src_reg, -con);
3087     } else {
3088       __ add(dst_reg, src_reg, con);
3089     }
3090   %}
3091 
3092   enc_class aarch64_enc_divw(iRegI dst, iRegI src1, iRegI src2) %{
3093     MacroAssembler _masm(&cbuf);
3094    Register dst_reg = as_Register($dst$$reg);
3095    Register src1_reg = as_Register($src1$$reg);
3096    Register src2_reg = as_Register($src2$$reg);
3097     __ corrected_idivl(dst_reg, src1_reg, src2_reg, false, rscratch1);
3098   %}
3099 
3100   enc_class aarch64_enc_div(iRegI dst, iRegI src1, iRegI src2) %{
3101     MacroAssembler _masm(&cbuf);
3102    Register dst_reg = as_Register($dst$$reg);
3103    Register src1_reg = as_Register($src1$$reg);
3104    Register src2_reg = as_Register($src2$$reg);
3105     __ corrected_idivq(dst_reg, src1_reg, src2_reg, false, rscratch1);
3106   %}
3107 
3108   enc_class aarch64_enc_modw(iRegI dst, iRegI src1, iRegI src2) %{
3109     MacroAssembler _masm(&cbuf);
3110    Register dst_reg = as_Register($dst$$reg);
3111    Register src1_reg = as_Register($src1$$reg);
3112    Register src2_reg = as_Register($src2$$reg);
3113     __ corrected_idivl(dst_reg, src1_reg, src2_reg, true, rscratch1);
3114   %}
3115 
3116   enc_class aarch64_enc_mod(iRegI dst, iRegI src1, iRegI src2) %{
3117     MacroAssembler _masm(&cbuf);
3118    Register dst_reg = as_Register($dst$$reg);
3119    Register src1_reg = as_Register($src1$$reg);
3120    Register src2_reg = as_Register($src2$$reg);
3121     __ corrected_idivq(dst_reg, src1_reg, src2_reg, true, rscratch1);
3122   %}
3123 
3124   // compare instruction encodings
3125 
3126   enc_class aarch64_enc_cmpw(iRegI src1, iRegI src2) %{
3127     MacroAssembler _masm(&cbuf);
3128     Register reg1 = as_Register($src1$$reg);
3129     Register reg2 = as_Register($src2$$reg);
3130     __ cmpw(reg1, reg2);
3131   %}
3132 
3133   enc_class aarch64_enc_cmpw_imm_addsub(iRegI src1, immIAddSub src2) %{
3134     MacroAssembler _masm(&cbuf);
3135     Register reg = as_Register($src1$$reg);
3136     int32_t val = $src2$$constant;
3137     if (val >= 0) {
3138       __ subsw(zr, reg, val);
3139     } else {
3140       __ addsw(zr, reg, -val);
3141     }
3142   %}
3143 
3144   enc_class aarch64_enc_cmpw_imm(iRegI src1, immI src2) %{
3145     MacroAssembler _masm(&cbuf);
3146     Register reg1 = as_Register($src1$$reg);
3147     u_int32_t val = (u_int32_t)$src2$$constant;
3148     __ movw(rscratch1, val);
3149     __ cmpw(reg1, rscratch1);
3150   %}
3151 
3152   enc_class aarch64_enc_cmp(iRegL src1, iRegL src2) %{
3153     MacroAssembler _masm(&cbuf);
3154     Register reg1 = as_Register($src1$$reg);
3155     Register reg2 = as_Register($src2$$reg);
3156     __ cmp(reg1, reg2);
3157   %}
3158 
3159   enc_class aarch64_enc_cmp_imm_addsub(iRegL src1, immL12 src2) %{
3160     MacroAssembler _masm(&cbuf);
3161     Register reg = as_Register($src1$$reg);
3162     int64_t val = $src2$$constant;
3163     if (val >= 0) {
3164       __ subs(zr, reg, val);
3165     } else if (val != -val) {
3166       __ adds(zr, reg, -val);
3167     } else {
3168     // aargh, Long.MIN_VALUE is a special case
3169       __ orr(rscratch1, zr, (u_int64_t)val);
3170       __ subs(zr, reg, rscratch1);
3171     }
3172   %}
3173 
3174   enc_class aarch64_enc_cmp_imm(iRegL src1, immL src2) %{
3175     MacroAssembler _masm(&cbuf);
3176     Register reg1 = as_Register($src1$$reg);
3177     u_int64_t val = (u_int64_t)$src2$$constant;
3178     __ mov(rscratch1, val);
3179     __ cmp(reg1, rscratch1);
3180   %}
3181 
3182   enc_class aarch64_enc_cmpp(iRegP src1, iRegP src2) %{
3183     MacroAssembler _masm(&cbuf);
3184     Register reg1 = as_Register($src1$$reg);
3185     Register reg2 = as_Register($src2$$reg);
3186     __ cmp(reg1, reg2);
3187   %}
3188 
3189   enc_class aarch64_enc_cmpn(iRegN src1, iRegN src2) %{
3190     MacroAssembler _masm(&cbuf);
3191     Register reg1 = as_Register($src1$$reg);
3192     Register reg2 = as_Register($src2$$reg);
3193     __ cmpw(reg1, reg2);
3194   %}
3195 
3196   enc_class aarch64_enc_testp(iRegP src) %{
3197     MacroAssembler _masm(&cbuf);
3198     Register reg = as_Register($src$$reg);
3199     __ cmp(reg, zr);
3200   %}
3201 
3202   enc_class aarch64_enc_testn(iRegN src) %{
3203     MacroAssembler _masm(&cbuf);
3204     Register reg = as_Register($src$$reg);
3205     __ cmpw(reg, zr);
3206   %}
3207 
3208   enc_class aarch64_enc_b(label lbl) %{
3209     MacroAssembler _masm(&cbuf);
3210     Label *L = $lbl$$label;
3211     __ b(*L);
3212   %}
3213 
3214   enc_class aarch64_enc_br_con(cmpOp cmp, label lbl) %{
3215     MacroAssembler _masm(&cbuf);
3216     Label *L = $lbl$$label;
3217     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3218   %}
3219 
3220   enc_class aarch64_enc_br_conU(cmpOpU cmp, label lbl) %{
3221     MacroAssembler _masm(&cbuf);
3222     Label *L = $lbl$$label;
3223     __ br ((Assembler::Condition)$cmp$$cmpcode, *L);
3224   %}
3225 
3226   enc_class aarch64_enc_partial_subtype_check(iRegP sub, iRegP super, iRegP temp, iRegP result)
3227   %{
3228      Register sub_reg = as_Register($sub$$reg);
3229      Register super_reg = as_Register($super$$reg);
3230      Register temp_reg = as_Register($temp$$reg);
3231      Register result_reg = as_Register($result$$reg);
3232 
3233      Label miss;
3234      MacroAssembler _masm(&cbuf);
3235      __ check_klass_subtype_slow_path(sub_reg, super_reg, temp_reg, result_reg,
3236                                      NULL, &miss,
3237                                      /*set_cond_codes:*/ true);
3238      if ($primary) {
3239        __ mov(result_reg, zr);
3240      }
3241      __ bind(miss);
3242   %}
3243 
3244   enc_class aarch64_enc_java_static_call(method meth) %{
3245     MacroAssembler _masm(&cbuf);
3246 
3247     address addr = (address)$meth$$method;
3248     address call;
3249     if (!_method) {
3250       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
3251       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
3252     } else {
3253       int method_index = resolved_method_index(cbuf);
3254       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
3255                                                   : static_call_Relocation::spec(method_index);
3256       call = __ trampoline_call(Address(addr, rspec), &cbuf);
3257 
3258       // Emit stub for static call
3259       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
3260       if (stub == NULL) {
3261         ciEnv::current()->record_failure("CodeCache is full");
3262         return;
3263       }
3264     }
3265     if (call == NULL) {
3266       ciEnv::current()->record_failure("CodeCache is full");
3267       return;
3268     }
3269   %}
3270 
3271   enc_class aarch64_enc_java_dynamic_call(method meth) %{
3272     MacroAssembler _masm(&cbuf);
3273     int method_index = resolved_method_index(cbuf);
3274     address call = __ ic_call((address)$meth$$method, method_index);
3275     if (call == NULL) {
3276       ciEnv::current()->record_failure("CodeCache is full");
3277       return;
3278     }
3279   %}
3280 
3281   enc_class aarch64_enc_call_epilog() %{
3282     MacroAssembler _masm(&cbuf);
3283     if (VerifyStackAtCalls) {
3284       // Check that stack depth is unchanged: find majik cookie on stack
3285       __ call_Unimplemented();
3286     }
3287   %}
3288 
3289   enc_class aarch64_enc_java_to_runtime(method meth) %{
3290     MacroAssembler _masm(&cbuf);
3291 
3292     // some calls to generated routines (arraycopy code) are scheduled
3293     // by C2 as runtime calls. if so we can call them using a br (they
3294     // will be in a reachable segment) otherwise we have to use a blr
3295     // which loads the absolute address into a register.
3296     address entry = (address)$meth$$method;
3297     CodeBlob *cb = CodeCache::find_blob(entry);
3298     if (cb) {
3299       address call = __ trampoline_call(Address(entry, relocInfo::runtime_call_type));
3300       if (call == NULL) {
3301         ciEnv::current()->record_failure("CodeCache is full");
3302         return;
3303       }
3304     } else {
3305       Label retaddr;
3306       __ adr(rscratch2, retaddr);
3307       __ lea(rscratch1, RuntimeAddress(entry));
3308       // Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
3309       __ stp(zr, rscratch2, Address(__ pre(sp, -2 * wordSize)));
3310       __ blr(rscratch1);
3311       __ bind(retaddr);
3312       __ add(sp, sp, 2 * wordSize);
3313     }
3314   %}
3315 
3316   enc_class aarch64_enc_rethrow() %{
3317     MacroAssembler _masm(&cbuf);
3318     __ far_jump(RuntimeAddress(OptoRuntime::rethrow_stub()));
3319   %}
3320 
3321   enc_class aarch64_enc_ret() %{
3322     MacroAssembler _masm(&cbuf);
3323     __ ret(lr);
3324   %}
3325 
3326   enc_class aarch64_enc_tail_call(iRegP jump_target) %{
3327     MacroAssembler _masm(&cbuf);
3328     Register target_reg = as_Register($jump_target$$reg);
3329     __ br(target_reg);
3330   %}
3331 
3332   enc_class aarch64_enc_tail_jmp(iRegP jump_target) %{
3333     MacroAssembler _masm(&cbuf);
3334     Register target_reg = as_Register($jump_target$$reg);
3335     // exception oop should be in r0
3336     // ret addr has been popped into lr
3337     // callee expects it in r3
3338     __ mov(r3, lr);
3339     __ br(target_reg);
3340   %}
3341 
3342   enc_class aarch64_enc_fast_lock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3343     MacroAssembler _masm(&cbuf);
3344     Register oop = as_Register($object$$reg);
3345     Register box = as_Register($box$$reg);
3346     Register disp_hdr = as_Register($tmp$$reg);
3347     Register tmp = as_Register($tmp2$$reg);
3348     Label cont;
3349     Label object_has_monitor;
3350     Label cas_failed;
3351 
3352     assert_different_registers(oop, box, tmp, disp_hdr);
3353 
3354     // Load markOop from object into displaced_header.
3355     __ ldr(disp_hdr, Address(oop, oopDesc::mark_offset_in_bytes()));
3356 
3357     // Always do locking in runtime.
3358     if (EmitSync & 0x01) {
3359       __ cmp(oop, zr);
3360       return;
3361     }
3362 
3363     if (UseBiasedLocking && !UseOptoBiasInlining) {
3364       __ biased_locking_enter(box, oop, disp_hdr, tmp, true, cont);
3365     }
3366 
3367     // Check for existing monitor
3368     if ((EmitSync & 0x02) == 0) {
3369       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3370     }
3371 
3372     // Set tmp to be (markOop of object | UNLOCK_VALUE).
3373     __ orr(tmp, disp_hdr, markOopDesc::unlocked_value);
3374 
3375     // Initialize the box. (Must happen before we update the object mark!)
3376     __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3377 
3378     // Compare object markOop with an unlocked value (tmp) and if
3379     // equal exchange the stack address of our box with object markOop.
3380     // On failure disp_hdr contains the possibly locked markOop.
3381     __ cmpxchg(oop, tmp, box, Assembler::xword, /*acquire*/ true,
3382                /*release*/ true, /*weak*/ false, disp_hdr);
3383     __ br(Assembler::EQ, cont);
3384 
3385     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3386 
3387     // If the compare-and-exchange succeeded, then we found an unlocked
3388     // object, will have now locked it will continue at label cont
3389 
3390     __ bind(cas_failed);
3391     // We did not see an unlocked object so try the fast recursive case.
3392 
3393     // Check if the owner is self by comparing the value in the
3394     // markOop of object (disp_hdr) with the stack pointer.
3395     __ mov(rscratch1, sp);
3396     __ sub(disp_hdr, disp_hdr, rscratch1);
3397     __ mov(tmp, (address) (~(os::vm_page_size()-1) | (uintptr_t)markOopDesc::lock_mask_in_place));
3398     // If condition is true we are cont and hence we can store 0 as the
3399     // displaced header in the box, which indicates that it is a recursive lock.
3400     __ ands(tmp/*==0?*/, disp_hdr, tmp);   // Sets flags for result
3401     __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3402 
3403     if ((EmitSync & 0x02) == 0) {
3404       __ b(cont);
3405 
3406       // Handle existing monitor.
3407       __ bind(object_has_monitor);
3408       // The object's monitor m is unlocked iff m->owner == NULL,
3409       // otherwise m->owner may contain a thread or a stack address.
3410       //
3411       // Try to CAS m->owner from NULL to current thread.
3412       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
3413     __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true,
3414                /*release*/ true, /*weak*/ false, noreg); // Sets flags for result
3415 
3416       // Store a non-null value into the box to avoid looking like a re-entrant
3417       // lock. The fast-path monitor unlock code checks for
3418       // markOopDesc::monitor_value so use markOopDesc::unused_mark which has the
3419       // relevant bit set, and also matches ObjectSynchronizer::slow_enter.
3420       __ mov(tmp, (address)markOopDesc::unused_mark());
3421       __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3422     }
3423 
3424     __ bind(cont);
3425     // flag == EQ indicates success
3426     // flag == NE indicates failure
3427   %}
3428 
3429   enc_class aarch64_enc_fast_unlock(iRegP object, iRegP box, iRegP tmp, iRegP tmp2) %{
3430     MacroAssembler _masm(&cbuf);
3431     Register oop = as_Register($object$$reg);
3432     Register box = as_Register($box$$reg);
3433     Register disp_hdr = as_Register($tmp$$reg);
3434     Register tmp = as_Register($tmp2$$reg);
3435     Label cont;
3436     Label object_has_monitor;
3437 
3438     assert_different_registers(oop, box, tmp, disp_hdr);
3439 
3440     // Always do locking in runtime.
3441     if (EmitSync & 0x01) {
3442       __ cmp(oop, zr); // Oop can't be 0 here => always false.
3443       return;
3444     }
3445 
3446     if (UseBiasedLocking && !UseOptoBiasInlining) {
3447       __ biased_locking_exit(oop, tmp, cont);
3448     }
3449 
3450     // Find the lock address and load the displaced header from the stack.
3451     __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes()));
3452 
3453     // If the displaced header is 0, we have a recursive unlock.
3454     __ cmp(disp_hdr, zr);
3455     __ br(Assembler::EQ, cont);
3456 
3457     // Handle existing monitor.
3458     if ((EmitSync & 0x02) == 0) {
3459       __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes()));
3460       __ tbnz(disp_hdr, exact_log2(markOopDesc::monitor_value), object_has_monitor);
3461     }
3462 
3463     // Check if it is still a light weight lock, this is is true if we
3464     // see the stack address of the basicLock in the markOop of the
3465     // object.
3466 
3467     __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false,
3468                /*release*/ true, /*weak*/ false, tmp);
3469     __ b(cont);
3470 
3471     assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
3472 
3473     // Handle existing monitor.
3474     if ((EmitSync & 0x02) == 0) {
3475       __ bind(object_has_monitor);
3476       __ add(tmp, tmp, -markOopDesc::monitor_value); // monitor
3477       __ ldr(rscratch1, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3478       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes()));
3479       __ eor(rscratch1, rscratch1, rthread); // Will be 0 if we are the owner.
3480       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if there are 0 recursions
3481       __ cmp(rscratch1, zr); // Sets flags for result
3482       __ br(Assembler::NE, cont);
3483 
3484       __ ldr(rscratch1, Address(tmp, ObjectMonitor::EntryList_offset_in_bytes()));
3485       __ ldr(disp_hdr, Address(tmp, ObjectMonitor::cxq_offset_in_bytes()));
3486       __ orr(rscratch1, rscratch1, disp_hdr); // Will be 0 if both are 0.
3487       __ cmp(rscratch1, zr); // Sets flags for result
3488       __ cbnz(rscratch1, cont);
3489       // need a release store here
3490       __ lea(tmp, Address(tmp, ObjectMonitor::owner_offset_in_bytes()));
3491       __ stlr(zr, tmp); // set unowned
3492     }
3493 
3494     __ bind(cont);
3495     // flag == EQ indicates success
3496     // flag == NE indicates failure
3497   %}
3498 
3499 %}
3500 
3501 //----------FRAME--------------------------------------------------------------
3502 // Definition of frame structure and management information.
3503 //
3504 //  S T A C K   L A Y O U T    Allocators stack-slot number
3505 //                             |   (to get allocators register number
3506 //  G  Owned by    |        |  v    add OptoReg::stack0())
3507 //  r   CALLER     |        |
3508 //  o     |        +--------+      pad to even-align allocators stack-slot
3509 //  w     V        |  pad0  |        numbers; owned by CALLER
3510 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3511 //  h     ^        |   in   |  5
3512 //        |        |  args  |  4   Holes in incoming args owned by SELF
3513 //  |     |        |        |  3
3514 //  |     |        +--------+
3515 //  V     |        | old out|      Empty on Intel, window on Sparc
3516 //        |    old |preserve|      Must be even aligned.
3517 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3518 //        |        |   in   |  3   area for Intel ret address
3519 //     Owned by    |preserve|      Empty on Sparc.
3520 //       SELF      +--------+
3521 //        |        |  pad2  |  2   pad to align old SP
3522 //        |        +--------+  1
3523 //        |        | locks  |  0
3524 //        |        +--------+----> OptoReg::stack0(), even aligned
3525 //        |        |  pad1  | 11   pad to align new SP
3526 //        |        +--------+
3527 //        |        |        | 10
3528 //        |        | spills |  9   spills
3529 //        V        |        |  8   (pad0 slot for callee)
3530 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3531 //        ^        |  out   |  7
3532 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3533 //     Owned by    +--------+
3534 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3535 //        |    new |preserve|      Must be even-aligned.
3536 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3537 //        |        |        |
3538 //
3539 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3540 //         known from SELF's arguments and the Java calling convention.
3541 //         Region 6-7 is determined per call site.
3542 // Note 2: If the calling convention leaves holes in the incoming argument
3543 //         area, those holes are owned by SELF.  Holes in the outgoing area
3544 //         are owned by the CALLEE.  Holes should not be nessecary in the
3545 //         incoming area, as the Java calling convention is completely under
3546 //         the control of the AD file.  Doubles can be sorted and packed to
3547 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3548 //         varargs C calling conventions.
3549 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3550 //         even aligned with pad0 as needed.
3551 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3552 //           (the latter is true on Intel but is it false on AArch64?)
3553 //         region 6-11 is even aligned; it may be padded out more so that
3554 //         the region from SP to FP meets the minimum stack alignment.
3555 // Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
3556 //         alignment.  Region 11, pad1, may be dynamically extended so that
3557 //         SP meets the minimum alignment.
3558 
3559 frame %{
3560   // What direction does stack grow in (assumed to be same for C & Java)
3561   stack_direction(TOWARDS_LOW);
3562 
3563   // These three registers define part of the calling convention
3564   // between compiled code and the interpreter.
3565 
3566   // Inline Cache Register or methodOop for I2C.
3567   inline_cache_reg(R12);
3568 
3569   // Method Oop Register when calling interpreter.
3570   interpreter_method_oop_reg(R12);
3571 
3572   // Number of stack slots consumed by locking an object
3573   sync_stack_slots(2);
3574 
3575   // Compiled code's Frame Pointer
3576   frame_pointer(R31);
3577 
3578   // Interpreter stores its frame pointer in a register which is
3579   // stored to the stack by I2CAdaptors.
3580   // I2CAdaptors convert from interpreted java to compiled java.
3581   interpreter_frame_pointer(R29);
3582 
3583   // Stack alignment requirement
3584   stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes)
3585 
3586   // Number of stack slots between incoming argument block and the start of
3587   // a new frame.  The PROLOG must add this many slots to the stack.  The
3588   // EPILOG must remove this many slots. aarch64 needs two slots for
3589   // return address and fp.
3590   // TODO think this is correct but check
3591   in_preserve_stack_slots(4);
3592 
3593   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3594   // for calls to C.  Supports the var-args backing area for register parms.
3595   varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
3596 
3597   // The after-PROLOG location of the return address.  Location of
3598   // return address specifies a type (REG or STACK) and a number
3599   // representing the register number (i.e. - use a register name) or
3600   // stack slot.
3601   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3602   // Otherwise, it is above the locks and verification slot and alignment word
3603   // TODO this may well be correct but need to check why that - 2 is there
3604   // ppc port uses 0 but we definitely need to allow for fixed_slots
3605   // which folds in the space used for monitors
3606   return_addr(STACK - 2 +
3607               align_up((Compile::current()->in_preserve_stack_slots() +
3608                         Compile::current()->fixed_slots()),
3609                        stack_alignment_in_slots()));
3610 
3611   // Body of function which returns an integer array locating
3612   // arguments either in registers or in stack slots.  Passed an array
3613   // of ideal registers called "sig" and a "length" count.  Stack-slot
3614   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3615   // arguments for a CALLEE.  Incoming stack arguments are
3616   // automatically biased by the preserve_stack_slots field above.
3617 
3618   calling_convention
3619   %{
3620     // No difference between ingoing/outgoing just pass false
3621     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3622   %}
3623 
3624   c_calling_convention
3625   %{
3626     // This is obviously always outgoing
3627     (void) SharedRuntime::c_calling_convention(sig_bt, regs, NULL, length);
3628   %}
3629 
3630   // Location of compiled Java return values.  Same as C for now.
3631   return_value
3632   %{
3633     // TODO do we allow ideal_reg == Op_RegN???
3634     assert(ideal_reg >= Op_RegI && ideal_reg <= Op_RegL,
3635            "only return normal values");
3636 
3637     static const int lo[Op_RegL + 1] = { // enum name
3638       0,                                 // Op_Node
3639       0,                                 // Op_Set
3640       R0_num,                            // Op_RegN
3641       R0_num,                            // Op_RegI
3642       R0_num,                            // Op_RegP
3643       V0_num,                            // Op_RegF
3644       V0_num,                            // Op_RegD
3645       R0_num                             // Op_RegL
3646     };
3647 
3648     static const int hi[Op_RegL + 1] = { // enum name
3649       0,                                 // Op_Node
3650       0,                                 // Op_Set
3651       OptoReg::Bad,                       // Op_RegN
3652       OptoReg::Bad,                      // Op_RegI
3653       R0_H_num,                          // Op_RegP
3654       OptoReg::Bad,                      // Op_RegF
3655       V0_H_num,                          // Op_RegD
3656       R0_H_num                           // Op_RegL
3657     };
3658 
3659     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
3660   %}
3661 %}
3662 
3663 //----------ATTRIBUTES---------------------------------------------------------
3664 //----------Operand Attributes-------------------------------------------------
3665 op_attrib op_cost(1);        // Required cost attribute
3666 
3667 //----------Instruction Attributes---------------------------------------------
3668 ins_attrib ins_cost(INSN_COST); // Required cost attribute
3669 ins_attrib ins_size(32);        // Required size attribute (in bits)
3670 ins_attrib ins_short_branch(0); // Required flag: is this instruction
3671                                 // a non-matching short branch variant
3672                                 // of some long branch?
3673 ins_attrib ins_alignment(4);    // Required alignment attribute (must
3674                                 // be a power of 2) specifies the
3675                                 // alignment that some part of the
3676                                 // instruction (not necessarily the
3677                                 // start) requires.  If > 1, a
3678                                 // compute_padding() function must be
3679                                 // provided for the instruction
3680 
3681 //----------OPERANDS-----------------------------------------------------------
3682 // Operand definitions must precede instruction definitions for correct parsing
3683 // in the ADLC because operands constitute user defined types which are used in
3684 // instruction definitions.
3685 
3686 //----------Simple Operands----------------------------------------------------
3687 
3688 // Integer operands 32 bit
3689 // 32 bit immediate
3690 operand immI()
3691 %{
3692   match(ConI);
3693 
3694   op_cost(0);
3695   format %{ %}
3696   interface(CONST_INTER);
3697 %}
3698 
3699 // 32 bit zero
3700 operand immI0()
3701 %{
3702   predicate(n->get_int() == 0);
3703   match(ConI);
3704 
3705   op_cost(0);
3706   format %{ %}
3707   interface(CONST_INTER);
3708 %}
3709 
3710 // 32 bit unit increment
3711 operand immI_1()
3712 %{
3713   predicate(n->get_int() == 1);
3714   match(ConI);
3715 
3716   op_cost(0);
3717   format %{ %}
3718   interface(CONST_INTER);
3719 %}
3720 
3721 // 32 bit unit decrement
3722 operand immI_M1()
3723 %{
3724   predicate(n->get_int() == -1);
3725   match(ConI);
3726 
3727   op_cost(0);
3728   format %{ %}
3729   interface(CONST_INTER);
3730 %}
3731 
3732 // Shift values for add/sub extension shift
3733 operand immIExt()
3734 %{
3735   predicate(0 <= n->get_int() && (n->get_int() <= 4));
3736   match(ConI);
3737 
3738   op_cost(0);
3739   format %{ %}
3740   interface(CONST_INTER);
3741 %}
3742 
3743 operand immI_le_4()
3744 %{
3745   predicate(n->get_int() <= 4);
3746   match(ConI);
3747 
3748   op_cost(0);
3749   format %{ %}
3750   interface(CONST_INTER);
3751 %}
3752 
3753 operand immI_31()
3754 %{
3755   predicate(n->get_int() == 31);
3756   match(ConI);
3757 
3758   op_cost(0);
3759   format %{ %}
3760   interface(CONST_INTER);
3761 %}
3762 
3763 operand immI_8()
3764 %{
3765   predicate(n->get_int() == 8);
3766   match(ConI);
3767 
3768   op_cost(0);
3769   format %{ %}
3770   interface(CONST_INTER);
3771 %}
3772 
3773 operand immI_16()
3774 %{
3775   predicate(n->get_int() == 16);
3776   match(ConI);
3777 
3778   op_cost(0);
3779   format %{ %}
3780   interface(CONST_INTER);
3781 %}
3782 
3783 operand immI_24()
3784 %{
3785   predicate(n->get_int() == 24);
3786   match(ConI);
3787 
3788   op_cost(0);
3789   format %{ %}
3790   interface(CONST_INTER);
3791 %}
3792 
3793 operand immI_32()
3794 %{
3795   predicate(n->get_int() == 32);
3796   match(ConI);
3797 
3798   op_cost(0);
3799   format %{ %}
3800   interface(CONST_INTER);
3801 %}
3802 
3803 operand immI_48()
3804 %{
3805   predicate(n->get_int() == 48);
3806   match(ConI);
3807 
3808   op_cost(0);
3809   format %{ %}
3810   interface(CONST_INTER);
3811 %}
3812 
3813 operand immI_56()
3814 %{
3815   predicate(n->get_int() == 56);
3816   match(ConI);
3817 
3818   op_cost(0);
3819   format %{ %}
3820   interface(CONST_INTER);
3821 %}
3822 
3823 operand immI_63()
3824 %{
3825   predicate(n->get_int() == 63);
3826   match(ConI);
3827 
3828   op_cost(0);
3829   format %{ %}
3830   interface(CONST_INTER);
3831 %}
3832 
3833 operand immI_64()
3834 %{
3835   predicate(n->get_int() == 64);
3836   match(ConI);
3837 
3838   op_cost(0);
3839   format %{ %}
3840   interface(CONST_INTER);
3841 %}
3842 
3843 operand immI_255()
3844 %{
3845   predicate(n->get_int() == 255);
3846   match(ConI);
3847 
3848   op_cost(0);
3849   format %{ %}
3850   interface(CONST_INTER);
3851 %}
3852 
3853 operand immI_65535()
3854 %{
3855   predicate(n->get_int() == 65535);
3856   match(ConI);
3857 
3858   op_cost(0);
3859   format %{ %}
3860   interface(CONST_INTER);
3861 %}
3862 
3863 operand immL_255()
3864 %{
3865   predicate(n->get_long() == 255L);
3866   match(ConL);
3867 
3868   op_cost(0);
3869   format %{ %}
3870   interface(CONST_INTER);
3871 %}
3872 
3873 operand immL_65535()
3874 %{
3875   predicate(n->get_long() == 65535L);
3876   match(ConL);
3877 
3878   op_cost(0);
3879   format %{ %}
3880   interface(CONST_INTER);
3881 %}
3882 
3883 operand immL_4294967295()
3884 %{
3885   predicate(n->get_long() == 4294967295L);
3886   match(ConL);
3887 
3888   op_cost(0);
3889   format %{ %}
3890   interface(CONST_INTER);
3891 %}
3892 
3893 operand immL_bitmask()
3894 %{
3895   predicate(((n->get_long() & 0xc000000000000000l) == 0)
3896             && is_power_of_2(n->get_long() + 1));
3897   match(ConL);
3898 
3899   op_cost(0);
3900   format %{ %}
3901   interface(CONST_INTER);
3902 %}
3903 
3904 operand immI_bitmask()
3905 %{
3906   predicate(((n->get_int() & 0xc0000000) == 0)
3907             && is_power_of_2(n->get_int() + 1));
3908   match(ConI);
3909 
3910   op_cost(0);
3911   format %{ %}
3912   interface(CONST_INTER);
3913 %}
3914 
3915 // Scale values for scaled offset addressing modes (up to long but not quad)
3916 operand immIScale()
3917 %{
3918   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3919   match(ConI);
3920 
3921   op_cost(0);
3922   format %{ %}
3923   interface(CONST_INTER);
3924 %}
3925 
3926 // 26 bit signed offset -- for pc-relative branches
3927 operand immI26()
3928 %{
3929   predicate(((-(1 << 25)) <= n->get_int()) && (n->get_int() < (1 << 25)));
3930   match(ConI);
3931 
3932   op_cost(0);
3933   format %{ %}
3934   interface(CONST_INTER);
3935 %}
3936 
3937 // 19 bit signed offset -- for pc-relative loads
3938 operand immI19()
3939 %{
3940   predicate(((-(1 << 18)) <= n->get_int()) && (n->get_int() < (1 << 18)));
3941   match(ConI);
3942 
3943   op_cost(0);
3944   format %{ %}
3945   interface(CONST_INTER);
3946 %}
3947 
3948 // 12 bit unsigned offset -- for base plus immediate loads
3949 operand immIU12()
3950 %{
3951   predicate((0 <= n->get_int()) && (n->get_int() < (1 << 12)));
3952   match(ConI);
3953 
3954   op_cost(0);
3955   format %{ %}
3956   interface(CONST_INTER);
3957 %}
3958 
3959 operand immLU12()
3960 %{
3961   predicate((0 <= n->get_long()) && (n->get_long() < (1 << 12)));
3962   match(ConL);
3963 
3964   op_cost(0);
3965   format %{ %}
3966   interface(CONST_INTER);
3967 %}
3968 
3969 // Offset for scaled or unscaled immediate loads and stores
3970 operand immIOffset()
3971 %{
3972   predicate(Address::offset_ok_for_immed(n->get_int()));
3973   match(ConI);
3974 
3975   op_cost(0);
3976   format %{ %}
3977   interface(CONST_INTER);
3978 %}
3979 
3980 operand immIOffset4()
3981 %{
3982   predicate(Address::offset_ok_for_immed(n->get_int(), 2));
3983   match(ConI);
3984 
3985   op_cost(0);
3986   format %{ %}
3987   interface(CONST_INTER);
3988 %}
3989 
3990 operand immIOffset8()
3991 %{
3992   predicate(Address::offset_ok_for_immed(n->get_int(), 3));
3993   match(ConI);
3994 
3995   op_cost(0);
3996   format %{ %}
3997   interface(CONST_INTER);
3998 %}
3999 
4000 operand immIOffset16()
4001 %{
4002   predicate(Address::offset_ok_for_immed(n->get_int(), 4));
4003   match(ConI);
4004 
4005   op_cost(0);
4006   format %{ %}
4007   interface(CONST_INTER);
4008 %}
4009 
4010 operand immLoffset()
4011 %{
4012   predicate(Address::offset_ok_for_immed(n->get_long()));
4013   match(ConL);
4014 
4015   op_cost(0);
4016   format %{ %}
4017   interface(CONST_INTER);
4018 %}
4019 
4020 operand immLoffset4()
4021 %{
4022   predicate(Address::offset_ok_for_immed(n->get_long(), 2));
4023   match(ConL);
4024 
4025   op_cost(0);
4026   format %{ %}
4027   interface(CONST_INTER);
4028 %}
4029 
4030 operand immLoffset8()
4031 %{
4032   predicate(Address::offset_ok_for_immed(n->get_long(), 3));
4033   match(ConL);
4034 
4035   op_cost(0);
4036   format %{ %}
4037   interface(CONST_INTER);
4038 %}
4039 
4040 operand immLoffset16()
4041 %{
4042   predicate(Address::offset_ok_for_immed(n->get_long(), 4));
4043   match(ConL);
4044 
4045   op_cost(0);
4046   format %{ %}
4047   interface(CONST_INTER);
4048 %}
4049 
4050 // 32 bit integer valid for add sub immediate
4051 operand immIAddSub()
4052 %{
4053   predicate(Assembler::operand_valid_for_add_sub_immediate((long)n->get_int()));
4054   match(ConI);
4055   op_cost(0);
4056   format %{ %}
4057   interface(CONST_INTER);
4058 %}
4059 
4060 // 32 bit unsigned integer valid for logical immediate
4061 // TODO -- check this is right when e.g the mask is 0x80000000
4062 operand immILog()
4063 %{
4064   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/true, (unsigned long)n->get_int()));
4065   match(ConI);
4066 
4067   op_cost(0);
4068   format %{ %}
4069   interface(CONST_INTER);
4070 %}
4071 
4072 // Integer operands 64 bit
4073 // 64 bit immediate
4074 operand immL()
4075 %{
4076   match(ConL);
4077 
4078   op_cost(0);
4079   format %{ %}
4080   interface(CONST_INTER);
4081 %}
4082 
4083 // 64 bit zero
4084 operand immL0()
4085 %{
4086   predicate(n->get_long() == 0);
4087   match(ConL);
4088 
4089   op_cost(0);
4090   format %{ %}
4091   interface(CONST_INTER);
4092 %}
4093 
4094 // 64 bit unit increment
4095 operand immL_1()
4096 %{
4097   predicate(n->get_long() == 1);
4098   match(ConL);
4099 
4100   op_cost(0);
4101   format %{ %}
4102   interface(CONST_INTER);
4103 %}
4104 
4105 // 64 bit unit decrement
4106 operand immL_M1()
4107 %{
4108   predicate(n->get_long() == -1);
4109   match(ConL);
4110 
4111   op_cost(0);
4112   format %{ %}
4113   interface(CONST_INTER);
4114 %}
4115 
4116 // 32 bit offset of pc in thread anchor
4117 
4118 operand immL_pc_off()
4119 %{
4120   predicate(n->get_long() == in_bytes(JavaThread::frame_anchor_offset()) +
4121                              in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
4122   match(ConL);
4123 
4124   op_cost(0);
4125   format %{ %}
4126   interface(CONST_INTER);
4127 %}
4128 
4129 // 64 bit integer valid for add sub immediate
4130 operand immLAddSub()
4131 %{
4132   predicate(Assembler::operand_valid_for_add_sub_immediate(n->get_long()));
4133   match(ConL);
4134   op_cost(0);
4135   format %{ %}
4136   interface(CONST_INTER);
4137 %}
4138 
4139 // 64 bit integer valid for logical immediate
4140 operand immLLog()
4141 %{
4142   predicate(Assembler::operand_valid_for_logical_immediate(/*is32*/false, (unsigned long)n->get_long()));
4143   match(ConL);
4144   op_cost(0);
4145   format %{ %}
4146   interface(CONST_INTER);
4147 %}
4148 
4149 // Long Immediate: low 32-bit mask
4150 operand immL_32bits()
4151 %{
4152   predicate(n->get_long() == 0xFFFFFFFFL);
4153   match(ConL);
4154   op_cost(0);
4155   format %{ %}
4156   interface(CONST_INTER);
4157 %}
4158 
4159 // Pointer operands
4160 // Pointer Immediate
4161 operand immP()
4162 %{
4163   match(ConP);
4164 
4165   op_cost(0);
4166   format %{ %}
4167   interface(CONST_INTER);
4168 %}
4169 
4170 // NULL Pointer Immediate
4171 operand immP0()
4172 %{
4173   predicate(n->get_ptr() == 0);
4174   match(ConP);
4175 
4176   op_cost(0);
4177   format %{ %}
4178   interface(CONST_INTER);
4179 %}
4180 
4181 // Pointer Immediate One
4182 // this is used in object initialization (initial object header)
4183 operand immP_1()
4184 %{
4185   predicate(n->get_ptr() == 1);
4186   match(ConP);
4187 
4188   op_cost(0);
4189   format %{ %}
4190   interface(CONST_INTER);
4191 %}
4192 
4193 // Polling Page Pointer Immediate
4194 operand immPollPage()
4195 %{
4196   predicate((address)n->get_ptr() == os::get_polling_page());
4197   match(ConP);
4198 
4199   op_cost(0);
4200   format %{ %}
4201   interface(CONST_INTER);
4202 %}
4203 
4204 // Card Table Byte Map Base
4205 operand immByteMapBase()
4206 %{
4207   // Get base of card map
4208   predicate(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
4209             (jbyte*)n->get_ptr() == ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base());
4210   match(ConP);
4211 
4212   op_cost(0);
4213   format %{ %}
4214   interface(CONST_INTER);
4215 %}
4216 
4217 // Pointer Immediate Minus One
4218 // this is used when we want to write the current PC to the thread anchor
4219 operand immP_M1()
4220 %{
4221   predicate(n->get_ptr() == -1);
4222   match(ConP);
4223 
4224   op_cost(0);
4225   format %{ %}
4226   interface(CONST_INTER);
4227 %}
4228 
4229 // Pointer Immediate Minus Two
4230 // this is used when we want to write the current PC to the thread anchor
4231 operand immP_M2()
4232 %{
4233   predicate(n->get_ptr() == -2);
4234   match(ConP);
4235 
4236   op_cost(0);
4237   format %{ %}
4238   interface(CONST_INTER);
4239 %}
4240 
4241 // Float and Double operands
4242 // Double Immediate
4243 operand immD()
4244 %{
4245   match(ConD);
4246   op_cost(0);
4247   format %{ %}
4248   interface(CONST_INTER);
4249 %}
4250 
4251 // Double Immediate: +0.0d
4252 operand immD0()
4253 %{
4254   predicate(jlong_cast(n->getd()) == 0);
4255   match(ConD);
4256 
4257   op_cost(0);
4258   format %{ %}
4259   interface(CONST_INTER);
4260 %}
4261 
4262 // constant 'double +0.0'.
4263 operand immDPacked()
4264 %{
4265   predicate(Assembler::operand_valid_for_float_immediate(n->getd()));
4266   match(ConD);
4267   op_cost(0);
4268   format %{ %}
4269   interface(CONST_INTER);
4270 %}
4271 
4272 // Float Immediate
4273 operand immF()
4274 %{
4275   match(ConF);
4276   op_cost(0);
4277   format %{ %}
4278   interface(CONST_INTER);
4279 %}
4280 
4281 // Float Immediate: +0.0f.
4282 operand immF0()
4283 %{
4284   predicate(jint_cast(n->getf()) == 0);
4285   match(ConF);
4286 
4287   op_cost(0);
4288   format %{ %}
4289   interface(CONST_INTER);
4290 %}
4291 
4292 //
4293 operand immFPacked()
4294 %{
4295   predicate(Assembler::operand_valid_for_float_immediate((double)n->getf()));
4296   match(ConF);
4297   op_cost(0);
4298   format %{ %}
4299   interface(CONST_INTER);
4300 %}
4301 
4302 // Narrow pointer operands
4303 // Narrow Pointer Immediate
4304 operand immN()
4305 %{
4306   match(ConN);
4307 
4308   op_cost(0);
4309   format %{ %}
4310   interface(CONST_INTER);
4311 %}
4312 
4313 // Narrow NULL Pointer Immediate
4314 operand immN0()
4315 %{
4316   predicate(n->get_narrowcon() == 0);
4317   match(ConN);
4318 
4319   op_cost(0);
4320   format %{ %}
4321   interface(CONST_INTER);
4322 %}
4323 
4324 operand immNKlass()
4325 %{
4326   match(ConNKlass);
4327 
4328   op_cost(0);
4329   format %{ %}
4330   interface(CONST_INTER);
4331 %}
4332 
4333 // Integer 32 bit Register Operands
4334 // Integer 32 bitRegister (excludes SP)
4335 operand iRegI()
4336 %{
4337   constraint(ALLOC_IN_RC(any_reg32));
4338   match(RegI);
4339   match(iRegINoSp);
4340   op_cost(0);
4341   format %{ %}
4342   interface(REG_INTER);
4343 %}
4344 
4345 // Integer 32 bit Register not Special
4346 operand iRegINoSp()
4347 %{
4348   constraint(ALLOC_IN_RC(no_special_reg32));
4349   match(RegI);
4350   op_cost(0);
4351   format %{ %}
4352   interface(REG_INTER);
4353 %}
4354 
4355 // Integer 64 bit Register Operands
4356 // Integer 64 bit Register (includes SP)
4357 operand iRegL()
4358 %{
4359   constraint(ALLOC_IN_RC(any_reg));
4360   match(RegL);
4361   match(iRegLNoSp);
4362   op_cost(0);
4363   format %{ %}
4364   interface(REG_INTER);
4365 %}
4366 
4367 // Integer 64 bit Register not Special
4368 operand iRegLNoSp()
4369 %{
4370   constraint(ALLOC_IN_RC(no_special_reg));
4371   match(RegL);
4372   match(iRegL_R0);
4373   format %{ %}
4374   interface(REG_INTER);
4375 %}
4376 
4377 // Pointer Register Operands
4378 // Pointer Register
4379 operand iRegP()
4380 %{
4381   constraint(ALLOC_IN_RC(ptr_reg));
4382   match(RegP);
4383   match(iRegPNoSp);
4384   match(iRegP_R0);
4385   //match(iRegP_R2);
4386   //match(iRegP_R4);
4387   //match(iRegP_R5);
4388   match(thread_RegP);
4389   op_cost(0);
4390   format %{ %}
4391   interface(REG_INTER);
4392 %}
4393 
4394 // Pointer 64 bit Register not Special
4395 operand iRegPNoSp()
4396 %{
4397   constraint(ALLOC_IN_RC(no_special_ptr_reg));
4398   match(RegP);
4399   // match(iRegP);
4400   // match(iRegP_R0);
4401   // match(iRegP_R2);
4402   // match(iRegP_R4);
4403   // match(iRegP_R5);
4404   // match(thread_RegP);
4405   op_cost(0);
4406   format %{ %}
4407   interface(REG_INTER);
4408 %}
4409 
4410 // Pointer 64 bit Register R0 only
4411 operand iRegP_R0()
4412 %{
4413   constraint(ALLOC_IN_RC(r0_reg));
4414   match(RegP);
4415   // match(iRegP);
4416   match(iRegPNoSp);
4417   op_cost(0);
4418   format %{ %}
4419   interface(REG_INTER);
4420 %}
4421 
4422 // Pointer 64 bit Register R1 only
4423 operand iRegP_R1()
4424 %{
4425   constraint(ALLOC_IN_RC(r1_reg));
4426   match(RegP);
4427   // match(iRegP);
4428   match(iRegPNoSp);
4429   op_cost(0);
4430   format %{ %}
4431   interface(REG_INTER);
4432 %}
4433 
4434 // Pointer 64 bit Register R2 only
4435 operand iRegP_R2()
4436 %{
4437   constraint(ALLOC_IN_RC(r2_reg));
4438   match(RegP);
4439   // match(iRegP);
4440   match(iRegPNoSp);
4441   op_cost(0);
4442   format %{ %}
4443   interface(REG_INTER);
4444 %}
4445 
4446 // Pointer 64 bit Register R3 only
4447 operand iRegP_R3()
4448 %{
4449   constraint(ALLOC_IN_RC(r3_reg));
4450   match(RegP);
4451   // match(iRegP);
4452   match(iRegPNoSp);
4453   op_cost(0);
4454   format %{ %}
4455   interface(REG_INTER);
4456 %}
4457 
4458 // Pointer 64 bit Register R4 only
4459 operand iRegP_R4()
4460 %{
4461   constraint(ALLOC_IN_RC(r4_reg));
4462   match(RegP);
4463   // match(iRegP);
4464   match(iRegPNoSp);
4465   op_cost(0);
4466   format %{ %}
4467   interface(REG_INTER);
4468 %}
4469 
4470 // Pointer 64 bit Register R5 only
4471 operand iRegP_R5()
4472 %{
4473   constraint(ALLOC_IN_RC(r5_reg));
4474   match(RegP);
4475   // match(iRegP);
4476   match(iRegPNoSp);
4477   op_cost(0);
4478   format %{ %}
4479   interface(REG_INTER);
4480 %}
4481 
4482 // Pointer 64 bit Register R10 only
4483 operand iRegP_R10()
4484 %{
4485   constraint(ALLOC_IN_RC(r10_reg));
4486   match(RegP);
4487   // match(iRegP);
4488   match(iRegPNoSp);
4489   op_cost(0);
4490   format %{ %}
4491   interface(REG_INTER);
4492 %}
4493 
4494 // Long 64 bit Register R0 only
4495 operand iRegL_R0()
4496 %{
4497   constraint(ALLOC_IN_RC(r0_reg));
4498   match(RegL);
4499   match(iRegLNoSp);
4500   op_cost(0);
4501   format %{ %}
4502   interface(REG_INTER);
4503 %}
4504 
4505 // Long 64 bit Register R2 only
4506 operand iRegL_R2()
4507 %{
4508   constraint(ALLOC_IN_RC(r2_reg));
4509   match(RegL);
4510   match(iRegLNoSp);
4511   op_cost(0);
4512   format %{ %}
4513   interface(REG_INTER);
4514 %}
4515 
4516 // Long 64 bit Register R3 only
4517 operand iRegL_R3()
4518 %{
4519   constraint(ALLOC_IN_RC(r3_reg));
4520   match(RegL);
4521   match(iRegLNoSp);
4522   op_cost(0);
4523   format %{ %}
4524   interface(REG_INTER);
4525 %}
4526 
4527 // Long 64 bit Register R11 only
4528 operand iRegL_R11()
4529 %{
4530   constraint(ALLOC_IN_RC(r11_reg));
4531   match(RegL);
4532   match(iRegLNoSp);
4533   op_cost(0);
4534   format %{ %}
4535   interface(REG_INTER);
4536 %}
4537 
4538 // Pointer 64 bit Register FP only
4539 operand iRegP_FP()
4540 %{
4541   constraint(ALLOC_IN_RC(fp_reg));
4542   match(RegP);
4543   // match(iRegP);
4544   op_cost(0);
4545   format %{ %}
4546   interface(REG_INTER);
4547 %}
4548 
4549 // Register R0 only
4550 operand iRegI_R0()
4551 %{
4552   constraint(ALLOC_IN_RC(int_r0_reg));
4553   match(RegI);
4554   match(iRegINoSp);
4555   op_cost(0);
4556   format %{ %}
4557   interface(REG_INTER);
4558 %}
4559 
4560 // Register R2 only
4561 operand iRegI_R2()
4562 %{
4563   constraint(ALLOC_IN_RC(int_r2_reg));
4564   match(RegI);
4565   match(iRegINoSp);
4566   op_cost(0);
4567   format %{ %}
4568   interface(REG_INTER);
4569 %}
4570 
4571 // Register R3 only
4572 operand iRegI_R3()
4573 %{
4574   constraint(ALLOC_IN_RC(int_r3_reg));
4575   match(RegI);
4576   match(iRegINoSp);
4577   op_cost(0);
4578   format %{ %}
4579   interface(REG_INTER);
4580 %}
4581 
4582 
4583 // Register R4 only
4584 operand iRegI_R4()
4585 %{
4586   constraint(ALLOC_IN_RC(int_r4_reg));
4587   match(RegI);
4588   match(iRegINoSp);
4589   op_cost(0);
4590   format %{ %}
4591   interface(REG_INTER);
4592 %}
4593 
4594 
4595 // Pointer Register Operands
4596 // Narrow Pointer Register
4597 operand iRegN()
4598 %{
4599   constraint(ALLOC_IN_RC(any_reg32));
4600   match(RegN);
4601   match(iRegNNoSp);
4602   op_cost(0);
4603   format %{ %}
4604   interface(REG_INTER);
4605 %}
4606 
4607 operand iRegN_R0()
4608 %{
4609   constraint(ALLOC_IN_RC(r0_reg));
4610   match(iRegN);
4611   op_cost(0);
4612   format %{ %}
4613   interface(REG_INTER);
4614 %}
4615 
4616 operand iRegN_R2()
4617 %{
4618   constraint(ALLOC_IN_RC(r2_reg));
4619   match(iRegN);
4620   op_cost(0);
4621   format %{ %}
4622   interface(REG_INTER);
4623 %}
4624 
4625 operand iRegN_R3()
4626 %{
4627   constraint(ALLOC_IN_RC(r3_reg));
4628   match(iRegN);
4629   op_cost(0);
4630   format %{ %}
4631   interface(REG_INTER);
4632 %}
4633 
4634 // Integer 64 bit Register not Special
4635 operand iRegNNoSp()
4636 %{
4637   constraint(ALLOC_IN_RC(no_special_reg32));
4638   match(RegN);
4639   op_cost(0);
4640   format %{ %}
4641   interface(REG_INTER);
4642 %}
4643 
4644 // heap base register -- used for encoding immN0
4645 
4646 operand iRegIHeapbase()
4647 %{
4648   constraint(ALLOC_IN_RC(heapbase_reg));
4649   match(RegI);
4650   op_cost(0);
4651   format %{ %}
4652   interface(REG_INTER);
4653 %}
4654 
4655 // Float Register
4656 // Float register operands
4657 operand vRegF()
4658 %{
4659   constraint(ALLOC_IN_RC(float_reg));
4660   match(RegF);
4661 
4662   op_cost(0);
4663   format %{ %}
4664   interface(REG_INTER);
4665 %}
4666 
4667 // Double Register
4668 // Double register operands
4669 operand vRegD()
4670 %{
4671   constraint(ALLOC_IN_RC(double_reg));
4672   match(RegD);
4673 
4674   op_cost(0);
4675   format %{ %}
4676   interface(REG_INTER);
4677 %}
4678 
4679 operand vecD()
4680 %{
4681   constraint(ALLOC_IN_RC(vectord_reg));
4682   match(VecD);
4683 
4684   op_cost(0);
4685   format %{ %}
4686   interface(REG_INTER);
4687 %}
4688 
4689 operand vecX()
4690 %{
4691   constraint(ALLOC_IN_RC(vectorx_reg));
4692   match(VecX);
4693 
4694   op_cost(0);
4695   format %{ %}
4696   interface(REG_INTER);
4697 %}
4698 
4699 operand vRegD_V0()
4700 %{
4701   constraint(ALLOC_IN_RC(v0_reg));
4702   match(RegD);
4703   op_cost(0);
4704   format %{ %}
4705   interface(REG_INTER);
4706 %}
4707 
4708 operand vRegD_V1()
4709 %{
4710   constraint(ALLOC_IN_RC(v1_reg));
4711   match(RegD);
4712   op_cost(0);
4713   format %{ %}
4714   interface(REG_INTER);
4715 %}
4716 
4717 operand vRegD_V2()
4718 %{
4719   constraint(ALLOC_IN_RC(v2_reg));
4720   match(RegD);
4721   op_cost(0);
4722   format %{ %}
4723   interface(REG_INTER);
4724 %}
4725 
4726 operand vRegD_V3()
4727 %{
4728   constraint(ALLOC_IN_RC(v3_reg));
4729   match(RegD);
4730   op_cost(0);
4731   format %{ %}
4732   interface(REG_INTER);
4733 %}
4734 
4735 // Flags register, used as output of signed compare instructions
4736 
4737 // note that on AArch64 we also use this register as the output for
4738 // for floating point compare instructions (CmpF CmpD). this ensures
4739 // that ordered inequality tests use GT, GE, LT or LE none of which
4740 // pass through cases where the result is unordered i.e. one or both
4741 // inputs to the compare is a NaN. this means that the ideal code can
4742 // replace e.g. a GT with an LE and not end up capturing the NaN case
4743 // (where the comparison should always fail). EQ and NE tests are
4744 // always generated in ideal code so that unordered folds into the NE
4745 // case, matching the behaviour of AArch64 NE.
4746 //
4747 // This differs from x86 where the outputs of FP compares use a
4748 // special FP flags registers and where compares based on this
4749 // register are distinguished into ordered inequalities (cmpOpUCF) and
4750 // EQ/NEQ tests (cmpOpUCF2). x86 has to special case the latter tests
4751 // to explicitly handle the unordered case in branches. x86 also has
4752 // to include extra CMoveX rules to accept a cmpOpUCF input.
4753 
4754 operand rFlagsReg()
4755 %{
4756   constraint(ALLOC_IN_RC(int_flags));
4757   match(RegFlags);
4758 
4759   op_cost(0);
4760   format %{ "RFLAGS" %}
4761   interface(REG_INTER);
4762 %}
4763 
4764 // Flags register, used as output of unsigned compare instructions
4765 operand rFlagsRegU()
4766 %{
4767   constraint(ALLOC_IN_RC(int_flags));
4768   match(RegFlags);
4769 
4770   op_cost(0);
4771   format %{ "RFLAGSU" %}
4772   interface(REG_INTER);
4773 %}
4774 
4775 // Special Registers
4776 
4777 // Method Register
4778 operand inline_cache_RegP(iRegP reg)
4779 %{
4780   constraint(ALLOC_IN_RC(method_reg)); // inline_cache_reg
4781   match(reg);
4782   match(iRegPNoSp);
4783   op_cost(0);
4784   format %{ %}
4785   interface(REG_INTER);
4786 %}
4787 
4788 operand interpreter_method_oop_RegP(iRegP reg)
4789 %{
4790   constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_oop_reg
4791   match(reg);
4792   match(iRegPNoSp);
4793   op_cost(0);
4794   format %{ %}
4795   interface(REG_INTER);
4796 %}
4797 
4798 // Thread Register
4799 operand thread_RegP(iRegP reg)
4800 %{
4801   constraint(ALLOC_IN_RC(thread_reg)); // link_reg
4802   match(reg);
4803   op_cost(0);
4804   format %{ %}
4805   interface(REG_INTER);
4806 %}
4807 
4808 operand lr_RegP(iRegP reg)
4809 %{
4810   constraint(ALLOC_IN_RC(lr_reg)); // link_reg
4811   match(reg);
4812   op_cost(0);
4813   format %{ %}
4814   interface(REG_INTER);
4815 %}
4816 
4817 //----------Memory Operands----------------------------------------------------
4818 
4819 operand indirect(iRegP reg)
4820 %{
4821   constraint(ALLOC_IN_RC(ptr_reg));
4822   match(reg);
4823   op_cost(0);
4824   format %{ "[$reg]" %}
4825   interface(MEMORY_INTER) %{
4826     base($reg);
4827     index(0xffffffff);
4828     scale(0x0);
4829     disp(0x0);
4830   %}
4831 %}
4832 
4833 operand indIndexScaledI2L(iRegP reg, iRegI ireg, immIScale scale)
4834 %{
4835   constraint(ALLOC_IN_RC(ptr_reg));
4836   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4837   match(AddP reg (LShiftL (ConvI2L ireg) scale));
4838   op_cost(0);
4839   format %{ "$reg, $ireg sxtw($scale), 0, I2L" %}
4840   interface(MEMORY_INTER) %{
4841     base($reg);
4842     index($ireg);
4843     scale($scale);
4844     disp(0x0);
4845   %}
4846 %}
4847 
4848 operand indIndexScaled(iRegP reg, iRegL lreg, immIScale scale)
4849 %{
4850   constraint(ALLOC_IN_RC(ptr_reg));
4851   predicate(size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
4852   match(AddP reg (LShiftL lreg scale));
4853   op_cost(0);
4854   format %{ "$reg, $lreg lsl($scale)" %}
4855   interface(MEMORY_INTER) %{
4856     base($reg);
4857     index($lreg);
4858     scale($scale);
4859     disp(0x0);
4860   %}
4861 %}
4862 
4863 operand indIndexI2L(iRegP reg, iRegI ireg)
4864 %{
4865   constraint(ALLOC_IN_RC(ptr_reg));
4866   match(AddP reg (ConvI2L ireg));
4867   op_cost(0);
4868   format %{ "$reg, $ireg, 0, I2L" %}
4869   interface(MEMORY_INTER) %{
4870     base($reg);
4871     index($ireg);
4872     scale(0x0);
4873     disp(0x0);
4874   %}
4875 %}
4876 
4877 operand indIndex(iRegP reg, iRegL lreg)
4878 %{
4879   constraint(ALLOC_IN_RC(ptr_reg));
4880   match(AddP reg lreg);
4881   op_cost(0);
4882   format %{ "$reg, $lreg" %}
4883   interface(MEMORY_INTER) %{
4884     base($reg);
4885     index($lreg);
4886     scale(0x0);
4887     disp(0x0);
4888   %}
4889 %}
4890 
4891 operand indOffI(iRegP reg, immIOffset off)
4892 %{
4893   constraint(ALLOC_IN_RC(ptr_reg));
4894   match(AddP reg off);
4895   op_cost(0);
4896   format %{ "[$reg, $off]" %}
4897   interface(MEMORY_INTER) %{
4898     base($reg);
4899     index(0xffffffff);
4900     scale(0x0);
4901     disp($off);
4902   %}
4903 %}
4904 
4905 operand indOffI4(iRegP reg, immIOffset4 off)
4906 %{
4907   constraint(ALLOC_IN_RC(ptr_reg));
4908   match(AddP reg off);
4909   op_cost(0);
4910   format %{ "[$reg, $off]" %}
4911   interface(MEMORY_INTER) %{
4912     base($reg);
4913     index(0xffffffff);
4914     scale(0x0);
4915     disp($off);
4916   %}
4917 %}
4918 
4919 operand indOffI8(iRegP reg, immIOffset8 off)
4920 %{
4921   constraint(ALLOC_IN_RC(ptr_reg));
4922   match(AddP reg off);
4923   op_cost(0);
4924   format %{ "[$reg, $off]" %}
4925   interface(MEMORY_INTER) %{
4926     base($reg);
4927     index(0xffffffff);
4928     scale(0x0);
4929     disp($off);
4930   %}
4931 %}
4932 
4933 operand indOffI16(iRegP reg, immIOffset16 off)
4934 %{
4935   constraint(ALLOC_IN_RC(ptr_reg));
4936   match(AddP reg off);
4937   op_cost(0);
4938   format %{ "[$reg, $off]" %}
4939   interface(MEMORY_INTER) %{
4940     base($reg);
4941     index(0xffffffff);
4942     scale(0x0);
4943     disp($off);
4944   %}
4945 %}
4946 
4947 operand indOffL(iRegP reg, immLoffset off)
4948 %{
4949   constraint(ALLOC_IN_RC(ptr_reg));
4950   match(AddP reg off);
4951   op_cost(0);
4952   format %{ "[$reg, $off]" %}
4953   interface(MEMORY_INTER) %{
4954     base($reg);
4955     index(0xffffffff);
4956     scale(0x0);
4957     disp($off);
4958   %}
4959 %}
4960 
4961 operand indOffL4(iRegP reg, immLoffset4 off)
4962 %{
4963   constraint(ALLOC_IN_RC(ptr_reg));
4964   match(AddP reg off);
4965   op_cost(0);
4966   format %{ "[$reg, $off]" %}
4967   interface(MEMORY_INTER) %{
4968     base($reg);
4969     index(0xffffffff);
4970     scale(0x0);
4971     disp($off);
4972   %}
4973 %}
4974 
4975 operand indOffL8(iRegP reg, immLoffset8 off)
4976 %{
4977   constraint(ALLOC_IN_RC(ptr_reg));
4978   match(AddP reg off);
4979   op_cost(0);
4980   format %{ "[$reg, $off]" %}
4981   interface(MEMORY_INTER) %{
4982     base($reg);
4983     index(0xffffffff);
4984     scale(0x0);
4985     disp($off);
4986   %}
4987 %}
4988 
4989 operand indOffL16(iRegP reg, immLoffset16 off)
4990 %{
4991   constraint(ALLOC_IN_RC(ptr_reg));
4992   match(AddP reg off);
4993   op_cost(0);
4994   format %{ "[$reg, $off]" %}
4995   interface(MEMORY_INTER) %{
4996     base($reg);
4997     index(0xffffffff);
4998     scale(0x0);
4999     disp($off);
5000   %}
5001 %}
5002 
5003 operand indirectN(iRegN reg)
5004 %{
5005   predicate(Universe::narrow_oop_shift() == 0);
5006   constraint(ALLOC_IN_RC(ptr_reg));
5007   match(DecodeN reg);
5008   op_cost(0);
5009   format %{ "[$reg]\t# narrow" %}
5010   interface(MEMORY_INTER) %{
5011     base($reg);
5012     index(0xffffffff);
5013     scale(0x0);
5014     disp(0x0);
5015   %}
5016 %}
5017 
5018 operand indIndexScaledI2LN(iRegN reg, iRegI ireg, immIScale scale)
5019 %{
5020   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5021   constraint(ALLOC_IN_RC(ptr_reg));
5022   match(AddP (DecodeN reg) (LShiftL (ConvI2L ireg) scale));
5023   op_cost(0);
5024   format %{ "$reg, $ireg sxtw($scale), 0, I2L\t# narrow" %}
5025   interface(MEMORY_INTER) %{
5026     base($reg);
5027     index($ireg);
5028     scale($scale);
5029     disp(0x0);
5030   %}
5031 %}
5032 
5033 operand indIndexScaledN(iRegN reg, iRegL lreg, immIScale scale)
5034 %{
5035   predicate(Universe::narrow_oop_shift() == 0 && size_fits_all_mem_uses(n->as_AddP(), n->in(AddPNode::Offset)->in(2)->get_int()));
5036   constraint(ALLOC_IN_RC(ptr_reg));
5037   match(AddP (DecodeN reg) (LShiftL lreg scale));
5038   op_cost(0);
5039   format %{ "$reg, $lreg lsl($scale)\t# narrow" %}
5040   interface(MEMORY_INTER) %{
5041     base($reg);
5042     index($lreg);
5043     scale($scale);
5044     disp(0x0);
5045   %}
5046 %}
5047 
5048 operand indIndexI2LN(iRegN reg, iRegI ireg)
5049 %{
5050   predicate(Universe::narrow_oop_shift() == 0);
5051   constraint(ALLOC_IN_RC(ptr_reg));
5052   match(AddP (DecodeN reg) (ConvI2L ireg));
5053   op_cost(0);
5054   format %{ "$reg, $ireg, 0, I2L\t# narrow" %}
5055   interface(MEMORY_INTER) %{
5056     base($reg);
5057     index($ireg);
5058     scale(0x0);
5059     disp(0x0);
5060   %}
5061 %}
5062 
5063 operand indIndexN(iRegN reg, iRegL lreg)
5064 %{
5065   predicate(Universe::narrow_oop_shift() == 0);
5066   constraint(ALLOC_IN_RC(ptr_reg));
5067   match(AddP (DecodeN reg) lreg);
5068   op_cost(0);
5069   format %{ "$reg, $lreg\t# narrow" %}
5070   interface(MEMORY_INTER) %{
5071     base($reg);
5072     index($lreg);
5073     scale(0x0);
5074     disp(0x0);
5075   %}
5076 %}
5077 
5078 operand indOffIN(iRegN reg, immIOffset off)
5079 %{
5080   predicate(Universe::narrow_oop_shift() == 0);
5081   constraint(ALLOC_IN_RC(ptr_reg));
5082   match(AddP (DecodeN reg) off);
5083   op_cost(0);
5084   format %{ "[$reg, $off]\t# narrow" %}
5085   interface(MEMORY_INTER) %{
5086     base($reg);
5087     index(0xffffffff);
5088     scale(0x0);
5089     disp($off);
5090   %}
5091 %}
5092 
5093 operand indOffLN(iRegN reg, immLoffset off)
5094 %{
5095   predicate(Universe::narrow_oop_shift() == 0);
5096   constraint(ALLOC_IN_RC(ptr_reg));
5097   match(AddP (DecodeN reg) off);
5098   op_cost(0);
5099   format %{ "[$reg, $off]\t# narrow" %}
5100   interface(MEMORY_INTER) %{
5101     base($reg);
5102     index(0xffffffff);
5103     scale(0x0);
5104     disp($off);
5105   %}
5106 %}
5107 
5108 
5109 
5110 // AArch64 opto stubs need to write to the pc slot in the thread anchor
5111 operand thread_anchor_pc(thread_RegP reg, immL_pc_off off)
5112 %{
5113   constraint(ALLOC_IN_RC(ptr_reg));
5114   match(AddP reg off);
5115   op_cost(0);
5116   format %{ "[$reg, $off]" %}
5117   interface(MEMORY_INTER) %{
5118     base($reg);
5119     index(0xffffffff);
5120     scale(0x0);
5121     disp($off);
5122   %}
5123 %}
5124 
5125 //----------Special Memory Operands--------------------------------------------
5126 // Stack Slot Operand - This operand is used for loading and storing temporary
5127 //                      values on the stack where a match requires a value to
5128 //                      flow through memory.
5129 operand stackSlotP(sRegP reg)
5130 %{
5131   constraint(ALLOC_IN_RC(stack_slots));
5132   op_cost(100);
5133   // No match rule because this operand is only generated in matching
5134   // match(RegP);
5135   format %{ "[$reg]" %}
5136   interface(MEMORY_INTER) %{
5137     base(0x1e);  // RSP
5138     index(0x0);  // No Index
5139     scale(0x0);  // No Scale
5140     disp($reg);  // Stack Offset
5141   %}
5142 %}
5143 
5144 operand stackSlotI(sRegI reg)
5145 %{
5146   constraint(ALLOC_IN_RC(stack_slots));
5147   // No match rule because this operand is only generated in matching
5148   // match(RegI);
5149   format %{ "[$reg]" %}
5150   interface(MEMORY_INTER) %{
5151     base(0x1e);  // RSP
5152     index(0x0);  // No Index
5153     scale(0x0);  // No Scale
5154     disp($reg);  // Stack Offset
5155   %}
5156 %}
5157 
5158 operand stackSlotF(sRegF reg)
5159 %{
5160   constraint(ALLOC_IN_RC(stack_slots));
5161   // No match rule because this operand is only generated in matching
5162   // match(RegF);
5163   format %{ "[$reg]" %}
5164   interface(MEMORY_INTER) %{
5165     base(0x1e);  // RSP
5166     index(0x0);  // No Index
5167     scale(0x0);  // No Scale
5168     disp($reg);  // Stack Offset
5169   %}
5170 %}
5171 
5172 operand stackSlotD(sRegD reg)
5173 %{
5174   constraint(ALLOC_IN_RC(stack_slots));
5175   // No match rule because this operand is only generated in matching
5176   // match(RegD);
5177   format %{ "[$reg]" %}
5178   interface(MEMORY_INTER) %{
5179     base(0x1e);  // RSP
5180     index(0x0);  // No Index
5181     scale(0x0);  // No Scale
5182     disp($reg);  // Stack Offset
5183   %}
5184 %}
5185 
5186 operand stackSlotL(sRegL reg)
5187 %{
5188   constraint(ALLOC_IN_RC(stack_slots));
5189   // No match rule because this operand is only generated in matching
5190   // match(RegL);
5191   format %{ "[$reg]" %}
5192   interface(MEMORY_INTER) %{
5193     base(0x1e);  // RSP
5194     index(0x0);  // No Index
5195     scale(0x0);  // No Scale
5196     disp($reg);  // Stack Offset
5197   %}
5198 %}
5199 
5200 // Operands for expressing Control Flow
5201 // NOTE: Label is a predefined operand which should not be redefined in
5202 //       the AD file. It is generically handled within the ADLC.
5203 
5204 //----------Conditional Branch Operands----------------------------------------
5205 // Comparison Op  - This is the operation of the comparison, and is limited to
5206 //                  the following set of codes:
5207 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5208 //
5209 // Other attributes of the comparison, such as unsignedness, are specified
5210 // by the comparison instruction that sets a condition code flags register.
5211 // That result is represented by a flags operand whose subtype is appropriate
5212 // to the unsignedness (etc.) of the comparison.
5213 //
5214 // Later, the instruction which matches both the Comparison Op (a Bool) and
5215 // the flags (produced by the Cmp) specifies the coding of the comparison op
5216 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5217 
5218 // used for signed integral comparisons and fp comparisons
5219 
5220 operand cmpOp()
5221 %{
5222   match(Bool);
5223 
5224   format %{ "" %}
5225   interface(COND_INTER) %{
5226     equal(0x0, "eq");
5227     not_equal(0x1, "ne");
5228     less(0xb, "lt");
5229     greater_equal(0xa, "ge");
5230     less_equal(0xd, "le");
5231     greater(0xc, "gt");
5232     overflow(0x6, "vs");
5233     no_overflow(0x7, "vc");
5234   %}
5235 %}
5236 
5237 // used for unsigned integral comparisons
5238 
5239 operand cmpOpU()
5240 %{
5241   match(Bool);
5242 
5243   format %{ "" %}
5244   interface(COND_INTER) %{
5245     equal(0x0, "eq");
5246     not_equal(0x1, "ne");
5247     less(0x3, "lo");
5248     greater_equal(0x2, "hs");
5249     less_equal(0x9, "ls");
5250     greater(0x8, "hi");
5251     overflow(0x6, "vs");
5252     no_overflow(0x7, "vc");
5253   %}
5254 %}
5255 
5256 // used for certain integral comparisons which can be
5257 // converted to cbxx or tbxx instructions
5258 
5259 operand cmpOpEqNe()
5260 %{
5261   match(Bool);
5262   match(CmpOp);
5263   op_cost(0);
5264   predicate(n->as_Bool()->_test._test == BoolTest::ne
5265             || n->as_Bool()->_test._test == BoolTest::eq);
5266 
5267   format %{ "" %}
5268   interface(COND_INTER) %{
5269     equal(0x0, "eq");
5270     not_equal(0x1, "ne");
5271     less(0xb, "lt");
5272     greater_equal(0xa, "ge");
5273     less_equal(0xd, "le");
5274     greater(0xc, "gt");
5275     overflow(0x6, "vs");
5276     no_overflow(0x7, "vc");
5277   %}
5278 %}
5279 
5280 // used for certain integral comparisons which can be
5281 // converted to cbxx or tbxx instructions
5282 
5283 operand cmpOpLtGe()
5284 %{
5285   match(Bool);
5286   match(CmpOp);
5287   op_cost(0);
5288 
5289   predicate(n->as_Bool()->_test._test == BoolTest::lt
5290             || n->as_Bool()->_test._test == BoolTest::ge);
5291 
5292   format %{ "" %}
5293   interface(COND_INTER) %{
5294     equal(0x0, "eq");
5295     not_equal(0x1, "ne");
5296     less(0xb, "lt");
5297     greater_equal(0xa, "ge");
5298     less_equal(0xd, "le");
5299     greater(0xc, "gt");
5300     overflow(0x6, "vs");
5301     no_overflow(0x7, "vc");
5302   %}
5303 %}
5304 
5305 // used for certain unsigned integral comparisons which can be
5306 // converted to cbxx or tbxx instructions
5307 
5308 operand cmpOpUEqNeLtGe()
5309 %{
5310   match(Bool);
5311   match(CmpOp);
5312   op_cost(0);
5313 
5314   predicate(n->as_Bool()->_test._test == BoolTest::eq
5315             || n->as_Bool()->_test._test == BoolTest::ne
5316             || n->as_Bool()->_test._test == BoolTest::lt
5317             || n->as_Bool()->_test._test == BoolTest::ge);
5318 
5319   format %{ "" %}
5320   interface(COND_INTER) %{
5321     equal(0x0, "eq");
5322     not_equal(0x1, "ne");
5323     less(0xb, "lt");
5324     greater_equal(0xa, "ge");
5325     less_equal(0xd, "le");
5326     greater(0xc, "gt");
5327     overflow(0x6, "vs");
5328     no_overflow(0x7, "vc");
5329   %}
5330 %}
5331 
5332 // Special operand allowing long args to int ops to be truncated for free
5333 
5334 operand iRegL2I(iRegL reg) %{
5335 
5336   op_cost(0);
5337 
5338   match(ConvL2I reg);
5339 
5340   format %{ "l2i($reg)" %}
5341 
5342   interface(REG_INTER)
5343 %}
5344 
5345 opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
5346 opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
5347 opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
5348 
5349 //----------OPERAND CLASSES----------------------------------------------------
5350 // Operand Classes are groups of operands that are used as to simplify
5351 // instruction definitions by not requiring the AD writer to specify
5352 // separate instructions for every form of operand when the
5353 // instruction accepts multiple operand types with the same basic
5354 // encoding and format. The classic case of this is memory operands.
5355 
5356 // memory is used to define read/write location for load/store
5357 // instruction defs. we can turn a memory op into an Address
5358 
5359 opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI, indOffL,
5360                indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
5361 
5362 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
5363 // operations. it allows the src to be either an iRegI or a (ConvL2I
5364 // iRegL). in the latter case the l2i normally planted for a ConvL2I
5365 // can be elided because the 32-bit instruction will just employ the
5366 // lower 32 bits anyway.
5367 //
5368 // n.b. this does not elide all L2I conversions. if the truncated
5369 // value is consumed by more than one operation then the ConvL2I
5370 // cannot be bundled into the consuming nodes so an l2i gets planted
5371 // (actually a movw $dst $src) and the downstream instructions consume
5372 // the result of the l2i as an iRegI input. That's a shame since the
5373 // movw is actually redundant but its not too costly.
5374 
5375 opclass iRegIorL2I(iRegI, iRegL2I);
5376 
5377 //----------PIPELINE-----------------------------------------------------------
5378 // Rules which define the behavior of the target architectures pipeline.
5379 
5380 // For specific pipelines, eg A53, define the stages of that pipeline
5381 //pipe_desc(ISS, EX1, EX2, WR);
5382 #define ISS S0
5383 #define EX1 S1
5384 #define EX2 S2
5385 #define WR  S3
5386 
5387 // Integer ALU reg operation
5388 pipeline %{
5389 
5390 attributes %{
5391   // ARM instructions are of fixed length
5392   fixed_size_instructions;        // Fixed size instructions TODO does
5393   max_instructions_per_bundle = 2;   // A53 = 2, A57 = 4
5394   // ARM instructions come in 32-bit word units
5395   instruction_unit_size = 4;         // An instruction is 4 bytes long
5396   instruction_fetch_unit_size = 64;  // The processor fetches one line
5397   instruction_fetch_units = 1;       // of 64 bytes
5398 
5399   // List of nop instructions
5400   nops( MachNop );
5401 %}
5402 
5403 // We don't use an actual pipeline model so don't care about resources
5404 // or description. we do use pipeline classes to introduce fixed
5405 // latencies
5406 
5407 //----------RESOURCES----------------------------------------------------------
5408 // Resources are the functional units available to the machine
5409 
5410 resources( INS0, INS1, INS01 = INS0 | INS1,
5411            ALU0, ALU1, ALU = ALU0 | ALU1,
5412            MAC,
5413            DIV,
5414            BRANCH,
5415            LDST,
5416            NEON_FP);
5417 
5418 //----------PIPELINE DESCRIPTION-----------------------------------------------
5419 // Pipeline Description specifies the stages in the machine's pipeline
5420 
5421 // Define the pipeline as a generic 6 stage pipeline
5422 pipe_desc(S0, S1, S2, S3, S4, S5);
5423 
5424 //----------PIPELINE CLASSES---------------------------------------------------
5425 // Pipeline Classes describe the stages in which input and output are
5426 // referenced by the hardware pipeline.
5427 
5428 pipe_class fp_dop_reg_reg_s(vRegF dst, vRegF src1, vRegF src2)
5429 %{
5430   single_instruction;
5431   src1   : S1(read);
5432   src2   : S2(read);
5433   dst    : S5(write);
5434   INS01  : ISS;
5435   NEON_FP : S5;
5436 %}
5437 
5438 pipe_class fp_dop_reg_reg_d(vRegD dst, vRegD src1, vRegD src2)
5439 %{
5440   single_instruction;
5441   src1   : S1(read);
5442   src2   : S2(read);
5443   dst    : S5(write);
5444   INS01  : ISS;
5445   NEON_FP : S5;
5446 %}
5447 
5448 pipe_class fp_uop_s(vRegF dst, vRegF src)
5449 %{
5450   single_instruction;
5451   src    : S1(read);
5452   dst    : S5(write);
5453   INS01  : ISS;
5454   NEON_FP : S5;
5455 %}
5456 
5457 pipe_class fp_uop_d(vRegD dst, vRegD src)
5458 %{
5459   single_instruction;
5460   src    : S1(read);
5461   dst    : S5(write);
5462   INS01  : ISS;
5463   NEON_FP : S5;
5464 %}
5465 
5466 pipe_class fp_d2f(vRegF dst, vRegD src)
5467 %{
5468   single_instruction;
5469   src    : S1(read);
5470   dst    : S5(write);
5471   INS01  : ISS;
5472   NEON_FP : S5;
5473 %}
5474 
5475 pipe_class fp_f2d(vRegD dst, vRegF src)
5476 %{
5477   single_instruction;
5478   src    : S1(read);
5479   dst    : S5(write);
5480   INS01  : ISS;
5481   NEON_FP : S5;
5482 %}
5483 
5484 pipe_class fp_f2i(iRegINoSp dst, vRegF src)
5485 %{
5486   single_instruction;
5487   src    : S1(read);
5488   dst    : S5(write);
5489   INS01  : ISS;
5490   NEON_FP : S5;
5491 %}
5492 
5493 pipe_class fp_f2l(iRegLNoSp dst, vRegF src)
5494 %{
5495   single_instruction;
5496   src    : S1(read);
5497   dst    : S5(write);
5498   INS01  : ISS;
5499   NEON_FP : S5;
5500 %}
5501 
5502 pipe_class fp_i2f(vRegF dst, iRegIorL2I src)
5503 %{
5504   single_instruction;
5505   src    : S1(read);
5506   dst    : S5(write);
5507   INS01  : ISS;
5508   NEON_FP : S5;
5509 %}
5510 
5511 pipe_class fp_l2f(vRegF dst, iRegL src)
5512 %{
5513   single_instruction;
5514   src    : S1(read);
5515   dst    : S5(write);
5516   INS01  : ISS;
5517   NEON_FP : S5;
5518 %}
5519 
5520 pipe_class fp_d2i(iRegINoSp dst, vRegD src)
5521 %{
5522   single_instruction;
5523   src    : S1(read);
5524   dst    : S5(write);
5525   INS01  : ISS;
5526   NEON_FP : S5;
5527 %}
5528 
5529 pipe_class fp_d2l(iRegLNoSp dst, vRegD src)
5530 %{
5531   single_instruction;
5532   src    : S1(read);
5533   dst    : S5(write);
5534   INS01  : ISS;
5535   NEON_FP : S5;
5536 %}
5537 
5538 pipe_class fp_i2d(vRegD dst, iRegIorL2I src)
5539 %{
5540   single_instruction;
5541   src    : S1(read);
5542   dst    : S5(write);
5543   INS01  : ISS;
5544   NEON_FP : S5;
5545 %}
5546 
5547 pipe_class fp_l2d(vRegD dst, iRegIorL2I src)
5548 %{
5549   single_instruction;
5550   src    : S1(read);
5551   dst    : S5(write);
5552   INS01  : ISS;
5553   NEON_FP : S5;
5554 %}
5555 
5556 pipe_class fp_div_s(vRegF dst, vRegF src1, vRegF src2)
5557 %{
5558   single_instruction;
5559   src1   : S1(read);
5560   src2   : S2(read);
5561   dst    : S5(write);
5562   INS0   : ISS;
5563   NEON_FP : S5;
5564 %}
5565 
5566 pipe_class fp_div_d(vRegD dst, vRegD src1, vRegD src2)
5567 %{
5568   single_instruction;
5569   src1   : S1(read);
5570   src2   : S2(read);
5571   dst    : S5(write);
5572   INS0   : ISS;
5573   NEON_FP : S5;
5574 %}
5575 
5576 pipe_class fp_cond_reg_reg_s(vRegF dst, vRegF src1, vRegF src2, rFlagsReg cr)
5577 %{
5578   single_instruction;
5579   cr     : S1(read);
5580   src1   : S1(read);
5581   src2   : S1(read);
5582   dst    : S3(write);
5583   INS01  : ISS;
5584   NEON_FP : S3;
5585 %}
5586 
5587 pipe_class fp_cond_reg_reg_d(vRegD dst, vRegD src1, vRegD src2, rFlagsReg cr)
5588 %{
5589   single_instruction;
5590   cr     : S1(read);
5591   src1   : S1(read);
5592   src2   : S1(read);
5593   dst    : S3(write);
5594   INS01  : ISS;
5595   NEON_FP : S3;
5596 %}
5597 
5598 pipe_class fp_imm_s(vRegF dst)
5599 %{
5600   single_instruction;
5601   dst    : S3(write);
5602   INS01  : ISS;
5603   NEON_FP : S3;
5604 %}
5605 
5606 pipe_class fp_imm_d(vRegD dst)
5607 %{
5608   single_instruction;
5609   dst    : S3(write);
5610   INS01  : ISS;
5611   NEON_FP : S3;
5612 %}
5613 
5614 pipe_class fp_load_constant_s(vRegF dst)
5615 %{
5616   single_instruction;
5617   dst    : S4(write);
5618   INS01  : ISS;
5619   NEON_FP : S4;
5620 %}
5621 
5622 pipe_class fp_load_constant_d(vRegD dst)
5623 %{
5624   single_instruction;
5625   dst    : S4(write);
5626   INS01  : ISS;
5627   NEON_FP : S4;
5628 %}
5629 
5630 pipe_class vmul64(vecD dst, vecD src1, vecD src2)
5631 %{
5632   single_instruction;
5633   dst    : S5(write);
5634   src1   : S1(read);
5635   src2   : S1(read);
5636   INS01  : ISS;
5637   NEON_FP : S5;
5638 %}
5639 
5640 pipe_class vmul128(vecX dst, vecX src1, vecX src2)
5641 %{
5642   single_instruction;
5643   dst    : S5(write);
5644   src1   : S1(read);
5645   src2   : S1(read);
5646   INS0   : ISS;
5647   NEON_FP : S5;
5648 %}
5649 
5650 pipe_class vmla64(vecD dst, vecD src1, vecD src2)
5651 %{
5652   single_instruction;
5653   dst    : S5(write);
5654   src1   : S1(read);
5655   src2   : S1(read);
5656   dst    : S1(read);
5657   INS01  : ISS;
5658   NEON_FP : S5;
5659 %}
5660 
5661 pipe_class vmla128(vecX dst, vecX src1, vecX src2)
5662 %{
5663   single_instruction;
5664   dst    : S5(write);
5665   src1   : S1(read);
5666   src2   : S1(read);
5667   dst    : S1(read);
5668   INS0   : ISS;
5669   NEON_FP : S5;
5670 %}
5671 
5672 pipe_class vdop64(vecD dst, vecD src1, vecD src2)
5673 %{
5674   single_instruction;
5675   dst    : S4(write);
5676   src1   : S2(read);
5677   src2   : S2(read);
5678   INS01  : ISS;
5679   NEON_FP : S4;
5680 %}
5681 
5682 pipe_class vdop128(vecX dst, vecX src1, vecX src2)
5683 %{
5684   single_instruction;
5685   dst    : S4(write);
5686   src1   : S2(read);
5687   src2   : S2(read);
5688   INS0   : ISS;
5689   NEON_FP : S4;
5690 %}
5691 
5692 pipe_class vlogical64(vecD dst, vecD src1, vecD src2)
5693 %{
5694   single_instruction;
5695   dst    : S3(write);
5696   src1   : S2(read);
5697   src2   : S2(read);
5698   INS01  : ISS;
5699   NEON_FP : S3;
5700 %}
5701 
5702 pipe_class vlogical128(vecX dst, vecX src1, vecX src2)
5703 %{
5704   single_instruction;
5705   dst    : S3(write);
5706   src1   : S2(read);
5707   src2   : S2(read);
5708   INS0   : ISS;
5709   NEON_FP : S3;
5710 %}
5711 
5712 pipe_class vshift64(vecD dst, vecD src, vecX shift)
5713 %{
5714   single_instruction;
5715   dst    : S3(write);
5716   src    : S1(read);
5717   shift  : S1(read);
5718   INS01  : ISS;
5719   NEON_FP : S3;
5720 %}
5721 
5722 pipe_class vshift128(vecX dst, vecX src, vecX shift)
5723 %{
5724   single_instruction;
5725   dst    : S3(write);
5726   src    : S1(read);
5727   shift  : S1(read);
5728   INS0   : ISS;
5729   NEON_FP : S3;
5730 %}
5731 
5732 pipe_class vshift64_imm(vecD dst, vecD src, immI shift)
5733 %{
5734   single_instruction;
5735   dst    : S3(write);
5736   src    : S1(read);
5737   INS01  : ISS;
5738   NEON_FP : S3;
5739 %}
5740 
5741 pipe_class vshift128_imm(vecX dst, vecX src, immI shift)
5742 %{
5743   single_instruction;
5744   dst    : S3(write);
5745   src    : S1(read);
5746   INS0   : ISS;
5747   NEON_FP : S3;
5748 %}
5749 
5750 pipe_class vdop_fp64(vecD dst, vecD src1, vecD src2)
5751 %{
5752   single_instruction;
5753   dst    : S5(write);
5754   src1   : S1(read);
5755   src2   : S1(read);
5756   INS01  : ISS;
5757   NEON_FP : S5;
5758 %}
5759 
5760 pipe_class vdop_fp128(vecX dst, vecX src1, vecX src2)
5761 %{
5762   single_instruction;
5763   dst    : S5(write);
5764   src1   : S1(read);
5765   src2   : S1(read);
5766   INS0   : ISS;
5767   NEON_FP : S5;
5768 %}
5769 
5770 pipe_class vmuldiv_fp64(vecD dst, vecD src1, vecD src2)
5771 %{
5772   single_instruction;
5773   dst    : S5(write);
5774   src1   : S1(read);
5775   src2   : S1(read);
5776   INS0   : ISS;
5777   NEON_FP : S5;
5778 %}
5779 
5780 pipe_class vmuldiv_fp128(vecX dst, vecX src1, vecX src2)
5781 %{
5782   single_instruction;
5783   dst    : S5(write);
5784   src1   : S1(read);
5785   src2   : S1(read);
5786   INS0   : ISS;
5787   NEON_FP : S5;
5788 %}
5789 
5790 pipe_class vsqrt_fp128(vecX dst, vecX src)
5791 %{
5792   single_instruction;
5793   dst    : S5(write);
5794   src    : S1(read);
5795   INS0   : ISS;
5796   NEON_FP : S5;
5797 %}
5798 
5799 pipe_class vunop_fp64(vecD dst, vecD src)
5800 %{
5801   single_instruction;
5802   dst    : S5(write);
5803   src    : S1(read);
5804   INS01  : ISS;
5805   NEON_FP : S5;
5806 %}
5807 
5808 pipe_class vunop_fp128(vecX dst, vecX src)
5809 %{
5810   single_instruction;
5811   dst    : S5(write);
5812   src    : S1(read);
5813   INS0   : ISS;
5814   NEON_FP : S5;
5815 %}
5816 
5817 pipe_class vdup_reg_reg64(vecD dst, iRegI src)
5818 %{
5819   single_instruction;
5820   dst    : S3(write);
5821   src    : S1(read);
5822   INS01  : ISS;
5823   NEON_FP : S3;
5824 %}
5825 
5826 pipe_class vdup_reg_reg128(vecX dst, iRegI src)
5827 %{
5828   single_instruction;
5829   dst    : S3(write);
5830   src    : S1(read);
5831   INS01  : ISS;
5832   NEON_FP : S3;
5833 %}
5834 
5835 pipe_class vdup_reg_freg64(vecD dst, vRegF src)
5836 %{
5837   single_instruction;
5838   dst    : S3(write);
5839   src    : S1(read);
5840   INS01  : ISS;
5841   NEON_FP : S3;
5842 %}
5843 
5844 pipe_class vdup_reg_freg128(vecX dst, vRegF src)
5845 %{
5846   single_instruction;
5847   dst    : S3(write);
5848   src    : S1(read);
5849   INS01  : ISS;
5850   NEON_FP : S3;
5851 %}
5852 
5853 pipe_class vdup_reg_dreg128(vecX dst, vRegD src)
5854 %{
5855   single_instruction;
5856   dst    : S3(write);
5857   src    : S1(read);
5858   INS01  : ISS;
5859   NEON_FP : S3;
5860 %}
5861 
5862 pipe_class vmovi_reg_imm64(vecD dst)
5863 %{
5864   single_instruction;
5865   dst    : S3(write);
5866   INS01  : ISS;
5867   NEON_FP : S3;
5868 %}
5869 
5870 pipe_class vmovi_reg_imm128(vecX dst)
5871 %{
5872   single_instruction;
5873   dst    : S3(write);
5874   INS0   : ISS;
5875   NEON_FP : S3;
5876 %}
5877 
5878 pipe_class vload_reg_mem64(vecD dst, vmem8 mem)
5879 %{
5880   single_instruction;
5881   dst    : S5(write);
5882   mem    : ISS(read);
5883   INS01  : ISS;
5884   NEON_FP : S3;
5885 %}
5886 
5887 pipe_class vload_reg_mem128(vecX dst, vmem16 mem)
5888 %{
5889   single_instruction;
5890   dst    : S5(write);
5891   mem    : ISS(read);
5892   INS01  : ISS;
5893   NEON_FP : S3;
5894 %}
5895 
5896 pipe_class vstore_reg_mem64(vecD src, vmem8 mem)
5897 %{
5898   single_instruction;
5899   mem    : ISS(read);
5900   src    : S2(read);
5901   INS01  : ISS;
5902   NEON_FP : S3;
5903 %}
5904 
5905 pipe_class vstore_reg_mem128(vecD src, vmem16 mem)
5906 %{
5907   single_instruction;
5908   mem    : ISS(read);
5909   src    : S2(read);
5910   INS01  : ISS;
5911   NEON_FP : S3;
5912 %}
5913 
5914 //------- Integer ALU operations --------------------------
5915 
5916 // Integer ALU reg-reg operation
5917 // Operands needed in EX1, result generated in EX2
5918 // Eg.  ADD     x0, x1, x2
5919 pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2)
5920 %{
5921   single_instruction;
5922   dst    : EX2(write);
5923   src1   : EX1(read);
5924   src2   : EX1(read);
5925   INS01  : ISS; // Dual issue as instruction 0 or 1
5926   ALU    : EX2;
5927 %}
5928 
5929 // Integer ALU reg-reg operation with constant shift
5930 // Shifted register must be available in LATE_ISS instead of EX1
5931 // Eg.  ADD     x0, x1, x2, LSL #2
5932 pipe_class ialu_reg_reg_shift(iRegI dst, iRegI src1, iRegI src2, immI shift)
5933 %{
5934   single_instruction;
5935   dst    : EX2(write);
5936   src1   : EX1(read);
5937   src2   : ISS(read);
5938   INS01  : ISS;
5939   ALU    : EX2;
5940 %}
5941 
5942 // Integer ALU reg operation with constant shift
5943 // Eg.  LSL     x0, x1, #shift
5944 pipe_class ialu_reg_shift(iRegI dst, iRegI src1)
5945 %{
5946   single_instruction;
5947   dst    : EX2(write);
5948   src1   : ISS(read);
5949   INS01  : ISS;
5950   ALU    : EX2;
5951 %}
5952 
5953 // Integer ALU reg-reg operation with variable shift
5954 // Both operands must be available in LATE_ISS instead of EX1
5955 // Result is available in EX1 instead of EX2
5956 // Eg.  LSLV    x0, x1, x2
5957 pipe_class ialu_reg_reg_vshift(iRegI dst, iRegI src1, iRegI src2)
5958 %{
5959   single_instruction;
5960   dst    : EX1(write);
5961   src1   : ISS(read);
5962   src2   : ISS(read);
5963   INS01  : ISS;
5964   ALU    : EX1;
5965 %}
5966 
5967 // Integer ALU reg-reg operation with extract
5968 // As for _vshift above, but result generated in EX2
5969 // Eg.  EXTR    x0, x1, x2, #N
5970 pipe_class ialu_reg_reg_extr(iRegI dst, iRegI src1, iRegI src2)
5971 %{
5972   single_instruction;
5973   dst    : EX2(write);
5974   src1   : ISS(read);
5975   src2   : ISS(read);
5976   INS1   : ISS; // Can only dual issue as Instruction 1
5977   ALU    : EX1;
5978 %}
5979 
5980 // Integer ALU reg operation
5981 // Eg.  NEG     x0, x1
5982 pipe_class ialu_reg(iRegI dst, iRegI src)
5983 %{
5984   single_instruction;
5985   dst    : EX2(write);
5986   src    : EX1(read);
5987   INS01  : ISS;
5988   ALU    : EX2;
5989 %}
5990 
5991 // Integer ALU reg mmediate operation
5992 // Eg.  ADD     x0, x1, #N
5993 pipe_class ialu_reg_imm(iRegI dst, iRegI src1)
5994 %{
5995   single_instruction;
5996   dst    : EX2(write);
5997   src1   : EX1(read);
5998   INS01  : ISS;
5999   ALU    : EX2;
6000 %}
6001 
6002 // Integer ALU immediate operation (no source operands)
6003 // Eg.  MOV     x0, #N
6004 pipe_class ialu_imm(iRegI dst)
6005 %{
6006   single_instruction;
6007   dst    : EX1(write);
6008   INS01  : ISS;
6009   ALU    : EX1;
6010 %}
6011 
6012 //------- Compare operation -------------------------------
6013 
6014 // Compare reg-reg
6015 // Eg.  CMP     x0, x1
6016 pipe_class icmp_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
6017 %{
6018   single_instruction;
6019 //  fixed_latency(16);
6020   cr     : EX2(write);
6021   op1    : EX1(read);
6022   op2    : EX1(read);
6023   INS01  : ISS;
6024   ALU    : EX2;
6025 %}
6026 
6027 // Compare reg-reg
6028 // Eg.  CMP     x0, #N
6029 pipe_class icmp_reg_imm(rFlagsReg cr, iRegI op1)
6030 %{
6031   single_instruction;
6032 //  fixed_latency(16);
6033   cr     : EX2(write);
6034   op1    : EX1(read);
6035   INS01  : ISS;
6036   ALU    : EX2;
6037 %}
6038 
6039 //------- Conditional instructions ------------------------
6040 
6041 // Conditional no operands
6042 // Eg.  CSINC   x0, zr, zr, <cond>
6043 pipe_class icond_none(iRegI dst, rFlagsReg cr)
6044 %{
6045   single_instruction;
6046   cr     : EX1(read);
6047   dst    : EX2(write);
6048   INS01  : ISS;
6049   ALU    : EX2;
6050 %}
6051 
6052 // Conditional 2 operand
6053 // EG.  CSEL    X0, X1, X2, <cond>
6054 pipe_class icond_reg_reg(iRegI dst, iRegI src1, iRegI src2, rFlagsReg cr)
6055 %{
6056   single_instruction;
6057   cr     : EX1(read);
6058   src1   : EX1(read);
6059   src2   : EX1(read);
6060   dst    : EX2(write);
6061   INS01  : ISS;
6062   ALU    : EX2;
6063 %}
6064 
6065 // Conditional 2 operand
6066 // EG.  CSEL    X0, X1, X2, <cond>
6067 pipe_class icond_reg(iRegI dst, iRegI src, rFlagsReg cr)
6068 %{
6069   single_instruction;
6070   cr     : EX1(read);
6071   src    : EX1(read);
6072   dst    : EX2(write);
6073   INS01  : ISS;
6074   ALU    : EX2;
6075 %}
6076 
6077 //------- Multiply pipeline operations --------------------
6078 
6079 // Multiply reg-reg
6080 // Eg.  MUL     w0, w1, w2
6081 pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6082 %{
6083   single_instruction;
6084   dst    : WR(write);
6085   src1   : ISS(read);
6086   src2   : ISS(read);
6087   INS01  : ISS;
6088   MAC    : WR;
6089 %}
6090 
6091 // Multiply accumulate
6092 // Eg.  MADD    w0, w1, w2, w3
6093 pipe_class imac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6094 %{
6095   single_instruction;
6096   dst    : WR(write);
6097   src1   : ISS(read);
6098   src2   : ISS(read);
6099   src3   : ISS(read);
6100   INS01  : ISS;
6101   MAC    : WR;
6102 %}
6103 
6104 // Eg.  MUL     w0, w1, w2
6105 pipe_class lmul_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6106 %{
6107   single_instruction;
6108   fixed_latency(3); // Maximum latency for 64 bit mul
6109   dst    : WR(write);
6110   src1   : ISS(read);
6111   src2   : ISS(read);
6112   INS01  : ISS;
6113   MAC    : WR;
6114 %}
6115 
6116 // Multiply accumulate
6117 // Eg.  MADD    w0, w1, w2, w3
6118 pipe_class lmac_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI src3)
6119 %{
6120   single_instruction;
6121   fixed_latency(3); // Maximum latency for 64 bit mul
6122   dst    : WR(write);
6123   src1   : ISS(read);
6124   src2   : ISS(read);
6125   src3   : ISS(read);
6126   INS01  : ISS;
6127   MAC    : WR;
6128 %}
6129 
6130 //------- Divide pipeline operations --------------------
6131 
6132 // Eg.  SDIV    w0, w1, w2
6133 pipe_class idiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6134 %{
6135   single_instruction;
6136   fixed_latency(8); // Maximum latency for 32 bit divide
6137   dst    : WR(write);
6138   src1   : ISS(read);
6139   src2   : ISS(read);
6140   INS0   : ISS; // Can only dual issue as instruction 0
6141   DIV    : WR;
6142 %}
6143 
6144 // Eg.  SDIV    x0, x1, x2
6145 pipe_class ldiv_reg_reg(iRegI dst, iRegI src1, iRegI src2)
6146 %{
6147   single_instruction;
6148   fixed_latency(16); // Maximum latency for 64 bit divide
6149   dst    : WR(write);
6150   src1   : ISS(read);
6151   src2   : ISS(read);
6152   INS0   : ISS; // Can only dual issue as instruction 0
6153   DIV    : WR;
6154 %}
6155 
6156 //------- Load pipeline operations ------------------------
6157 
6158 // Load - prefetch
6159 // Eg.  PFRM    <mem>
6160 pipe_class iload_prefetch(memory mem)
6161 %{
6162   single_instruction;
6163   mem    : ISS(read);
6164   INS01  : ISS;
6165   LDST   : WR;
6166 %}
6167 
6168 // Load - reg, mem
6169 // Eg.  LDR     x0, <mem>
6170 pipe_class iload_reg_mem(iRegI dst, memory mem)
6171 %{
6172   single_instruction;
6173   dst    : WR(write);
6174   mem    : ISS(read);
6175   INS01  : ISS;
6176   LDST   : WR;
6177 %}
6178 
6179 // Load - reg, reg
6180 // Eg.  LDR     x0, [sp, x1]
6181 pipe_class iload_reg_reg(iRegI dst, iRegI src)
6182 %{
6183   single_instruction;
6184   dst    : WR(write);
6185   src    : ISS(read);
6186   INS01  : ISS;
6187   LDST   : WR;
6188 %}
6189 
6190 //------- Store pipeline operations -----------------------
6191 
6192 // Store - zr, mem
6193 // Eg.  STR     zr, <mem>
6194 pipe_class istore_mem(memory mem)
6195 %{
6196   single_instruction;
6197   mem    : ISS(read);
6198   INS01  : ISS;
6199   LDST   : WR;
6200 %}
6201 
6202 // Store - reg, mem
6203 // Eg.  STR     x0, <mem>
6204 pipe_class istore_reg_mem(iRegI src, memory mem)
6205 %{
6206   single_instruction;
6207   mem    : ISS(read);
6208   src    : EX2(read);
6209   INS01  : ISS;
6210   LDST   : WR;
6211 %}
6212 
6213 // Store - reg, reg
6214 // Eg. STR      x0, [sp, x1]
6215 pipe_class istore_reg_reg(iRegI dst, iRegI src)
6216 %{
6217   single_instruction;
6218   dst    : ISS(read);
6219   src    : EX2(read);
6220   INS01  : ISS;
6221   LDST   : WR;
6222 %}
6223 
6224 //------- Store pipeline operations -----------------------
6225 
6226 // Branch
6227 pipe_class pipe_branch()
6228 %{
6229   single_instruction;
6230   INS01  : ISS;
6231   BRANCH : EX1;
6232 %}
6233 
6234 // Conditional branch
6235 pipe_class pipe_branch_cond(rFlagsReg cr)
6236 %{
6237   single_instruction;
6238   cr     : EX1(read);
6239   INS01  : ISS;
6240   BRANCH : EX1;
6241 %}
6242 
6243 // Compare & Branch
6244 // EG.  CBZ/CBNZ
6245 pipe_class pipe_cmp_branch(iRegI op1)
6246 %{
6247   single_instruction;
6248   op1    : EX1(read);
6249   INS01  : ISS;
6250   BRANCH : EX1;
6251 %}
6252 
6253 //------- Synchronisation operations ----------------------
6254 
6255 // Any operation requiring serialization.
6256 // EG.  DMB/Atomic Ops/Load Acquire/Str Release
6257 pipe_class pipe_serial()
6258 %{
6259   single_instruction;
6260   force_serialization;
6261   fixed_latency(16);
6262   INS01  : ISS(2); // Cannot dual issue with any other instruction
6263   LDST   : WR;
6264 %}
6265 
6266 // Generic big/slow expanded idiom - also serialized
6267 pipe_class pipe_slow()
6268 %{
6269   instruction_count(10);
6270   multiple_bundles;
6271   force_serialization;
6272   fixed_latency(16);
6273   INS01  : ISS(2); // Cannot dual issue with any other instruction
6274   LDST   : WR;
6275 %}
6276 
6277 // Empty pipeline class
6278 pipe_class pipe_class_empty()
6279 %{
6280   single_instruction;
6281   fixed_latency(0);
6282 %}
6283 
6284 // Default pipeline class.
6285 pipe_class pipe_class_default()
6286 %{
6287   single_instruction;
6288   fixed_latency(2);
6289 %}
6290 
6291 // Pipeline class for compares.
6292 pipe_class pipe_class_compare()
6293 %{
6294   single_instruction;
6295   fixed_latency(16);
6296 %}
6297 
6298 // Pipeline class for memory operations.
6299 pipe_class pipe_class_memory()
6300 %{
6301   single_instruction;
6302   fixed_latency(16);
6303 %}
6304 
6305 // Pipeline class for call.
6306 pipe_class pipe_class_call()
6307 %{
6308   single_instruction;
6309   fixed_latency(100);
6310 %}
6311 
6312 // Define the class for the Nop node.
6313 define %{
6314    MachNop = pipe_class_empty;
6315 %}
6316 
6317 %}
6318 //----------INSTRUCTIONS-------------------------------------------------------
6319 //
6320 // match      -- States which machine-independent subtree may be replaced
6321 //               by this instruction.
6322 // ins_cost   -- The estimated cost of this instruction is used by instruction
6323 //               selection to identify a minimum cost tree of machine
6324 //               instructions that matches a tree of machine-independent
6325 //               instructions.
6326 // format     -- A string providing the disassembly for this instruction.
6327 //               The value of an instruction's operand may be inserted
6328 //               by referring to it with a '$' prefix.
6329 // opcode     -- Three instruction opcodes may be provided.  These are referred
6330 //               to within an encode class as $primary, $secondary, and $tertiary
6331 //               rrspectively.  The primary opcode is commonly used to
6332 //               indicate the type of machine instruction, while secondary
6333 //               and tertiary are often used for prefix options or addressing
6334 //               modes.
6335 // ins_encode -- A list of encode classes with parameters. The encode class
6336 //               name must have been defined in an 'enc_class' specification
6337 //               in the encode section of the architecture description.
6338 
6339 // ============================================================================
6340 // Memory (Load/Store) Instructions
6341 
6342 // Load Instructions
6343 
6344 // Load Byte (8 bit signed)
6345 instruct loadB(iRegINoSp dst, memory mem)
6346 %{
6347   match(Set dst (LoadB mem));
6348   predicate(!needs_acquiring_load(n));
6349 
6350   ins_cost(4 * INSN_COST);
6351   format %{ "ldrsbw  $dst, $mem\t# byte" %}
6352 
6353   ins_encode(aarch64_enc_ldrsbw(dst, mem));
6354 
6355   ins_pipe(iload_reg_mem);
6356 %}
6357 
6358 // Load Byte (8 bit signed) into long
6359 instruct loadB2L(iRegLNoSp dst, memory mem)
6360 %{
6361   match(Set dst (ConvI2L (LoadB mem)));
6362   predicate(!needs_acquiring_load(n->in(1)));
6363 
6364   ins_cost(4 * INSN_COST);
6365   format %{ "ldrsb  $dst, $mem\t# byte" %}
6366 
6367   ins_encode(aarch64_enc_ldrsb(dst, mem));
6368 
6369   ins_pipe(iload_reg_mem);
6370 %}
6371 
6372 // Load Byte (8 bit unsigned)
6373 instruct loadUB(iRegINoSp dst, memory mem)
6374 %{
6375   match(Set dst (LoadUB mem));
6376   predicate(!needs_acquiring_load(n));
6377 
6378   ins_cost(4 * INSN_COST);
6379   format %{ "ldrbw  $dst, $mem\t# byte" %}
6380 
6381   ins_encode(aarch64_enc_ldrb(dst, mem));
6382 
6383   ins_pipe(iload_reg_mem);
6384 %}
6385 
6386 // Load Byte (8 bit unsigned) into long
6387 instruct loadUB2L(iRegLNoSp dst, memory mem)
6388 %{
6389   match(Set dst (ConvI2L (LoadUB mem)));
6390   predicate(!needs_acquiring_load(n->in(1)));
6391 
6392   ins_cost(4 * INSN_COST);
6393   format %{ "ldrb  $dst, $mem\t# byte" %}
6394 
6395   ins_encode(aarch64_enc_ldrb(dst, mem));
6396 
6397   ins_pipe(iload_reg_mem);
6398 %}
6399 
6400 // Load Short (16 bit signed)
6401 instruct loadS(iRegINoSp dst, memory mem)
6402 %{
6403   match(Set dst (LoadS mem));
6404   predicate(!needs_acquiring_load(n));
6405 
6406   ins_cost(4 * INSN_COST);
6407   format %{ "ldrshw  $dst, $mem\t# short" %}
6408 
6409   ins_encode(aarch64_enc_ldrshw(dst, mem));
6410 
6411   ins_pipe(iload_reg_mem);
6412 %}
6413 
6414 // Load Short (16 bit signed) into long
6415 instruct loadS2L(iRegLNoSp dst, memory mem)
6416 %{
6417   match(Set dst (ConvI2L (LoadS mem)));
6418   predicate(!needs_acquiring_load(n->in(1)));
6419 
6420   ins_cost(4 * INSN_COST);
6421   format %{ "ldrsh  $dst, $mem\t# short" %}
6422 
6423   ins_encode(aarch64_enc_ldrsh(dst, mem));
6424 
6425   ins_pipe(iload_reg_mem);
6426 %}
6427 
6428 // Load Char (16 bit unsigned)
6429 instruct loadUS(iRegINoSp dst, memory mem)
6430 %{
6431   match(Set dst (LoadUS mem));
6432   predicate(!needs_acquiring_load(n));
6433 
6434   ins_cost(4 * INSN_COST);
6435   format %{ "ldrh  $dst, $mem\t# short" %}
6436 
6437   ins_encode(aarch64_enc_ldrh(dst, mem));
6438 
6439   ins_pipe(iload_reg_mem);
6440 %}
6441 
6442 // Load Short/Char (16 bit unsigned) into long
6443 instruct loadUS2L(iRegLNoSp dst, memory mem)
6444 %{
6445   match(Set dst (ConvI2L (LoadUS mem)));
6446   predicate(!needs_acquiring_load(n->in(1)));
6447 
6448   ins_cost(4 * INSN_COST);
6449   format %{ "ldrh  $dst, $mem\t# short" %}
6450 
6451   ins_encode(aarch64_enc_ldrh(dst, mem));
6452 
6453   ins_pipe(iload_reg_mem);
6454 %}
6455 
6456 // Load Integer (32 bit signed)
6457 instruct loadI(iRegINoSp dst, memory mem)
6458 %{
6459   match(Set dst (LoadI mem));
6460   predicate(!needs_acquiring_load(n));
6461 
6462   ins_cost(4 * INSN_COST);
6463   format %{ "ldrw  $dst, $mem\t# int" %}
6464 
6465   ins_encode(aarch64_enc_ldrw(dst, mem));
6466 
6467   ins_pipe(iload_reg_mem);
6468 %}
6469 
6470 // Load Integer (32 bit signed) into long
6471 instruct loadI2L(iRegLNoSp dst, memory mem)
6472 %{
6473   match(Set dst (ConvI2L (LoadI mem)));
6474   predicate(!needs_acquiring_load(n->in(1)));
6475 
6476   ins_cost(4 * INSN_COST);
6477   format %{ "ldrsw  $dst, $mem\t# int" %}
6478 
6479   ins_encode(aarch64_enc_ldrsw(dst, mem));
6480 
6481   ins_pipe(iload_reg_mem);
6482 %}
6483 
6484 // Load Integer (32 bit unsigned) into long
6485 instruct loadUI2L(iRegLNoSp dst, memory mem, immL_32bits mask)
6486 %{
6487   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
6488   predicate(!needs_acquiring_load(n->in(1)->in(1)->as_Load()));
6489 
6490   ins_cost(4 * INSN_COST);
6491   format %{ "ldrw  $dst, $mem\t# int" %}
6492 
6493   ins_encode(aarch64_enc_ldrw(dst, mem));
6494 
6495   ins_pipe(iload_reg_mem);
6496 %}
6497 
6498 // Load Long (64 bit signed)
6499 instruct loadL(iRegLNoSp dst, memory mem)
6500 %{
6501   match(Set dst (LoadL mem));
6502   predicate(!needs_acquiring_load(n));
6503 
6504   ins_cost(4 * INSN_COST);
6505   format %{ "ldr  $dst, $mem\t# int" %}
6506 
6507   ins_encode(aarch64_enc_ldr(dst, mem));
6508 
6509   ins_pipe(iload_reg_mem);
6510 %}
6511 
6512 // Load Range
6513 instruct loadRange(iRegINoSp dst, memory mem)
6514 %{
6515   match(Set dst (LoadRange mem));
6516 
6517   ins_cost(4 * INSN_COST);
6518   format %{ "ldrw  $dst, $mem\t# range" %}
6519 
6520   ins_encode(aarch64_enc_ldrw(dst, mem));
6521 
6522   ins_pipe(iload_reg_mem);
6523 %}
6524 
6525 // Load Pointer
6526 instruct loadP(iRegPNoSp dst, memory mem)
6527 %{
6528   match(Set dst (LoadP mem));
6529   predicate(!needs_acquiring_load(n));
6530 
6531   ins_cost(4 * INSN_COST);
6532   format %{ "ldr  $dst, $mem\t# ptr" %}
6533 
6534   ins_encode(aarch64_enc_ldr(dst, mem));
6535 
6536   ins_pipe(iload_reg_mem);
6537 %}
6538 
6539 // Load Compressed Pointer
6540 instruct loadN(iRegNNoSp dst, memory mem)
6541 %{
6542   match(Set dst (LoadN mem));
6543   predicate(!needs_acquiring_load(n));
6544 
6545   ins_cost(4 * INSN_COST);
6546   format %{ "ldrw  $dst, $mem\t# compressed ptr" %}
6547 
6548   ins_encode(aarch64_enc_ldrw(dst, mem));
6549 
6550   ins_pipe(iload_reg_mem);
6551 %}
6552 
6553 // Load Klass Pointer
6554 instruct loadKlass(iRegPNoSp dst, memory mem)
6555 %{
6556   match(Set dst (LoadKlass mem));
6557   predicate(!needs_acquiring_load(n));
6558 
6559   ins_cost(4 * INSN_COST);
6560   format %{ "ldr  $dst, $mem\t# class" %}
6561 
6562   ins_encode(aarch64_enc_ldr(dst, mem));
6563 
6564   ins_pipe(iload_reg_mem);
6565 %}
6566 
6567 // Load Narrow Klass Pointer
6568 instruct loadNKlass(iRegNNoSp dst, memory mem)
6569 %{
6570   match(Set dst (LoadNKlass mem));
6571   predicate(!needs_acquiring_load(n));
6572 
6573   ins_cost(4 * INSN_COST);
6574   format %{ "ldrw  $dst, $mem\t# compressed class ptr" %}
6575 
6576   ins_encode(aarch64_enc_ldrw(dst, mem));
6577 
6578   ins_pipe(iload_reg_mem);
6579 %}
6580 
6581 // Load Float
6582 instruct loadF(vRegF dst, memory mem)
6583 %{
6584   match(Set dst (LoadF mem));
6585   predicate(!needs_acquiring_load(n));
6586 
6587   ins_cost(4 * INSN_COST);
6588   format %{ "ldrs  $dst, $mem\t# float" %}
6589 
6590   ins_encode( aarch64_enc_ldrs(dst, mem) );
6591 
6592   ins_pipe(pipe_class_memory);
6593 %}
6594 
6595 // Load Double
6596 instruct loadD(vRegD dst, memory mem)
6597 %{
6598   match(Set dst (LoadD mem));
6599   predicate(!needs_acquiring_load(n));
6600 
6601   ins_cost(4 * INSN_COST);
6602   format %{ "ldrd  $dst, $mem\t# double" %}
6603 
6604   ins_encode( aarch64_enc_ldrd(dst, mem) );
6605 
6606   ins_pipe(pipe_class_memory);
6607 %}
6608 
6609 
6610 // Load Int Constant
6611 instruct loadConI(iRegINoSp dst, immI src)
6612 %{
6613   match(Set dst src);
6614 
6615   ins_cost(INSN_COST);
6616   format %{ "mov $dst, $src\t# int" %}
6617 
6618   ins_encode( aarch64_enc_movw_imm(dst, src) );
6619 
6620   ins_pipe(ialu_imm);
6621 %}
6622 
6623 // Load Long Constant
6624 instruct loadConL(iRegLNoSp dst, immL src)
6625 %{
6626   match(Set dst src);
6627 
6628   ins_cost(INSN_COST);
6629   format %{ "mov $dst, $src\t# long" %}
6630 
6631   ins_encode( aarch64_enc_mov_imm(dst, src) );
6632 
6633   ins_pipe(ialu_imm);
6634 %}
6635 
6636 // Load Pointer Constant
6637 
6638 instruct loadConP(iRegPNoSp dst, immP con)
6639 %{
6640   match(Set dst con);
6641 
6642   ins_cost(INSN_COST * 4);
6643   format %{
6644     "mov  $dst, $con\t# ptr\n\t"
6645   %}
6646 
6647   ins_encode(aarch64_enc_mov_p(dst, con));
6648 
6649   ins_pipe(ialu_imm);
6650 %}
6651 
6652 // Load Null Pointer Constant
6653 
6654 instruct loadConP0(iRegPNoSp dst, immP0 con)
6655 %{
6656   match(Set dst con);
6657 
6658   ins_cost(INSN_COST);
6659   format %{ "mov  $dst, $con\t# NULL ptr" %}
6660 
6661   ins_encode(aarch64_enc_mov_p0(dst, con));
6662 
6663   ins_pipe(ialu_imm);
6664 %}
6665 
6666 // Load Pointer Constant One
6667 
6668 instruct loadConP1(iRegPNoSp dst, immP_1 con)
6669 %{
6670   match(Set dst con);
6671 
6672   ins_cost(INSN_COST);
6673   format %{ "mov  $dst, $con\t# NULL ptr" %}
6674 
6675   ins_encode(aarch64_enc_mov_p1(dst, con));
6676 
6677   ins_pipe(ialu_imm);
6678 %}
6679 
6680 // Load Poll Page Constant
6681 
6682 instruct loadConPollPage(iRegPNoSp dst, immPollPage con)
6683 %{
6684   match(Set dst con);
6685 
6686   ins_cost(INSN_COST);
6687   format %{ "adr  $dst, $con\t# Poll Page Ptr" %}
6688 
6689   ins_encode(aarch64_enc_mov_poll_page(dst, con));
6690 
6691   ins_pipe(ialu_imm);
6692 %}
6693 
6694 // Load Byte Map Base Constant
6695 
6696 instruct loadByteMapBase(iRegPNoSp dst, immByteMapBase con)
6697 %{
6698   match(Set dst con);
6699 
6700   ins_cost(INSN_COST);
6701   format %{ "adr  $dst, $con\t# Byte Map Base" %}
6702 
6703   ins_encode(aarch64_enc_mov_byte_map_base(dst, con));
6704 
6705   ins_pipe(ialu_imm);
6706 %}
6707 
6708 // Load Narrow Pointer Constant
6709 
6710 instruct loadConN(iRegNNoSp dst, immN con)
6711 %{
6712   match(Set dst con);
6713 
6714   ins_cost(INSN_COST * 4);
6715   format %{ "mov  $dst, $con\t# compressed ptr" %}
6716 
6717   ins_encode(aarch64_enc_mov_n(dst, con));
6718 
6719   ins_pipe(ialu_imm);
6720 %}
6721 
6722 // Load Narrow Null Pointer Constant
6723 
6724 instruct loadConN0(iRegNNoSp dst, immN0 con)
6725 %{
6726   match(Set dst con);
6727 
6728   ins_cost(INSN_COST);
6729   format %{ "mov  $dst, $con\t# compressed NULL ptr" %}
6730 
6731   ins_encode(aarch64_enc_mov_n0(dst, con));
6732 
6733   ins_pipe(ialu_imm);
6734 %}
6735 
6736 // Load Narrow Klass Constant
6737 
6738 instruct loadConNKlass(iRegNNoSp dst, immNKlass con)
6739 %{
6740   match(Set dst con);
6741 
6742   ins_cost(INSN_COST);
6743   format %{ "mov  $dst, $con\t# compressed klass ptr" %}
6744 
6745   ins_encode(aarch64_enc_mov_nk(dst, con));
6746 
6747   ins_pipe(ialu_imm);
6748 %}
6749 
6750 // Load Packed Float Constant
6751 
6752 instruct loadConF_packed(vRegF dst, immFPacked con) %{
6753   match(Set dst con);
6754   ins_cost(INSN_COST * 4);
6755   format %{ "fmovs  $dst, $con"%}
6756   ins_encode %{
6757     __ fmovs(as_FloatRegister($dst$$reg), (double)$con$$constant);
6758   %}
6759 
6760   ins_pipe(fp_imm_s);
6761 %}
6762 
6763 // Load Float Constant
6764 
6765 instruct loadConF(vRegF dst, immF con) %{
6766   match(Set dst con);
6767 
6768   ins_cost(INSN_COST * 4);
6769 
6770   format %{
6771     "ldrs $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6772   %}
6773 
6774   ins_encode %{
6775     __ ldrs(as_FloatRegister($dst$$reg), $constantaddress($con));
6776   %}
6777 
6778   ins_pipe(fp_load_constant_s);
6779 %}
6780 
6781 // Load Packed Double Constant
6782 
6783 instruct loadConD_packed(vRegD dst, immDPacked con) %{
6784   match(Set dst con);
6785   ins_cost(INSN_COST);
6786   format %{ "fmovd  $dst, $con"%}
6787   ins_encode %{
6788     __ fmovd(as_FloatRegister($dst$$reg), $con$$constant);
6789   %}
6790 
6791   ins_pipe(fp_imm_d);
6792 %}
6793 
6794 // Load Double Constant
6795 
6796 instruct loadConD(vRegD dst, immD con) %{
6797   match(Set dst con);
6798 
6799   ins_cost(INSN_COST * 5);
6800   format %{
6801     "ldrd $dst, [$constantaddress]\t# load from constant table: float=$con\n\t"
6802   %}
6803 
6804   ins_encode %{
6805     __ ldrd(as_FloatRegister($dst$$reg), $constantaddress($con));
6806   %}
6807 
6808   ins_pipe(fp_load_constant_d);
6809 %}
6810 
6811 // Store Instructions
6812 
6813 // Store CMS card-mark Immediate
6814 instruct storeimmCM0(immI0 zero, memory mem)
6815 %{
6816   match(Set mem (StoreCM mem zero));
6817   predicate(unnecessary_storestore(n));
6818 
6819   ins_cost(INSN_COST);
6820   format %{ "storestore (elided)\n\t"
6821             "strb zr, $mem\t# byte" %}
6822 
6823   ins_encode(aarch64_enc_strb0(mem));
6824 
6825   ins_pipe(istore_mem);
6826 %}
6827 
6828 // Store CMS card-mark Immediate with intervening StoreStore
6829 // needed when using CMS with no conditional card marking
6830 instruct storeimmCM0_ordered(immI0 zero, memory mem)
6831 %{
6832   match(Set mem (StoreCM mem zero));
6833 
6834   ins_cost(INSN_COST * 2);
6835   format %{ "storestore\n\t"
6836             "dmb ishst"
6837             "\n\tstrb zr, $mem\t# byte" %}
6838 
6839   ins_encode(aarch64_enc_strb0_ordered(mem));
6840 
6841   ins_pipe(istore_mem);
6842 %}
6843 
6844 // Store Byte
6845 instruct storeB(iRegIorL2I src, memory mem)
6846 %{
6847   match(Set mem (StoreB mem src));
6848   predicate(!needs_releasing_store(n));
6849 
6850   ins_cost(INSN_COST);
6851   format %{ "strb  $src, $mem\t# byte" %}
6852 
6853   ins_encode(aarch64_enc_strb(src, mem));
6854 
6855   ins_pipe(istore_reg_mem);
6856 %}
6857 
6858 
6859 instruct storeimmB0(immI0 zero, memory mem)
6860 %{
6861   match(Set mem (StoreB mem zero));
6862   predicate(!needs_releasing_store(n));
6863 
6864   ins_cost(INSN_COST);
6865   format %{ "strb rscractch2, $mem\t# byte" %}
6866 
6867   ins_encode(aarch64_enc_strb0(mem));
6868 
6869   ins_pipe(istore_mem);
6870 %}
6871 
6872 // Store Char/Short
6873 instruct storeC(iRegIorL2I src, memory mem)
6874 %{
6875   match(Set mem (StoreC mem src));
6876   predicate(!needs_releasing_store(n));
6877 
6878   ins_cost(INSN_COST);
6879   format %{ "strh  $src, $mem\t# short" %}
6880 
6881   ins_encode(aarch64_enc_strh(src, mem));
6882 
6883   ins_pipe(istore_reg_mem);
6884 %}
6885 
6886 instruct storeimmC0(immI0 zero, memory mem)
6887 %{
6888   match(Set mem (StoreC mem zero));
6889   predicate(!needs_releasing_store(n));
6890 
6891   ins_cost(INSN_COST);
6892   format %{ "strh  zr, $mem\t# short" %}
6893 
6894   ins_encode(aarch64_enc_strh0(mem));
6895 
6896   ins_pipe(istore_mem);
6897 %}
6898 
6899 // Store Integer
6900 
6901 instruct storeI(iRegIorL2I src, memory mem)
6902 %{
6903   match(Set mem(StoreI mem src));
6904   predicate(!needs_releasing_store(n));
6905 
6906   ins_cost(INSN_COST);
6907   format %{ "strw  $src, $mem\t# int" %}
6908 
6909   ins_encode(aarch64_enc_strw(src, mem));
6910 
6911   ins_pipe(istore_reg_mem);
6912 %}
6913 
6914 instruct storeimmI0(immI0 zero, memory mem)
6915 %{
6916   match(Set mem(StoreI mem zero));
6917   predicate(!needs_releasing_store(n));
6918 
6919   ins_cost(INSN_COST);
6920   format %{ "strw  zr, $mem\t# int" %}
6921 
6922   ins_encode(aarch64_enc_strw0(mem));
6923 
6924   ins_pipe(istore_mem);
6925 %}
6926 
6927 // Store Long (64 bit signed)
6928 instruct storeL(iRegL src, memory mem)
6929 %{
6930   match(Set mem (StoreL mem src));
6931   predicate(!needs_releasing_store(n));
6932 
6933   ins_cost(INSN_COST);
6934   format %{ "str  $src, $mem\t# int" %}
6935 
6936   ins_encode(aarch64_enc_str(src, mem));
6937 
6938   ins_pipe(istore_reg_mem);
6939 %}
6940 
6941 // Store Long (64 bit signed)
6942 instruct storeimmL0(immL0 zero, memory mem)
6943 %{
6944   match(Set mem (StoreL mem zero));
6945   predicate(!needs_releasing_store(n));
6946 
6947   ins_cost(INSN_COST);
6948   format %{ "str  zr, $mem\t# int" %}
6949 
6950   ins_encode(aarch64_enc_str0(mem));
6951 
6952   ins_pipe(istore_mem);
6953 %}
6954 
6955 // Store Pointer
6956 instruct storeP(iRegP src, memory mem)
6957 %{
6958   match(Set mem (StoreP mem src));
6959   predicate(!needs_releasing_store(n));
6960 
6961   ins_cost(INSN_COST);
6962   format %{ "str  $src, $mem\t# ptr" %}
6963 
6964   ins_encode(aarch64_enc_str(src, mem));
6965 
6966   ins_pipe(istore_reg_mem);
6967 %}
6968 
6969 // Store Pointer
6970 instruct storeimmP0(immP0 zero, memory mem)
6971 %{
6972   match(Set mem (StoreP mem zero));
6973   predicate(!needs_releasing_store(n));
6974 
6975   ins_cost(INSN_COST);
6976   format %{ "str zr, $mem\t# ptr" %}
6977 
6978   ins_encode(aarch64_enc_str0(mem));
6979 
6980   ins_pipe(istore_mem);
6981 %}
6982 
6983 // Store Compressed Pointer
6984 instruct storeN(iRegN src, memory mem)
6985 %{
6986   match(Set mem (StoreN mem src));
6987   predicate(!needs_releasing_store(n));
6988 
6989   ins_cost(INSN_COST);
6990   format %{ "strw  $src, $mem\t# compressed ptr" %}
6991 
6992   ins_encode(aarch64_enc_strw(src, mem));
6993 
6994   ins_pipe(istore_reg_mem);
6995 %}
6996 
6997 instruct storeImmN0(iRegIHeapbase heapbase, immN0 zero, memory mem)
6998 %{
6999   match(Set mem (StoreN mem zero));
7000   predicate(Universe::narrow_oop_base() == NULL &&
7001             Universe::narrow_klass_base() == NULL &&
7002             (!needs_releasing_store(n)));
7003 
7004   ins_cost(INSN_COST);
7005   format %{ "strw  rheapbase, $mem\t# compressed ptr (rheapbase==0)" %}
7006 
7007   ins_encode(aarch64_enc_strw(heapbase, mem));
7008 
7009   ins_pipe(istore_reg_mem);
7010 %}
7011 
7012 // Store Float
7013 instruct storeF(vRegF src, memory mem)
7014 %{
7015   match(Set mem (StoreF mem src));
7016   predicate(!needs_releasing_store(n));
7017 
7018   ins_cost(INSN_COST);
7019   format %{ "strs  $src, $mem\t# float" %}
7020 
7021   ins_encode( aarch64_enc_strs(src, mem) );
7022 
7023   ins_pipe(pipe_class_memory);
7024 %}
7025 
7026 // TODO
7027 // implement storeImmF0 and storeFImmPacked
7028 
7029 // Store Double
7030 instruct storeD(vRegD src, memory mem)
7031 %{
7032   match(Set mem (StoreD mem src));
7033   predicate(!needs_releasing_store(n));
7034 
7035   ins_cost(INSN_COST);
7036   format %{ "strd  $src, $mem\t# double" %}
7037 
7038   ins_encode( aarch64_enc_strd(src, mem) );
7039 
7040   ins_pipe(pipe_class_memory);
7041 %}
7042 
7043 // Store Compressed Klass Pointer
7044 instruct storeNKlass(iRegN src, memory mem)
7045 %{
7046   predicate(!needs_releasing_store(n));
7047   match(Set mem (StoreNKlass mem src));
7048 
7049   ins_cost(INSN_COST);
7050   format %{ "strw  $src, $mem\t# compressed klass ptr" %}
7051 
7052   ins_encode(aarch64_enc_strw(src, mem));
7053 
7054   ins_pipe(istore_reg_mem);
7055 %}
7056 
7057 // TODO
7058 // implement storeImmD0 and storeDImmPacked
7059 
7060 // prefetch instructions
7061 // Must be safe to execute with invalid address (cannot fault).
7062 
7063 instruct prefetchalloc( memory mem ) %{
7064   match(PrefetchAllocation mem);
7065 
7066   ins_cost(INSN_COST);
7067   format %{ "prfm $mem, PSTL1KEEP\t# Prefetch into level 1 cache write keep" %}
7068 
7069   ins_encode( aarch64_enc_prefetchw(mem) );
7070 
7071   ins_pipe(iload_prefetch);
7072 %}
7073 
7074 //  ---------------- volatile loads and stores ----------------
7075 
7076 // Load Byte (8 bit signed)
7077 instruct loadB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7078 %{
7079   match(Set dst (LoadB mem));
7080 
7081   ins_cost(VOLATILE_REF_COST);
7082   format %{ "ldarsb  $dst, $mem\t# byte" %}
7083 
7084   ins_encode(aarch64_enc_ldarsb(dst, mem));
7085 
7086   ins_pipe(pipe_serial);
7087 %}
7088 
7089 // Load Byte (8 bit signed) into long
7090 instruct loadB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7091 %{
7092   match(Set dst (ConvI2L (LoadB mem)));
7093 
7094   ins_cost(VOLATILE_REF_COST);
7095   format %{ "ldarsb  $dst, $mem\t# byte" %}
7096 
7097   ins_encode(aarch64_enc_ldarsb(dst, mem));
7098 
7099   ins_pipe(pipe_serial);
7100 %}
7101 
7102 // Load Byte (8 bit unsigned)
7103 instruct loadUB_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7104 %{
7105   match(Set dst (LoadUB mem));
7106 
7107   ins_cost(VOLATILE_REF_COST);
7108   format %{ "ldarb  $dst, $mem\t# byte" %}
7109 
7110   ins_encode(aarch64_enc_ldarb(dst, mem));
7111 
7112   ins_pipe(pipe_serial);
7113 %}
7114 
7115 // Load Byte (8 bit unsigned) into long
7116 instruct loadUB2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7117 %{
7118   match(Set dst (ConvI2L (LoadUB mem)));
7119 
7120   ins_cost(VOLATILE_REF_COST);
7121   format %{ "ldarb  $dst, $mem\t# byte" %}
7122 
7123   ins_encode(aarch64_enc_ldarb(dst, mem));
7124 
7125   ins_pipe(pipe_serial);
7126 %}
7127 
7128 // Load Short (16 bit signed)
7129 instruct loadS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7130 %{
7131   match(Set dst (LoadS mem));
7132 
7133   ins_cost(VOLATILE_REF_COST);
7134   format %{ "ldarshw  $dst, $mem\t# short" %}
7135 
7136   ins_encode(aarch64_enc_ldarshw(dst, mem));
7137 
7138   ins_pipe(pipe_serial);
7139 %}
7140 
7141 instruct loadUS_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7142 %{
7143   match(Set dst (LoadUS mem));
7144 
7145   ins_cost(VOLATILE_REF_COST);
7146   format %{ "ldarhw  $dst, $mem\t# short" %}
7147 
7148   ins_encode(aarch64_enc_ldarhw(dst, mem));
7149 
7150   ins_pipe(pipe_serial);
7151 %}
7152 
7153 // Load Short/Char (16 bit unsigned) into long
7154 instruct loadUS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7155 %{
7156   match(Set dst (ConvI2L (LoadUS mem)));
7157 
7158   ins_cost(VOLATILE_REF_COST);
7159   format %{ "ldarh  $dst, $mem\t# short" %}
7160 
7161   ins_encode(aarch64_enc_ldarh(dst, mem));
7162 
7163   ins_pipe(pipe_serial);
7164 %}
7165 
7166 // Load Short/Char (16 bit signed) into long
7167 instruct loadS2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7168 %{
7169   match(Set dst (ConvI2L (LoadS mem)));
7170 
7171   ins_cost(VOLATILE_REF_COST);
7172   format %{ "ldarh  $dst, $mem\t# short" %}
7173 
7174   ins_encode(aarch64_enc_ldarsh(dst, mem));
7175 
7176   ins_pipe(pipe_serial);
7177 %}
7178 
7179 // Load Integer (32 bit signed)
7180 instruct loadI_volatile(iRegINoSp dst, /* sync_memory*/indirect mem)
7181 %{
7182   match(Set dst (LoadI mem));
7183 
7184   ins_cost(VOLATILE_REF_COST);
7185   format %{ "ldarw  $dst, $mem\t# int" %}
7186 
7187   ins_encode(aarch64_enc_ldarw(dst, mem));
7188 
7189   ins_pipe(pipe_serial);
7190 %}
7191 
7192 // Load Integer (32 bit unsigned) into long
7193 instruct loadUI2L_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem, immL_32bits mask)
7194 %{
7195   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
7196 
7197   ins_cost(VOLATILE_REF_COST);
7198   format %{ "ldarw  $dst, $mem\t# int" %}
7199 
7200   ins_encode(aarch64_enc_ldarw(dst, mem));
7201 
7202   ins_pipe(pipe_serial);
7203 %}
7204 
7205 // Load Long (64 bit signed)
7206 instruct loadL_volatile(iRegLNoSp dst, /* sync_memory*/indirect mem)
7207 %{
7208   match(Set dst (LoadL mem));
7209 
7210   ins_cost(VOLATILE_REF_COST);
7211   format %{ "ldar  $dst, $mem\t# int" %}
7212 
7213   ins_encode(aarch64_enc_ldar(dst, mem));
7214 
7215   ins_pipe(pipe_serial);
7216 %}
7217 
7218 // Load Pointer
7219 instruct loadP_volatile(iRegPNoSp dst, /* sync_memory*/indirect mem)
7220 %{
7221   match(Set dst (LoadP mem));
7222 
7223   ins_cost(VOLATILE_REF_COST);
7224   format %{ "ldar  $dst, $mem\t# ptr" %}
7225 
7226   ins_encode(aarch64_enc_ldar(dst, mem));
7227 
7228   ins_pipe(pipe_serial);
7229 %}
7230 
7231 // Load Compressed Pointer
7232 instruct loadN_volatile(iRegNNoSp dst, /* sync_memory*/indirect mem)
7233 %{
7234   match(Set dst (LoadN mem));
7235 
7236   ins_cost(VOLATILE_REF_COST);
7237   format %{ "ldarw  $dst, $mem\t# compressed ptr" %}
7238 
7239   ins_encode(aarch64_enc_ldarw(dst, mem));
7240 
7241   ins_pipe(pipe_serial);
7242 %}
7243 
7244 // Load Float
7245 instruct loadF_volatile(vRegF dst, /* sync_memory*/indirect mem)
7246 %{
7247   match(Set dst (LoadF mem));
7248 
7249   ins_cost(VOLATILE_REF_COST);
7250   format %{ "ldars  $dst, $mem\t# float" %}
7251 
7252   ins_encode( aarch64_enc_fldars(dst, mem) );
7253 
7254   ins_pipe(pipe_serial);
7255 %}
7256 
7257 // Load Double
7258 instruct loadD_volatile(vRegD dst, /* sync_memory*/indirect mem)
7259 %{
7260   match(Set dst (LoadD mem));
7261 
7262   ins_cost(VOLATILE_REF_COST);
7263   format %{ "ldard  $dst, $mem\t# double" %}
7264 
7265   ins_encode( aarch64_enc_fldard(dst, mem) );
7266 
7267   ins_pipe(pipe_serial);
7268 %}
7269 
7270 // Store Byte
7271 instruct storeB_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7272 %{
7273   match(Set mem (StoreB mem src));
7274 
7275   ins_cost(VOLATILE_REF_COST);
7276   format %{ "stlrb  $src, $mem\t# byte" %}
7277 
7278   ins_encode(aarch64_enc_stlrb(src, mem));
7279 
7280   ins_pipe(pipe_class_memory);
7281 %}
7282 
7283 // Store Char/Short
7284 instruct storeC_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7285 %{
7286   match(Set mem (StoreC mem src));
7287 
7288   ins_cost(VOLATILE_REF_COST);
7289   format %{ "stlrh  $src, $mem\t# short" %}
7290 
7291   ins_encode(aarch64_enc_stlrh(src, mem));
7292 
7293   ins_pipe(pipe_class_memory);
7294 %}
7295 
7296 // Store Integer
7297 
7298 instruct storeI_volatile(iRegIorL2I src, /* sync_memory*/indirect mem)
7299 %{
7300   match(Set mem(StoreI mem src));
7301 
7302   ins_cost(VOLATILE_REF_COST);
7303   format %{ "stlrw  $src, $mem\t# int" %}
7304 
7305   ins_encode(aarch64_enc_stlrw(src, mem));
7306 
7307   ins_pipe(pipe_class_memory);
7308 %}
7309 
7310 // Store Long (64 bit signed)
7311 instruct storeL_volatile(iRegL src, /* sync_memory*/indirect mem)
7312 %{
7313   match(Set mem (StoreL mem src));
7314 
7315   ins_cost(VOLATILE_REF_COST);
7316   format %{ "stlr  $src, $mem\t# int" %}
7317 
7318   ins_encode(aarch64_enc_stlr(src, mem));
7319 
7320   ins_pipe(pipe_class_memory);
7321 %}
7322 
7323 // Store Pointer
7324 instruct storeP_volatile(iRegP src, /* sync_memory*/indirect mem)
7325 %{
7326   match(Set mem (StoreP mem src));
7327 
7328   ins_cost(VOLATILE_REF_COST);
7329   format %{ "stlr  $src, $mem\t# ptr" %}
7330 
7331   ins_encode(aarch64_enc_stlr(src, mem));
7332 
7333   ins_pipe(pipe_class_memory);
7334 %}
7335 
7336 // Store Compressed Pointer
7337 instruct storeN_volatile(iRegN src, /* sync_memory*/indirect mem)
7338 %{
7339   match(Set mem (StoreN mem src));
7340 
7341   ins_cost(VOLATILE_REF_COST);
7342   format %{ "stlrw  $src, $mem\t# compressed ptr" %}
7343 
7344   ins_encode(aarch64_enc_stlrw(src, mem));
7345 
7346   ins_pipe(pipe_class_memory);
7347 %}
7348 
7349 // Store Float
7350 instruct storeF_volatile(vRegF src, /* sync_memory*/indirect mem)
7351 %{
7352   match(Set mem (StoreF mem src));
7353 
7354   ins_cost(VOLATILE_REF_COST);
7355   format %{ "stlrs  $src, $mem\t# float" %}
7356 
7357   ins_encode( aarch64_enc_fstlrs(src, mem) );
7358 
7359   ins_pipe(pipe_class_memory);
7360 %}
7361 
7362 // TODO
7363 // implement storeImmF0 and storeFImmPacked
7364 
7365 // Store Double
7366 instruct storeD_volatile(vRegD src, /* sync_memory*/indirect mem)
7367 %{
7368   match(Set mem (StoreD mem src));
7369 
7370   ins_cost(VOLATILE_REF_COST);
7371   format %{ "stlrd  $src, $mem\t# double" %}
7372 
7373   ins_encode( aarch64_enc_fstlrd(src, mem) );
7374 
7375   ins_pipe(pipe_class_memory);
7376 %}
7377 
7378 //  ---------------- end of volatile loads and stores ----------------
7379 
7380 // ============================================================================
7381 // BSWAP Instructions
7382 
7383 instruct bytes_reverse_int(iRegINoSp dst, iRegIorL2I src) %{
7384   match(Set dst (ReverseBytesI src));
7385 
7386   ins_cost(INSN_COST);
7387   format %{ "revw  $dst, $src" %}
7388 
7389   ins_encode %{
7390     __ revw(as_Register($dst$$reg), as_Register($src$$reg));
7391   %}
7392 
7393   ins_pipe(ialu_reg);
7394 %}
7395 
7396 instruct bytes_reverse_long(iRegLNoSp dst, iRegL src) %{
7397   match(Set dst (ReverseBytesL src));
7398 
7399   ins_cost(INSN_COST);
7400   format %{ "rev  $dst, $src" %}
7401 
7402   ins_encode %{
7403     __ rev(as_Register($dst$$reg), as_Register($src$$reg));
7404   %}
7405 
7406   ins_pipe(ialu_reg);
7407 %}
7408 
7409 instruct bytes_reverse_unsigned_short(iRegINoSp dst, iRegIorL2I src) %{
7410   match(Set dst (ReverseBytesUS src));
7411 
7412   ins_cost(INSN_COST);
7413   format %{ "rev16w  $dst, $src" %}
7414 
7415   ins_encode %{
7416     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7417   %}
7418 
7419   ins_pipe(ialu_reg);
7420 %}
7421 
7422 instruct bytes_reverse_short(iRegINoSp dst, iRegIorL2I src) %{
7423   match(Set dst (ReverseBytesS src));
7424 
7425   ins_cost(INSN_COST);
7426   format %{ "rev16w  $dst, $src\n\t"
7427             "sbfmw $dst, $dst, #0, #15" %}
7428 
7429   ins_encode %{
7430     __ rev16w(as_Register($dst$$reg), as_Register($src$$reg));
7431     __ sbfmw(as_Register($dst$$reg), as_Register($dst$$reg), 0U, 15U);
7432   %}
7433 
7434   ins_pipe(ialu_reg);
7435 %}
7436 
7437 // ============================================================================
7438 // Zero Count Instructions
7439 
7440 instruct countLeadingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7441   match(Set dst (CountLeadingZerosI src));
7442 
7443   ins_cost(INSN_COST);
7444   format %{ "clzw  $dst, $src" %}
7445   ins_encode %{
7446     __ clzw(as_Register($dst$$reg), as_Register($src$$reg));
7447   %}
7448 
7449   ins_pipe(ialu_reg);
7450 %}
7451 
7452 instruct countLeadingZerosL(iRegINoSp dst, iRegL src) %{
7453   match(Set dst (CountLeadingZerosL src));
7454 
7455   ins_cost(INSN_COST);
7456   format %{ "clz   $dst, $src" %}
7457   ins_encode %{
7458     __ clz(as_Register($dst$$reg), as_Register($src$$reg));
7459   %}
7460 
7461   ins_pipe(ialu_reg);
7462 %}
7463 
7464 instruct countTrailingZerosI(iRegINoSp dst, iRegIorL2I src) %{
7465   match(Set dst (CountTrailingZerosI src));
7466 
7467   ins_cost(INSN_COST * 2);
7468   format %{ "rbitw  $dst, $src\n\t"
7469             "clzw   $dst, $dst" %}
7470   ins_encode %{
7471     __ rbitw(as_Register($dst$$reg), as_Register($src$$reg));
7472     __ clzw(as_Register($dst$$reg), as_Register($dst$$reg));
7473   %}
7474 
7475   ins_pipe(ialu_reg);
7476 %}
7477 
7478 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
7479   match(Set dst (CountTrailingZerosL src));
7480 
7481   ins_cost(INSN_COST * 2);
7482   format %{ "rbit   $dst, $src\n\t"
7483             "clz    $dst, $dst" %}
7484   ins_encode %{
7485     __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
7486     __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
7487   %}
7488 
7489   ins_pipe(ialu_reg);
7490 %}
7491 
7492 //---------- Population Count Instructions -------------------------------------
7493 //
7494 
7495 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
7496   predicate(UsePopCountInstruction);
7497   match(Set dst (PopCountI src));
7498   effect(TEMP tmp);
7499   ins_cost(INSN_COST * 13);
7500 
7501   format %{ "movw   $src, $src\n\t"
7502             "mov    $tmp, $src\t# vector (1D)\n\t"
7503             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7504             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7505             "mov    $dst, $tmp\t# vector (1D)" %}
7506   ins_encode %{
7507     __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
7508     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7509     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7510     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7511     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7512   %}
7513 
7514   ins_pipe(pipe_class_default);
7515 %}
7516 
7517 instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{
7518   predicate(UsePopCountInstruction);
7519   match(Set dst (PopCountI (LoadI mem)));
7520   effect(TEMP tmp);
7521   ins_cost(INSN_COST * 13);
7522 
7523   format %{ "ldrs   $tmp, $mem\n\t"
7524             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7525             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7526             "mov    $dst, $tmp\t# vector (1D)" %}
7527   ins_encode %{
7528     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7529     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
7530                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7531     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7532     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7533     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7534   %}
7535 
7536   ins_pipe(pipe_class_default);
7537 %}
7538 
7539 // Note: Long.bitCount(long) returns an int.
7540 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
7541   predicate(UsePopCountInstruction);
7542   match(Set dst (PopCountL src));
7543   effect(TEMP tmp);
7544   ins_cost(INSN_COST * 13);
7545 
7546   format %{ "mov    $tmp, $src\t# vector (1D)\n\t"
7547             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7548             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7549             "mov    $dst, $tmp\t# vector (1D)" %}
7550   ins_encode %{
7551     __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register);
7552     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7553     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7554     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7555   %}
7556 
7557   ins_pipe(pipe_class_default);
7558 %}
7559 
7560 instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{
7561   predicate(UsePopCountInstruction);
7562   match(Set dst (PopCountL (LoadL mem)));
7563   effect(TEMP tmp);
7564   ins_cost(INSN_COST * 13);
7565 
7566   format %{ "ldrd   $tmp, $mem\n\t"
7567             "cnt    $tmp, $tmp\t# vector (8B)\n\t"
7568             "addv   $tmp, $tmp\t# vector (8B)\n\t"
7569             "mov    $dst, $tmp\t# vector (1D)" %}
7570   ins_encode %{
7571     FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
7572     loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
7573                as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
7574     __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7575     __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
7576     __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0);
7577   %}
7578 
7579   ins_pipe(pipe_class_default);
7580 %}
7581 
7582 // ============================================================================
7583 // MemBar Instruction
7584 
7585 instruct load_fence() %{
7586   match(LoadFence);
7587   ins_cost(VOLATILE_REF_COST);
7588 
7589   format %{ "load_fence" %}
7590 
7591   ins_encode %{
7592     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7593   %}
7594   ins_pipe(pipe_serial);
7595 %}
7596 
7597 instruct unnecessary_membar_acquire() %{
7598   predicate(unnecessary_acquire(n));
7599   match(MemBarAcquire);
7600   ins_cost(0);
7601 
7602   format %{ "membar_acquire (elided)" %}
7603 
7604   ins_encode %{
7605     __ block_comment("membar_acquire (elided)");
7606   %}
7607 
7608   ins_pipe(pipe_class_empty);
7609 %}
7610 
7611 instruct membar_acquire() %{
7612   match(MemBarAcquire);
7613   ins_cost(VOLATILE_REF_COST);
7614 
7615   format %{ "membar_acquire\n\t"
7616             "dmb ish" %}
7617 
7618   ins_encode %{
7619     __ block_comment("membar_acquire");
7620     __ membar(Assembler::LoadLoad|Assembler::LoadStore);
7621   %}
7622 
7623   ins_pipe(pipe_serial);
7624 %}
7625 
7626 
7627 instruct membar_acquire_lock() %{
7628   match(MemBarAcquireLock);
7629   ins_cost(VOLATILE_REF_COST);
7630 
7631   format %{ "membar_acquire_lock (elided)" %}
7632 
7633   ins_encode %{
7634     __ block_comment("membar_acquire_lock (elided)");
7635   %}
7636 
7637   ins_pipe(pipe_serial);
7638 %}
7639 
7640 instruct store_fence() %{
7641   match(StoreFence);
7642   ins_cost(VOLATILE_REF_COST);
7643 
7644   format %{ "store_fence" %}
7645 
7646   ins_encode %{
7647     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7648   %}
7649   ins_pipe(pipe_serial);
7650 %}
7651 
7652 instruct unnecessary_membar_release() %{
7653   predicate(unnecessary_release(n));
7654   match(MemBarRelease);
7655   ins_cost(0);
7656 
7657   format %{ "membar_release (elided)" %}
7658 
7659   ins_encode %{
7660     __ block_comment("membar_release (elided)");
7661   %}
7662   ins_pipe(pipe_serial);
7663 %}
7664 
7665 instruct membar_release() %{
7666   match(MemBarRelease);
7667   ins_cost(VOLATILE_REF_COST);
7668 
7669   format %{ "membar_release\n\t"
7670             "dmb ish" %}
7671 
7672   ins_encode %{
7673     __ block_comment("membar_release");
7674     __ membar(Assembler::LoadStore|Assembler::StoreStore);
7675   %}
7676   ins_pipe(pipe_serial);
7677 %}
7678 
7679 instruct membar_storestore() %{
7680   match(MemBarStoreStore);
7681   ins_cost(VOLATILE_REF_COST);
7682 
7683   format %{ "MEMBAR-store-store" %}
7684 
7685   ins_encode %{
7686     __ membar(Assembler::StoreStore);
7687   %}
7688   ins_pipe(pipe_serial);
7689 %}
7690 
7691 instruct membar_release_lock() %{
7692   match(MemBarReleaseLock);
7693   ins_cost(VOLATILE_REF_COST);
7694 
7695   format %{ "membar_release_lock (elided)" %}
7696 
7697   ins_encode %{
7698     __ block_comment("membar_release_lock (elided)");
7699   %}
7700 
7701   ins_pipe(pipe_serial);
7702 %}
7703 
7704 instruct unnecessary_membar_volatile() %{
7705   predicate(unnecessary_volatile(n));
7706   match(MemBarVolatile);
7707   ins_cost(0);
7708 
7709   format %{ "membar_volatile (elided)" %}
7710 
7711   ins_encode %{
7712     __ block_comment("membar_volatile (elided)");
7713   %}
7714 
7715   ins_pipe(pipe_serial);
7716 %}
7717 
7718 instruct membar_volatile() %{
7719   match(MemBarVolatile);
7720   ins_cost(VOLATILE_REF_COST*100);
7721 
7722   format %{ "membar_volatile\n\t"
7723              "dmb ish"%}
7724 
7725   ins_encode %{
7726     __ block_comment("membar_volatile");
7727     __ membar(Assembler::StoreLoad);
7728   %}
7729 
7730   ins_pipe(pipe_serial);
7731 %}
7732 
7733 // ============================================================================
7734 // Cast/Convert Instructions
7735 
7736 instruct castX2P(iRegPNoSp dst, iRegL src) %{
7737   match(Set dst (CastX2P src));
7738 
7739   ins_cost(INSN_COST);
7740   format %{ "mov $dst, $src\t# long -> ptr" %}
7741 
7742   ins_encode %{
7743     if ($dst$$reg != $src$$reg) {
7744       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7745     }
7746   %}
7747 
7748   ins_pipe(ialu_reg);
7749 %}
7750 
7751 instruct castP2X(iRegLNoSp dst, iRegP src) %{
7752   match(Set dst (CastP2X src));
7753 
7754   ins_cost(INSN_COST);
7755   format %{ "mov $dst, $src\t# ptr -> long" %}
7756 
7757   ins_encode %{
7758     if ($dst$$reg != $src$$reg) {
7759       __ mov(as_Register($dst$$reg), as_Register($src$$reg));
7760     }
7761   %}
7762 
7763   ins_pipe(ialu_reg);
7764 %}
7765 
7766 // Convert oop into int for vectors alignment masking
7767 instruct convP2I(iRegINoSp dst, iRegP src) %{
7768   match(Set dst (ConvL2I (CastP2X src)));
7769 
7770   ins_cost(INSN_COST);
7771   format %{ "movw $dst, $src\t# ptr -> int" %}
7772   ins_encode %{
7773     __ movw($dst$$Register, $src$$Register);
7774   %}
7775 
7776   ins_pipe(ialu_reg);
7777 %}
7778 
7779 // Convert compressed oop into int for vectors alignment masking
7780 // in case of 32bit oops (heap < 4Gb).
7781 instruct convN2I(iRegINoSp dst, iRegN src)
7782 %{
7783   predicate(Universe::narrow_oop_shift() == 0);
7784   match(Set dst (ConvL2I (CastP2X (DecodeN src))));
7785 
7786   ins_cost(INSN_COST);
7787   format %{ "mov dst, $src\t# compressed ptr -> int" %}
7788   ins_encode %{
7789     __ movw($dst$$Register, $src$$Register);
7790   %}
7791 
7792   ins_pipe(ialu_reg);
7793 %}
7794 
7795 
7796 // Convert oop pointer into compressed form
7797 instruct encodeHeapOop(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7798   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
7799   match(Set dst (EncodeP src));
7800   effect(KILL cr);
7801   ins_cost(INSN_COST * 3);
7802   format %{ "encode_heap_oop $dst, $src" %}
7803   ins_encode %{
7804     Register s = $src$$Register;
7805     Register d = $dst$$Register;
7806     __ encode_heap_oop(d, s);
7807   %}
7808   ins_pipe(ialu_reg);
7809 %}
7810 
7811 instruct encodeHeapOop_not_null(iRegNNoSp dst, iRegP src, rFlagsReg cr) %{
7812   predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
7813   match(Set dst (EncodeP src));
7814   ins_cost(INSN_COST * 3);
7815   format %{ "encode_heap_oop_not_null $dst, $src" %}
7816   ins_encode %{
7817     __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
7818   %}
7819   ins_pipe(ialu_reg);
7820 %}
7821 
7822 instruct decodeHeapOop(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7823   predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
7824             n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
7825   match(Set dst (DecodeN src));
7826   ins_cost(INSN_COST * 3);
7827   format %{ "decode_heap_oop $dst, $src" %}
7828   ins_encode %{
7829     Register s = $src$$Register;
7830     Register d = $dst$$Register;
7831     __ decode_heap_oop(d, s);
7832   %}
7833   ins_pipe(ialu_reg);
7834 %}
7835 
7836 instruct decodeHeapOop_not_null(iRegPNoSp dst, iRegN src, rFlagsReg cr) %{
7837   predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
7838             n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
7839   match(Set dst (DecodeN src));
7840   ins_cost(INSN_COST * 3);
7841   format %{ "decode_heap_oop_not_null $dst, $src" %}
7842   ins_encode %{
7843     Register s = $src$$Register;
7844     Register d = $dst$$Register;
7845     __ decode_heap_oop_not_null(d, s);
7846   %}
7847   ins_pipe(ialu_reg);
7848 %}
7849 
7850 // n.b. AArch64 implementations of encode_klass_not_null and
7851 // decode_klass_not_null do not modify the flags register so, unlike
7852 // Intel, we don't kill CR as a side effect here
7853 
7854 instruct encodeKlass_not_null(iRegNNoSp dst, iRegP src) %{
7855   match(Set dst (EncodePKlass src));
7856 
7857   ins_cost(INSN_COST * 3);
7858   format %{ "encode_klass_not_null $dst,$src" %}
7859 
7860   ins_encode %{
7861     Register src_reg = as_Register($src$$reg);
7862     Register dst_reg = as_Register($dst$$reg);
7863     __ encode_klass_not_null(dst_reg, src_reg);
7864   %}
7865 
7866    ins_pipe(ialu_reg);
7867 %}
7868 
7869 instruct decodeKlass_not_null(iRegPNoSp dst, iRegN src) %{
7870   match(Set dst (DecodeNKlass src));
7871 
7872   ins_cost(INSN_COST * 3);
7873   format %{ "decode_klass_not_null $dst,$src" %}
7874 
7875   ins_encode %{
7876     Register src_reg = as_Register($src$$reg);
7877     Register dst_reg = as_Register($dst$$reg);
7878     if (dst_reg != src_reg) {
7879       __ decode_klass_not_null(dst_reg, src_reg);
7880     } else {
7881       __ decode_klass_not_null(dst_reg);
7882     }
7883   %}
7884 
7885    ins_pipe(ialu_reg);
7886 %}
7887 
7888 instruct checkCastPP(iRegPNoSp dst)
7889 %{
7890   match(Set dst (CheckCastPP dst));
7891 
7892   size(0);
7893   format %{ "# checkcastPP of $dst" %}
7894   ins_encode(/* empty encoding */);
7895   ins_pipe(pipe_class_empty);
7896 %}
7897 
7898 instruct castPP(iRegPNoSp dst)
7899 %{
7900   match(Set dst (CastPP dst));
7901 
7902   size(0);
7903   format %{ "# castPP of $dst" %}
7904   ins_encode(/* empty encoding */);
7905   ins_pipe(pipe_class_empty);
7906 %}
7907 
7908 instruct castII(iRegI dst)
7909 %{
7910   match(Set dst (CastII dst));
7911 
7912   size(0);
7913   format %{ "# castII of $dst" %}
7914   ins_encode(/* empty encoding */);
7915   ins_cost(0);
7916   ins_pipe(pipe_class_empty);
7917 %}
7918 
7919 // ============================================================================
7920 // Atomic operation instructions
7921 //
7922 // Intel and SPARC both implement Ideal Node LoadPLocked and
7923 // Store{PIL}Conditional instructions using a normal load for the
7924 // LoadPLocked and a CAS for the Store{PIL}Conditional.
7925 //
7926 // The ideal code appears only to use LoadPLocked/StorePLocked as a
7927 // pair to lock object allocations from Eden space when not using
7928 // TLABs.
7929 //
7930 // There does not appear to be a Load{IL}Locked Ideal Node and the
7931 // Ideal code appears to use Store{IL}Conditional as an alias for CAS
7932 // and to use StoreIConditional only for 32-bit and StoreLConditional
7933 // only for 64-bit.
7934 //
7935 // We implement LoadPLocked and StorePLocked instructions using,
7936 // respectively the AArch64 hw load-exclusive and store-conditional
7937 // instructions. Whereas we must implement each of
7938 // Store{IL}Conditional using a CAS which employs a pair of
7939 // instructions comprising a load-exclusive followed by a
7940 // store-conditional.
7941 
7942 
7943 // Locked-load (linked load) of the current heap-top
7944 // used when updating the eden heap top
7945 // implemented using ldaxr on AArch64
7946 
7947 instruct loadPLocked(iRegPNoSp dst, indirect mem)
7948 %{
7949   match(Set dst (LoadPLocked mem));
7950 
7951   ins_cost(VOLATILE_REF_COST);
7952 
7953   format %{ "ldaxr $dst, $mem\t# ptr linked acquire" %}
7954 
7955   ins_encode(aarch64_enc_ldaxr(dst, mem));
7956 
7957   ins_pipe(pipe_serial);
7958 %}
7959 
7960 // Conditional-store of the updated heap-top.
7961 // Used during allocation of the shared heap.
7962 // Sets flag (EQ) on success.
7963 // implemented using stlxr on AArch64.
7964 
7965 instruct storePConditional(memory heap_top_ptr, iRegP oldval, iRegP newval, rFlagsReg cr)
7966 %{
7967   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7968 
7969   ins_cost(VOLATILE_REF_COST);
7970 
7971  // TODO
7972  // do we need to do a store-conditional release or can we just use a
7973  // plain store-conditional?
7974 
7975   format %{
7976     "stlxr rscratch1, $newval, $heap_top_ptr\t# ptr cond release"
7977     "cmpw rscratch1, zr\t# EQ on successful write"
7978   %}
7979 
7980   ins_encode(aarch64_enc_stlxr(newval, heap_top_ptr));
7981 
7982   ins_pipe(pipe_serial);
7983 %}
7984 
7985 
7986 // storeLConditional is used by PhaseMacroExpand::expand_lock_node
7987 // when attempting to rebias a lock towards the current thread.  We
7988 // must use the acquire form of cmpxchg in order to guarantee acquire
7989 // semantics in this case.
7990 instruct storeLConditional(indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr)
7991 %{
7992   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7993 
7994   ins_cost(VOLATILE_REF_COST);
7995 
7996   format %{
7997     "cmpxchg rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
7998     "cmpw rscratch1, zr\t# EQ on successful write"
7999   %}
8000 
8001   ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval));
8002 
8003   ins_pipe(pipe_slow);
8004 %}
8005 
8006 // storeIConditional also has acquire semantics, for no better reason
8007 // than matching storeLConditional.  At the time of writing this
8008 // comment storeIConditional was not used anywhere by AArch64.
8009 instruct storeIConditional(indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr)
8010 %{
8011   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8012 
8013   ins_cost(VOLATILE_REF_COST);
8014 
8015   format %{
8016     "cmpxchgw rscratch1, $mem, $oldval, $newval, $mem\t# if $mem == $oldval then $mem <-- $newval"
8017     "cmpw rscratch1, zr\t# EQ on successful write"
8018   %}
8019 
8020   ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval));
8021 
8022   ins_pipe(pipe_slow);
8023 %}
8024 
8025 // standard CompareAndSwapX when we are using barriers
8026 // these have higher priority than the rules selected by a predicate
8027 
8028 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
8029 // can't match them
8030 
8031 instruct compareAndSwapB(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8032 
8033   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8034   ins_cost(2 * VOLATILE_REF_COST);
8035 
8036   effect(KILL cr);
8037 
8038   format %{
8039     "cmpxchgb $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8040     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8041   %}
8042 
8043   ins_encode(aarch64_enc_cmpxchgb(mem, oldval, newval),
8044             aarch64_enc_cset_eq(res));
8045 
8046   ins_pipe(pipe_slow);
8047 %}
8048 
8049 instruct compareAndSwapS(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8050 
8051   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8052   ins_cost(2 * VOLATILE_REF_COST);
8053 
8054   effect(KILL cr);
8055 
8056   format %{
8057     "cmpxchgs $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8058     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8059   %}
8060 
8061   ins_encode(aarch64_enc_cmpxchgs(mem, oldval, newval),
8062             aarch64_enc_cset_eq(res));
8063 
8064   ins_pipe(pipe_slow);
8065 %}
8066 
8067 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8068 
8069   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8070   ins_cost(2 * VOLATILE_REF_COST);
8071 
8072   effect(KILL cr);
8073 
8074  format %{
8075     "cmpxchgw $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8076     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8077  %}
8078 
8079  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8080             aarch64_enc_cset_eq(res));
8081 
8082   ins_pipe(pipe_slow);
8083 %}
8084 
8085 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8086 
8087   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8088   ins_cost(2 * VOLATILE_REF_COST);
8089 
8090   effect(KILL cr);
8091 
8092  format %{
8093     "cmpxchg $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8094     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8095  %}
8096 
8097  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8098             aarch64_enc_cset_eq(res));
8099 
8100   ins_pipe(pipe_slow);
8101 %}
8102 
8103 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8104 
8105   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8106   ins_cost(2 * VOLATILE_REF_COST);
8107 
8108   effect(KILL cr);
8109 
8110  format %{
8111     "cmpxchg $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8112     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8113  %}
8114 
8115  ins_encode(aarch64_enc_cmpxchg(mem, oldval, newval),
8116             aarch64_enc_cset_eq(res));
8117 
8118   ins_pipe(pipe_slow);
8119 %}
8120 
8121 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8122 
8123   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8124   ins_cost(2 * VOLATILE_REF_COST);
8125 
8126   effect(KILL cr);
8127 
8128  format %{
8129     "cmpxchgw $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8130     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8131  %}
8132 
8133  ins_encode(aarch64_enc_cmpxchgw(mem, oldval, newval),
8134             aarch64_enc_cset_eq(res));
8135 
8136   ins_pipe(pipe_slow);
8137 %}
8138 
8139 // alternative CompareAndSwapX when we are eliding barriers
8140 
8141 instruct compareAndSwapBAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8142 
8143   predicate(needs_acquiring_load_exclusive(n));
8144   match(Set res (CompareAndSwapB mem (Binary oldval newval)));
8145   ins_cost(VOLATILE_REF_COST);
8146 
8147   effect(KILL cr);
8148 
8149   format %{
8150     "cmpxchgb_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8151     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8152   %}
8153 
8154   ins_encode(aarch64_enc_cmpxchgb_acq(mem, oldval, newval),
8155             aarch64_enc_cset_eq(res));
8156 
8157   ins_pipe(pipe_slow);
8158 %}
8159 
8160 instruct compareAndSwapSAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8161 
8162   predicate(needs_acquiring_load_exclusive(n));
8163   match(Set res (CompareAndSwapS mem (Binary oldval newval)));
8164   ins_cost(VOLATILE_REF_COST);
8165 
8166   effect(KILL cr);
8167 
8168   format %{
8169     "cmpxchgs_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8170     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8171   %}
8172 
8173   ins_encode(aarch64_enc_cmpxchgs_acq(mem, oldval, newval),
8174             aarch64_enc_cset_eq(res));
8175 
8176   ins_pipe(pipe_slow);
8177 %}
8178 
8179 instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
8180 
8181   predicate(needs_acquiring_load_exclusive(n));
8182   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
8183   ins_cost(VOLATILE_REF_COST);
8184 
8185   effect(KILL cr);
8186 
8187  format %{
8188     "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
8189     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8190  %}
8191 
8192  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8193             aarch64_enc_cset_eq(res));
8194 
8195   ins_pipe(pipe_slow);
8196 %}
8197 
8198 instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
8199 
8200   predicate(needs_acquiring_load_exclusive(n));
8201   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
8202   ins_cost(VOLATILE_REF_COST);
8203 
8204   effect(KILL cr);
8205 
8206  format %{
8207     "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
8208     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8209  %}
8210 
8211  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8212             aarch64_enc_cset_eq(res));
8213 
8214   ins_pipe(pipe_slow);
8215 %}
8216 
8217 instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8218 
8219   predicate(needs_acquiring_load_exclusive(n));
8220   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
8221   ins_cost(VOLATILE_REF_COST);
8222 
8223   effect(KILL cr);
8224 
8225  format %{
8226     "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
8227     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8228  %}
8229 
8230  ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
8231             aarch64_enc_cset_eq(res));
8232 
8233   ins_pipe(pipe_slow);
8234 %}
8235 
8236 instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
8237 
8238   predicate(needs_acquiring_load_exclusive(n));
8239   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
8240   ins_cost(VOLATILE_REF_COST);
8241 
8242   effect(KILL cr);
8243 
8244  format %{
8245     "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
8246     "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8247  %}
8248 
8249  ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
8250             aarch64_enc_cset_eq(res));
8251 
8252   ins_pipe(pipe_slow);
8253 %}
8254 
8255 
8256 // ---------------------------------------------------------------------
8257 
8258 
8259 // BEGIN This section of the file is automatically generated. Do not edit --------------
8260 
8261 // Sundry CAS operations.  Note that release is always true,
8262 // regardless of the memory ordering of the CAS.  This is because we
8263 // need the volatile case to be sequentially consistent but there is
8264 // no trailing StoreLoad barrier emitted by C2.  Unfortunately we
8265 // can't check the type of memory ordering here, so we always emit a
8266 // STLXR.
8267 
8268 // This section is generated from aarch64_ad_cas.m4
8269 
8270 
8271 
8272 instruct compareAndExchangeB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8273   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8274   ins_cost(2 * VOLATILE_REF_COST);
8275   effect(TEMP_DEF res, KILL cr);
8276   format %{
8277     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8278   %}
8279   ins_encode %{
8280     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8281                Assembler::byte, /*acquire*/ false, /*release*/ true,
8282                /*weak*/ false, $res$$Register);
8283     __ sxtbw($res$$Register, $res$$Register);
8284   %}
8285   ins_pipe(pipe_slow);
8286 %}
8287 
8288 instruct compareAndExchangeS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8289   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8290   ins_cost(2 * VOLATILE_REF_COST);
8291   effect(TEMP_DEF res, KILL cr);
8292   format %{
8293     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8294   %}
8295   ins_encode %{
8296     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8297                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8298                /*weak*/ false, $res$$Register);
8299     __ sxthw($res$$Register, $res$$Register);
8300   %}
8301   ins_pipe(pipe_slow);
8302 %}
8303 
8304 instruct compareAndExchangeI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8305   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8306   ins_cost(2 * VOLATILE_REF_COST);
8307   effect(TEMP_DEF res, KILL cr);
8308   format %{
8309     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8310   %}
8311   ins_encode %{
8312     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8313                Assembler::word, /*acquire*/ false, /*release*/ true,
8314                /*weak*/ false, $res$$Register);
8315   %}
8316   ins_pipe(pipe_slow);
8317 %}
8318 
8319 instruct compareAndExchangeL(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8320   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8321   ins_cost(2 * VOLATILE_REF_COST);
8322   effect(TEMP_DEF res, KILL cr);
8323   format %{
8324     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8325   %}
8326   ins_encode %{
8327     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8328                Assembler::xword, /*acquire*/ false, /*release*/ true,
8329                /*weak*/ false, $res$$Register);
8330   %}
8331   ins_pipe(pipe_slow);
8332 %}
8333 
8334 instruct compareAndExchangeN(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8335   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8336   ins_cost(2 * VOLATILE_REF_COST);
8337   effect(TEMP_DEF res, KILL cr);
8338   format %{
8339     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8340   %}
8341   ins_encode %{
8342     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8343                Assembler::word, /*acquire*/ false, /*release*/ true,
8344                /*weak*/ false, $res$$Register);
8345   %}
8346   ins_pipe(pipe_slow);
8347 %}
8348 
8349 instruct compareAndExchangeP(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8350   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8351   ins_cost(2 * VOLATILE_REF_COST);
8352   effect(TEMP_DEF res, KILL cr);
8353   format %{
8354     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8355   %}
8356   ins_encode %{
8357     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8358                Assembler::xword, /*acquire*/ false, /*release*/ true,
8359                /*weak*/ false, $res$$Register);
8360   %}
8361   ins_pipe(pipe_slow);
8362 %}
8363 
8364 instruct compareAndExchangeBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8365   predicate(needs_acquiring_load_exclusive(n));
8366   match(Set res (CompareAndExchangeB mem (Binary oldval newval)));
8367   ins_cost(VOLATILE_REF_COST);
8368   effect(TEMP_DEF res, KILL cr);
8369   format %{
8370     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8371   %}
8372   ins_encode %{
8373     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8374                Assembler::byte, /*acquire*/ true, /*release*/ true,
8375                /*weak*/ false, $res$$Register);
8376     __ sxtbw($res$$Register, $res$$Register);
8377   %}
8378   ins_pipe(pipe_slow);
8379 %}
8380 
8381 instruct compareAndExchangeSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8382   predicate(needs_acquiring_load_exclusive(n));
8383   match(Set res (CompareAndExchangeS mem (Binary oldval newval)));
8384   ins_cost(VOLATILE_REF_COST);
8385   effect(TEMP_DEF res, KILL cr);
8386   format %{
8387     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8388   %}
8389   ins_encode %{
8390     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8391                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8392                /*weak*/ false, $res$$Register);
8393     __ sxthw($res$$Register, $res$$Register);
8394   %}
8395   ins_pipe(pipe_slow);
8396 %}
8397 
8398 
8399 instruct compareAndExchangeIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8400   predicate(needs_acquiring_load_exclusive(n));
8401   match(Set res (CompareAndExchangeI mem (Binary oldval newval)));
8402   ins_cost(VOLATILE_REF_COST);
8403   effect(TEMP_DEF res, KILL cr);
8404   format %{
8405     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8406   %}
8407   ins_encode %{
8408     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8409                Assembler::word, /*acquire*/ true, /*release*/ true,
8410                /*weak*/ false, $res$$Register);
8411   %}
8412   ins_pipe(pipe_slow);
8413 %}
8414 
8415 instruct compareAndExchangeLAcq(iRegLNoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8416   predicate(needs_acquiring_load_exclusive(n));
8417   match(Set res (CompareAndExchangeL mem (Binary oldval newval)));
8418   ins_cost(VOLATILE_REF_COST);
8419   effect(TEMP_DEF res, KILL cr);
8420   format %{
8421     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8422   %}
8423   ins_encode %{
8424     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8425                Assembler::xword, /*acquire*/ true, /*release*/ true,
8426                /*weak*/ false, $res$$Register);
8427   %}
8428   ins_pipe(pipe_slow);
8429 %}
8430 
8431 
8432 instruct compareAndExchangeNAcq(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8433   predicate(needs_acquiring_load_exclusive(n));
8434   match(Set res (CompareAndExchangeN mem (Binary oldval newval)));
8435   ins_cost(VOLATILE_REF_COST);
8436   effect(TEMP_DEF res, KILL cr);
8437   format %{
8438     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8439   %}
8440   ins_encode %{
8441     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8442                Assembler::word, /*acquire*/ true, /*release*/ true,
8443                /*weak*/ false, $res$$Register);
8444   %}
8445   ins_pipe(pipe_slow);
8446 %}
8447 
8448 instruct compareAndExchangePAcq(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8449   predicate(needs_acquiring_load_exclusive(n));
8450   match(Set res (CompareAndExchangeP mem (Binary oldval newval)));
8451   ins_cost(VOLATILE_REF_COST);
8452   effect(TEMP_DEF res, KILL cr);
8453   format %{
8454     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8455   %}
8456   ins_encode %{
8457     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8458                Assembler::xword, /*acquire*/ true, /*release*/ true,
8459                /*weak*/ false, $res$$Register);
8460   %}
8461   ins_pipe(pipe_slow);
8462 %}
8463 
8464 instruct weakCompareAndSwapB(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8465   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8466   ins_cost(2 * VOLATILE_REF_COST);
8467   effect(KILL cr);
8468   format %{
8469     "cmpxchgb $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8470     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8471   %}
8472   ins_encode %{
8473     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8474                Assembler::byte, /*acquire*/ false, /*release*/ true,
8475                /*weak*/ true, noreg);
8476     __ csetw($res$$Register, Assembler::EQ);
8477   %}
8478   ins_pipe(pipe_slow);
8479 %}
8480 
8481 instruct weakCompareAndSwapS(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8482   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8483   ins_cost(2 * VOLATILE_REF_COST);
8484   effect(KILL cr);
8485   format %{
8486     "cmpxchgs $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8487     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8488   %}
8489   ins_encode %{
8490     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8491                Assembler::halfword, /*acquire*/ false, /*release*/ true,
8492                /*weak*/ true, noreg);
8493     __ csetw($res$$Register, Assembler::EQ);
8494   %}
8495   ins_pipe(pipe_slow);
8496 %}
8497 
8498 instruct weakCompareAndSwapI(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8499   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8500   ins_cost(2 * VOLATILE_REF_COST);
8501   effect(KILL cr);
8502   format %{
8503     "cmpxchgw $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8504     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8505   %}
8506   ins_encode %{
8507     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8508                Assembler::word, /*acquire*/ false, /*release*/ true,
8509                /*weak*/ true, noreg);
8510     __ csetw($res$$Register, Assembler::EQ);
8511   %}
8512   ins_pipe(pipe_slow);
8513 %}
8514 
8515 instruct weakCompareAndSwapL(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8516   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8517   ins_cost(2 * VOLATILE_REF_COST);
8518   effect(KILL cr);
8519   format %{
8520     "cmpxchg $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8521     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8522   %}
8523   ins_encode %{
8524     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8525                Assembler::xword, /*acquire*/ false, /*release*/ true,
8526                /*weak*/ true, noreg);
8527     __ csetw($res$$Register, Assembler::EQ);
8528   %}
8529   ins_pipe(pipe_slow);
8530 %}
8531 
8532 instruct weakCompareAndSwapN(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8533   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8534   ins_cost(2 * VOLATILE_REF_COST);
8535   effect(KILL cr);
8536   format %{
8537     "cmpxchgw $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8538     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8539   %}
8540   ins_encode %{
8541     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8542                Assembler::word, /*acquire*/ false, /*release*/ true,
8543                /*weak*/ true, noreg);
8544     __ csetw($res$$Register, Assembler::EQ);
8545   %}
8546   ins_pipe(pipe_slow);
8547 %}
8548 
8549 instruct weakCompareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8550   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8551   ins_cost(2 * VOLATILE_REF_COST);
8552   effect(KILL cr);
8553   format %{
8554     "cmpxchg $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8555     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8556   %}
8557   ins_encode %{
8558     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8559                Assembler::xword, /*acquire*/ false, /*release*/ true,
8560                /*weak*/ true, noreg);
8561     __ csetw($res$$Register, Assembler::EQ);
8562   %}
8563   ins_pipe(pipe_slow);
8564 %}
8565 
8566 instruct weakCompareAndSwapBAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8567   predicate(needs_acquiring_load_exclusive(n));
8568   match(Set res (WeakCompareAndSwapB mem (Binary oldval newval)));
8569   ins_cost(VOLATILE_REF_COST);
8570   effect(KILL cr);
8571   format %{
8572     "cmpxchgb_acq $res = $mem, $oldval, $newval\t# (byte, weak) if $mem == $oldval then $mem <-- $newval"
8573     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8574   %}
8575   ins_encode %{
8576     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8577                Assembler::byte, /*acquire*/ true, /*release*/ true,
8578                /*weak*/ true, noreg);
8579     __ csetw($res$$Register, Assembler::EQ);
8580   %}
8581   ins_pipe(pipe_slow);
8582 %}
8583 
8584 instruct weakCompareAndSwapSAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8585   predicate(needs_acquiring_load_exclusive(n));
8586   match(Set res (WeakCompareAndSwapS mem (Binary oldval newval)));
8587   ins_cost(VOLATILE_REF_COST);
8588   effect(KILL cr);
8589   format %{
8590     "cmpxchgs_acq $res = $mem, $oldval, $newval\t# (short, weak) if $mem == $oldval then $mem <-- $newval"
8591     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8592   %}
8593   ins_encode %{
8594     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8595                Assembler::halfword, /*acquire*/ true, /*release*/ true,
8596                /*weak*/ true, noreg);
8597     __ csetw($res$$Register, Assembler::EQ);
8598   %}
8599   ins_pipe(pipe_slow);
8600 %}
8601 
8602 instruct weakCompareAndSwapIAcq(iRegINoSp res, indirect mem, iRegI oldval, iRegI newval, rFlagsReg cr) %{
8603   predicate(needs_acquiring_load_exclusive(n));
8604   match(Set res (WeakCompareAndSwapI mem (Binary oldval newval)));
8605   ins_cost(VOLATILE_REF_COST);
8606   effect(KILL cr);
8607   format %{
8608     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (int, weak) if $mem == $oldval then $mem <-- $newval"
8609     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8610   %}
8611   ins_encode %{
8612     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8613                Assembler::word, /*acquire*/ true, /*release*/ true,
8614                /*weak*/ true, noreg);
8615     __ csetw($res$$Register, Assembler::EQ);
8616   %}
8617   ins_pipe(pipe_slow);
8618 %}
8619 
8620 instruct weakCompareAndSwapLAcq(iRegINoSp res, indirect mem, iRegL oldval, iRegL newval, rFlagsReg cr) %{
8621   predicate(needs_acquiring_load_exclusive(n));
8622   match(Set res (WeakCompareAndSwapL mem (Binary oldval newval)));
8623   ins_cost(VOLATILE_REF_COST);
8624   effect(KILL cr);
8625   format %{
8626     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (long, weak) if $mem == $oldval then $mem <-- $newval"
8627     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8628   %}
8629   ins_encode %{
8630     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8631                Assembler::xword, /*acquire*/ true, /*release*/ true,
8632                /*weak*/ true, noreg);
8633     __ csetw($res$$Register, Assembler::EQ);
8634   %}
8635   ins_pipe(pipe_slow);
8636 %}
8637 
8638 instruct weakCompareAndSwapNAcq(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, rFlagsReg cr) %{
8639   predicate(needs_acquiring_load_exclusive(n));
8640   match(Set res (WeakCompareAndSwapN mem (Binary oldval newval)));
8641   ins_cost(VOLATILE_REF_COST);
8642   effect(KILL cr);
8643   format %{
8644     "cmpxchgw_acq $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval"
8645     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8646   %}
8647   ins_encode %{
8648     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8649                Assembler::word, /*acquire*/ true, /*release*/ true,
8650                /*weak*/ true, noreg);
8651     __ csetw($res$$Register, Assembler::EQ);
8652   %}
8653   ins_pipe(pipe_slow);
8654 %}
8655 
8656 instruct weakCompareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
8657   predicate(needs_acquiring_load_exclusive(n));
8658   match(Set res (WeakCompareAndSwapP mem (Binary oldval newval)));
8659   ins_cost(VOLATILE_REF_COST);
8660   effect(KILL cr);
8661   format %{
8662     "cmpxchg_acq $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval"
8663     "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
8664   %}
8665   ins_encode %{
8666     __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
8667                Assembler::xword, /*acquire*/ true, /*release*/ true,
8668                /*weak*/ true, noreg);
8669     __ csetw($res$$Register, Assembler::EQ);
8670   %}
8671   ins_pipe(pipe_slow);
8672 %}
8673 
8674 // END This section of the file is automatically generated. Do not edit --------------
8675 // ---------------------------------------------------------------------
8676 
8677 instruct get_and_setI(indirect mem, iRegI newv, iRegINoSp prev) %{
8678   match(Set prev (GetAndSetI mem newv));
8679   ins_cost(2 * VOLATILE_REF_COST);
8680   format %{ "atomic_xchgw  $prev, $newv, [$mem]" %}
8681   ins_encode %{
8682     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8683   %}
8684   ins_pipe(pipe_serial);
8685 %}
8686 
8687 instruct get_and_setL(indirect mem, iRegL newv, iRegLNoSp prev) %{
8688   match(Set prev (GetAndSetL mem newv));
8689   ins_cost(2 * VOLATILE_REF_COST);
8690   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8691   ins_encode %{
8692     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8693   %}
8694   ins_pipe(pipe_serial);
8695 %}
8696 
8697 instruct get_and_setN(indirect mem, iRegN newv, iRegINoSp prev) %{
8698   match(Set prev (GetAndSetN mem newv));
8699   ins_cost(2 * VOLATILE_REF_COST);
8700   format %{ "atomic_xchgw $prev, $newv, [$mem]" %}
8701   ins_encode %{
8702     __ atomic_xchgw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8703   %}
8704   ins_pipe(pipe_serial);
8705 %}
8706 
8707 instruct get_and_setP(indirect mem, iRegP newv, iRegPNoSp prev) %{
8708   match(Set prev (GetAndSetP mem newv));
8709   ins_cost(2 * VOLATILE_REF_COST);
8710   format %{ "atomic_xchg  $prev, $newv, [$mem]" %}
8711   ins_encode %{
8712     __ atomic_xchg($prev$$Register, $newv$$Register, as_Register($mem$$base));
8713   %}
8714   ins_pipe(pipe_serial);
8715 %}
8716 
8717 instruct get_and_setIAcq(indirect mem, iRegI newv, iRegINoSp prev) %{
8718   predicate(needs_acquiring_load_exclusive(n));
8719   match(Set prev (GetAndSetI mem newv));
8720   ins_cost(VOLATILE_REF_COST);
8721   format %{ "atomic_xchgw_acq  $prev, $newv, [$mem]" %}
8722   ins_encode %{
8723     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8724   %}
8725   ins_pipe(pipe_serial);
8726 %}
8727 
8728 instruct get_and_setLAcq(indirect mem, iRegL newv, iRegLNoSp prev) %{
8729   predicate(needs_acquiring_load_exclusive(n));
8730   match(Set prev (GetAndSetL mem newv));
8731   ins_cost(VOLATILE_REF_COST);
8732   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8733   ins_encode %{
8734     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8735   %}
8736   ins_pipe(pipe_serial);
8737 %}
8738 
8739 instruct get_and_setNAcq(indirect mem, iRegN newv, iRegINoSp prev) %{
8740   predicate(needs_acquiring_load_exclusive(n));
8741   match(Set prev (GetAndSetN mem newv));
8742   ins_cost(VOLATILE_REF_COST);
8743   format %{ "atomic_xchgw_acq $prev, $newv, [$mem]" %}
8744   ins_encode %{
8745     __ atomic_xchgalw($prev$$Register, $newv$$Register, as_Register($mem$$base));
8746   %}
8747   ins_pipe(pipe_serial);
8748 %}
8749 
8750 instruct get_and_setPAcq(indirect mem, iRegP newv, iRegPNoSp prev) %{
8751   predicate(needs_acquiring_load_exclusive(n));
8752   match(Set prev (GetAndSetP mem newv));
8753   ins_cost(VOLATILE_REF_COST);
8754   format %{ "atomic_xchg_acq  $prev, $newv, [$mem]" %}
8755   ins_encode %{
8756     __ atomic_xchgal($prev$$Register, $newv$$Register, as_Register($mem$$base));
8757   %}
8758   ins_pipe(pipe_serial);
8759 %}
8760 
8761 
8762 instruct get_and_addL(indirect mem, iRegLNoSp newval, iRegL incr) %{
8763   match(Set newval (GetAndAddL mem incr));
8764   ins_cost(2 * VOLATILE_REF_COST + 1);
8765   format %{ "get_and_addL $newval, [$mem], $incr" %}
8766   ins_encode %{
8767     __ atomic_add($newval$$Register, $incr$$Register, as_Register($mem$$base));
8768   %}
8769   ins_pipe(pipe_serial);
8770 %}
8771 
8772 instruct get_and_addL_no_res(indirect mem, Universe dummy, iRegL incr) %{
8773   predicate(n->as_LoadStore()->result_not_used());
8774   match(Set dummy (GetAndAddL mem incr));
8775   ins_cost(2 * VOLATILE_REF_COST);
8776   format %{ "get_and_addL [$mem], $incr" %}
8777   ins_encode %{
8778     __ atomic_add(noreg, $incr$$Register, as_Register($mem$$base));
8779   %}
8780   ins_pipe(pipe_serial);
8781 %}
8782 
8783 instruct get_and_addLi(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8784   match(Set newval (GetAndAddL mem incr));
8785   ins_cost(2 * VOLATILE_REF_COST + 1);
8786   format %{ "get_and_addL $newval, [$mem], $incr" %}
8787   ins_encode %{
8788     __ atomic_add($newval$$Register, $incr$$constant, as_Register($mem$$base));
8789   %}
8790   ins_pipe(pipe_serial);
8791 %}
8792 
8793 instruct get_and_addLi_no_res(indirect mem, Universe dummy, immLAddSub incr) %{
8794   predicate(n->as_LoadStore()->result_not_used());
8795   match(Set dummy (GetAndAddL mem incr));
8796   ins_cost(2 * VOLATILE_REF_COST);
8797   format %{ "get_and_addL [$mem], $incr" %}
8798   ins_encode %{
8799     __ atomic_add(noreg, $incr$$constant, as_Register($mem$$base));
8800   %}
8801   ins_pipe(pipe_serial);
8802 %}
8803 
8804 instruct get_and_addI(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8805   match(Set newval (GetAndAddI mem incr));
8806   ins_cost(2 * VOLATILE_REF_COST + 1);
8807   format %{ "get_and_addI $newval, [$mem], $incr" %}
8808   ins_encode %{
8809     __ atomic_addw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8810   %}
8811   ins_pipe(pipe_serial);
8812 %}
8813 
8814 instruct get_and_addI_no_res(indirect mem, Universe dummy, iRegIorL2I incr) %{
8815   predicate(n->as_LoadStore()->result_not_used());
8816   match(Set dummy (GetAndAddI mem incr));
8817   ins_cost(2 * VOLATILE_REF_COST);
8818   format %{ "get_and_addI [$mem], $incr" %}
8819   ins_encode %{
8820     __ atomic_addw(noreg, $incr$$Register, as_Register($mem$$base));
8821   %}
8822   ins_pipe(pipe_serial);
8823 %}
8824 
8825 instruct get_and_addIi(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8826   match(Set newval (GetAndAddI mem incr));
8827   ins_cost(2 * VOLATILE_REF_COST + 1);
8828   format %{ "get_and_addI $newval, [$mem], $incr" %}
8829   ins_encode %{
8830     __ atomic_addw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8831   %}
8832   ins_pipe(pipe_serial);
8833 %}
8834 
8835 instruct get_and_addIi_no_res(indirect mem, Universe dummy, immIAddSub incr) %{
8836   predicate(n->as_LoadStore()->result_not_used());
8837   match(Set dummy (GetAndAddI mem incr));
8838   ins_cost(2 * VOLATILE_REF_COST);
8839   format %{ "get_and_addI [$mem], $incr" %}
8840   ins_encode %{
8841     __ atomic_addw(noreg, $incr$$constant, as_Register($mem$$base));
8842   %}
8843   ins_pipe(pipe_serial);
8844 %}
8845 
8846 instruct get_and_addLAcq(indirect mem, iRegLNoSp newval, iRegL incr) %{
8847   predicate(needs_acquiring_load_exclusive(n));
8848   match(Set newval (GetAndAddL mem incr));
8849   ins_cost(VOLATILE_REF_COST + 1);
8850   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8851   ins_encode %{
8852     __ atomic_addal($newval$$Register, $incr$$Register, as_Register($mem$$base));
8853   %}
8854   ins_pipe(pipe_serial);
8855 %}
8856 
8857 instruct get_and_addL_no_resAcq(indirect mem, Universe dummy, iRegL incr) %{
8858   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8859   match(Set dummy (GetAndAddL mem incr));
8860   ins_cost(VOLATILE_REF_COST);
8861   format %{ "get_and_addL_acq [$mem], $incr" %}
8862   ins_encode %{
8863     __ atomic_addal(noreg, $incr$$Register, as_Register($mem$$base));
8864   %}
8865   ins_pipe(pipe_serial);
8866 %}
8867 
8868 instruct get_and_addLiAcq(indirect mem, iRegLNoSp newval, immLAddSub incr) %{
8869   predicate(needs_acquiring_load_exclusive(n));
8870   match(Set newval (GetAndAddL mem incr));
8871   ins_cost(VOLATILE_REF_COST + 1);
8872   format %{ "get_and_addL_acq $newval, [$mem], $incr" %}
8873   ins_encode %{
8874     __ atomic_addal($newval$$Register, $incr$$constant, as_Register($mem$$base));
8875   %}
8876   ins_pipe(pipe_serial);
8877 %}
8878 
8879 instruct get_and_addLi_no_resAcq(indirect mem, Universe dummy, immLAddSub incr) %{
8880   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8881   match(Set dummy (GetAndAddL mem incr));
8882   ins_cost(VOLATILE_REF_COST);
8883   format %{ "get_and_addL_acq [$mem], $incr" %}
8884   ins_encode %{
8885     __ atomic_addal(noreg, $incr$$constant, as_Register($mem$$base));
8886   %}
8887   ins_pipe(pipe_serial);
8888 %}
8889 
8890 instruct get_and_addIAcq(indirect mem, iRegINoSp newval, iRegIorL2I incr) %{
8891   predicate(needs_acquiring_load_exclusive(n));
8892   match(Set newval (GetAndAddI mem incr));
8893   ins_cost(VOLATILE_REF_COST + 1);
8894   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8895   ins_encode %{
8896     __ atomic_addalw($newval$$Register, $incr$$Register, as_Register($mem$$base));
8897   %}
8898   ins_pipe(pipe_serial);
8899 %}
8900 
8901 instruct get_and_addI_no_resAcq(indirect mem, Universe dummy, iRegIorL2I incr) %{
8902   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8903   match(Set dummy (GetAndAddI mem incr));
8904   ins_cost(VOLATILE_REF_COST);
8905   format %{ "get_and_addI_acq [$mem], $incr" %}
8906   ins_encode %{
8907     __ atomic_addalw(noreg, $incr$$Register, as_Register($mem$$base));
8908   %}
8909   ins_pipe(pipe_serial);
8910 %}
8911 
8912 instruct get_and_addIiAcq(indirect mem, iRegINoSp newval, immIAddSub incr) %{
8913   predicate(needs_acquiring_load_exclusive(n));
8914   match(Set newval (GetAndAddI mem incr));
8915   ins_cost(VOLATILE_REF_COST + 1);
8916   format %{ "get_and_addI_acq $newval, [$mem], $incr" %}
8917   ins_encode %{
8918     __ atomic_addalw($newval$$Register, $incr$$constant, as_Register($mem$$base));
8919   %}
8920   ins_pipe(pipe_serial);
8921 %}
8922 
8923 instruct get_and_addIi_no_resAcq(indirect mem, Universe dummy, immIAddSub incr) %{
8924   predicate(n->as_LoadStore()->result_not_used() && needs_acquiring_load_exclusive(n));
8925   match(Set dummy (GetAndAddI mem incr));
8926   ins_cost(VOLATILE_REF_COST);
8927   format %{ "get_and_addI_acq [$mem], $incr" %}
8928   ins_encode %{
8929     __ atomic_addalw(noreg, $incr$$constant, as_Register($mem$$base));
8930   %}
8931   ins_pipe(pipe_serial);
8932 %}
8933 
8934 // Manifest a CmpL result in an integer register.
8935 // (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
8936 instruct cmpL3_reg_reg(iRegINoSp dst, iRegL src1, iRegL src2, rFlagsReg flags)
8937 %{
8938   match(Set dst (CmpL3 src1 src2));
8939   effect(KILL flags);
8940 
8941   ins_cost(INSN_COST * 6);
8942   format %{
8943       "cmp $src1, $src2"
8944       "csetw $dst, ne"
8945       "cnegw $dst, lt"
8946   %}
8947   // format %{ "CmpL3 $dst, $src1, $src2" %}
8948   ins_encode %{
8949     __ cmp($src1$$Register, $src2$$Register);
8950     __ csetw($dst$$Register, Assembler::NE);
8951     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8952   %}
8953 
8954   ins_pipe(pipe_class_default);
8955 %}
8956 
8957 instruct cmpL3_reg_imm(iRegINoSp dst, iRegL src1, immLAddSub src2, rFlagsReg flags)
8958 %{
8959   match(Set dst (CmpL3 src1 src2));
8960   effect(KILL flags);
8961 
8962   ins_cost(INSN_COST * 6);
8963   format %{
8964       "cmp $src1, $src2"
8965       "csetw $dst, ne"
8966       "cnegw $dst, lt"
8967   %}
8968   ins_encode %{
8969     int32_t con = (int32_t)$src2$$constant;
8970      if (con < 0) {
8971       __ adds(zr, $src1$$Register, -con);
8972     } else {
8973       __ subs(zr, $src1$$Register, con);
8974     }
8975     __ csetw($dst$$Register, Assembler::NE);
8976     __ cnegw($dst$$Register, $dst$$Register, Assembler::LT);
8977   %}
8978 
8979   ins_pipe(pipe_class_default);
8980 %}
8981 
8982 // ============================================================================
8983 // Conditional Move Instructions
8984 
8985 // n.b. we have identical rules for both a signed compare op (cmpOp)
8986 // and an unsigned compare op (cmpOpU). it would be nice if we could
8987 // define an op class which merged both inputs and use it to type the
8988 // argument to a single rule. unfortunatelyt his fails because the
8989 // opclass does not live up to the COND_INTER interface of its
8990 // component operands. When the generic code tries to negate the
8991 // operand it ends up running the generci Machoper::negate method
8992 // which throws a ShouldNotHappen. So, we have to provide two flavours
8993 // of each rule, one for a cmpOp and a second for a cmpOpU (sigh).
8994 
8995 instruct cmovI_reg_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
8996   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
8997 
8998   ins_cost(INSN_COST * 2);
8999   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, int"  %}
9000 
9001   ins_encode %{
9002     __ cselw(as_Register($dst$$reg),
9003              as_Register($src2$$reg),
9004              as_Register($src1$$reg),
9005              (Assembler::Condition)$cmp$$cmpcode);
9006   %}
9007 
9008   ins_pipe(icond_reg_reg);
9009 %}
9010 
9011 instruct cmovUI_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9012   match(Set dst (CMoveI (Binary cmp cr) (Binary src1 src2)));
9013 
9014   ins_cost(INSN_COST * 2);
9015   format %{ "cselw $dst, $src2, $src1 $cmp\t# unsigned, int"  %}
9016 
9017   ins_encode %{
9018     __ cselw(as_Register($dst$$reg),
9019              as_Register($src2$$reg),
9020              as_Register($src1$$reg),
9021              (Assembler::Condition)$cmp$$cmpcode);
9022   %}
9023 
9024   ins_pipe(icond_reg_reg);
9025 %}
9026 
9027 // special cases where one arg is zero
9028 
9029 // n.b. this is selected in preference to the rule above because it
9030 // avoids loading constant 0 into a source register
9031 
9032 // TODO
9033 // we ought only to be able to cull one of these variants as the ideal
9034 // transforms ought always to order the zero consistently (to left/right?)
9035 
9036 instruct cmovI_zero_reg(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9037   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9038 
9039   ins_cost(INSN_COST * 2);
9040   format %{ "cselw $dst, $src, zr $cmp\t# signed, int"  %}
9041 
9042   ins_encode %{
9043     __ cselw(as_Register($dst$$reg),
9044              as_Register($src$$reg),
9045              zr,
9046              (Assembler::Condition)$cmp$$cmpcode);
9047   %}
9048 
9049   ins_pipe(icond_reg);
9050 %}
9051 
9052 instruct cmovUI_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, iRegIorL2I src) %{
9053   match(Set dst (CMoveI (Binary cmp cr) (Binary zero src)));
9054 
9055   ins_cost(INSN_COST * 2);
9056   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, int"  %}
9057 
9058   ins_encode %{
9059     __ cselw(as_Register($dst$$reg),
9060              as_Register($src$$reg),
9061              zr,
9062              (Assembler::Condition)$cmp$$cmpcode);
9063   %}
9064 
9065   ins_pipe(icond_reg);
9066 %}
9067 
9068 instruct cmovI_reg_zero(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9069   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9070 
9071   ins_cost(INSN_COST * 2);
9072   format %{ "cselw $dst, zr, $src $cmp\t# signed, int"  %}
9073 
9074   ins_encode %{
9075     __ cselw(as_Register($dst$$reg),
9076              zr,
9077              as_Register($src$$reg),
9078              (Assembler::Condition)$cmp$$cmpcode);
9079   %}
9080 
9081   ins_pipe(icond_reg);
9082 %}
9083 
9084 instruct cmovUI_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, iRegIorL2I src, immI0 zero) %{
9085   match(Set dst (CMoveI (Binary cmp cr) (Binary src zero)));
9086 
9087   ins_cost(INSN_COST * 2);
9088   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, int"  %}
9089 
9090   ins_encode %{
9091     __ cselw(as_Register($dst$$reg),
9092              zr,
9093              as_Register($src$$reg),
9094              (Assembler::Condition)$cmp$$cmpcode);
9095   %}
9096 
9097   ins_pipe(icond_reg);
9098 %}
9099 
9100 // special case for creating a boolean 0 or 1
9101 
9102 // n.b. this is selected in preference to the rule above because it
9103 // avoids loading constants 0 and 1 into a source register
9104 
9105 instruct cmovI_reg_zero_one(cmpOp cmp, rFlagsReg cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9106   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9107 
9108   ins_cost(INSN_COST * 2);
9109   format %{ "csincw $dst, zr, zr $cmp\t# signed, int"  %}
9110 
9111   ins_encode %{
9112     // equivalently
9113     // cset(as_Register($dst$$reg),
9114     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9115     __ csincw(as_Register($dst$$reg),
9116              zr,
9117              zr,
9118              (Assembler::Condition)$cmp$$cmpcode);
9119   %}
9120 
9121   ins_pipe(icond_none);
9122 %}
9123 
9124 instruct cmovUI_reg_zero_one(cmpOpU cmp, rFlagsRegU cr, iRegINoSp dst, immI0 zero, immI_1 one) %{
9125   match(Set dst (CMoveI (Binary cmp cr) (Binary one zero)));
9126 
9127   ins_cost(INSN_COST * 2);
9128   format %{ "csincw $dst, zr, zr $cmp\t# unsigned, int"  %}
9129 
9130   ins_encode %{
9131     // equivalently
9132     // cset(as_Register($dst$$reg),
9133     //      negate_condition((Assembler::Condition)$cmp$$cmpcode));
9134     __ csincw(as_Register($dst$$reg),
9135              zr,
9136              zr,
9137              (Assembler::Condition)$cmp$$cmpcode);
9138   %}
9139 
9140   ins_pipe(icond_none);
9141 %}
9142 
9143 instruct cmovL_reg_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9144   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9145 
9146   ins_cost(INSN_COST * 2);
9147   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, long"  %}
9148 
9149   ins_encode %{
9150     __ csel(as_Register($dst$$reg),
9151             as_Register($src2$$reg),
9152             as_Register($src1$$reg),
9153             (Assembler::Condition)$cmp$$cmpcode);
9154   %}
9155 
9156   ins_pipe(icond_reg_reg);
9157 %}
9158 
9159 instruct cmovUL_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src1, iRegL src2) %{
9160   match(Set dst (CMoveL (Binary cmp cr) (Binary src1 src2)));
9161 
9162   ins_cost(INSN_COST * 2);
9163   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, long"  %}
9164 
9165   ins_encode %{
9166     __ csel(as_Register($dst$$reg),
9167             as_Register($src2$$reg),
9168             as_Register($src1$$reg),
9169             (Assembler::Condition)$cmp$$cmpcode);
9170   %}
9171 
9172   ins_pipe(icond_reg_reg);
9173 %}
9174 
9175 // special cases where one arg is zero
9176 
9177 instruct cmovL_reg_zero(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9178   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9179 
9180   ins_cost(INSN_COST * 2);
9181   format %{ "csel $dst, zr, $src $cmp\t# signed, long"  %}
9182 
9183   ins_encode %{
9184     __ csel(as_Register($dst$$reg),
9185             zr,
9186             as_Register($src$$reg),
9187             (Assembler::Condition)$cmp$$cmpcode);
9188   %}
9189 
9190   ins_pipe(icond_reg);
9191 %}
9192 
9193 instruct cmovUL_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, iRegL src, immL0 zero) %{
9194   match(Set dst (CMoveL (Binary cmp cr) (Binary src zero)));
9195 
9196   ins_cost(INSN_COST * 2);
9197   format %{ "csel $dst, zr, $src $cmp\t# unsigned, long"  %}
9198 
9199   ins_encode %{
9200     __ csel(as_Register($dst$$reg),
9201             zr,
9202             as_Register($src$$reg),
9203             (Assembler::Condition)$cmp$$cmpcode);
9204   %}
9205 
9206   ins_pipe(icond_reg);
9207 %}
9208 
9209 instruct cmovL_zero_reg(cmpOp cmp, rFlagsReg cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9210   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9211 
9212   ins_cost(INSN_COST * 2);
9213   format %{ "csel $dst, $src, zr $cmp\t# signed, long"  %}
9214 
9215   ins_encode %{
9216     __ csel(as_Register($dst$$reg),
9217             as_Register($src$$reg),
9218             zr,
9219             (Assembler::Condition)$cmp$$cmpcode);
9220   %}
9221 
9222   ins_pipe(icond_reg);
9223 %}
9224 
9225 instruct cmovUL_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegLNoSp dst, immL0 zero, iRegL src) %{
9226   match(Set dst (CMoveL (Binary cmp cr) (Binary zero src)));
9227 
9228   ins_cost(INSN_COST * 2);
9229   format %{ "csel $dst, $src, zr $cmp\t# unsigned, long"  %}
9230 
9231   ins_encode %{
9232     __ csel(as_Register($dst$$reg),
9233             as_Register($src$$reg),
9234             zr,
9235             (Assembler::Condition)$cmp$$cmpcode);
9236   %}
9237 
9238   ins_pipe(icond_reg);
9239 %}
9240 
9241 instruct cmovP_reg_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9242   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9243 
9244   ins_cost(INSN_COST * 2);
9245   format %{ "csel $dst, $src2, $src1 $cmp\t# signed, ptr"  %}
9246 
9247   ins_encode %{
9248     __ csel(as_Register($dst$$reg),
9249             as_Register($src2$$reg),
9250             as_Register($src1$$reg),
9251             (Assembler::Condition)$cmp$$cmpcode);
9252   %}
9253 
9254   ins_pipe(icond_reg_reg);
9255 %}
9256 
9257 instruct cmovUP_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src1, iRegP src2) %{
9258   match(Set dst (CMoveP (Binary cmp cr) (Binary src1 src2)));
9259 
9260   ins_cost(INSN_COST * 2);
9261   format %{ "csel $dst, $src2, $src1 $cmp\t# unsigned, ptr"  %}
9262 
9263   ins_encode %{
9264     __ csel(as_Register($dst$$reg),
9265             as_Register($src2$$reg),
9266             as_Register($src1$$reg),
9267             (Assembler::Condition)$cmp$$cmpcode);
9268   %}
9269 
9270   ins_pipe(icond_reg_reg);
9271 %}
9272 
9273 // special cases where one arg is zero
9274 
9275 instruct cmovP_reg_zero(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9276   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9277 
9278   ins_cost(INSN_COST * 2);
9279   format %{ "csel $dst, zr, $src $cmp\t# signed, ptr"  %}
9280 
9281   ins_encode %{
9282     __ csel(as_Register($dst$$reg),
9283             zr,
9284             as_Register($src$$reg),
9285             (Assembler::Condition)$cmp$$cmpcode);
9286   %}
9287 
9288   ins_pipe(icond_reg);
9289 %}
9290 
9291 instruct cmovUP_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, iRegP src, immP0 zero) %{
9292   match(Set dst (CMoveP (Binary cmp cr) (Binary src zero)));
9293 
9294   ins_cost(INSN_COST * 2);
9295   format %{ "csel $dst, zr, $src $cmp\t# unsigned, ptr"  %}
9296 
9297   ins_encode %{
9298     __ csel(as_Register($dst$$reg),
9299             zr,
9300             as_Register($src$$reg),
9301             (Assembler::Condition)$cmp$$cmpcode);
9302   %}
9303 
9304   ins_pipe(icond_reg);
9305 %}
9306 
9307 instruct cmovP_zero_reg(cmpOp cmp, rFlagsReg cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9308   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9309 
9310   ins_cost(INSN_COST * 2);
9311   format %{ "csel $dst, $src, zr $cmp\t# signed, ptr"  %}
9312 
9313   ins_encode %{
9314     __ csel(as_Register($dst$$reg),
9315             as_Register($src$$reg),
9316             zr,
9317             (Assembler::Condition)$cmp$$cmpcode);
9318   %}
9319 
9320   ins_pipe(icond_reg);
9321 %}
9322 
9323 instruct cmovUP_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegPNoSp dst, immP0 zero, iRegP src) %{
9324   match(Set dst (CMoveP (Binary cmp cr) (Binary zero src)));
9325 
9326   ins_cost(INSN_COST * 2);
9327   format %{ "csel $dst, $src, zr $cmp\t# unsigned, ptr"  %}
9328 
9329   ins_encode %{
9330     __ csel(as_Register($dst$$reg),
9331             as_Register($src$$reg),
9332             zr,
9333             (Assembler::Condition)$cmp$$cmpcode);
9334   %}
9335 
9336   ins_pipe(icond_reg);
9337 %}
9338 
9339 instruct cmovN_reg_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9340   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9341 
9342   ins_cost(INSN_COST * 2);
9343   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9344 
9345   ins_encode %{
9346     __ cselw(as_Register($dst$$reg),
9347              as_Register($src2$$reg),
9348              as_Register($src1$$reg),
9349              (Assembler::Condition)$cmp$$cmpcode);
9350   %}
9351 
9352   ins_pipe(icond_reg_reg);
9353 %}
9354 
9355 instruct cmovUN_reg_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src1, iRegN src2) %{
9356   match(Set dst (CMoveN (Binary cmp cr) (Binary src1 src2)));
9357 
9358   ins_cost(INSN_COST * 2);
9359   format %{ "cselw $dst, $src2, $src1 $cmp\t# signed, compressed ptr"  %}
9360 
9361   ins_encode %{
9362     __ cselw(as_Register($dst$$reg),
9363              as_Register($src2$$reg),
9364              as_Register($src1$$reg),
9365              (Assembler::Condition)$cmp$$cmpcode);
9366   %}
9367 
9368   ins_pipe(icond_reg_reg);
9369 %}
9370 
9371 // special cases where one arg is zero
9372 
9373 instruct cmovN_reg_zero(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9374   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9375 
9376   ins_cost(INSN_COST * 2);
9377   format %{ "cselw $dst, zr, $src $cmp\t# signed, compressed ptr"  %}
9378 
9379   ins_encode %{
9380     __ cselw(as_Register($dst$$reg),
9381              zr,
9382              as_Register($src$$reg),
9383              (Assembler::Condition)$cmp$$cmpcode);
9384   %}
9385 
9386   ins_pipe(icond_reg);
9387 %}
9388 
9389 instruct cmovUN_reg_zero(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, iRegN src, immN0 zero) %{
9390   match(Set dst (CMoveN (Binary cmp cr) (Binary src zero)));
9391 
9392   ins_cost(INSN_COST * 2);
9393   format %{ "cselw $dst, zr, $src $cmp\t# unsigned, compressed ptr"  %}
9394 
9395   ins_encode %{
9396     __ cselw(as_Register($dst$$reg),
9397              zr,
9398              as_Register($src$$reg),
9399              (Assembler::Condition)$cmp$$cmpcode);
9400   %}
9401 
9402   ins_pipe(icond_reg);
9403 %}
9404 
9405 instruct cmovN_zero_reg(cmpOp cmp, rFlagsReg cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9406   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9407 
9408   ins_cost(INSN_COST * 2);
9409   format %{ "cselw $dst, $src, zr $cmp\t# signed, compressed ptr"  %}
9410 
9411   ins_encode %{
9412     __ cselw(as_Register($dst$$reg),
9413              as_Register($src$$reg),
9414              zr,
9415              (Assembler::Condition)$cmp$$cmpcode);
9416   %}
9417 
9418   ins_pipe(icond_reg);
9419 %}
9420 
9421 instruct cmovUN_zero_reg(cmpOpU cmp, rFlagsRegU cr, iRegNNoSp dst, immN0 zero, iRegN src) %{
9422   match(Set dst (CMoveN (Binary cmp cr) (Binary zero src)));
9423 
9424   ins_cost(INSN_COST * 2);
9425   format %{ "cselw $dst, $src, zr $cmp\t# unsigned, compressed ptr"  %}
9426 
9427   ins_encode %{
9428     __ cselw(as_Register($dst$$reg),
9429              as_Register($src$$reg),
9430              zr,
9431              (Assembler::Condition)$cmp$$cmpcode);
9432   %}
9433 
9434   ins_pipe(icond_reg);
9435 %}
9436 
9437 instruct cmovF_reg(cmpOp cmp, rFlagsReg cr, vRegF dst, vRegF src1,  vRegF src2)
9438 %{
9439   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9440 
9441   ins_cost(INSN_COST * 3);
9442 
9443   format %{ "fcsels $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9444   ins_encode %{
9445     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9446     __ fcsels(as_FloatRegister($dst$$reg),
9447               as_FloatRegister($src2$$reg),
9448               as_FloatRegister($src1$$reg),
9449               cond);
9450   %}
9451 
9452   ins_pipe(fp_cond_reg_reg_s);
9453 %}
9454 
9455 instruct cmovUF_reg(cmpOpU cmp, rFlagsRegU cr, vRegF dst, vRegF src1,  vRegF src2)
9456 %{
9457   match(Set dst (CMoveF (Binary cmp cr) (Binary src1 src2)));
9458 
9459   ins_cost(INSN_COST * 3);
9460 
9461   format %{ "fcsels $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9462   ins_encode %{
9463     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9464     __ fcsels(as_FloatRegister($dst$$reg),
9465               as_FloatRegister($src2$$reg),
9466               as_FloatRegister($src1$$reg),
9467               cond);
9468   %}
9469 
9470   ins_pipe(fp_cond_reg_reg_s);
9471 %}
9472 
9473 instruct cmovD_reg(cmpOp cmp, rFlagsReg cr, vRegD dst, vRegD src1,  vRegD src2)
9474 %{
9475   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9476 
9477   ins_cost(INSN_COST * 3);
9478 
9479   format %{ "fcseld $dst, $src1, $src2, $cmp\t# signed cmove float\n\t" %}
9480   ins_encode %{
9481     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9482     __ fcseld(as_FloatRegister($dst$$reg),
9483               as_FloatRegister($src2$$reg),
9484               as_FloatRegister($src1$$reg),
9485               cond);
9486   %}
9487 
9488   ins_pipe(fp_cond_reg_reg_d);
9489 %}
9490 
9491 instruct cmovUD_reg(cmpOpU cmp, rFlagsRegU cr, vRegD dst, vRegD src1,  vRegD src2)
9492 %{
9493   match(Set dst (CMoveD (Binary cmp cr) (Binary src1 src2)));
9494 
9495   ins_cost(INSN_COST * 3);
9496 
9497   format %{ "fcseld $dst, $src1, $src2, $cmp\t# unsigned cmove float\n\t" %}
9498   ins_encode %{
9499     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
9500     __ fcseld(as_FloatRegister($dst$$reg),
9501               as_FloatRegister($src2$$reg),
9502               as_FloatRegister($src1$$reg),
9503               cond);
9504   %}
9505 
9506   ins_pipe(fp_cond_reg_reg_d);
9507 %}
9508 
9509 // ============================================================================
9510 // Arithmetic Instructions
9511 //
9512 
9513 // Integer Addition
9514 
9515 // TODO
9516 // these currently employ operations which do not set CR and hence are
9517 // not flagged as killing CR but we would like to isolate the cases
9518 // where we want to set flags from those where we don't. need to work
9519 // out how to do that.
9520 
9521 instruct addI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9522   match(Set dst (AddI src1 src2));
9523 
9524   ins_cost(INSN_COST);
9525   format %{ "addw  $dst, $src1, $src2" %}
9526 
9527   ins_encode %{
9528     __ addw(as_Register($dst$$reg),
9529             as_Register($src1$$reg),
9530             as_Register($src2$$reg));
9531   %}
9532 
9533   ins_pipe(ialu_reg_reg);
9534 %}
9535 
9536 instruct addI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9537   match(Set dst (AddI src1 src2));
9538 
9539   ins_cost(INSN_COST);
9540   format %{ "addw $dst, $src1, $src2" %}
9541 
9542   // use opcode to indicate that this is an add not a sub
9543   opcode(0x0);
9544 
9545   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9546 
9547   ins_pipe(ialu_reg_imm);
9548 %}
9549 
9550 instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
9551   match(Set dst (AddI (ConvL2I src1) src2));
9552 
9553   ins_cost(INSN_COST);
9554   format %{ "addw $dst, $src1, $src2" %}
9555 
9556   // use opcode to indicate that this is an add not a sub
9557   opcode(0x0);
9558 
9559   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9560 
9561   ins_pipe(ialu_reg_imm);
9562 %}
9563 
9564 // Pointer Addition
9565 instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
9566   match(Set dst (AddP src1 src2));
9567 
9568   ins_cost(INSN_COST);
9569   format %{ "add $dst, $src1, $src2\t# ptr" %}
9570 
9571   ins_encode %{
9572     __ add(as_Register($dst$$reg),
9573            as_Register($src1$$reg),
9574            as_Register($src2$$reg));
9575   %}
9576 
9577   ins_pipe(ialu_reg_reg);
9578 %}
9579 
9580 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
9581   match(Set dst (AddP src1 (ConvI2L src2)));
9582 
9583   ins_cost(1.9 * INSN_COST);
9584   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
9585 
9586   ins_encode %{
9587     __ add(as_Register($dst$$reg),
9588            as_Register($src1$$reg),
9589            as_Register($src2$$reg), ext::sxtw);
9590   %}
9591 
9592   ins_pipe(ialu_reg_reg);
9593 %}
9594 
9595 instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
9596   match(Set dst (AddP src1 (LShiftL src2 scale)));
9597 
9598   ins_cost(1.9 * INSN_COST);
9599   format %{ "add $dst, $src1, $src2, LShiftL $scale\t# ptr" %}
9600 
9601   ins_encode %{
9602     __ lea(as_Register($dst$$reg),
9603            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9604                    Address::lsl($scale$$constant)));
9605   %}
9606 
9607   ins_pipe(ialu_reg_reg_shift);
9608 %}
9609 
9610 instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
9611   match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
9612 
9613   ins_cost(1.9 * INSN_COST);
9614   format %{ "add $dst, $src1, $src2, I2L $scale\t# ptr" %}
9615 
9616   ins_encode %{
9617     __ lea(as_Register($dst$$reg),
9618            Address(as_Register($src1$$reg), as_Register($src2$$reg),
9619                    Address::sxtw($scale$$constant)));
9620   %}
9621 
9622   ins_pipe(ialu_reg_reg_shift);
9623 %}
9624 
9625 instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
9626   match(Set dst (LShiftL (ConvI2L src) scale));
9627 
9628   ins_cost(INSN_COST);
9629   format %{ "sbfiz $dst, $src, $scale & 63, -$scale & 63\t" %}
9630 
9631   ins_encode %{
9632     __ sbfiz(as_Register($dst$$reg),
9633           as_Register($src$$reg),
9634           $scale$$constant & 63, MIN(32, (-$scale$$constant) & 63));
9635   %}
9636 
9637   ins_pipe(ialu_reg_shift);
9638 %}
9639 
9640 // Pointer Immediate Addition
9641 // n.b. this needs to be more expensive than using an indirect memory
9642 // operand
9643 instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
9644   match(Set dst (AddP src1 src2));
9645 
9646   ins_cost(INSN_COST);
9647   format %{ "add $dst, $src1, $src2\t# ptr" %}
9648 
9649   // use opcode to indicate that this is an add not a sub
9650   opcode(0x0);
9651 
9652   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9653 
9654   ins_pipe(ialu_reg_imm);
9655 %}
9656 
9657 // Long Addition
9658 instruct addL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9659 
9660   match(Set dst (AddL src1 src2));
9661 
9662   ins_cost(INSN_COST);
9663   format %{ "add  $dst, $src1, $src2" %}
9664 
9665   ins_encode %{
9666     __ add(as_Register($dst$$reg),
9667            as_Register($src1$$reg),
9668            as_Register($src2$$reg));
9669   %}
9670 
9671   ins_pipe(ialu_reg_reg);
9672 %}
9673 
9674 // No constant pool entries requiredLong Immediate Addition.
9675 instruct addL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9676   match(Set dst (AddL src1 src2));
9677 
9678   ins_cost(INSN_COST);
9679   format %{ "add $dst, $src1, $src2" %}
9680 
9681   // use opcode to indicate that this is an add not a sub
9682   opcode(0x0);
9683 
9684   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9685 
9686   ins_pipe(ialu_reg_imm);
9687 %}
9688 
9689 // Integer Subtraction
9690 instruct subI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9691   match(Set dst (SubI src1 src2));
9692 
9693   ins_cost(INSN_COST);
9694   format %{ "subw  $dst, $src1, $src2" %}
9695 
9696   ins_encode %{
9697     __ subw(as_Register($dst$$reg),
9698             as_Register($src1$$reg),
9699             as_Register($src2$$reg));
9700   %}
9701 
9702   ins_pipe(ialu_reg_reg);
9703 %}
9704 
9705 // Immediate Subtraction
9706 instruct subI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immIAddSub src2) %{
9707   match(Set dst (SubI src1 src2));
9708 
9709   ins_cost(INSN_COST);
9710   format %{ "subw $dst, $src1, $src2" %}
9711 
9712   // use opcode to indicate that this is a sub not an add
9713   opcode(0x1);
9714 
9715   ins_encode(aarch64_enc_addsubw_imm(dst, src1, src2));
9716 
9717   ins_pipe(ialu_reg_imm);
9718 %}
9719 
9720 // Long Subtraction
9721 instruct subL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9722 
9723   match(Set dst (SubL src1 src2));
9724 
9725   ins_cost(INSN_COST);
9726   format %{ "sub  $dst, $src1, $src2" %}
9727 
9728   ins_encode %{
9729     __ sub(as_Register($dst$$reg),
9730            as_Register($src1$$reg),
9731            as_Register($src2$$reg));
9732   %}
9733 
9734   ins_pipe(ialu_reg_reg);
9735 %}
9736 
9737 // No constant pool entries requiredLong Immediate Subtraction.
9738 instruct subL_reg_imm(iRegLNoSp dst, iRegL src1, immLAddSub src2) %{
9739   match(Set dst (SubL src1 src2));
9740 
9741   ins_cost(INSN_COST);
9742   format %{ "sub$dst, $src1, $src2" %}
9743 
9744   // use opcode to indicate that this is a sub not an add
9745   opcode(0x1);
9746 
9747   ins_encode( aarch64_enc_addsub_imm(dst, src1, src2) );
9748 
9749   ins_pipe(ialu_reg_imm);
9750 %}
9751 
9752 // Integer Negation (special case for sub)
9753 
9754 instruct negI_reg(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr) %{
9755   match(Set dst (SubI zero src));
9756 
9757   ins_cost(INSN_COST);
9758   format %{ "negw $dst, $src\t# int" %}
9759 
9760   ins_encode %{
9761     __ negw(as_Register($dst$$reg),
9762             as_Register($src$$reg));
9763   %}
9764 
9765   ins_pipe(ialu_reg);
9766 %}
9767 
9768 // Long Negation
9769 
9770 instruct negL_reg(iRegLNoSp dst, iRegL src, immL0 zero, rFlagsReg cr) %{
9771   match(Set dst (SubL zero src));
9772 
9773   ins_cost(INSN_COST);
9774   format %{ "neg $dst, $src\t# long" %}
9775 
9776   ins_encode %{
9777     __ neg(as_Register($dst$$reg),
9778            as_Register($src$$reg));
9779   %}
9780 
9781   ins_pipe(ialu_reg);
9782 %}
9783 
9784 // Integer Multiply
9785 
9786 instruct mulI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9787   match(Set dst (MulI src1 src2));
9788 
9789   ins_cost(INSN_COST * 3);
9790   format %{ "mulw  $dst, $src1, $src2" %}
9791 
9792   ins_encode %{
9793     __ mulw(as_Register($dst$$reg),
9794             as_Register($src1$$reg),
9795             as_Register($src2$$reg));
9796   %}
9797 
9798   ins_pipe(imul_reg_reg);
9799 %}
9800 
9801 instruct smulI(iRegLNoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9802   match(Set dst (MulL (ConvI2L src1) (ConvI2L src2)));
9803 
9804   ins_cost(INSN_COST * 3);
9805   format %{ "smull  $dst, $src1, $src2" %}
9806 
9807   ins_encode %{
9808     __ smull(as_Register($dst$$reg),
9809              as_Register($src1$$reg),
9810              as_Register($src2$$reg));
9811   %}
9812 
9813   ins_pipe(imul_reg_reg);
9814 %}
9815 
9816 // Long Multiply
9817 
9818 instruct mulL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9819   match(Set dst (MulL src1 src2));
9820 
9821   ins_cost(INSN_COST * 5);
9822   format %{ "mul  $dst, $src1, $src2" %}
9823 
9824   ins_encode %{
9825     __ mul(as_Register($dst$$reg),
9826            as_Register($src1$$reg),
9827            as_Register($src2$$reg));
9828   %}
9829 
9830   ins_pipe(lmul_reg_reg);
9831 %}
9832 
9833 instruct mulHiL_rReg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr)
9834 %{
9835   match(Set dst (MulHiL src1 src2));
9836 
9837   ins_cost(INSN_COST * 7);
9838   format %{ "smulh   $dst, $src1, $src2, \t# mulhi" %}
9839 
9840   ins_encode %{
9841     __ smulh(as_Register($dst$$reg),
9842              as_Register($src1$$reg),
9843              as_Register($src2$$reg));
9844   %}
9845 
9846   ins_pipe(lmul_reg_reg);
9847 %}
9848 
9849 // Combined Integer Multiply & Add/Sub
9850 
9851 instruct maddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9852   match(Set dst (AddI src3 (MulI src1 src2)));
9853 
9854   ins_cost(INSN_COST * 3);
9855   format %{ "madd  $dst, $src1, $src2, $src3" %}
9856 
9857   ins_encode %{
9858     __ maddw(as_Register($dst$$reg),
9859              as_Register($src1$$reg),
9860              as_Register($src2$$reg),
9861              as_Register($src3$$reg));
9862   %}
9863 
9864   ins_pipe(imac_reg_reg);
9865 %}
9866 
9867 instruct msubI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, iRegIorL2I src3) %{
9868   match(Set dst (SubI src3 (MulI src1 src2)));
9869 
9870   ins_cost(INSN_COST * 3);
9871   format %{ "msub  $dst, $src1, $src2, $src3" %}
9872 
9873   ins_encode %{
9874     __ msubw(as_Register($dst$$reg),
9875              as_Register($src1$$reg),
9876              as_Register($src2$$reg),
9877              as_Register($src3$$reg));
9878   %}
9879 
9880   ins_pipe(imac_reg_reg);
9881 %}
9882 
9883 // Combined Long Multiply & Add/Sub
9884 
9885 instruct maddL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9886   match(Set dst (AddL src3 (MulL src1 src2)));
9887 
9888   ins_cost(INSN_COST * 5);
9889   format %{ "madd  $dst, $src1, $src2, $src3" %}
9890 
9891   ins_encode %{
9892     __ madd(as_Register($dst$$reg),
9893             as_Register($src1$$reg),
9894             as_Register($src2$$reg),
9895             as_Register($src3$$reg));
9896   %}
9897 
9898   ins_pipe(lmac_reg_reg);
9899 %}
9900 
9901 instruct msubL(iRegLNoSp dst, iRegL src1, iRegL src2, iRegL src3) %{
9902   match(Set dst (SubL src3 (MulL src1 src2)));
9903 
9904   ins_cost(INSN_COST * 5);
9905   format %{ "msub  $dst, $src1, $src2, $src3" %}
9906 
9907   ins_encode %{
9908     __ msub(as_Register($dst$$reg),
9909             as_Register($src1$$reg),
9910             as_Register($src2$$reg),
9911             as_Register($src3$$reg));
9912   %}
9913 
9914   ins_pipe(lmac_reg_reg);
9915 %}
9916 
9917 // Integer Divide
9918 
9919 instruct divI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9920   match(Set dst (DivI src1 src2));
9921 
9922   ins_cost(INSN_COST * 19);
9923   format %{ "sdivw  $dst, $src1, $src2" %}
9924 
9925   ins_encode(aarch64_enc_divw(dst, src1, src2));
9926   ins_pipe(idiv_reg_reg);
9927 %}
9928 
9929 instruct signExtract(iRegINoSp dst, iRegIorL2I src1, immI_31 div1, immI_31 div2) %{
9930   match(Set dst (URShiftI (RShiftI src1 div1) div2));
9931   ins_cost(INSN_COST);
9932   format %{ "lsrw $dst, $src1, $div1" %}
9933   ins_encode %{
9934     __ lsrw(as_Register($dst$$reg), as_Register($src1$$reg), 31);
9935   %}
9936   ins_pipe(ialu_reg_shift);
9937 %}
9938 
9939 instruct div2Round(iRegINoSp dst, iRegIorL2I src, immI_31 div1, immI_31 div2) %{
9940   match(Set dst (AddI src (URShiftI (RShiftI src div1) div2)));
9941   ins_cost(INSN_COST);
9942   format %{ "addw $dst, $src, LSR $div1" %}
9943 
9944   ins_encode %{
9945     __ addw(as_Register($dst$$reg),
9946               as_Register($src$$reg),
9947               as_Register($src$$reg),
9948               Assembler::LSR, 31);
9949   %}
9950   ins_pipe(ialu_reg);
9951 %}
9952 
9953 // Long Divide
9954 
9955 instruct divL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
9956   match(Set dst (DivL src1 src2));
9957 
9958   ins_cost(INSN_COST * 35);
9959   format %{ "sdiv   $dst, $src1, $src2" %}
9960 
9961   ins_encode(aarch64_enc_div(dst, src1, src2));
9962   ins_pipe(ldiv_reg_reg);
9963 %}
9964 
9965 instruct signExtractL(iRegLNoSp dst, iRegL src1, immI_63 div1, immI_63 div2) %{
9966   match(Set dst (URShiftL (RShiftL src1 div1) div2));
9967   ins_cost(INSN_COST);
9968   format %{ "lsr $dst, $src1, $div1" %}
9969   ins_encode %{
9970     __ lsr(as_Register($dst$$reg), as_Register($src1$$reg), 63);
9971   %}
9972   ins_pipe(ialu_reg_shift);
9973 %}
9974 
9975 instruct div2RoundL(iRegLNoSp dst, iRegL src, immI_63 div1, immI_63 div2) %{
9976   match(Set dst (AddL src (URShiftL (RShiftL src div1) div2)));
9977   ins_cost(INSN_COST);
9978   format %{ "add $dst, $src, $div1" %}
9979 
9980   ins_encode %{
9981     __ add(as_Register($dst$$reg),
9982               as_Register($src$$reg),
9983               as_Register($src$$reg),
9984               Assembler::LSR, 63);
9985   %}
9986   ins_pipe(ialu_reg);
9987 %}
9988 
9989 // Integer Remainder
9990 
9991 instruct modI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
9992   match(Set dst (ModI src1 src2));
9993 
9994   ins_cost(INSN_COST * 22);
9995   format %{ "sdivw  rscratch1, $src1, $src2\n\t"
9996             "msubw($dst, rscratch1, $src2, $src1" %}
9997 
9998   ins_encode(aarch64_enc_modw(dst, src1, src2));
9999   ins_pipe(idiv_reg_reg);
10000 %}
10001 
10002 // Long Remainder
10003 
10004 instruct modL(iRegLNoSp dst, iRegL src1, iRegL src2) %{
10005   match(Set dst (ModL src1 src2));
10006 
10007   ins_cost(INSN_COST * 38);
10008   format %{ "sdiv   rscratch1, $src1, $src2\n"
10009             "msub($dst, rscratch1, $src2, $src1" %}
10010 
10011   ins_encode(aarch64_enc_mod(dst, src1, src2));
10012   ins_pipe(ldiv_reg_reg);
10013 %}
10014 
10015 // Integer Shifts
10016 
10017 // Shift Left Register
10018 instruct lShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10019   match(Set dst (LShiftI src1 src2));
10020 
10021   ins_cost(INSN_COST * 2);
10022   format %{ "lslvw  $dst, $src1, $src2" %}
10023 
10024   ins_encode %{
10025     __ lslvw(as_Register($dst$$reg),
10026              as_Register($src1$$reg),
10027              as_Register($src2$$reg));
10028   %}
10029 
10030   ins_pipe(ialu_reg_reg_vshift);
10031 %}
10032 
10033 // Shift Left Immediate
10034 instruct lShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10035   match(Set dst (LShiftI src1 src2));
10036 
10037   ins_cost(INSN_COST);
10038   format %{ "lslw $dst, $src1, ($src2 & 0x1f)" %}
10039 
10040   ins_encode %{
10041     __ lslw(as_Register($dst$$reg),
10042             as_Register($src1$$reg),
10043             $src2$$constant & 0x1f);
10044   %}
10045 
10046   ins_pipe(ialu_reg_shift);
10047 %}
10048 
10049 // Shift Right Logical Register
10050 instruct urShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10051   match(Set dst (URShiftI src1 src2));
10052 
10053   ins_cost(INSN_COST * 2);
10054   format %{ "lsrvw  $dst, $src1, $src2" %}
10055 
10056   ins_encode %{
10057     __ lsrvw(as_Register($dst$$reg),
10058              as_Register($src1$$reg),
10059              as_Register($src2$$reg));
10060   %}
10061 
10062   ins_pipe(ialu_reg_reg_vshift);
10063 %}
10064 
10065 // Shift Right Logical Immediate
10066 instruct urShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10067   match(Set dst (URShiftI src1 src2));
10068 
10069   ins_cost(INSN_COST);
10070   format %{ "lsrw $dst, $src1, ($src2 & 0x1f)" %}
10071 
10072   ins_encode %{
10073     __ lsrw(as_Register($dst$$reg),
10074             as_Register($src1$$reg),
10075             $src2$$constant & 0x1f);
10076   %}
10077 
10078   ins_pipe(ialu_reg_shift);
10079 %}
10080 
10081 // Shift Right Arithmetic Register
10082 instruct rShiftI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
10083   match(Set dst (RShiftI src1 src2));
10084 
10085   ins_cost(INSN_COST * 2);
10086   format %{ "asrvw  $dst, $src1, $src2" %}
10087 
10088   ins_encode %{
10089     __ asrvw(as_Register($dst$$reg),
10090              as_Register($src1$$reg),
10091              as_Register($src2$$reg));
10092   %}
10093 
10094   ins_pipe(ialu_reg_reg_vshift);
10095 %}
10096 
10097 // Shift Right Arithmetic Immediate
10098 instruct rShiftI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immI src2) %{
10099   match(Set dst (RShiftI src1 src2));
10100 
10101   ins_cost(INSN_COST);
10102   format %{ "asrw $dst, $src1, ($src2 & 0x1f)" %}
10103 
10104   ins_encode %{
10105     __ asrw(as_Register($dst$$reg),
10106             as_Register($src1$$reg),
10107             $src2$$constant & 0x1f);
10108   %}
10109 
10110   ins_pipe(ialu_reg_shift);
10111 %}
10112 
10113 // Combined Int Mask and Right Shift (using UBFM)
10114 // TODO
10115 
10116 // Long Shifts
10117 
10118 // Shift Left Register
10119 instruct lShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10120   match(Set dst (LShiftL src1 src2));
10121 
10122   ins_cost(INSN_COST * 2);
10123   format %{ "lslv  $dst, $src1, $src2" %}
10124 
10125   ins_encode %{
10126     __ lslv(as_Register($dst$$reg),
10127             as_Register($src1$$reg),
10128             as_Register($src2$$reg));
10129   %}
10130 
10131   ins_pipe(ialu_reg_reg_vshift);
10132 %}
10133 
10134 // Shift Left Immediate
10135 instruct lShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10136   match(Set dst (LShiftL src1 src2));
10137 
10138   ins_cost(INSN_COST);
10139   format %{ "lsl $dst, $src1, ($src2 & 0x3f)" %}
10140 
10141   ins_encode %{
10142     __ lsl(as_Register($dst$$reg),
10143             as_Register($src1$$reg),
10144             $src2$$constant & 0x3f);
10145   %}
10146 
10147   ins_pipe(ialu_reg_shift);
10148 %}
10149 
10150 // Shift Right Logical Register
10151 instruct urShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10152   match(Set dst (URShiftL src1 src2));
10153 
10154   ins_cost(INSN_COST * 2);
10155   format %{ "lsrv  $dst, $src1, $src2" %}
10156 
10157   ins_encode %{
10158     __ lsrv(as_Register($dst$$reg),
10159             as_Register($src1$$reg),
10160             as_Register($src2$$reg));
10161   %}
10162 
10163   ins_pipe(ialu_reg_reg_vshift);
10164 %}
10165 
10166 // Shift Right Logical Immediate
10167 instruct urShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10168   match(Set dst (URShiftL src1 src2));
10169 
10170   ins_cost(INSN_COST);
10171   format %{ "lsr $dst, $src1, ($src2 & 0x3f)" %}
10172 
10173   ins_encode %{
10174     __ lsr(as_Register($dst$$reg),
10175            as_Register($src1$$reg),
10176            $src2$$constant & 0x3f);
10177   %}
10178 
10179   ins_pipe(ialu_reg_shift);
10180 %}
10181 
10182 // A special-case pattern for card table stores.
10183 instruct urShiftP_reg_imm(iRegLNoSp dst, iRegP src1, immI src2) %{
10184   match(Set dst (URShiftL (CastP2X src1) src2));
10185 
10186   ins_cost(INSN_COST);
10187   format %{ "lsr $dst, p2x($src1), ($src2 & 0x3f)" %}
10188 
10189   ins_encode %{
10190     __ lsr(as_Register($dst$$reg),
10191            as_Register($src1$$reg),
10192            $src2$$constant & 0x3f);
10193   %}
10194 
10195   ins_pipe(ialu_reg_shift);
10196 %}
10197 
10198 // Shift Right Arithmetic Register
10199 instruct rShiftL_reg_reg(iRegLNoSp dst, iRegL src1, iRegIorL2I src2) %{
10200   match(Set dst (RShiftL src1 src2));
10201 
10202   ins_cost(INSN_COST * 2);
10203   format %{ "asrv  $dst, $src1, $src2" %}
10204 
10205   ins_encode %{
10206     __ asrv(as_Register($dst$$reg),
10207             as_Register($src1$$reg),
10208             as_Register($src2$$reg));
10209   %}
10210 
10211   ins_pipe(ialu_reg_reg_vshift);
10212 %}
10213 
10214 // Shift Right Arithmetic Immediate
10215 instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
10216   match(Set dst (RShiftL src1 src2));
10217 
10218   ins_cost(INSN_COST);
10219   format %{ "asr $dst, $src1, ($src2 & 0x3f)" %}
10220 
10221   ins_encode %{
10222     __ asr(as_Register($dst$$reg),
10223            as_Register($src1$$reg),
10224            $src2$$constant & 0x3f);
10225   %}
10226 
10227   ins_pipe(ialu_reg_shift);
10228 %}
10229 
10230 // BEGIN This section of the file is automatically generated. Do not edit --------------
10231 
10232 instruct regL_not_reg(iRegLNoSp dst,
10233                          iRegL src1, immL_M1 m1,
10234                          rFlagsReg cr) %{
10235   match(Set dst (XorL src1 m1));
10236   ins_cost(INSN_COST);
10237   format %{ "eon  $dst, $src1, zr" %}
10238 
10239   ins_encode %{
10240     __ eon(as_Register($dst$$reg),
10241               as_Register($src1$$reg),
10242               zr,
10243               Assembler::LSL, 0);
10244   %}
10245 
10246   ins_pipe(ialu_reg);
10247 %}
10248 instruct regI_not_reg(iRegINoSp dst,
10249                          iRegIorL2I src1, immI_M1 m1,
10250                          rFlagsReg cr) %{
10251   match(Set dst (XorI src1 m1));
10252   ins_cost(INSN_COST);
10253   format %{ "eonw  $dst, $src1, zr" %}
10254 
10255   ins_encode %{
10256     __ eonw(as_Register($dst$$reg),
10257               as_Register($src1$$reg),
10258               zr,
10259               Assembler::LSL, 0);
10260   %}
10261 
10262   ins_pipe(ialu_reg);
10263 %}
10264 
10265 instruct AndI_reg_not_reg(iRegINoSp dst,
10266                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10267                          rFlagsReg cr) %{
10268   match(Set dst (AndI src1 (XorI src2 m1)));
10269   ins_cost(INSN_COST);
10270   format %{ "bicw  $dst, $src1, $src2" %}
10271 
10272   ins_encode %{
10273     __ bicw(as_Register($dst$$reg),
10274               as_Register($src1$$reg),
10275               as_Register($src2$$reg),
10276               Assembler::LSL, 0);
10277   %}
10278 
10279   ins_pipe(ialu_reg_reg);
10280 %}
10281 
10282 instruct AndL_reg_not_reg(iRegLNoSp dst,
10283                          iRegL src1, iRegL src2, immL_M1 m1,
10284                          rFlagsReg cr) %{
10285   match(Set dst (AndL src1 (XorL src2 m1)));
10286   ins_cost(INSN_COST);
10287   format %{ "bic  $dst, $src1, $src2" %}
10288 
10289   ins_encode %{
10290     __ bic(as_Register($dst$$reg),
10291               as_Register($src1$$reg),
10292               as_Register($src2$$reg),
10293               Assembler::LSL, 0);
10294   %}
10295 
10296   ins_pipe(ialu_reg_reg);
10297 %}
10298 
10299 instruct OrI_reg_not_reg(iRegINoSp dst,
10300                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10301                          rFlagsReg cr) %{
10302   match(Set dst (OrI src1 (XorI src2 m1)));
10303   ins_cost(INSN_COST);
10304   format %{ "ornw  $dst, $src1, $src2" %}
10305 
10306   ins_encode %{
10307     __ ornw(as_Register($dst$$reg),
10308               as_Register($src1$$reg),
10309               as_Register($src2$$reg),
10310               Assembler::LSL, 0);
10311   %}
10312 
10313   ins_pipe(ialu_reg_reg);
10314 %}
10315 
10316 instruct OrL_reg_not_reg(iRegLNoSp dst,
10317                          iRegL src1, iRegL src2, immL_M1 m1,
10318                          rFlagsReg cr) %{
10319   match(Set dst (OrL src1 (XorL src2 m1)));
10320   ins_cost(INSN_COST);
10321   format %{ "orn  $dst, $src1, $src2" %}
10322 
10323   ins_encode %{
10324     __ orn(as_Register($dst$$reg),
10325               as_Register($src1$$reg),
10326               as_Register($src2$$reg),
10327               Assembler::LSL, 0);
10328   %}
10329 
10330   ins_pipe(ialu_reg_reg);
10331 %}
10332 
10333 instruct XorI_reg_not_reg(iRegINoSp dst,
10334                          iRegIorL2I src1, iRegIorL2I src2, immI_M1 m1,
10335                          rFlagsReg cr) %{
10336   match(Set dst (XorI m1 (XorI src2 src1)));
10337   ins_cost(INSN_COST);
10338   format %{ "eonw  $dst, $src1, $src2" %}
10339 
10340   ins_encode %{
10341     __ eonw(as_Register($dst$$reg),
10342               as_Register($src1$$reg),
10343               as_Register($src2$$reg),
10344               Assembler::LSL, 0);
10345   %}
10346 
10347   ins_pipe(ialu_reg_reg);
10348 %}
10349 
10350 instruct XorL_reg_not_reg(iRegLNoSp dst,
10351                          iRegL src1, iRegL src2, immL_M1 m1,
10352                          rFlagsReg cr) %{
10353   match(Set dst (XorL m1 (XorL src2 src1)));
10354   ins_cost(INSN_COST);
10355   format %{ "eon  $dst, $src1, $src2" %}
10356 
10357   ins_encode %{
10358     __ eon(as_Register($dst$$reg),
10359               as_Register($src1$$reg),
10360               as_Register($src2$$reg),
10361               Assembler::LSL, 0);
10362   %}
10363 
10364   ins_pipe(ialu_reg_reg);
10365 %}
10366 
10367 instruct AndI_reg_URShift_not_reg(iRegINoSp dst,
10368                          iRegIorL2I src1, iRegIorL2I src2,
10369                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10370   match(Set dst (AndI src1 (XorI(URShiftI src2 src3) src4)));
10371   ins_cost(1.9 * INSN_COST);
10372   format %{ "bicw  $dst, $src1, $src2, LSR $src3" %}
10373 
10374   ins_encode %{
10375     __ bicw(as_Register($dst$$reg),
10376               as_Register($src1$$reg),
10377               as_Register($src2$$reg),
10378               Assembler::LSR,
10379               $src3$$constant & 0x1f);
10380   %}
10381 
10382   ins_pipe(ialu_reg_reg_shift);
10383 %}
10384 
10385 instruct AndL_reg_URShift_not_reg(iRegLNoSp dst,
10386                          iRegL src1, iRegL src2,
10387                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10388   match(Set dst (AndL src1 (XorL(URShiftL src2 src3) src4)));
10389   ins_cost(1.9 * INSN_COST);
10390   format %{ "bic  $dst, $src1, $src2, LSR $src3" %}
10391 
10392   ins_encode %{
10393     __ bic(as_Register($dst$$reg),
10394               as_Register($src1$$reg),
10395               as_Register($src2$$reg),
10396               Assembler::LSR,
10397               $src3$$constant & 0x3f);
10398   %}
10399 
10400   ins_pipe(ialu_reg_reg_shift);
10401 %}
10402 
10403 instruct AndI_reg_RShift_not_reg(iRegINoSp dst,
10404                          iRegIorL2I src1, iRegIorL2I src2,
10405                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10406   match(Set dst (AndI src1 (XorI(RShiftI src2 src3) src4)));
10407   ins_cost(1.9 * INSN_COST);
10408   format %{ "bicw  $dst, $src1, $src2, ASR $src3" %}
10409 
10410   ins_encode %{
10411     __ bicw(as_Register($dst$$reg),
10412               as_Register($src1$$reg),
10413               as_Register($src2$$reg),
10414               Assembler::ASR,
10415               $src3$$constant & 0x1f);
10416   %}
10417 
10418   ins_pipe(ialu_reg_reg_shift);
10419 %}
10420 
10421 instruct AndL_reg_RShift_not_reg(iRegLNoSp dst,
10422                          iRegL src1, iRegL src2,
10423                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10424   match(Set dst (AndL src1 (XorL(RShiftL src2 src3) src4)));
10425   ins_cost(1.9 * INSN_COST);
10426   format %{ "bic  $dst, $src1, $src2, ASR $src3" %}
10427 
10428   ins_encode %{
10429     __ bic(as_Register($dst$$reg),
10430               as_Register($src1$$reg),
10431               as_Register($src2$$reg),
10432               Assembler::ASR,
10433               $src3$$constant & 0x3f);
10434   %}
10435 
10436   ins_pipe(ialu_reg_reg_shift);
10437 %}
10438 
10439 instruct AndI_reg_LShift_not_reg(iRegINoSp dst,
10440                          iRegIorL2I src1, iRegIorL2I src2,
10441                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10442   match(Set dst (AndI src1 (XorI(LShiftI src2 src3) src4)));
10443   ins_cost(1.9 * INSN_COST);
10444   format %{ "bicw  $dst, $src1, $src2, LSL $src3" %}
10445 
10446   ins_encode %{
10447     __ bicw(as_Register($dst$$reg),
10448               as_Register($src1$$reg),
10449               as_Register($src2$$reg),
10450               Assembler::LSL,
10451               $src3$$constant & 0x1f);
10452   %}
10453 
10454   ins_pipe(ialu_reg_reg_shift);
10455 %}
10456 
10457 instruct AndL_reg_LShift_not_reg(iRegLNoSp dst,
10458                          iRegL src1, iRegL src2,
10459                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10460   match(Set dst (AndL src1 (XorL(LShiftL src2 src3) src4)));
10461   ins_cost(1.9 * INSN_COST);
10462   format %{ "bic  $dst, $src1, $src2, LSL $src3" %}
10463 
10464   ins_encode %{
10465     __ bic(as_Register($dst$$reg),
10466               as_Register($src1$$reg),
10467               as_Register($src2$$reg),
10468               Assembler::LSL,
10469               $src3$$constant & 0x3f);
10470   %}
10471 
10472   ins_pipe(ialu_reg_reg_shift);
10473 %}
10474 
10475 instruct XorI_reg_URShift_not_reg(iRegINoSp dst,
10476                          iRegIorL2I src1, iRegIorL2I src2,
10477                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10478   match(Set dst (XorI src4 (XorI(URShiftI src2 src3) src1)));
10479   ins_cost(1.9 * INSN_COST);
10480   format %{ "eonw  $dst, $src1, $src2, LSR $src3" %}
10481 
10482   ins_encode %{
10483     __ eonw(as_Register($dst$$reg),
10484               as_Register($src1$$reg),
10485               as_Register($src2$$reg),
10486               Assembler::LSR,
10487               $src3$$constant & 0x1f);
10488   %}
10489 
10490   ins_pipe(ialu_reg_reg_shift);
10491 %}
10492 
10493 instruct XorL_reg_URShift_not_reg(iRegLNoSp dst,
10494                          iRegL src1, iRegL src2,
10495                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10496   match(Set dst (XorL src4 (XorL(URShiftL src2 src3) src1)));
10497   ins_cost(1.9 * INSN_COST);
10498   format %{ "eon  $dst, $src1, $src2, LSR $src3" %}
10499 
10500   ins_encode %{
10501     __ eon(as_Register($dst$$reg),
10502               as_Register($src1$$reg),
10503               as_Register($src2$$reg),
10504               Assembler::LSR,
10505               $src3$$constant & 0x3f);
10506   %}
10507 
10508   ins_pipe(ialu_reg_reg_shift);
10509 %}
10510 
10511 instruct XorI_reg_RShift_not_reg(iRegINoSp dst,
10512                          iRegIorL2I src1, iRegIorL2I src2,
10513                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10514   match(Set dst (XorI src4 (XorI(RShiftI src2 src3) src1)));
10515   ins_cost(1.9 * INSN_COST);
10516   format %{ "eonw  $dst, $src1, $src2, ASR $src3" %}
10517 
10518   ins_encode %{
10519     __ eonw(as_Register($dst$$reg),
10520               as_Register($src1$$reg),
10521               as_Register($src2$$reg),
10522               Assembler::ASR,
10523               $src3$$constant & 0x1f);
10524   %}
10525 
10526   ins_pipe(ialu_reg_reg_shift);
10527 %}
10528 
10529 instruct XorL_reg_RShift_not_reg(iRegLNoSp dst,
10530                          iRegL src1, iRegL src2,
10531                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10532   match(Set dst (XorL src4 (XorL(RShiftL src2 src3) src1)));
10533   ins_cost(1.9 * INSN_COST);
10534   format %{ "eon  $dst, $src1, $src2, ASR $src3" %}
10535 
10536   ins_encode %{
10537     __ eon(as_Register($dst$$reg),
10538               as_Register($src1$$reg),
10539               as_Register($src2$$reg),
10540               Assembler::ASR,
10541               $src3$$constant & 0x3f);
10542   %}
10543 
10544   ins_pipe(ialu_reg_reg_shift);
10545 %}
10546 
10547 instruct XorI_reg_LShift_not_reg(iRegINoSp dst,
10548                          iRegIorL2I src1, iRegIorL2I src2,
10549                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10550   match(Set dst (XorI src4 (XorI(LShiftI src2 src3) src1)));
10551   ins_cost(1.9 * INSN_COST);
10552   format %{ "eonw  $dst, $src1, $src2, LSL $src3" %}
10553 
10554   ins_encode %{
10555     __ eonw(as_Register($dst$$reg),
10556               as_Register($src1$$reg),
10557               as_Register($src2$$reg),
10558               Assembler::LSL,
10559               $src3$$constant & 0x1f);
10560   %}
10561 
10562   ins_pipe(ialu_reg_reg_shift);
10563 %}
10564 
10565 instruct XorL_reg_LShift_not_reg(iRegLNoSp dst,
10566                          iRegL src1, iRegL src2,
10567                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10568   match(Set dst (XorL src4 (XorL(LShiftL src2 src3) src1)));
10569   ins_cost(1.9 * INSN_COST);
10570   format %{ "eon  $dst, $src1, $src2, LSL $src3" %}
10571 
10572   ins_encode %{
10573     __ eon(as_Register($dst$$reg),
10574               as_Register($src1$$reg),
10575               as_Register($src2$$reg),
10576               Assembler::LSL,
10577               $src3$$constant & 0x3f);
10578   %}
10579 
10580   ins_pipe(ialu_reg_reg_shift);
10581 %}
10582 
10583 instruct OrI_reg_URShift_not_reg(iRegINoSp dst,
10584                          iRegIorL2I src1, iRegIorL2I src2,
10585                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10586   match(Set dst (OrI src1 (XorI(URShiftI src2 src3) src4)));
10587   ins_cost(1.9 * INSN_COST);
10588   format %{ "ornw  $dst, $src1, $src2, LSR $src3" %}
10589 
10590   ins_encode %{
10591     __ ornw(as_Register($dst$$reg),
10592               as_Register($src1$$reg),
10593               as_Register($src2$$reg),
10594               Assembler::LSR,
10595               $src3$$constant & 0x1f);
10596   %}
10597 
10598   ins_pipe(ialu_reg_reg_shift);
10599 %}
10600 
10601 instruct OrL_reg_URShift_not_reg(iRegLNoSp dst,
10602                          iRegL src1, iRegL src2,
10603                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10604   match(Set dst (OrL src1 (XorL(URShiftL src2 src3) src4)));
10605   ins_cost(1.9 * INSN_COST);
10606   format %{ "orn  $dst, $src1, $src2, LSR $src3" %}
10607 
10608   ins_encode %{
10609     __ orn(as_Register($dst$$reg),
10610               as_Register($src1$$reg),
10611               as_Register($src2$$reg),
10612               Assembler::LSR,
10613               $src3$$constant & 0x3f);
10614   %}
10615 
10616   ins_pipe(ialu_reg_reg_shift);
10617 %}
10618 
10619 instruct OrI_reg_RShift_not_reg(iRegINoSp dst,
10620                          iRegIorL2I src1, iRegIorL2I src2,
10621                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10622   match(Set dst (OrI src1 (XorI(RShiftI src2 src3) src4)));
10623   ins_cost(1.9 * INSN_COST);
10624   format %{ "ornw  $dst, $src1, $src2, ASR $src3" %}
10625 
10626   ins_encode %{
10627     __ ornw(as_Register($dst$$reg),
10628               as_Register($src1$$reg),
10629               as_Register($src2$$reg),
10630               Assembler::ASR,
10631               $src3$$constant & 0x1f);
10632   %}
10633 
10634   ins_pipe(ialu_reg_reg_shift);
10635 %}
10636 
10637 instruct OrL_reg_RShift_not_reg(iRegLNoSp dst,
10638                          iRegL src1, iRegL src2,
10639                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10640   match(Set dst (OrL src1 (XorL(RShiftL src2 src3) src4)));
10641   ins_cost(1.9 * INSN_COST);
10642   format %{ "orn  $dst, $src1, $src2, ASR $src3" %}
10643 
10644   ins_encode %{
10645     __ orn(as_Register($dst$$reg),
10646               as_Register($src1$$reg),
10647               as_Register($src2$$reg),
10648               Assembler::ASR,
10649               $src3$$constant & 0x3f);
10650   %}
10651 
10652   ins_pipe(ialu_reg_reg_shift);
10653 %}
10654 
10655 instruct OrI_reg_LShift_not_reg(iRegINoSp dst,
10656                          iRegIorL2I src1, iRegIorL2I src2,
10657                          immI src3, immI_M1 src4, rFlagsReg cr) %{
10658   match(Set dst (OrI src1 (XorI(LShiftI src2 src3) src4)));
10659   ins_cost(1.9 * INSN_COST);
10660   format %{ "ornw  $dst, $src1, $src2, LSL $src3" %}
10661 
10662   ins_encode %{
10663     __ ornw(as_Register($dst$$reg),
10664               as_Register($src1$$reg),
10665               as_Register($src2$$reg),
10666               Assembler::LSL,
10667               $src3$$constant & 0x1f);
10668   %}
10669 
10670   ins_pipe(ialu_reg_reg_shift);
10671 %}
10672 
10673 instruct OrL_reg_LShift_not_reg(iRegLNoSp dst,
10674                          iRegL src1, iRegL src2,
10675                          immI src3, immL_M1 src4, rFlagsReg cr) %{
10676   match(Set dst (OrL src1 (XorL(LShiftL src2 src3) src4)));
10677   ins_cost(1.9 * INSN_COST);
10678   format %{ "orn  $dst, $src1, $src2, LSL $src3" %}
10679 
10680   ins_encode %{
10681     __ orn(as_Register($dst$$reg),
10682               as_Register($src1$$reg),
10683               as_Register($src2$$reg),
10684               Assembler::LSL,
10685               $src3$$constant & 0x3f);
10686   %}
10687 
10688   ins_pipe(ialu_reg_reg_shift);
10689 %}
10690 
10691 instruct AndI_reg_URShift_reg(iRegINoSp dst,
10692                          iRegIorL2I src1, iRegIorL2I src2,
10693                          immI src3, rFlagsReg cr) %{
10694   match(Set dst (AndI src1 (URShiftI src2 src3)));
10695 
10696   ins_cost(1.9 * INSN_COST);
10697   format %{ "andw  $dst, $src1, $src2, LSR $src3" %}
10698 
10699   ins_encode %{
10700     __ andw(as_Register($dst$$reg),
10701               as_Register($src1$$reg),
10702               as_Register($src2$$reg),
10703               Assembler::LSR,
10704               $src3$$constant & 0x1f);
10705   %}
10706 
10707   ins_pipe(ialu_reg_reg_shift);
10708 %}
10709 
10710 instruct AndL_reg_URShift_reg(iRegLNoSp dst,
10711                          iRegL src1, iRegL src2,
10712                          immI src3, rFlagsReg cr) %{
10713   match(Set dst (AndL src1 (URShiftL src2 src3)));
10714 
10715   ins_cost(1.9 * INSN_COST);
10716   format %{ "andr  $dst, $src1, $src2, LSR $src3" %}
10717 
10718   ins_encode %{
10719     __ andr(as_Register($dst$$reg),
10720               as_Register($src1$$reg),
10721               as_Register($src2$$reg),
10722               Assembler::LSR,
10723               $src3$$constant & 0x3f);
10724   %}
10725 
10726   ins_pipe(ialu_reg_reg_shift);
10727 %}
10728 
10729 instruct AndI_reg_RShift_reg(iRegINoSp dst,
10730                          iRegIorL2I src1, iRegIorL2I src2,
10731                          immI src3, rFlagsReg cr) %{
10732   match(Set dst (AndI src1 (RShiftI src2 src3)));
10733 
10734   ins_cost(1.9 * INSN_COST);
10735   format %{ "andw  $dst, $src1, $src2, ASR $src3" %}
10736 
10737   ins_encode %{
10738     __ andw(as_Register($dst$$reg),
10739               as_Register($src1$$reg),
10740               as_Register($src2$$reg),
10741               Assembler::ASR,
10742               $src3$$constant & 0x1f);
10743   %}
10744 
10745   ins_pipe(ialu_reg_reg_shift);
10746 %}
10747 
10748 instruct AndL_reg_RShift_reg(iRegLNoSp dst,
10749                          iRegL src1, iRegL src2,
10750                          immI src3, rFlagsReg cr) %{
10751   match(Set dst (AndL src1 (RShiftL src2 src3)));
10752 
10753   ins_cost(1.9 * INSN_COST);
10754   format %{ "andr  $dst, $src1, $src2, ASR $src3" %}
10755 
10756   ins_encode %{
10757     __ andr(as_Register($dst$$reg),
10758               as_Register($src1$$reg),
10759               as_Register($src2$$reg),
10760               Assembler::ASR,
10761               $src3$$constant & 0x3f);
10762   %}
10763 
10764   ins_pipe(ialu_reg_reg_shift);
10765 %}
10766 
10767 instruct AndI_reg_LShift_reg(iRegINoSp dst,
10768                          iRegIorL2I src1, iRegIorL2I src2,
10769                          immI src3, rFlagsReg cr) %{
10770   match(Set dst (AndI src1 (LShiftI src2 src3)));
10771 
10772   ins_cost(1.9 * INSN_COST);
10773   format %{ "andw  $dst, $src1, $src2, LSL $src3" %}
10774 
10775   ins_encode %{
10776     __ andw(as_Register($dst$$reg),
10777               as_Register($src1$$reg),
10778               as_Register($src2$$reg),
10779               Assembler::LSL,
10780               $src3$$constant & 0x1f);
10781   %}
10782 
10783   ins_pipe(ialu_reg_reg_shift);
10784 %}
10785 
10786 instruct AndL_reg_LShift_reg(iRegLNoSp dst,
10787                          iRegL src1, iRegL src2,
10788                          immI src3, rFlagsReg cr) %{
10789   match(Set dst (AndL src1 (LShiftL src2 src3)));
10790 
10791   ins_cost(1.9 * INSN_COST);
10792   format %{ "andr  $dst, $src1, $src2, LSL $src3" %}
10793 
10794   ins_encode %{
10795     __ andr(as_Register($dst$$reg),
10796               as_Register($src1$$reg),
10797               as_Register($src2$$reg),
10798               Assembler::LSL,
10799               $src3$$constant & 0x3f);
10800   %}
10801 
10802   ins_pipe(ialu_reg_reg_shift);
10803 %}
10804 
10805 instruct XorI_reg_URShift_reg(iRegINoSp dst,
10806                          iRegIorL2I src1, iRegIorL2I src2,
10807                          immI src3, rFlagsReg cr) %{
10808   match(Set dst (XorI src1 (URShiftI src2 src3)));
10809 
10810   ins_cost(1.9 * INSN_COST);
10811   format %{ "eorw  $dst, $src1, $src2, LSR $src3" %}
10812 
10813   ins_encode %{
10814     __ eorw(as_Register($dst$$reg),
10815               as_Register($src1$$reg),
10816               as_Register($src2$$reg),
10817               Assembler::LSR,
10818               $src3$$constant & 0x1f);
10819   %}
10820 
10821   ins_pipe(ialu_reg_reg_shift);
10822 %}
10823 
10824 instruct XorL_reg_URShift_reg(iRegLNoSp dst,
10825                          iRegL src1, iRegL src2,
10826                          immI src3, rFlagsReg cr) %{
10827   match(Set dst (XorL src1 (URShiftL src2 src3)));
10828 
10829   ins_cost(1.9 * INSN_COST);
10830   format %{ "eor  $dst, $src1, $src2, LSR $src3" %}
10831 
10832   ins_encode %{
10833     __ eor(as_Register($dst$$reg),
10834               as_Register($src1$$reg),
10835               as_Register($src2$$reg),
10836               Assembler::LSR,
10837               $src3$$constant & 0x3f);
10838   %}
10839 
10840   ins_pipe(ialu_reg_reg_shift);
10841 %}
10842 
10843 instruct XorI_reg_RShift_reg(iRegINoSp dst,
10844                          iRegIorL2I src1, iRegIorL2I src2,
10845                          immI src3, rFlagsReg cr) %{
10846   match(Set dst (XorI src1 (RShiftI src2 src3)));
10847 
10848   ins_cost(1.9 * INSN_COST);
10849   format %{ "eorw  $dst, $src1, $src2, ASR $src3" %}
10850 
10851   ins_encode %{
10852     __ eorw(as_Register($dst$$reg),
10853               as_Register($src1$$reg),
10854               as_Register($src2$$reg),
10855               Assembler::ASR,
10856               $src3$$constant & 0x1f);
10857   %}
10858 
10859   ins_pipe(ialu_reg_reg_shift);
10860 %}
10861 
10862 instruct XorL_reg_RShift_reg(iRegLNoSp dst,
10863                          iRegL src1, iRegL src2,
10864                          immI src3, rFlagsReg cr) %{
10865   match(Set dst (XorL src1 (RShiftL src2 src3)));
10866 
10867   ins_cost(1.9 * INSN_COST);
10868   format %{ "eor  $dst, $src1, $src2, ASR $src3" %}
10869 
10870   ins_encode %{
10871     __ eor(as_Register($dst$$reg),
10872               as_Register($src1$$reg),
10873               as_Register($src2$$reg),
10874               Assembler::ASR,
10875               $src3$$constant & 0x3f);
10876   %}
10877 
10878   ins_pipe(ialu_reg_reg_shift);
10879 %}
10880 
10881 instruct XorI_reg_LShift_reg(iRegINoSp dst,
10882                          iRegIorL2I src1, iRegIorL2I src2,
10883                          immI src3, rFlagsReg cr) %{
10884   match(Set dst (XorI src1 (LShiftI src2 src3)));
10885 
10886   ins_cost(1.9 * INSN_COST);
10887   format %{ "eorw  $dst, $src1, $src2, LSL $src3" %}
10888 
10889   ins_encode %{
10890     __ eorw(as_Register($dst$$reg),
10891               as_Register($src1$$reg),
10892               as_Register($src2$$reg),
10893               Assembler::LSL,
10894               $src3$$constant & 0x1f);
10895   %}
10896 
10897   ins_pipe(ialu_reg_reg_shift);
10898 %}
10899 
10900 instruct XorL_reg_LShift_reg(iRegLNoSp dst,
10901                          iRegL src1, iRegL src2,
10902                          immI src3, rFlagsReg cr) %{
10903   match(Set dst (XorL src1 (LShiftL src2 src3)));
10904 
10905   ins_cost(1.9 * INSN_COST);
10906   format %{ "eor  $dst, $src1, $src2, LSL $src3" %}
10907 
10908   ins_encode %{
10909     __ eor(as_Register($dst$$reg),
10910               as_Register($src1$$reg),
10911               as_Register($src2$$reg),
10912               Assembler::LSL,
10913               $src3$$constant & 0x3f);
10914   %}
10915 
10916   ins_pipe(ialu_reg_reg_shift);
10917 %}
10918 
10919 instruct OrI_reg_URShift_reg(iRegINoSp dst,
10920                          iRegIorL2I src1, iRegIorL2I src2,
10921                          immI src3, rFlagsReg cr) %{
10922   match(Set dst (OrI src1 (URShiftI src2 src3)));
10923 
10924   ins_cost(1.9 * INSN_COST);
10925   format %{ "orrw  $dst, $src1, $src2, LSR $src3" %}
10926 
10927   ins_encode %{
10928     __ orrw(as_Register($dst$$reg),
10929               as_Register($src1$$reg),
10930               as_Register($src2$$reg),
10931               Assembler::LSR,
10932               $src3$$constant & 0x1f);
10933   %}
10934 
10935   ins_pipe(ialu_reg_reg_shift);
10936 %}
10937 
10938 instruct OrL_reg_URShift_reg(iRegLNoSp dst,
10939                          iRegL src1, iRegL src2,
10940                          immI src3, rFlagsReg cr) %{
10941   match(Set dst (OrL src1 (URShiftL src2 src3)));
10942 
10943   ins_cost(1.9 * INSN_COST);
10944   format %{ "orr  $dst, $src1, $src2, LSR $src3" %}
10945 
10946   ins_encode %{
10947     __ orr(as_Register($dst$$reg),
10948               as_Register($src1$$reg),
10949               as_Register($src2$$reg),
10950               Assembler::LSR,
10951               $src3$$constant & 0x3f);
10952   %}
10953 
10954   ins_pipe(ialu_reg_reg_shift);
10955 %}
10956 
10957 instruct OrI_reg_RShift_reg(iRegINoSp dst,
10958                          iRegIorL2I src1, iRegIorL2I src2,
10959                          immI src3, rFlagsReg cr) %{
10960   match(Set dst (OrI src1 (RShiftI src2 src3)));
10961 
10962   ins_cost(1.9 * INSN_COST);
10963   format %{ "orrw  $dst, $src1, $src2, ASR $src3" %}
10964 
10965   ins_encode %{
10966     __ orrw(as_Register($dst$$reg),
10967               as_Register($src1$$reg),
10968               as_Register($src2$$reg),
10969               Assembler::ASR,
10970               $src3$$constant & 0x1f);
10971   %}
10972 
10973   ins_pipe(ialu_reg_reg_shift);
10974 %}
10975 
10976 instruct OrL_reg_RShift_reg(iRegLNoSp dst,
10977                          iRegL src1, iRegL src2,
10978                          immI src3, rFlagsReg cr) %{
10979   match(Set dst (OrL src1 (RShiftL src2 src3)));
10980 
10981   ins_cost(1.9 * INSN_COST);
10982   format %{ "orr  $dst, $src1, $src2, ASR $src3" %}
10983 
10984   ins_encode %{
10985     __ orr(as_Register($dst$$reg),
10986               as_Register($src1$$reg),
10987               as_Register($src2$$reg),
10988               Assembler::ASR,
10989               $src3$$constant & 0x3f);
10990   %}
10991 
10992   ins_pipe(ialu_reg_reg_shift);
10993 %}
10994 
10995 instruct OrI_reg_LShift_reg(iRegINoSp dst,
10996                          iRegIorL2I src1, iRegIorL2I src2,
10997                          immI src3, rFlagsReg cr) %{
10998   match(Set dst (OrI src1 (LShiftI src2 src3)));
10999 
11000   ins_cost(1.9 * INSN_COST);
11001   format %{ "orrw  $dst, $src1, $src2, LSL $src3" %}
11002 
11003   ins_encode %{
11004     __ orrw(as_Register($dst$$reg),
11005               as_Register($src1$$reg),
11006               as_Register($src2$$reg),
11007               Assembler::LSL,
11008               $src3$$constant & 0x1f);
11009   %}
11010 
11011   ins_pipe(ialu_reg_reg_shift);
11012 %}
11013 
11014 instruct OrL_reg_LShift_reg(iRegLNoSp dst,
11015                          iRegL src1, iRegL src2,
11016                          immI src3, rFlagsReg cr) %{
11017   match(Set dst (OrL src1 (LShiftL src2 src3)));
11018 
11019   ins_cost(1.9 * INSN_COST);
11020   format %{ "orr  $dst, $src1, $src2, LSL $src3" %}
11021 
11022   ins_encode %{
11023     __ orr(as_Register($dst$$reg),
11024               as_Register($src1$$reg),
11025               as_Register($src2$$reg),
11026               Assembler::LSL,
11027               $src3$$constant & 0x3f);
11028   %}
11029 
11030   ins_pipe(ialu_reg_reg_shift);
11031 %}
11032 
11033 instruct AddI_reg_URShift_reg(iRegINoSp dst,
11034                          iRegIorL2I src1, iRegIorL2I src2,
11035                          immI src3, rFlagsReg cr) %{
11036   match(Set dst (AddI src1 (URShiftI src2 src3)));
11037 
11038   ins_cost(1.9 * INSN_COST);
11039   format %{ "addw  $dst, $src1, $src2, LSR $src3" %}
11040 
11041   ins_encode %{
11042     __ addw(as_Register($dst$$reg),
11043               as_Register($src1$$reg),
11044               as_Register($src2$$reg),
11045               Assembler::LSR,
11046               $src3$$constant & 0x1f);
11047   %}
11048 
11049   ins_pipe(ialu_reg_reg_shift);
11050 %}
11051 
11052 instruct AddL_reg_URShift_reg(iRegLNoSp dst,
11053                          iRegL src1, iRegL src2,
11054                          immI src3, rFlagsReg cr) %{
11055   match(Set dst (AddL src1 (URShiftL src2 src3)));
11056 
11057   ins_cost(1.9 * INSN_COST);
11058   format %{ "add  $dst, $src1, $src2, LSR $src3" %}
11059 
11060   ins_encode %{
11061     __ add(as_Register($dst$$reg),
11062               as_Register($src1$$reg),
11063               as_Register($src2$$reg),
11064               Assembler::LSR,
11065               $src3$$constant & 0x3f);
11066   %}
11067 
11068   ins_pipe(ialu_reg_reg_shift);
11069 %}
11070 
11071 instruct AddI_reg_RShift_reg(iRegINoSp dst,
11072                          iRegIorL2I src1, iRegIorL2I src2,
11073                          immI src3, rFlagsReg cr) %{
11074   match(Set dst (AddI src1 (RShiftI src2 src3)));
11075 
11076   ins_cost(1.9 * INSN_COST);
11077   format %{ "addw  $dst, $src1, $src2, ASR $src3" %}
11078 
11079   ins_encode %{
11080     __ addw(as_Register($dst$$reg),
11081               as_Register($src1$$reg),
11082               as_Register($src2$$reg),
11083               Assembler::ASR,
11084               $src3$$constant & 0x1f);
11085   %}
11086 
11087   ins_pipe(ialu_reg_reg_shift);
11088 %}
11089 
11090 instruct AddL_reg_RShift_reg(iRegLNoSp dst,
11091                          iRegL src1, iRegL src2,
11092                          immI src3, rFlagsReg cr) %{
11093   match(Set dst (AddL src1 (RShiftL src2 src3)));
11094 
11095   ins_cost(1.9 * INSN_COST);
11096   format %{ "add  $dst, $src1, $src2, ASR $src3" %}
11097 
11098   ins_encode %{
11099     __ add(as_Register($dst$$reg),
11100               as_Register($src1$$reg),
11101               as_Register($src2$$reg),
11102               Assembler::ASR,
11103               $src3$$constant & 0x3f);
11104   %}
11105 
11106   ins_pipe(ialu_reg_reg_shift);
11107 %}
11108 
11109 instruct AddI_reg_LShift_reg(iRegINoSp dst,
11110                          iRegIorL2I src1, iRegIorL2I src2,
11111                          immI src3, rFlagsReg cr) %{
11112   match(Set dst (AddI src1 (LShiftI src2 src3)));
11113 
11114   ins_cost(1.9 * INSN_COST);
11115   format %{ "addw  $dst, $src1, $src2, LSL $src3" %}
11116 
11117   ins_encode %{
11118     __ addw(as_Register($dst$$reg),
11119               as_Register($src1$$reg),
11120               as_Register($src2$$reg),
11121               Assembler::LSL,
11122               $src3$$constant & 0x1f);
11123   %}
11124 
11125   ins_pipe(ialu_reg_reg_shift);
11126 %}
11127 
11128 instruct AddL_reg_LShift_reg(iRegLNoSp dst,
11129                          iRegL src1, iRegL src2,
11130                          immI src3, rFlagsReg cr) %{
11131   match(Set dst (AddL src1 (LShiftL src2 src3)));
11132 
11133   ins_cost(1.9 * INSN_COST);
11134   format %{ "add  $dst, $src1, $src2, LSL $src3" %}
11135 
11136   ins_encode %{
11137     __ add(as_Register($dst$$reg),
11138               as_Register($src1$$reg),
11139               as_Register($src2$$reg),
11140               Assembler::LSL,
11141               $src3$$constant & 0x3f);
11142   %}
11143 
11144   ins_pipe(ialu_reg_reg_shift);
11145 %}
11146 
11147 instruct SubI_reg_URShift_reg(iRegINoSp dst,
11148                          iRegIorL2I src1, iRegIorL2I src2,
11149                          immI src3, rFlagsReg cr) %{
11150   match(Set dst (SubI src1 (URShiftI src2 src3)));
11151 
11152   ins_cost(1.9 * INSN_COST);
11153   format %{ "subw  $dst, $src1, $src2, LSR $src3" %}
11154 
11155   ins_encode %{
11156     __ subw(as_Register($dst$$reg),
11157               as_Register($src1$$reg),
11158               as_Register($src2$$reg),
11159               Assembler::LSR,
11160               $src3$$constant & 0x1f);
11161   %}
11162 
11163   ins_pipe(ialu_reg_reg_shift);
11164 %}
11165 
11166 instruct SubL_reg_URShift_reg(iRegLNoSp dst,
11167                          iRegL src1, iRegL src2,
11168                          immI src3, rFlagsReg cr) %{
11169   match(Set dst (SubL src1 (URShiftL src2 src3)));
11170 
11171   ins_cost(1.9 * INSN_COST);
11172   format %{ "sub  $dst, $src1, $src2, LSR $src3" %}
11173 
11174   ins_encode %{
11175     __ sub(as_Register($dst$$reg),
11176               as_Register($src1$$reg),
11177               as_Register($src2$$reg),
11178               Assembler::LSR,
11179               $src3$$constant & 0x3f);
11180   %}
11181 
11182   ins_pipe(ialu_reg_reg_shift);
11183 %}
11184 
11185 instruct SubI_reg_RShift_reg(iRegINoSp dst,
11186                          iRegIorL2I src1, iRegIorL2I src2,
11187                          immI src3, rFlagsReg cr) %{
11188   match(Set dst (SubI src1 (RShiftI src2 src3)));
11189 
11190   ins_cost(1.9 * INSN_COST);
11191   format %{ "subw  $dst, $src1, $src2, ASR $src3" %}
11192 
11193   ins_encode %{
11194     __ subw(as_Register($dst$$reg),
11195               as_Register($src1$$reg),
11196               as_Register($src2$$reg),
11197               Assembler::ASR,
11198               $src3$$constant & 0x1f);
11199   %}
11200 
11201   ins_pipe(ialu_reg_reg_shift);
11202 %}
11203 
11204 instruct SubL_reg_RShift_reg(iRegLNoSp dst,
11205                          iRegL src1, iRegL src2,
11206                          immI src3, rFlagsReg cr) %{
11207   match(Set dst (SubL src1 (RShiftL src2 src3)));
11208 
11209   ins_cost(1.9 * INSN_COST);
11210   format %{ "sub  $dst, $src1, $src2, ASR $src3" %}
11211 
11212   ins_encode %{
11213     __ sub(as_Register($dst$$reg),
11214               as_Register($src1$$reg),
11215               as_Register($src2$$reg),
11216               Assembler::ASR,
11217               $src3$$constant & 0x3f);
11218   %}
11219 
11220   ins_pipe(ialu_reg_reg_shift);
11221 %}
11222 
11223 instruct SubI_reg_LShift_reg(iRegINoSp dst,
11224                          iRegIorL2I src1, iRegIorL2I src2,
11225                          immI src3, rFlagsReg cr) %{
11226   match(Set dst (SubI src1 (LShiftI src2 src3)));
11227 
11228   ins_cost(1.9 * INSN_COST);
11229   format %{ "subw  $dst, $src1, $src2, LSL $src3" %}
11230 
11231   ins_encode %{
11232     __ subw(as_Register($dst$$reg),
11233               as_Register($src1$$reg),
11234               as_Register($src2$$reg),
11235               Assembler::LSL,
11236               $src3$$constant & 0x1f);
11237   %}
11238 
11239   ins_pipe(ialu_reg_reg_shift);
11240 %}
11241 
11242 instruct SubL_reg_LShift_reg(iRegLNoSp dst,
11243                          iRegL src1, iRegL src2,
11244                          immI src3, rFlagsReg cr) %{
11245   match(Set dst (SubL src1 (LShiftL src2 src3)));
11246 
11247   ins_cost(1.9 * INSN_COST);
11248   format %{ "sub  $dst, $src1, $src2, LSL $src3" %}
11249 
11250   ins_encode %{
11251     __ sub(as_Register($dst$$reg),
11252               as_Register($src1$$reg),
11253               as_Register($src2$$reg),
11254               Assembler::LSL,
11255               $src3$$constant & 0x3f);
11256   %}
11257 
11258   ins_pipe(ialu_reg_reg_shift);
11259 %}
11260 
11261 
11262 
11263 // Shift Left followed by Shift Right.
11264 // This idiom is used by the compiler for the i2b bytecode etc.
11265 instruct sbfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11266 %{
11267   match(Set dst (RShiftL (LShiftL src lshift_count) rshift_count));
11268   // Make sure we are not going to exceed what sbfm can do.
11269   predicate((unsigned int)n->in(2)->get_int() <= 63
11270             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11271 
11272   ins_cost(INSN_COST * 2);
11273   format %{ "sbfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11274   ins_encode %{
11275     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11276     int s = 63 - lshift;
11277     int r = (rshift - lshift) & 63;
11278     __ sbfm(as_Register($dst$$reg),
11279             as_Register($src$$reg),
11280             r, s);
11281   %}
11282 
11283   ins_pipe(ialu_reg_shift);
11284 %}
11285 
11286 // Shift Left followed by Shift Right.
11287 // This idiom is used by the compiler for the i2b bytecode etc.
11288 instruct sbfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11289 %{
11290   match(Set dst (RShiftI (LShiftI src lshift_count) rshift_count));
11291   // Make sure we are not going to exceed what sbfmw can do.
11292   predicate((unsigned int)n->in(2)->get_int() <= 31
11293             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11294 
11295   ins_cost(INSN_COST * 2);
11296   format %{ "sbfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11297   ins_encode %{
11298     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11299     int s = 31 - lshift;
11300     int r = (rshift - lshift) & 31;
11301     __ sbfmw(as_Register($dst$$reg),
11302             as_Register($src$$reg),
11303             r, s);
11304   %}
11305 
11306   ins_pipe(ialu_reg_shift);
11307 %}
11308 
11309 // Shift Left followed by Shift Right.
11310 // This idiom is used by the compiler for the i2b bytecode etc.
11311 instruct ubfmL(iRegLNoSp dst, iRegL src, immI lshift_count, immI rshift_count)
11312 %{
11313   match(Set dst (URShiftL (LShiftL src lshift_count) rshift_count));
11314   // Make sure we are not going to exceed what ubfm can do.
11315   predicate((unsigned int)n->in(2)->get_int() <= 63
11316             && (unsigned int)n->in(1)->in(2)->get_int() <= 63);
11317 
11318   ins_cost(INSN_COST * 2);
11319   format %{ "ubfm  $dst, $src, $rshift_count - $lshift_count, #63 - $lshift_count" %}
11320   ins_encode %{
11321     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11322     int s = 63 - lshift;
11323     int r = (rshift - lshift) & 63;
11324     __ ubfm(as_Register($dst$$reg),
11325             as_Register($src$$reg),
11326             r, s);
11327   %}
11328 
11329   ins_pipe(ialu_reg_shift);
11330 %}
11331 
11332 // Shift Left followed by Shift Right.
11333 // This idiom is used by the compiler for the i2b bytecode etc.
11334 instruct ubfmwI(iRegINoSp dst, iRegIorL2I src, immI lshift_count, immI rshift_count)
11335 %{
11336   match(Set dst (URShiftI (LShiftI src lshift_count) rshift_count));
11337   // Make sure we are not going to exceed what ubfmw can do.
11338   predicate((unsigned int)n->in(2)->get_int() <= 31
11339             && (unsigned int)n->in(1)->in(2)->get_int() <= 31);
11340 
11341   ins_cost(INSN_COST * 2);
11342   format %{ "ubfmw  $dst, $src, $rshift_count - $lshift_count, #31 - $lshift_count" %}
11343   ins_encode %{
11344     int lshift = $lshift_count$$constant, rshift = $rshift_count$$constant;
11345     int s = 31 - lshift;
11346     int r = (rshift - lshift) & 31;
11347     __ ubfmw(as_Register($dst$$reg),
11348             as_Register($src$$reg),
11349             r, s);
11350   %}
11351 
11352   ins_pipe(ialu_reg_shift);
11353 %}
11354 // Bitfield extract with shift & mask
11355 
11356 instruct ubfxwI(iRegINoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11357 %{
11358   match(Set dst (AndI (URShiftI src rshift) mask));
11359 
11360   ins_cost(INSN_COST);
11361   format %{ "ubfxw $dst, $src, $rshift, $mask" %}
11362   ins_encode %{
11363     int rshift = $rshift$$constant;
11364     long mask = $mask$$constant;
11365     int width = exact_log2(mask+1);
11366     __ ubfxw(as_Register($dst$$reg),
11367             as_Register($src$$reg), rshift, width);
11368   %}
11369   ins_pipe(ialu_reg_shift);
11370 %}
11371 instruct ubfxL(iRegLNoSp dst, iRegL src, immI rshift, immL_bitmask mask)
11372 %{
11373   match(Set dst (AndL (URShiftL src rshift) mask));
11374 
11375   ins_cost(INSN_COST);
11376   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11377   ins_encode %{
11378     int rshift = $rshift$$constant;
11379     long mask = $mask$$constant;
11380     int width = exact_log2(mask+1);
11381     __ ubfx(as_Register($dst$$reg),
11382             as_Register($src$$reg), rshift, width);
11383   %}
11384   ins_pipe(ialu_reg_shift);
11385 %}
11386 
11387 // We can use ubfx when extending an And with a mask when we know mask
11388 // is positive.  We know that because immI_bitmask guarantees it.
11389 instruct ubfxIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI rshift, immI_bitmask mask)
11390 %{
11391   match(Set dst (ConvI2L (AndI (URShiftI src rshift) mask)));
11392 
11393   ins_cost(INSN_COST * 2);
11394   format %{ "ubfx $dst, $src, $rshift, $mask" %}
11395   ins_encode %{
11396     int rshift = $rshift$$constant;
11397     long mask = $mask$$constant;
11398     int width = exact_log2(mask+1);
11399     __ ubfx(as_Register($dst$$reg),
11400             as_Register($src$$reg), rshift, width);
11401   %}
11402   ins_pipe(ialu_reg_shift);
11403 %}
11404 
11405 // We can use ubfiz when masking by a positive number and then left shifting the result.
11406 // We know that the mask is positive because immI_bitmask guarantees it.
11407 instruct ubfizwI(iRegINoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11408 %{
11409   match(Set dst (LShiftI (AndI src mask) lshift));
11410   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11411     (exact_log2(n->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= (31+1));
11412 
11413   ins_cost(INSN_COST);
11414   format %{ "ubfizw $dst, $src, $lshift, $mask" %}
11415   ins_encode %{
11416     int lshift = $lshift$$constant;
11417     long mask = $mask$$constant;
11418     int width = exact_log2(mask+1);
11419     __ ubfizw(as_Register($dst$$reg),
11420           as_Register($src$$reg), lshift, width);
11421   %}
11422   ins_pipe(ialu_reg_shift);
11423 %}
11424 // We can use ubfiz when masking by a positive number and then left shifting the result.
11425 // We know that the mask is positive because immL_bitmask guarantees it.
11426 instruct ubfizL(iRegLNoSp dst, iRegL src, immI lshift, immL_bitmask mask)
11427 %{
11428   match(Set dst (LShiftL (AndL src mask) lshift));
11429   predicate((unsigned int)n->in(2)->get_int() <= 63 &&
11430     (exact_log2_long(n->in(1)->in(2)->get_long()+1) + (unsigned int)n->in(2)->get_int()) <= (63+1));
11431 
11432   ins_cost(INSN_COST);
11433   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11434   ins_encode %{
11435     int lshift = $lshift$$constant;
11436     long mask = $mask$$constant;
11437     int width = exact_log2(mask+1);
11438     __ ubfiz(as_Register($dst$$reg),
11439           as_Register($src$$reg), lshift, width);
11440   %}
11441   ins_pipe(ialu_reg_shift);
11442 %}
11443 
11444 // If there is a convert I to L block between and AndI and a LShiftL, we can also match ubfiz
11445 instruct ubfizIConvI2L(iRegLNoSp dst, iRegIorL2I src, immI lshift, immI_bitmask mask)
11446 %{
11447   match(Set dst (LShiftL (ConvI2L(AndI src mask)) lshift));
11448   predicate((unsigned int)n->in(2)->get_int() <= 31 &&
11449     (exact_log2((unsigned int)n->in(1)->in(1)->in(2)->get_int()+1) + (unsigned int)n->in(2)->get_int()) <= 32);
11450 
11451   ins_cost(INSN_COST);
11452   format %{ "ubfiz $dst, $src, $lshift, $mask" %}
11453   ins_encode %{
11454     int lshift = $lshift$$constant;
11455     long mask = $mask$$constant;
11456     int width = exact_log2(mask+1);
11457     __ ubfiz(as_Register($dst$$reg),
11458              as_Register($src$$reg), lshift, width);
11459   %}
11460   ins_pipe(ialu_reg_shift);
11461 %}
11462 
11463 // Rotations
11464 
11465 instruct extrOrL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11466 %{
11467   match(Set dst (OrL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11468   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11469 
11470   ins_cost(INSN_COST);
11471   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11472 
11473   ins_encode %{
11474     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11475             $rshift$$constant & 63);
11476   %}
11477   ins_pipe(ialu_reg_reg_extr);
11478 %}
11479 
11480 instruct extrOrI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11481 %{
11482   match(Set dst (OrI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11483   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11484 
11485   ins_cost(INSN_COST);
11486   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11487 
11488   ins_encode %{
11489     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11490             $rshift$$constant & 31);
11491   %}
11492   ins_pipe(ialu_reg_reg_extr);
11493 %}
11494 
11495 instruct extrAddL(iRegLNoSp dst, iRegL src1, iRegL src2, immI lshift, immI rshift, rFlagsReg cr)
11496 %{
11497   match(Set dst (AddL (LShiftL src1 lshift) (URShiftL src2 rshift)));
11498   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 63));
11499 
11500   ins_cost(INSN_COST);
11501   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11502 
11503   ins_encode %{
11504     __ extr(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11505             $rshift$$constant & 63);
11506   %}
11507   ins_pipe(ialu_reg_reg_extr);
11508 %}
11509 
11510 instruct extrAddI(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI lshift, immI rshift, rFlagsReg cr)
11511 %{
11512   match(Set dst (AddI (LShiftI src1 lshift) (URShiftI src2 rshift)));
11513   predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 31));
11514 
11515   ins_cost(INSN_COST);
11516   format %{ "extr $dst, $src1, $src2, #$rshift" %}
11517 
11518   ins_encode %{
11519     __ extrw(as_Register($dst$$reg), as_Register($src1$$reg), as_Register($src2$$reg),
11520             $rshift$$constant & 31);
11521   %}
11522   ins_pipe(ialu_reg_reg_extr);
11523 %}
11524 
11525 
11526 // rol expander
11527 
11528 instruct rolL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11529 %{
11530   effect(DEF dst, USE src, USE shift);
11531 
11532   format %{ "rol    $dst, $src, $shift" %}
11533   ins_cost(INSN_COST * 3);
11534   ins_encode %{
11535     __ subw(rscratch1, zr, as_Register($shift$$reg));
11536     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11537             rscratch1);
11538     %}
11539   ins_pipe(ialu_reg_reg_vshift);
11540 %}
11541 
11542 // rol expander
11543 
11544 instruct rolI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11545 %{
11546   effect(DEF dst, USE src, USE shift);
11547 
11548   format %{ "rol    $dst, $src, $shift" %}
11549   ins_cost(INSN_COST * 3);
11550   ins_encode %{
11551     __ subw(rscratch1, zr, as_Register($shift$$reg));
11552     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11553             rscratch1);
11554     %}
11555   ins_pipe(ialu_reg_reg_vshift);
11556 %}
11557 
11558 instruct rolL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11559 %{
11560   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c_64 shift))));
11561 
11562   expand %{
11563     rolL_rReg(dst, src, shift, cr);
11564   %}
11565 %}
11566 
11567 instruct rolL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11568 %{
11569   match(Set dst (OrL (LShiftL src shift) (URShiftL src (SubI c0 shift))));
11570 
11571   expand %{
11572     rolL_rReg(dst, src, shift, cr);
11573   %}
11574 %}
11575 
11576 instruct rolI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11577 %{
11578   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c_32 shift))));
11579 
11580   expand %{
11581     rolI_rReg(dst, src, shift, cr);
11582   %}
11583 %}
11584 
11585 instruct rolI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11586 %{
11587   match(Set dst (OrI (LShiftI src shift) (URShiftI src (SubI c0 shift))));
11588 
11589   expand %{
11590     rolI_rReg(dst, src, shift, cr);
11591   %}
11592 %}
11593 
11594 // ror expander
11595 
11596 instruct rorL_rReg(iRegLNoSp dst, iRegL src, iRegI shift, rFlagsReg cr)
11597 %{
11598   effect(DEF dst, USE src, USE shift);
11599 
11600   format %{ "ror    $dst, $src, $shift" %}
11601   ins_cost(INSN_COST);
11602   ins_encode %{
11603     __ rorv(as_Register($dst$$reg), as_Register($src$$reg),
11604             as_Register($shift$$reg));
11605     %}
11606   ins_pipe(ialu_reg_reg_vshift);
11607 %}
11608 
11609 // ror expander
11610 
11611 instruct rorI_rReg(iRegINoSp dst, iRegI src, iRegI shift, rFlagsReg cr)
11612 %{
11613   effect(DEF dst, USE src, USE shift);
11614 
11615   format %{ "ror    $dst, $src, $shift" %}
11616   ins_cost(INSN_COST);
11617   ins_encode %{
11618     __ rorvw(as_Register($dst$$reg), as_Register($src$$reg),
11619             as_Register($shift$$reg));
11620     %}
11621   ins_pipe(ialu_reg_reg_vshift);
11622 %}
11623 
11624 instruct rorL_rReg_Var_C_64(iRegLNoSp dst, iRegL src, iRegI shift, immI_64 c_64, rFlagsReg cr)
11625 %{
11626   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c_64 shift))));
11627 
11628   expand %{
11629     rorL_rReg(dst, src, shift, cr);
11630   %}
11631 %}
11632 
11633 instruct rorL_rReg_Var_C0(iRegLNoSp dst, iRegL src, iRegI shift, immI0 c0, rFlagsReg cr)
11634 %{
11635   match(Set dst (OrL (URShiftL src shift) (LShiftL src (SubI c0 shift))));
11636 
11637   expand %{
11638     rorL_rReg(dst, src, shift, cr);
11639   %}
11640 %}
11641 
11642 instruct rorI_rReg_Var_C_32(iRegINoSp dst, iRegI src, iRegI shift, immI_32 c_32, rFlagsReg cr)
11643 %{
11644   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c_32 shift))));
11645 
11646   expand %{
11647     rorI_rReg(dst, src, shift, cr);
11648   %}
11649 %}
11650 
11651 instruct rorI_rReg_Var_C0(iRegINoSp dst, iRegI src, iRegI shift, immI0 c0, rFlagsReg cr)
11652 %{
11653   match(Set dst (OrI (URShiftI src shift) (LShiftI src (SubI c0 shift))));
11654 
11655   expand %{
11656     rorI_rReg(dst, src, shift, cr);
11657   %}
11658 %}
11659 
11660 // Add/subtract (extended)
11661 
11662 instruct AddExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11663 %{
11664   match(Set dst (AddL src1 (ConvI2L src2)));
11665   ins_cost(INSN_COST);
11666   format %{ "add  $dst, $src1, $src2, sxtw" %}
11667 
11668    ins_encode %{
11669      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11670             as_Register($src2$$reg), ext::sxtw);
11671    %}
11672   ins_pipe(ialu_reg_reg);
11673 %};
11674 
11675 instruct SubExtI(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, rFlagsReg cr)
11676 %{
11677   match(Set dst (SubL src1 (ConvI2L src2)));
11678   ins_cost(INSN_COST);
11679   format %{ "sub  $dst, $src1, $src2, sxtw" %}
11680 
11681    ins_encode %{
11682      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11683             as_Register($src2$$reg), ext::sxtw);
11684    %}
11685   ins_pipe(ialu_reg_reg);
11686 %};
11687 
11688 
11689 instruct AddExtI_sxth(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_16 lshift, immI_16 rshift, rFlagsReg cr)
11690 %{
11691   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11692   ins_cost(INSN_COST);
11693   format %{ "add  $dst, $src1, $src2, sxth" %}
11694 
11695    ins_encode %{
11696      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11697             as_Register($src2$$reg), ext::sxth);
11698    %}
11699   ins_pipe(ialu_reg_reg);
11700 %}
11701 
11702 instruct AddExtI_sxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11703 %{
11704   match(Set dst (AddI src1 (RShiftI (LShiftI src2 lshift) rshift)));
11705   ins_cost(INSN_COST);
11706   format %{ "add  $dst, $src1, $src2, sxtb" %}
11707 
11708    ins_encode %{
11709      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11710             as_Register($src2$$reg), ext::sxtb);
11711    %}
11712   ins_pipe(ialu_reg_reg);
11713 %}
11714 
11715 instruct AddExtI_uxtb(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_24 lshift, immI_24 rshift, rFlagsReg cr)
11716 %{
11717   match(Set dst (AddI src1 (URShiftI (LShiftI src2 lshift) rshift)));
11718   ins_cost(INSN_COST);
11719   format %{ "add  $dst, $src1, $src2, uxtb" %}
11720 
11721    ins_encode %{
11722      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11723             as_Register($src2$$reg), ext::uxtb);
11724    %}
11725   ins_pipe(ialu_reg_reg);
11726 %}
11727 
11728 instruct AddExtL_sxth(iRegLNoSp dst, iRegL src1, iRegL src2, immI_48 lshift, immI_48 rshift, rFlagsReg cr)
11729 %{
11730   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11731   ins_cost(INSN_COST);
11732   format %{ "add  $dst, $src1, $src2, sxth" %}
11733 
11734    ins_encode %{
11735      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11736             as_Register($src2$$reg), ext::sxth);
11737    %}
11738   ins_pipe(ialu_reg_reg);
11739 %}
11740 
11741 instruct AddExtL_sxtw(iRegLNoSp dst, iRegL src1, iRegL src2, immI_32 lshift, immI_32 rshift, rFlagsReg cr)
11742 %{
11743   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11744   ins_cost(INSN_COST);
11745   format %{ "add  $dst, $src1, $src2, sxtw" %}
11746 
11747    ins_encode %{
11748      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11749             as_Register($src2$$reg), ext::sxtw);
11750    %}
11751   ins_pipe(ialu_reg_reg);
11752 %}
11753 
11754 instruct AddExtL_sxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11755 %{
11756   match(Set dst (AddL src1 (RShiftL (LShiftL src2 lshift) rshift)));
11757   ins_cost(INSN_COST);
11758   format %{ "add  $dst, $src1, $src2, sxtb" %}
11759 
11760    ins_encode %{
11761      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11762             as_Register($src2$$reg), ext::sxtb);
11763    %}
11764   ins_pipe(ialu_reg_reg);
11765 %}
11766 
11767 instruct AddExtL_uxtb(iRegLNoSp dst, iRegL src1, iRegL src2, immI_56 lshift, immI_56 rshift, rFlagsReg cr)
11768 %{
11769   match(Set dst (AddL src1 (URShiftL (LShiftL src2 lshift) rshift)));
11770   ins_cost(INSN_COST);
11771   format %{ "add  $dst, $src1, $src2, uxtb" %}
11772 
11773    ins_encode %{
11774      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11775             as_Register($src2$$reg), ext::uxtb);
11776    %}
11777   ins_pipe(ialu_reg_reg);
11778 %}
11779 
11780 
11781 instruct AddExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11782 %{
11783   match(Set dst (AddI src1 (AndI src2 mask)));
11784   ins_cost(INSN_COST);
11785   format %{ "addw  $dst, $src1, $src2, uxtb" %}
11786 
11787    ins_encode %{
11788      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11789             as_Register($src2$$reg), ext::uxtb);
11790    %}
11791   ins_pipe(ialu_reg_reg);
11792 %}
11793 
11794 instruct AddExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11795 %{
11796   match(Set dst (AddI src1 (AndI src2 mask)));
11797   ins_cost(INSN_COST);
11798   format %{ "addw  $dst, $src1, $src2, uxth" %}
11799 
11800    ins_encode %{
11801      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11802             as_Register($src2$$reg), ext::uxth);
11803    %}
11804   ins_pipe(ialu_reg_reg);
11805 %}
11806 
11807 instruct AddExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11808 %{
11809   match(Set dst (AddL src1 (AndL src2 mask)));
11810   ins_cost(INSN_COST);
11811   format %{ "add  $dst, $src1, $src2, uxtb" %}
11812 
11813    ins_encode %{
11814      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11815             as_Register($src2$$reg), ext::uxtb);
11816    %}
11817   ins_pipe(ialu_reg_reg);
11818 %}
11819 
11820 instruct AddExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11821 %{
11822   match(Set dst (AddL src1 (AndL src2 mask)));
11823   ins_cost(INSN_COST);
11824   format %{ "add  $dst, $src1, $src2, uxth" %}
11825 
11826    ins_encode %{
11827      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11828             as_Register($src2$$reg), ext::uxth);
11829    %}
11830   ins_pipe(ialu_reg_reg);
11831 %}
11832 
11833 instruct AddExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11834 %{
11835   match(Set dst (AddL src1 (AndL src2 mask)));
11836   ins_cost(INSN_COST);
11837   format %{ "add  $dst, $src1, $src2, uxtw" %}
11838 
11839    ins_encode %{
11840      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11841             as_Register($src2$$reg), ext::uxtw);
11842    %}
11843   ins_pipe(ialu_reg_reg);
11844 %}
11845 
11846 instruct SubExtI_uxtb_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, rFlagsReg cr)
11847 %{
11848   match(Set dst (SubI src1 (AndI src2 mask)));
11849   ins_cost(INSN_COST);
11850   format %{ "subw  $dst, $src1, $src2, uxtb" %}
11851 
11852    ins_encode %{
11853      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11854             as_Register($src2$$reg), ext::uxtb);
11855    %}
11856   ins_pipe(ialu_reg_reg);
11857 %}
11858 
11859 instruct SubExtI_uxth_and(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, rFlagsReg cr)
11860 %{
11861   match(Set dst (SubI src1 (AndI src2 mask)));
11862   ins_cost(INSN_COST);
11863   format %{ "subw  $dst, $src1, $src2, uxth" %}
11864 
11865    ins_encode %{
11866      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
11867             as_Register($src2$$reg), ext::uxth);
11868    %}
11869   ins_pipe(ialu_reg_reg);
11870 %}
11871 
11872 instruct SubExtL_uxtb_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, rFlagsReg cr)
11873 %{
11874   match(Set dst (SubL src1 (AndL src2 mask)));
11875   ins_cost(INSN_COST);
11876   format %{ "sub  $dst, $src1, $src2, uxtb" %}
11877 
11878    ins_encode %{
11879      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11880             as_Register($src2$$reg), ext::uxtb);
11881    %}
11882   ins_pipe(ialu_reg_reg);
11883 %}
11884 
11885 instruct SubExtL_uxth_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, rFlagsReg cr)
11886 %{
11887   match(Set dst (SubL src1 (AndL src2 mask)));
11888   ins_cost(INSN_COST);
11889   format %{ "sub  $dst, $src1, $src2, uxth" %}
11890 
11891    ins_encode %{
11892      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11893             as_Register($src2$$reg), ext::uxth);
11894    %}
11895   ins_pipe(ialu_reg_reg);
11896 %}
11897 
11898 instruct SubExtL_uxtw_and(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, rFlagsReg cr)
11899 %{
11900   match(Set dst (SubL src1 (AndL src2 mask)));
11901   ins_cost(INSN_COST);
11902   format %{ "sub  $dst, $src1, $src2, uxtw" %}
11903 
11904    ins_encode %{
11905      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11906             as_Register($src2$$reg), ext::uxtw);
11907    %}
11908   ins_pipe(ialu_reg_reg);
11909 %}
11910 
11911 
11912 instruct AddExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11913 %{
11914   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11915   ins_cost(1.9 * INSN_COST);
11916   format %{ "add  $dst, $src1, $src2, sxtb #lshift2" %}
11917 
11918    ins_encode %{
11919      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11920             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11921    %}
11922   ins_pipe(ialu_reg_reg_shift);
11923 %}
11924 
11925 instruct AddExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11926 %{
11927   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11928   ins_cost(1.9 * INSN_COST);
11929   format %{ "add  $dst, $src1, $src2, sxth #lshift2" %}
11930 
11931    ins_encode %{
11932      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11933             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11934    %}
11935   ins_pipe(ialu_reg_reg_shift);
11936 %}
11937 
11938 instruct AddExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11939 %{
11940   match(Set dst (AddL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11941   ins_cost(1.9 * INSN_COST);
11942   format %{ "add  $dst, $src1, $src2, sxtw #lshift2" %}
11943 
11944    ins_encode %{
11945      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
11946             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11947    %}
11948   ins_pipe(ialu_reg_reg_shift);
11949 %}
11950 
11951 instruct SubExtL_sxtb_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_56 lshift1, immI_56 rshift1, rFlagsReg cr)
11952 %{
11953   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11954   ins_cost(1.9 * INSN_COST);
11955   format %{ "sub  $dst, $src1, $src2, sxtb #lshift2" %}
11956 
11957    ins_encode %{
11958      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11959             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11960    %}
11961   ins_pipe(ialu_reg_reg_shift);
11962 %}
11963 
11964 instruct SubExtL_sxth_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_48 lshift1, immI_48 rshift1, rFlagsReg cr)
11965 %{
11966   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11967   ins_cost(1.9 * INSN_COST);
11968   format %{ "sub  $dst, $src1, $src2, sxth #lshift2" %}
11969 
11970    ins_encode %{
11971      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11972             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
11973    %}
11974   ins_pipe(ialu_reg_reg_shift);
11975 %}
11976 
11977 instruct SubExtL_sxtw_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immIExt lshift2, immI_32 lshift1, immI_32 rshift1, rFlagsReg cr)
11978 %{
11979   match(Set dst (SubL src1 (LShiftL (RShiftL (LShiftL src2 lshift1) rshift1) lshift2)));
11980   ins_cost(1.9 * INSN_COST);
11981   format %{ "sub  $dst, $src1, $src2, sxtw #lshift2" %}
11982 
11983    ins_encode %{
11984      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
11985             as_Register($src2$$reg), ext::sxtw, ($lshift2$$constant));
11986    %}
11987   ins_pipe(ialu_reg_reg_shift);
11988 %}
11989 
11990 instruct AddExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
11991 %{
11992   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
11993   ins_cost(1.9 * INSN_COST);
11994   format %{ "addw  $dst, $src1, $src2, sxtb #lshift2" %}
11995 
11996    ins_encode %{
11997      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
11998             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
11999    %}
12000   ins_pipe(ialu_reg_reg_shift);
12001 %}
12002 
12003 instruct AddExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12004 %{
12005   match(Set dst (AddI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12006   ins_cost(1.9 * INSN_COST);
12007   format %{ "addw  $dst, $src1, $src2, sxth #lshift2" %}
12008 
12009    ins_encode %{
12010      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12011             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12012    %}
12013   ins_pipe(ialu_reg_reg_shift);
12014 %}
12015 
12016 instruct SubExtI_sxtb_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_24 lshift1, immI_24 rshift1, rFlagsReg cr)
12017 %{
12018   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12019   ins_cost(1.9 * INSN_COST);
12020   format %{ "subw  $dst, $src1, $src2, sxtb #lshift2" %}
12021 
12022    ins_encode %{
12023      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12024             as_Register($src2$$reg), ext::sxtb, ($lshift2$$constant));
12025    %}
12026   ins_pipe(ialu_reg_reg_shift);
12027 %}
12028 
12029 instruct SubExtI_sxth_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immIExt lshift2, immI_16 lshift1, immI_16 rshift1, rFlagsReg cr)
12030 %{
12031   match(Set dst (SubI src1 (LShiftI (RShiftI (LShiftI src2 lshift1) rshift1) lshift2)));
12032   ins_cost(1.9 * INSN_COST);
12033   format %{ "subw  $dst, $src1, $src2, sxth #lshift2" %}
12034 
12035    ins_encode %{
12036      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12037             as_Register($src2$$reg), ext::sxth, ($lshift2$$constant));
12038    %}
12039   ins_pipe(ialu_reg_reg_shift);
12040 %}
12041 
12042 
12043 instruct AddExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12044 %{
12045   match(Set dst (AddL src1 (LShiftL (ConvI2L src2) lshift)));
12046   ins_cost(1.9 * INSN_COST);
12047   format %{ "add  $dst, $src1, $src2, sxtw #lshift" %}
12048 
12049    ins_encode %{
12050      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12051             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12052    %}
12053   ins_pipe(ialu_reg_reg_shift);
12054 %};
12055 
12056 instruct SubExtI_shift(iRegLNoSp dst, iRegL src1, iRegIorL2I src2, immIExt lshift, rFlagsReg cr)
12057 %{
12058   match(Set dst (SubL src1 (LShiftL (ConvI2L src2) lshift)));
12059   ins_cost(1.9 * INSN_COST);
12060   format %{ "sub  $dst, $src1, $src2, sxtw #lshift" %}
12061 
12062    ins_encode %{
12063      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12064             as_Register($src2$$reg), ext::sxtw, ($lshift$$constant));
12065    %}
12066   ins_pipe(ialu_reg_reg_shift);
12067 %};
12068 
12069 
12070 instruct AddExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12071 %{
12072   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12073   ins_cost(1.9 * INSN_COST);
12074   format %{ "add  $dst, $src1, $src2, uxtb #lshift" %}
12075 
12076    ins_encode %{
12077      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12078             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12079    %}
12080   ins_pipe(ialu_reg_reg_shift);
12081 %}
12082 
12083 instruct AddExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12084 %{
12085   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12086   ins_cost(1.9 * INSN_COST);
12087   format %{ "add  $dst, $src1, $src2, uxth #lshift" %}
12088 
12089    ins_encode %{
12090      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12091             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12092    %}
12093   ins_pipe(ialu_reg_reg_shift);
12094 %}
12095 
12096 instruct AddExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12097 %{
12098   match(Set dst (AddL src1 (LShiftL (AndL src2 mask) lshift)));
12099   ins_cost(1.9 * INSN_COST);
12100   format %{ "add  $dst, $src1, $src2, uxtw #lshift" %}
12101 
12102    ins_encode %{
12103      __ add(as_Register($dst$$reg), as_Register($src1$$reg),
12104             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12105    %}
12106   ins_pipe(ialu_reg_reg_shift);
12107 %}
12108 
12109 instruct SubExtL_uxtb_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_255 mask, immIExt lshift, rFlagsReg cr)
12110 %{
12111   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12112   ins_cost(1.9 * INSN_COST);
12113   format %{ "sub  $dst, $src1, $src2, uxtb #lshift" %}
12114 
12115    ins_encode %{
12116      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12117             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12118    %}
12119   ins_pipe(ialu_reg_reg_shift);
12120 %}
12121 
12122 instruct SubExtL_uxth_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_65535 mask, immIExt lshift, rFlagsReg cr)
12123 %{
12124   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12125   ins_cost(1.9 * INSN_COST);
12126   format %{ "sub  $dst, $src1, $src2, uxth #lshift" %}
12127 
12128    ins_encode %{
12129      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12130             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12131    %}
12132   ins_pipe(ialu_reg_reg_shift);
12133 %}
12134 
12135 instruct SubExtL_uxtw_and_shift(iRegLNoSp dst, iRegL src1, iRegL src2, immL_4294967295 mask, immIExt lshift, rFlagsReg cr)
12136 %{
12137   match(Set dst (SubL src1 (LShiftL (AndL src2 mask) lshift)));
12138   ins_cost(1.9 * INSN_COST);
12139   format %{ "sub  $dst, $src1, $src2, uxtw #lshift" %}
12140 
12141    ins_encode %{
12142      __ sub(as_Register($dst$$reg), as_Register($src1$$reg),
12143             as_Register($src2$$reg), ext::uxtw, ($lshift$$constant));
12144    %}
12145   ins_pipe(ialu_reg_reg_shift);
12146 %}
12147 
12148 instruct AddExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12149 %{
12150   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12151   ins_cost(1.9 * INSN_COST);
12152   format %{ "addw  $dst, $src1, $src2, uxtb #lshift" %}
12153 
12154    ins_encode %{
12155      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12156             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12157    %}
12158   ins_pipe(ialu_reg_reg_shift);
12159 %}
12160 
12161 instruct AddExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12162 %{
12163   match(Set dst (AddI src1 (LShiftI (AndI src2 mask) lshift)));
12164   ins_cost(1.9 * INSN_COST);
12165   format %{ "addw  $dst, $src1, $src2, uxth #lshift" %}
12166 
12167    ins_encode %{
12168      __ addw(as_Register($dst$$reg), as_Register($src1$$reg),
12169             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12170    %}
12171   ins_pipe(ialu_reg_reg_shift);
12172 %}
12173 
12174 instruct SubExtI_uxtb_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_255 mask, immIExt lshift, rFlagsReg cr)
12175 %{
12176   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12177   ins_cost(1.9 * INSN_COST);
12178   format %{ "subw  $dst, $src1, $src2, uxtb #lshift" %}
12179 
12180    ins_encode %{
12181      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12182             as_Register($src2$$reg), ext::uxtb, ($lshift$$constant));
12183    %}
12184   ins_pipe(ialu_reg_reg_shift);
12185 %}
12186 
12187 instruct SubExtI_uxth_and_shift(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, immI_65535 mask, immIExt lshift, rFlagsReg cr)
12188 %{
12189   match(Set dst (SubI src1 (LShiftI (AndI src2 mask) lshift)));
12190   ins_cost(1.9 * INSN_COST);
12191   format %{ "subw  $dst, $src1, $src2, uxth #lshift" %}
12192 
12193    ins_encode %{
12194      __ subw(as_Register($dst$$reg), as_Register($src1$$reg),
12195             as_Register($src2$$reg), ext::uxth, ($lshift$$constant));
12196    %}
12197   ins_pipe(ialu_reg_reg_shift);
12198 %}
12199 // END This section of the file is automatically generated. Do not edit --------------
12200 
12201 // ============================================================================
12202 // Floating Point Arithmetic Instructions
12203 
12204 instruct addF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12205   match(Set dst (AddF src1 src2));
12206 
12207   ins_cost(INSN_COST * 5);
12208   format %{ "fadds   $dst, $src1, $src2" %}
12209 
12210   ins_encode %{
12211     __ fadds(as_FloatRegister($dst$$reg),
12212              as_FloatRegister($src1$$reg),
12213              as_FloatRegister($src2$$reg));
12214   %}
12215 
12216   ins_pipe(fp_dop_reg_reg_s);
12217 %}
12218 
12219 instruct addD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12220   match(Set dst (AddD src1 src2));
12221 
12222   ins_cost(INSN_COST * 5);
12223   format %{ "faddd   $dst, $src1, $src2" %}
12224 
12225   ins_encode %{
12226     __ faddd(as_FloatRegister($dst$$reg),
12227              as_FloatRegister($src1$$reg),
12228              as_FloatRegister($src2$$reg));
12229   %}
12230 
12231   ins_pipe(fp_dop_reg_reg_d);
12232 %}
12233 
12234 instruct subF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12235   match(Set dst (SubF src1 src2));
12236 
12237   ins_cost(INSN_COST * 5);
12238   format %{ "fsubs   $dst, $src1, $src2" %}
12239 
12240   ins_encode %{
12241     __ fsubs(as_FloatRegister($dst$$reg),
12242              as_FloatRegister($src1$$reg),
12243              as_FloatRegister($src2$$reg));
12244   %}
12245 
12246   ins_pipe(fp_dop_reg_reg_s);
12247 %}
12248 
12249 instruct subD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12250   match(Set dst (SubD src1 src2));
12251 
12252   ins_cost(INSN_COST * 5);
12253   format %{ "fsubd   $dst, $src1, $src2" %}
12254 
12255   ins_encode %{
12256     __ fsubd(as_FloatRegister($dst$$reg),
12257              as_FloatRegister($src1$$reg),
12258              as_FloatRegister($src2$$reg));
12259   %}
12260 
12261   ins_pipe(fp_dop_reg_reg_d);
12262 %}
12263 
12264 instruct mulF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12265   match(Set dst (MulF src1 src2));
12266 
12267   ins_cost(INSN_COST * 6);
12268   format %{ "fmuls   $dst, $src1, $src2" %}
12269 
12270   ins_encode %{
12271     __ fmuls(as_FloatRegister($dst$$reg),
12272              as_FloatRegister($src1$$reg),
12273              as_FloatRegister($src2$$reg));
12274   %}
12275 
12276   ins_pipe(fp_dop_reg_reg_s);
12277 %}
12278 
12279 instruct mulD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12280   match(Set dst (MulD src1 src2));
12281 
12282   ins_cost(INSN_COST * 6);
12283   format %{ "fmuld   $dst, $src1, $src2" %}
12284 
12285   ins_encode %{
12286     __ fmuld(as_FloatRegister($dst$$reg),
12287              as_FloatRegister($src1$$reg),
12288              as_FloatRegister($src2$$reg));
12289   %}
12290 
12291   ins_pipe(fp_dop_reg_reg_d);
12292 %}
12293 
12294 // src1 * src2 + src3
12295 instruct maddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12296   predicate(UseFMA);
12297   match(Set dst (FmaF src3 (Binary src1 src2)));
12298 
12299   format %{ "fmadds   $dst, $src1, $src2, $src3" %}
12300 
12301   ins_encode %{
12302     __ fmadds(as_FloatRegister($dst$$reg),
12303              as_FloatRegister($src1$$reg),
12304              as_FloatRegister($src2$$reg),
12305              as_FloatRegister($src3$$reg));
12306   %}
12307 
12308   ins_pipe(pipe_class_default);
12309 %}
12310 
12311 // src1 * src2 + src3
12312 instruct maddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12313   predicate(UseFMA);
12314   match(Set dst (FmaD src3 (Binary src1 src2)));
12315 
12316   format %{ "fmaddd   $dst, $src1, $src2, $src3" %}
12317 
12318   ins_encode %{
12319     __ fmaddd(as_FloatRegister($dst$$reg),
12320              as_FloatRegister($src1$$reg),
12321              as_FloatRegister($src2$$reg),
12322              as_FloatRegister($src3$$reg));
12323   %}
12324 
12325   ins_pipe(pipe_class_default);
12326 %}
12327 
12328 // -src1 * src2 + src3
12329 instruct msubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12330   predicate(UseFMA);
12331   match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
12332   match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
12333 
12334   format %{ "fmsubs   $dst, $src1, $src2, $src3" %}
12335 
12336   ins_encode %{
12337     __ fmsubs(as_FloatRegister($dst$$reg),
12338               as_FloatRegister($src1$$reg),
12339               as_FloatRegister($src2$$reg),
12340               as_FloatRegister($src3$$reg));
12341   %}
12342 
12343   ins_pipe(pipe_class_default);
12344 %}
12345 
12346 // -src1 * src2 + src3
12347 instruct msubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12348   predicate(UseFMA);
12349   match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
12350   match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
12351 
12352   format %{ "fmsubd   $dst, $src1, $src2, $src3" %}
12353 
12354   ins_encode %{
12355     __ fmsubd(as_FloatRegister($dst$$reg),
12356               as_FloatRegister($src1$$reg),
12357               as_FloatRegister($src2$$reg),
12358               as_FloatRegister($src3$$reg));
12359   %}
12360 
12361   ins_pipe(pipe_class_default);
12362 %}
12363 
12364 // -src1 * src2 - src3
12365 instruct mnaddF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3) %{
12366   predicate(UseFMA);
12367   match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
12368   match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
12369 
12370   format %{ "fnmadds  $dst, $src1, $src2, $src3" %}
12371 
12372   ins_encode %{
12373     __ fnmadds(as_FloatRegister($dst$$reg),
12374                as_FloatRegister($src1$$reg),
12375                as_FloatRegister($src2$$reg),
12376                as_FloatRegister($src3$$reg));
12377   %}
12378 
12379   ins_pipe(pipe_class_default);
12380 %}
12381 
12382 // -src1 * src2 - src3
12383 instruct mnaddD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3) %{
12384   predicate(UseFMA);
12385   match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
12386   match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
12387 
12388   format %{ "fnmaddd   $dst, $src1, $src2, $src3" %}
12389 
12390   ins_encode %{
12391     __ fnmaddd(as_FloatRegister($dst$$reg),
12392                as_FloatRegister($src1$$reg),
12393                as_FloatRegister($src2$$reg),
12394                as_FloatRegister($src3$$reg));
12395   %}
12396 
12397   ins_pipe(pipe_class_default);
12398 %}
12399 
12400 // src1 * src2 - src3
12401 instruct mnsubF_reg_reg(vRegF dst, vRegF src1, vRegF src2, vRegF src3, immF0 zero) %{
12402   predicate(UseFMA);
12403   match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
12404 
12405   format %{ "fnmsubs  $dst, $src1, $src2, $src3" %}
12406 
12407   ins_encode %{
12408     __ fnmsubs(as_FloatRegister($dst$$reg),
12409                as_FloatRegister($src1$$reg),
12410                as_FloatRegister($src2$$reg),
12411                as_FloatRegister($src3$$reg));
12412   %}
12413 
12414   ins_pipe(pipe_class_default);
12415 %}
12416 
12417 // src1 * src2 - src3
12418 instruct mnsubD_reg_reg(vRegD dst, vRegD src1, vRegD src2, vRegD src3, immD0 zero) %{
12419   predicate(UseFMA);
12420   match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
12421 
12422   format %{ "fnmsubd   $dst, $src1, $src2, $src3" %}
12423 
12424   ins_encode %{
12425   // n.b. insn name should be fnmsubd
12426     __ fnmsub(as_FloatRegister($dst$$reg),
12427               as_FloatRegister($src1$$reg),
12428               as_FloatRegister($src2$$reg),
12429               as_FloatRegister($src3$$reg));
12430   %}
12431 
12432   ins_pipe(pipe_class_default);
12433 %}
12434 
12435 
12436 instruct divF_reg_reg(vRegF dst, vRegF src1, vRegF src2) %{
12437   match(Set dst (DivF src1  src2));
12438 
12439   ins_cost(INSN_COST * 18);
12440   format %{ "fdivs   $dst, $src1, $src2" %}
12441 
12442   ins_encode %{
12443     __ fdivs(as_FloatRegister($dst$$reg),
12444              as_FloatRegister($src1$$reg),
12445              as_FloatRegister($src2$$reg));
12446   %}
12447 
12448   ins_pipe(fp_div_s);
12449 %}
12450 
12451 instruct divD_reg_reg(vRegD dst, vRegD src1, vRegD src2) %{
12452   match(Set dst (DivD src1  src2));
12453 
12454   ins_cost(INSN_COST * 32);
12455   format %{ "fdivd   $dst, $src1, $src2" %}
12456 
12457   ins_encode %{
12458     __ fdivd(as_FloatRegister($dst$$reg),
12459              as_FloatRegister($src1$$reg),
12460              as_FloatRegister($src2$$reg));
12461   %}
12462 
12463   ins_pipe(fp_div_d);
12464 %}
12465 
12466 instruct negF_reg_reg(vRegF dst, vRegF src) %{
12467   match(Set dst (NegF src));
12468 
12469   ins_cost(INSN_COST * 3);
12470   format %{ "fneg   $dst, $src" %}
12471 
12472   ins_encode %{
12473     __ fnegs(as_FloatRegister($dst$$reg),
12474              as_FloatRegister($src$$reg));
12475   %}
12476 
12477   ins_pipe(fp_uop_s);
12478 %}
12479 
12480 instruct negD_reg_reg(vRegD dst, vRegD src) %{
12481   match(Set dst (NegD src));
12482 
12483   ins_cost(INSN_COST * 3);
12484   format %{ "fnegd   $dst, $src" %}
12485 
12486   ins_encode %{
12487     __ fnegd(as_FloatRegister($dst$$reg),
12488              as_FloatRegister($src$$reg));
12489   %}
12490 
12491   ins_pipe(fp_uop_d);
12492 %}
12493 
12494 instruct absF_reg(vRegF dst, vRegF src) %{
12495   match(Set dst (AbsF src));
12496 
12497   ins_cost(INSN_COST * 3);
12498   format %{ "fabss   $dst, $src" %}
12499   ins_encode %{
12500     __ fabss(as_FloatRegister($dst$$reg),
12501              as_FloatRegister($src$$reg));
12502   %}
12503 
12504   ins_pipe(fp_uop_s);
12505 %}
12506 
12507 instruct absD_reg(vRegD dst, vRegD src) %{
12508   match(Set dst (AbsD src));
12509 
12510   ins_cost(INSN_COST * 3);
12511   format %{ "fabsd   $dst, $src" %}
12512   ins_encode %{
12513     __ fabsd(as_FloatRegister($dst$$reg),
12514              as_FloatRegister($src$$reg));
12515   %}
12516 
12517   ins_pipe(fp_uop_d);
12518 %}
12519 
12520 instruct sqrtD_reg(vRegD dst, vRegD src) %{
12521   match(Set dst (SqrtD src));
12522 
12523   ins_cost(INSN_COST * 50);
12524   format %{ "fsqrtd  $dst, $src" %}
12525   ins_encode %{
12526     __ fsqrtd(as_FloatRegister($dst$$reg),
12527              as_FloatRegister($src$$reg));
12528   %}
12529 
12530   ins_pipe(fp_div_s);
12531 %}
12532 
12533 instruct sqrtF_reg(vRegF dst, vRegF src) %{
12534   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
12535 
12536   ins_cost(INSN_COST * 50);
12537   format %{ "fsqrts  $dst, $src" %}
12538   ins_encode %{
12539     __ fsqrts(as_FloatRegister($dst$$reg),
12540              as_FloatRegister($src$$reg));
12541   %}
12542 
12543   ins_pipe(fp_div_d);
12544 %}
12545 
12546 // ============================================================================
12547 // Logical Instructions
12548 
12549 // Integer Logical Instructions
12550 
12551 // And Instructions
12552 
12553 
12554 instruct andI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2, rFlagsReg cr) %{
12555   match(Set dst (AndI src1 src2));
12556 
12557   format %{ "andw  $dst, $src1, $src2\t# int" %}
12558 
12559   ins_cost(INSN_COST);
12560   ins_encode %{
12561     __ andw(as_Register($dst$$reg),
12562             as_Register($src1$$reg),
12563             as_Register($src2$$reg));
12564   %}
12565 
12566   ins_pipe(ialu_reg_reg);
12567 %}
12568 
12569 instruct andI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2, rFlagsReg cr) %{
12570   match(Set dst (AndI src1 src2));
12571 
12572   format %{ "andsw  $dst, $src1, $src2\t# int" %}
12573 
12574   ins_cost(INSN_COST);
12575   ins_encode %{
12576     __ andw(as_Register($dst$$reg),
12577             as_Register($src1$$reg),
12578             (unsigned long)($src2$$constant));
12579   %}
12580 
12581   ins_pipe(ialu_reg_imm);
12582 %}
12583 
12584 // Or Instructions
12585 
12586 instruct orI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12587   match(Set dst (OrI src1 src2));
12588 
12589   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12590 
12591   ins_cost(INSN_COST);
12592   ins_encode %{
12593     __ orrw(as_Register($dst$$reg),
12594             as_Register($src1$$reg),
12595             as_Register($src2$$reg));
12596   %}
12597 
12598   ins_pipe(ialu_reg_reg);
12599 %}
12600 
12601 instruct orI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12602   match(Set dst (OrI src1 src2));
12603 
12604   format %{ "orrw  $dst, $src1, $src2\t# int" %}
12605 
12606   ins_cost(INSN_COST);
12607   ins_encode %{
12608     __ orrw(as_Register($dst$$reg),
12609             as_Register($src1$$reg),
12610             (unsigned long)($src2$$constant));
12611   %}
12612 
12613   ins_pipe(ialu_reg_imm);
12614 %}
12615 
12616 // Xor Instructions
12617 
12618 instruct xorI_reg_reg(iRegINoSp dst, iRegIorL2I src1, iRegIorL2I src2) %{
12619   match(Set dst (XorI src1 src2));
12620 
12621   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12622 
12623   ins_cost(INSN_COST);
12624   ins_encode %{
12625     __ eorw(as_Register($dst$$reg),
12626             as_Register($src1$$reg),
12627             as_Register($src2$$reg));
12628   %}
12629 
12630   ins_pipe(ialu_reg_reg);
12631 %}
12632 
12633 instruct xorI_reg_imm(iRegINoSp dst, iRegIorL2I src1, immILog src2) %{
12634   match(Set dst (XorI src1 src2));
12635 
12636   format %{ "eorw  $dst, $src1, $src2\t# int" %}
12637 
12638   ins_cost(INSN_COST);
12639   ins_encode %{
12640     __ eorw(as_Register($dst$$reg),
12641             as_Register($src1$$reg),
12642             (unsigned long)($src2$$constant));
12643   %}
12644 
12645   ins_pipe(ialu_reg_imm);
12646 %}
12647 
12648 // Long Logical Instructions
12649 // TODO
12650 
12651 instruct andL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2, rFlagsReg cr) %{
12652   match(Set dst (AndL src1 src2));
12653 
12654   format %{ "and  $dst, $src1, $src2\t# int" %}
12655 
12656   ins_cost(INSN_COST);
12657   ins_encode %{
12658     __ andr(as_Register($dst$$reg),
12659             as_Register($src1$$reg),
12660             as_Register($src2$$reg));
12661   %}
12662 
12663   ins_pipe(ialu_reg_reg);
12664 %}
12665 
12666 instruct andL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2, rFlagsReg cr) %{
12667   match(Set dst (AndL src1 src2));
12668 
12669   format %{ "and  $dst, $src1, $src2\t# int" %}
12670 
12671   ins_cost(INSN_COST);
12672   ins_encode %{
12673     __ andr(as_Register($dst$$reg),
12674             as_Register($src1$$reg),
12675             (unsigned long)($src2$$constant));
12676   %}
12677 
12678   ins_pipe(ialu_reg_imm);
12679 %}
12680 
12681 // Or Instructions
12682 
12683 instruct orL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12684   match(Set dst (OrL src1 src2));
12685 
12686   format %{ "orr  $dst, $src1, $src2\t# int" %}
12687 
12688   ins_cost(INSN_COST);
12689   ins_encode %{
12690     __ orr(as_Register($dst$$reg),
12691            as_Register($src1$$reg),
12692            as_Register($src2$$reg));
12693   %}
12694 
12695   ins_pipe(ialu_reg_reg);
12696 %}
12697 
12698 instruct orL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12699   match(Set dst (OrL src1 src2));
12700 
12701   format %{ "orr  $dst, $src1, $src2\t# int" %}
12702 
12703   ins_cost(INSN_COST);
12704   ins_encode %{
12705     __ orr(as_Register($dst$$reg),
12706            as_Register($src1$$reg),
12707            (unsigned long)($src2$$constant));
12708   %}
12709 
12710   ins_pipe(ialu_reg_imm);
12711 %}
12712 
12713 // Xor Instructions
12714 
12715 instruct xorL_reg_reg(iRegLNoSp dst, iRegL src1, iRegL src2) %{
12716   match(Set dst (XorL src1 src2));
12717 
12718   format %{ "eor  $dst, $src1, $src2\t# int" %}
12719 
12720   ins_cost(INSN_COST);
12721   ins_encode %{
12722     __ eor(as_Register($dst$$reg),
12723            as_Register($src1$$reg),
12724            as_Register($src2$$reg));
12725   %}
12726 
12727   ins_pipe(ialu_reg_reg);
12728 %}
12729 
12730 instruct xorL_reg_imm(iRegLNoSp dst, iRegL src1, immLLog src2) %{
12731   match(Set dst (XorL src1 src2));
12732 
12733   ins_cost(INSN_COST);
12734   format %{ "eor  $dst, $src1, $src2\t# int" %}
12735 
12736   ins_encode %{
12737     __ eor(as_Register($dst$$reg),
12738            as_Register($src1$$reg),
12739            (unsigned long)($src2$$constant));
12740   %}
12741 
12742   ins_pipe(ialu_reg_imm);
12743 %}
12744 
12745 instruct convI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src)
12746 %{
12747   match(Set dst (ConvI2L src));
12748 
12749   ins_cost(INSN_COST);
12750   format %{ "sxtw  $dst, $src\t# i2l" %}
12751   ins_encode %{
12752     __ sbfm($dst$$Register, $src$$Register, 0, 31);
12753   %}
12754   ins_pipe(ialu_reg_shift);
12755 %}
12756 
12757 // this pattern occurs in bigmath arithmetic
12758 instruct convUI2L_reg_reg(iRegLNoSp dst, iRegIorL2I src, immL_32bits mask)
12759 %{
12760   match(Set dst (AndL (ConvI2L src) mask));
12761 
12762   ins_cost(INSN_COST);
12763   format %{ "ubfm  $dst, $src, 0, 31\t# ui2l" %}
12764   ins_encode %{
12765     __ ubfm($dst$$Register, $src$$Register, 0, 31);
12766   %}
12767 
12768   ins_pipe(ialu_reg_shift);
12769 %}
12770 
12771 instruct convL2I_reg(iRegINoSp dst, iRegL src) %{
12772   match(Set dst (ConvL2I src));
12773 
12774   ins_cost(INSN_COST);
12775   format %{ "movw  $dst, $src \t// l2i" %}
12776 
12777   ins_encode %{
12778     __ movw(as_Register($dst$$reg), as_Register($src$$reg));
12779   %}
12780 
12781   ins_pipe(ialu_reg);
12782 %}
12783 
12784 instruct convI2B(iRegINoSp dst, iRegIorL2I src, rFlagsReg cr)
12785 %{
12786   match(Set dst (Conv2B src));
12787   effect(KILL cr);
12788 
12789   format %{
12790     "cmpw $src, zr\n\t"
12791     "cset $dst, ne"
12792   %}
12793 
12794   ins_encode %{
12795     __ cmpw(as_Register($src$$reg), zr);
12796     __ cset(as_Register($dst$$reg), Assembler::NE);
12797   %}
12798 
12799   ins_pipe(ialu_reg);
12800 %}
12801 
12802 instruct convP2B(iRegINoSp dst, iRegP src, rFlagsReg cr)
12803 %{
12804   match(Set dst (Conv2B src));
12805   effect(KILL cr);
12806 
12807   format %{
12808     "cmp  $src, zr\n\t"
12809     "cset $dst, ne"
12810   %}
12811 
12812   ins_encode %{
12813     __ cmp(as_Register($src$$reg), zr);
12814     __ cset(as_Register($dst$$reg), Assembler::NE);
12815   %}
12816 
12817   ins_pipe(ialu_reg);
12818 %}
12819 
12820 instruct convD2F_reg(vRegF dst, vRegD src) %{
12821   match(Set dst (ConvD2F src));
12822 
12823   ins_cost(INSN_COST * 5);
12824   format %{ "fcvtd  $dst, $src \t// d2f" %}
12825 
12826   ins_encode %{
12827     __ fcvtd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12828   %}
12829 
12830   ins_pipe(fp_d2f);
12831 %}
12832 
12833 instruct convF2D_reg(vRegD dst, vRegF src) %{
12834   match(Set dst (ConvF2D src));
12835 
12836   ins_cost(INSN_COST * 5);
12837   format %{ "fcvts  $dst, $src \t// f2d" %}
12838 
12839   ins_encode %{
12840     __ fcvts(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
12841   %}
12842 
12843   ins_pipe(fp_f2d);
12844 %}
12845 
12846 instruct convF2I_reg_reg(iRegINoSp dst, vRegF src) %{
12847   match(Set dst (ConvF2I src));
12848 
12849   ins_cost(INSN_COST * 5);
12850   format %{ "fcvtzsw  $dst, $src \t// f2i" %}
12851 
12852   ins_encode %{
12853     __ fcvtzsw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12854   %}
12855 
12856   ins_pipe(fp_f2i);
12857 %}
12858 
12859 instruct convF2L_reg_reg(iRegLNoSp dst, vRegF src) %{
12860   match(Set dst (ConvF2L src));
12861 
12862   ins_cost(INSN_COST * 5);
12863   format %{ "fcvtzs  $dst, $src \t// f2l" %}
12864 
12865   ins_encode %{
12866     __ fcvtzs(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12867   %}
12868 
12869   ins_pipe(fp_f2l);
12870 %}
12871 
12872 instruct convI2F_reg_reg(vRegF dst, iRegIorL2I src) %{
12873   match(Set dst (ConvI2F src));
12874 
12875   ins_cost(INSN_COST * 5);
12876   format %{ "scvtfws  $dst, $src \t// i2f" %}
12877 
12878   ins_encode %{
12879     __ scvtfws(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12880   %}
12881 
12882   ins_pipe(fp_i2f);
12883 %}
12884 
12885 instruct convL2F_reg_reg(vRegF dst, iRegL src) %{
12886   match(Set dst (ConvL2F src));
12887 
12888   ins_cost(INSN_COST * 5);
12889   format %{ "scvtfs  $dst, $src \t// l2f" %}
12890 
12891   ins_encode %{
12892     __ scvtfs(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12893   %}
12894 
12895   ins_pipe(fp_l2f);
12896 %}
12897 
12898 instruct convD2I_reg_reg(iRegINoSp dst, vRegD src) %{
12899   match(Set dst (ConvD2I src));
12900 
12901   ins_cost(INSN_COST * 5);
12902   format %{ "fcvtzdw  $dst, $src \t// d2i" %}
12903 
12904   ins_encode %{
12905     __ fcvtzdw(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12906   %}
12907 
12908   ins_pipe(fp_d2i);
12909 %}
12910 
12911 instruct convD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
12912   match(Set dst (ConvD2L src));
12913 
12914   ins_cost(INSN_COST * 5);
12915   format %{ "fcvtzd  $dst, $src \t// d2l" %}
12916 
12917   ins_encode %{
12918     __ fcvtzd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
12919   %}
12920 
12921   ins_pipe(fp_d2l);
12922 %}
12923 
12924 instruct convI2D_reg_reg(vRegD dst, iRegIorL2I src) %{
12925   match(Set dst (ConvI2D src));
12926 
12927   ins_cost(INSN_COST * 5);
12928   format %{ "scvtfwd  $dst, $src \t// i2d" %}
12929 
12930   ins_encode %{
12931     __ scvtfwd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12932   %}
12933 
12934   ins_pipe(fp_i2d);
12935 %}
12936 
12937 instruct convL2D_reg_reg(vRegD dst, iRegL src) %{
12938   match(Set dst (ConvL2D src));
12939 
12940   ins_cost(INSN_COST * 5);
12941   format %{ "scvtfd  $dst, $src \t// l2d" %}
12942 
12943   ins_encode %{
12944     __ scvtfd(as_FloatRegister($dst$$reg), as_Register($src$$reg));
12945   %}
12946 
12947   ins_pipe(fp_l2d);
12948 %}
12949 
12950 // stack <-> reg and reg <-> reg shuffles with no conversion
12951 
12952 instruct MoveF2I_stack_reg(iRegINoSp dst, stackSlotF src) %{
12953 
12954   match(Set dst (MoveF2I src));
12955 
12956   effect(DEF dst, USE src);
12957 
12958   ins_cost(4 * INSN_COST);
12959 
12960   format %{ "ldrw $dst, $src\t# MoveF2I_stack_reg" %}
12961 
12962   ins_encode %{
12963     __ ldrw($dst$$Register, Address(sp, $src$$disp));
12964   %}
12965 
12966   ins_pipe(iload_reg_reg);
12967 
12968 %}
12969 
12970 instruct MoveI2F_stack_reg(vRegF dst, stackSlotI src) %{
12971 
12972   match(Set dst (MoveI2F src));
12973 
12974   effect(DEF dst, USE src);
12975 
12976   ins_cost(4 * INSN_COST);
12977 
12978   format %{ "ldrs $dst, $src\t# MoveI2F_stack_reg" %}
12979 
12980   ins_encode %{
12981     __ ldrs(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
12982   %}
12983 
12984   ins_pipe(pipe_class_memory);
12985 
12986 %}
12987 
12988 instruct MoveD2L_stack_reg(iRegLNoSp dst, stackSlotD src) %{
12989 
12990   match(Set dst (MoveD2L src));
12991 
12992   effect(DEF dst, USE src);
12993 
12994   ins_cost(4 * INSN_COST);
12995 
12996   format %{ "ldr $dst, $src\t# MoveD2L_stack_reg" %}
12997 
12998   ins_encode %{
12999     __ ldr($dst$$Register, Address(sp, $src$$disp));
13000   %}
13001 
13002   ins_pipe(iload_reg_reg);
13003 
13004 %}
13005 
13006 instruct MoveL2D_stack_reg(vRegD dst, stackSlotL src) %{
13007 
13008   match(Set dst (MoveL2D src));
13009 
13010   effect(DEF dst, USE src);
13011 
13012   ins_cost(4 * INSN_COST);
13013 
13014   format %{ "ldrd $dst, $src\t# MoveL2D_stack_reg" %}
13015 
13016   ins_encode %{
13017     __ ldrd(as_FloatRegister($dst$$reg), Address(sp, $src$$disp));
13018   %}
13019 
13020   ins_pipe(pipe_class_memory);
13021 
13022 %}
13023 
13024 instruct MoveF2I_reg_stack(stackSlotI dst, vRegF src) %{
13025 
13026   match(Set dst (MoveF2I src));
13027 
13028   effect(DEF dst, USE src);
13029 
13030   ins_cost(INSN_COST);
13031 
13032   format %{ "strs $src, $dst\t# MoveF2I_reg_stack" %}
13033 
13034   ins_encode %{
13035     __ strs(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13036   %}
13037 
13038   ins_pipe(pipe_class_memory);
13039 
13040 %}
13041 
13042 instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
13043 
13044   match(Set dst (MoveI2F src));
13045 
13046   effect(DEF dst, USE src);
13047 
13048   ins_cost(INSN_COST);
13049 
13050   format %{ "strw $src, $dst\t# MoveI2F_reg_stack" %}
13051 
13052   ins_encode %{
13053     __ strw($src$$Register, Address(sp, $dst$$disp));
13054   %}
13055 
13056   ins_pipe(istore_reg_reg);
13057 
13058 %}
13059 
13060 instruct MoveD2L_reg_stack(stackSlotL dst, vRegD src) %{
13061 
13062   match(Set dst (MoveD2L src));
13063 
13064   effect(DEF dst, USE src);
13065 
13066   ins_cost(INSN_COST);
13067 
13068   format %{ "strd $dst, $src\t# MoveD2L_reg_stack" %}
13069 
13070   ins_encode %{
13071     __ strd(as_FloatRegister($src$$reg), Address(sp, $dst$$disp));
13072   %}
13073 
13074   ins_pipe(pipe_class_memory);
13075 
13076 %}
13077 
13078 instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
13079 
13080   match(Set dst (MoveL2D src));
13081 
13082   effect(DEF dst, USE src);
13083 
13084   ins_cost(INSN_COST);
13085 
13086   format %{ "str $src, $dst\t# MoveL2D_reg_stack" %}
13087 
13088   ins_encode %{
13089     __ str($src$$Register, Address(sp, $dst$$disp));
13090   %}
13091 
13092   ins_pipe(istore_reg_reg);
13093 
13094 %}
13095 
13096 instruct MoveF2I_reg_reg(iRegINoSp dst, vRegF src) %{
13097 
13098   match(Set dst (MoveF2I src));
13099 
13100   effect(DEF dst, USE src);
13101 
13102   ins_cost(INSN_COST);
13103 
13104   format %{ "fmovs $dst, $src\t# MoveF2I_reg_reg" %}
13105 
13106   ins_encode %{
13107     __ fmovs($dst$$Register, as_FloatRegister($src$$reg));
13108   %}
13109 
13110   ins_pipe(fp_f2i);
13111 
13112 %}
13113 
13114 instruct MoveI2F_reg_reg(vRegF dst, iRegI src) %{
13115 
13116   match(Set dst (MoveI2F src));
13117 
13118   effect(DEF dst, USE src);
13119 
13120   ins_cost(INSN_COST);
13121 
13122   format %{ "fmovs $dst, $src\t# MoveI2F_reg_reg" %}
13123 
13124   ins_encode %{
13125     __ fmovs(as_FloatRegister($dst$$reg), $src$$Register);
13126   %}
13127 
13128   ins_pipe(fp_i2f);
13129 
13130 %}
13131 
13132 instruct MoveD2L_reg_reg(iRegLNoSp dst, vRegD src) %{
13133 
13134   match(Set dst (MoveD2L src));
13135 
13136   effect(DEF dst, USE src);
13137 
13138   ins_cost(INSN_COST);
13139 
13140   format %{ "fmovd $dst, $src\t# MoveD2L_reg_reg" %}
13141 
13142   ins_encode %{
13143     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
13144   %}
13145 
13146   ins_pipe(fp_d2l);
13147 
13148 %}
13149 
13150 instruct MoveL2D_reg_reg(vRegD dst, iRegL src) %{
13151 
13152   match(Set dst (MoveL2D src));
13153 
13154   effect(DEF dst, USE src);
13155 
13156   ins_cost(INSN_COST);
13157 
13158   format %{ "fmovd $dst, $src\t# MoveL2D_reg_reg" %}
13159 
13160   ins_encode %{
13161     __ fmovd(as_FloatRegister($dst$$reg), $src$$Register);
13162   %}
13163 
13164   ins_pipe(fp_l2d);
13165 
13166 %}
13167 
13168 // ============================================================================
13169 // clearing of an array
13170 
13171 instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13172 %{
13173   match(Set dummy (ClearArray cnt base));
13174   effect(USE_KILL cnt, USE_KILL base);
13175 
13176   ins_cost(4 * INSN_COST);
13177   format %{ "ClearArray $cnt, $base" %}
13178 
13179   ins_encode %{
13180     __ zero_words($base$$Register, $cnt$$Register);
13181   %}
13182 
13183   ins_pipe(pipe_class_memory);
13184 %}
13185 
13186 instruct clearArray_imm_reg(immL cnt, iRegP_R10 base, Universe dummy, rFlagsReg cr)
13187 %{
13188   predicate((u_int64_t)n->in(2)->get_long()
13189             < (u_int64_t)(BlockZeroingLowLimit >> LogBytesPerWord));
13190   match(Set dummy (ClearArray cnt base));
13191   effect(USE_KILL base);
13192 
13193   ins_cost(4 * INSN_COST);
13194   format %{ "ClearArray $cnt, $base" %}
13195 
13196   ins_encode %{
13197     __ zero_words($base$$Register, (u_int64_t)$cnt$$constant);
13198   %}
13199 
13200   ins_pipe(pipe_class_memory);
13201 %}
13202 
13203 // ============================================================================
13204 // Overflow Math Instructions
13205 
13206 instruct overflowAddI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13207 %{
13208   match(Set cr (OverflowAddI op1 op2));
13209 
13210   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13211   ins_cost(INSN_COST);
13212   ins_encode %{
13213     __ cmnw($op1$$Register, $op2$$Register);
13214   %}
13215 
13216   ins_pipe(icmp_reg_reg);
13217 %}
13218 
13219 instruct overflowAddI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13220 %{
13221   match(Set cr (OverflowAddI op1 op2));
13222 
13223   format %{ "cmnw  $op1, $op2\t# overflow check int" %}
13224   ins_cost(INSN_COST);
13225   ins_encode %{
13226     __ cmnw($op1$$Register, $op2$$constant);
13227   %}
13228 
13229   ins_pipe(icmp_reg_imm);
13230 %}
13231 
13232 instruct overflowAddL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13233 %{
13234   match(Set cr (OverflowAddL op1 op2));
13235 
13236   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13237   ins_cost(INSN_COST);
13238   ins_encode %{
13239     __ cmn($op1$$Register, $op2$$Register);
13240   %}
13241 
13242   ins_pipe(icmp_reg_reg);
13243 %}
13244 
13245 instruct overflowAddL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13246 %{
13247   match(Set cr (OverflowAddL op1 op2));
13248 
13249   format %{ "cmn   $op1, $op2\t# overflow check long" %}
13250   ins_cost(INSN_COST);
13251   ins_encode %{
13252     __ cmn($op1$$Register, $op2$$constant);
13253   %}
13254 
13255   ins_pipe(icmp_reg_imm);
13256 %}
13257 
13258 instruct overflowSubI_reg_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13259 %{
13260   match(Set cr (OverflowSubI op1 op2));
13261 
13262   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13263   ins_cost(INSN_COST);
13264   ins_encode %{
13265     __ cmpw($op1$$Register, $op2$$Register);
13266   %}
13267 
13268   ins_pipe(icmp_reg_reg);
13269 %}
13270 
13271 instruct overflowSubI_reg_imm(rFlagsReg cr, iRegIorL2I op1, immIAddSub op2)
13272 %{
13273   match(Set cr (OverflowSubI op1 op2));
13274 
13275   format %{ "cmpw  $op1, $op2\t# overflow check int" %}
13276   ins_cost(INSN_COST);
13277   ins_encode %{
13278     __ cmpw($op1$$Register, $op2$$constant);
13279   %}
13280 
13281   ins_pipe(icmp_reg_imm);
13282 %}
13283 
13284 instruct overflowSubL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13285 %{
13286   match(Set cr (OverflowSubL op1 op2));
13287 
13288   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13289   ins_cost(INSN_COST);
13290   ins_encode %{
13291     __ cmp($op1$$Register, $op2$$Register);
13292   %}
13293 
13294   ins_pipe(icmp_reg_reg);
13295 %}
13296 
13297 instruct overflowSubL_reg_imm(rFlagsReg cr, iRegL op1, immLAddSub op2)
13298 %{
13299   match(Set cr (OverflowSubL op1 op2));
13300 
13301   format %{ "cmp   $op1, $op2\t# overflow check long" %}
13302   ins_cost(INSN_COST);
13303   ins_encode %{
13304     __ cmp($op1$$Register, $op2$$constant);
13305   %}
13306 
13307   ins_pipe(icmp_reg_imm);
13308 %}
13309 
13310 instruct overflowNegI_reg(rFlagsReg cr, immI0 zero, iRegIorL2I op1)
13311 %{
13312   match(Set cr (OverflowSubI zero op1));
13313 
13314   format %{ "cmpw  zr, $op1\t# overflow check int" %}
13315   ins_cost(INSN_COST);
13316   ins_encode %{
13317     __ cmpw(zr, $op1$$Register);
13318   %}
13319 
13320   ins_pipe(icmp_reg_imm);
13321 %}
13322 
13323 instruct overflowNegL_reg(rFlagsReg cr, immI0 zero, iRegL op1)
13324 %{
13325   match(Set cr (OverflowSubL zero op1));
13326 
13327   format %{ "cmp   zr, $op1\t# overflow check long" %}
13328   ins_cost(INSN_COST);
13329   ins_encode %{
13330     __ cmp(zr, $op1$$Register);
13331   %}
13332 
13333   ins_pipe(icmp_reg_imm);
13334 %}
13335 
13336 instruct overflowMulI_reg(rFlagsReg cr, iRegIorL2I op1, iRegIorL2I op2)
13337 %{
13338   match(Set cr (OverflowMulI op1 op2));
13339 
13340   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13341             "cmp   rscratch1, rscratch1, sxtw\n\t"
13342             "movw  rscratch1, #0x80000000\n\t"
13343             "cselw rscratch1, rscratch1, zr, NE\n\t"
13344             "cmpw  rscratch1, #1" %}
13345   ins_cost(5 * INSN_COST);
13346   ins_encode %{
13347     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13348     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13349     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13350     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13351     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13352   %}
13353 
13354   ins_pipe(pipe_slow);
13355 %}
13356 
13357 instruct overflowMulI_reg_branch(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, label labl, rFlagsReg cr)
13358 %{
13359   match(If cmp (OverflowMulI op1 op2));
13360   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13361             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13362   effect(USE labl, KILL cr);
13363 
13364   format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
13365             "cmp   rscratch1, rscratch1, sxtw\n\t"
13366             "b$cmp   $labl" %}
13367   ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
13368   ins_encode %{
13369     Label* L = $labl$$label;
13370     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13371     __ smull(rscratch1, $op1$$Register, $op2$$Register);
13372     __ subs(zr, rscratch1, rscratch1, ext::sxtw);      // NE => overflow
13373     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13374   %}
13375 
13376   ins_pipe(pipe_serial);
13377 %}
13378 
13379 instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13380 %{
13381   match(Set cr (OverflowMulL op1 op2));
13382 
13383   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13384             "smulh rscratch2, $op1, $op2\n\t"
13385             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13386             "movw  rscratch1, #0x80000000\n\t"
13387             "cselw rscratch1, rscratch1, zr, NE\n\t"
13388             "cmpw  rscratch1, #1" %}
13389   ins_cost(6 * INSN_COST);
13390   ins_encode %{
13391     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13392     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13393     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13394     __ movw(rscratch1, 0x80000000);                    // Develop 0 (EQ),
13395     __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
13396     __ cmpw(rscratch1, 1);                             // 0x80000000 - 1 => VS
13397   %}
13398 
13399   ins_pipe(pipe_slow);
13400 %}
13401 
13402 instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
13403 %{
13404   match(If cmp (OverflowMulL op1 op2));
13405   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
13406             || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
13407   effect(USE labl, KILL cr);
13408 
13409   format %{ "mul   rscratch1, $op1, $op2\t#overflow check long\n\t"
13410             "smulh rscratch2, $op1, $op2\n\t"
13411             "cmp   rscratch2, rscratch1, ASR #63\n\t"
13412             "b$cmp $labl" %}
13413   ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
13414   ins_encode %{
13415     Label* L = $labl$$label;
13416     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
13417     __ mul(rscratch1, $op1$$Register, $op2$$Register);   // Result bits 0..63
13418     __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
13419     __ cmp(rscratch2, rscratch1, Assembler::ASR, 63);    // Top is pure sign ext
13420     __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
13421   %}
13422 
13423   ins_pipe(pipe_serial);
13424 %}
13425 
13426 // ============================================================================
13427 // Compare Instructions
13428 
13429 instruct compI_reg_reg(rFlagsReg cr, iRegI op1, iRegI op2)
13430 %{
13431   match(Set cr (CmpI op1 op2));
13432 
13433   effect(DEF cr, USE op1, USE op2);
13434 
13435   ins_cost(INSN_COST);
13436   format %{ "cmpw  $op1, $op2" %}
13437 
13438   ins_encode(aarch64_enc_cmpw(op1, op2));
13439 
13440   ins_pipe(icmp_reg_reg);
13441 %}
13442 
13443 instruct compI_reg_immI0(rFlagsReg cr, iRegI op1, immI0 zero)
13444 %{
13445   match(Set cr (CmpI op1 zero));
13446 
13447   effect(DEF cr, USE op1);
13448 
13449   ins_cost(INSN_COST);
13450   format %{ "cmpw $op1, 0" %}
13451 
13452   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13453 
13454   ins_pipe(icmp_reg_imm);
13455 %}
13456 
13457 instruct compI_reg_immIAddSub(rFlagsReg cr, iRegI op1, immIAddSub op2)
13458 %{
13459   match(Set cr (CmpI op1 op2));
13460 
13461   effect(DEF cr, USE op1);
13462 
13463   ins_cost(INSN_COST);
13464   format %{ "cmpw  $op1, $op2" %}
13465 
13466   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13467 
13468   ins_pipe(icmp_reg_imm);
13469 %}
13470 
13471 instruct compI_reg_immI(rFlagsReg cr, iRegI op1, immI op2)
13472 %{
13473   match(Set cr (CmpI op1 op2));
13474 
13475   effect(DEF cr, USE op1);
13476 
13477   ins_cost(INSN_COST * 2);
13478   format %{ "cmpw  $op1, $op2" %}
13479 
13480   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13481 
13482   ins_pipe(icmp_reg_imm);
13483 %}
13484 
13485 // Unsigned compare Instructions; really, same as signed compare
13486 // except it should only be used to feed an If or a CMovI which takes a
13487 // cmpOpU.
13488 
13489 instruct compU_reg_reg(rFlagsRegU cr, iRegI op1, iRegI op2)
13490 %{
13491   match(Set cr (CmpU op1 op2));
13492 
13493   effect(DEF cr, USE op1, USE op2);
13494 
13495   ins_cost(INSN_COST);
13496   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13497 
13498   ins_encode(aarch64_enc_cmpw(op1, op2));
13499 
13500   ins_pipe(icmp_reg_reg);
13501 %}
13502 
13503 instruct compU_reg_immI0(rFlagsRegU cr, iRegI op1, immI0 zero)
13504 %{
13505   match(Set cr (CmpU op1 zero));
13506 
13507   effect(DEF cr, USE op1);
13508 
13509   ins_cost(INSN_COST);
13510   format %{ "cmpw $op1, #0\t# unsigned" %}
13511 
13512   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, zero));
13513 
13514   ins_pipe(icmp_reg_imm);
13515 %}
13516 
13517 instruct compU_reg_immIAddSub(rFlagsRegU cr, iRegI op1, immIAddSub op2)
13518 %{
13519   match(Set cr (CmpU op1 op2));
13520 
13521   effect(DEF cr, USE op1);
13522 
13523   ins_cost(INSN_COST);
13524   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13525 
13526   ins_encode(aarch64_enc_cmpw_imm_addsub(op1, op2));
13527 
13528   ins_pipe(icmp_reg_imm);
13529 %}
13530 
13531 instruct compU_reg_immI(rFlagsRegU cr, iRegI op1, immI op2)
13532 %{
13533   match(Set cr (CmpU op1 op2));
13534 
13535   effect(DEF cr, USE op1);
13536 
13537   ins_cost(INSN_COST * 2);
13538   format %{ "cmpw  $op1, $op2\t# unsigned" %}
13539 
13540   ins_encode(aarch64_enc_cmpw_imm(op1, op2));
13541 
13542   ins_pipe(icmp_reg_imm);
13543 %}
13544 
13545 instruct compL_reg_reg(rFlagsReg cr, iRegL op1, iRegL op2)
13546 %{
13547   match(Set cr (CmpL op1 op2));
13548 
13549   effect(DEF cr, USE op1, USE op2);
13550 
13551   ins_cost(INSN_COST);
13552   format %{ "cmp  $op1, $op2" %}
13553 
13554   ins_encode(aarch64_enc_cmp(op1, op2));
13555 
13556   ins_pipe(icmp_reg_reg);
13557 %}
13558 
13559 instruct compL_reg_immL0(rFlagsReg cr, iRegL op1, immL0 zero)
13560 %{
13561   match(Set cr (CmpL op1 zero));
13562 
13563   effect(DEF cr, USE op1);
13564 
13565   ins_cost(INSN_COST);
13566   format %{ "tst  $op1" %}
13567 
13568   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13569 
13570   ins_pipe(icmp_reg_imm);
13571 %}
13572 
13573 instruct compL_reg_immLAddSub(rFlagsReg cr, iRegL op1, immLAddSub op2)
13574 %{
13575   match(Set cr (CmpL op1 op2));
13576 
13577   effect(DEF cr, USE op1);
13578 
13579   ins_cost(INSN_COST);
13580   format %{ "cmp  $op1, $op2" %}
13581 
13582   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13583 
13584   ins_pipe(icmp_reg_imm);
13585 %}
13586 
13587 instruct compL_reg_immL(rFlagsReg cr, iRegL op1, immL op2)
13588 %{
13589   match(Set cr (CmpL op1 op2));
13590 
13591   effect(DEF cr, USE op1);
13592 
13593   ins_cost(INSN_COST * 2);
13594   format %{ "cmp  $op1, $op2" %}
13595 
13596   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13597 
13598   ins_pipe(icmp_reg_imm);
13599 %}
13600 
13601 instruct compUL_reg_reg(rFlagsRegU cr, iRegL op1, iRegL op2)
13602 %{
13603   match(Set cr (CmpUL op1 op2));
13604 
13605   effect(DEF cr, USE op1, USE op2);
13606 
13607   ins_cost(INSN_COST);
13608   format %{ "cmp  $op1, $op2" %}
13609 
13610   ins_encode(aarch64_enc_cmp(op1, op2));
13611 
13612   ins_pipe(icmp_reg_reg);
13613 %}
13614 
13615 instruct compUL_reg_immL0(rFlagsRegU cr, iRegL op1, immL0 zero)
13616 %{
13617   match(Set cr (CmpUL op1 zero));
13618 
13619   effect(DEF cr, USE op1);
13620 
13621   ins_cost(INSN_COST);
13622   format %{ "tst  $op1" %}
13623 
13624   ins_encode(aarch64_enc_cmp_imm_addsub(op1, zero));
13625 
13626   ins_pipe(icmp_reg_imm);
13627 %}
13628 
13629 instruct compUL_reg_immLAddSub(rFlagsRegU cr, iRegL op1, immLAddSub op2)
13630 %{
13631   match(Set cr (CmpUL op1 op2));
13632 
13633   effect(DEF cr, USE op1);
13634 
13635   ins_cost(INSN_COST);
13636   format %{ "cmp  $op1, $op2" %}
13637 
13638   ins_encode(aarch64_enc_cmp_imm_addsub(op1, op2));
13639 
13640   ins_pipe(icmp_reg_imm);
13641 %}
13642 
13643 instruct compUL_reg_immL(rFlagsRegU cr, iRegL op1, immL op2)
13644 %{
13645   match(Set cr (CmpUL op1 op2));
13646 
13647   effect(DEF cr, USE op1);
13648 
13649   ins_cost(INSN_COST * 2);
13650   format %{ "cmp  $op1, $op2" %}
13651 
13652   ins_encode(aarch64_enc_cmp_imm(op1, op2));
13653 
13654   ins_pipe(icmp_reg_imm);
13655 %}
13656 
13657 instruct compP_reg_reg(rFlagsRegU cr, iRegP op1, iRegP op2)
13658 %{
13659   match(Set cr (CmpP op1 op2));
13660 
13661   effect(DEF cr, USE op1, USE op2);
13662 
13663   ins_cost(INSN_COST);
13664   format %{ "cmp  $op1, $op2\t // ptr" %}
13665 
13666   ins_encode(aarch64_enc_cmpp(op1, op2));
13667 
13668   ins_pipe(icmp_reg_reg);
13669 %}
13670 
13671 instruct compN_reg_reg(rFlagsRegU cr, iRegN op1, iRegN op2)
13672 %{
13673   match(Set cr (CmpN op1 op2));
13674 
13675   effect(DEF cr, USE op1, USE op2);
13676 
13677   ins_cost(INSN_COST);
13678   format %{ "cmp  $op1, $op2\t // compressed ptr" %}
13679 
13680   ins_encode(aarch64_enc_cmpn(op1, op2));
13681 
13682   ins_pipe(icmp_reg_reg);
13683 %}
13684 
13685 instruct testP_reg(rFlagsRegU cr, iRegP op1, immP0 zero)
13686 %{
13687   match(Set cr (CmpP op1 zero));
13688 
13689   effect(DEF cr, USE op1, USE zero);
13690 
13691   ins_cost(INSN_COST);
13692   format %{ "cmp  $op1, 0\t // ptr" %}
13693 
13694   ins_encode(aarch64_enc_testp(op1));
13695 
13696   ins_pipe(icmp_reg_imm);
13697 %}
13698 
13699 instruct testN_reg(rFlagsRegU cr, iRegN op1, immN0 zero)
13700 %{
13701   match(Set cr (CmpN op1 zero));
13702 
13703   effect(DEF cr, USE op1, USE zero);
13704 
13705   ins_cost(INSN_COST);
13706   format %{ "cmp  $op1, 0\t // compressed ptr" %}
13707 
13708   ins_encode(aarch64_enc_testn(op1));
13709 
13710   ins_pipe(icmp_reg_imm);
13711 %}
13712 
13713 // FP comparisons
13714 //
13715 // n.b. CmpF/CmpD set a normal flags reg which then gets compared
13716 // using normal cmpOp. See declaration of rFlagsReg for details.
13717 
13718 instruct compF_reg_reg(rFlagsReg cr, vRegF src1, vRegF src2)
13719 %{
13720   match(Set cr (CmpF src1 src2));
13721 
13722   ins_cost(3 * INSN_COST);
13723   format %{ "fcmps $src1, $src2" %}
13724 
13725   ins_encode %{
13726     __ fcmps(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13727   %}
13728 
13729   ins_pipe(pipe_class_compare);
13730 %}
13731 
13732 instruct compF_reg_zero(rFlagsReg cr, vRegF src1, immF0 src2)
13733 %{
13734   match(Set cr (CmpF src1 src2));
13735 
13736   ins_cost(3 * INSN_COST);
13737   format %{ "fcmps $src1, 0.0" %}
13738 
13739   ins_encode %{
13740     __ fcmps(as_FloatRegister($src1$$reg), 0.0);
13741   %}
13742 
13743   ins_pipe(pipe_class_compare);
13744 %}
13745 // FROM HERE
13746 
13747 instruct compD_reg_reg(rFlagsReg cr, vRegD src1, vRegD src2)
13748 %{
13749   match(Set cr (CmpD src1 src2));
13750 
13751   ins_cost(3 * INSN_COST);
13752   format %{ "fcmpd $src1, $src2" %}
13753 
13754   ins_encode %{
13755     __ fcmpd(as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
13756   %}
13757 
13758   ins_pipe(pipe_class_compare);
13759 %}
13760 
13761 instruct compD_reg_zero(rFlagsReg cr, vRegD src1, immD0 src2)
13762 %{
13763   match(Set cr (CmpD src1 src2));
13764 
13765   ins_cost(3 * INSN_COST);
13766   format %{ "fcmpd $src1, 0.0" %}
13767 
13768   ins_encode %{
13769     __ fcmpd(as_FloatRegister($src1$$reg), 0.0);
13770   %}
13771 
13772   ins_pipe(pipe_class_compare);
13773 %}
13774 
13775 instruct compF3_reg_reg(iRegINoSp dst, vRegF src1, vRegF src2, rFlagsReg cr)
13776 %{
13777   match(Set dst (CmpF3 src1 src2));
13778   effect(KILL cr);
13779 
13780   ins_cost(5 * INSN_COST);
13781   format %{ "fcmps $src1, $src2\n\t"
13782             "csinvw($dst, zr, zr, eq\n\t"
13783             "csnegw($dst, $dst, $dst, lt)"
13784   %}
13785 
13786   ins_encode %{
13787     Label done;
13788     FloatRegister s1 = as_FloatRegister($src1$$reg);
13789     FloatRegister s2 = as_FloatRegister($src2$$reg);
13790     Register d = as_Register($dst$$reg);
13791     __ fcmps(s1, s2);
13792     // installs 0 if EQ else -1
13793     __ csinvw(d, zr, zr, Assembler::EQ);
13794     // keeps -1 if less or unordered else installs 1
13795     __ csnegw(d, d, d, Assembler::LT);
13796     __ bind(done);
13797   %}
13798 
13799   ins_pipe(pipe_class_default);
13800 
13801 %}
13802 
13803 instruct compD3_reg_reg(iRegINoSp dst, vRegD src1, vRegD src2, rFlagsReg cr)
13804 %{
13805   match(Set dst (CmpD3 src1 src2));
13806   effect(KILL cr);
13807 
13808   ins_cost(5 * INSN_COST);
13809   format %{ "fcmpd $src1, $src2\n\t"
13810             "csinvw($dst, zr, zr, eq\n\t"
13811             "csnegw($dst, $dst, $dst, lt)"
13812   %}
13813 
13814   ins_encode %{
13815     Label done;
13816     FloatRegister s1 = as_FloatRegister($src1$$reg);
13817     FloatRegister s2 = as_FloatRegister($src2$$reg);
13818     Register d = as_Register($dst$$reg);
13819     __ fcmpd(s1, s2);
13820     // installs 0 if EQ else -1
13821     __ csinvw(d, zr, zr, Assembler::EQ);
13822     // keeps -1 if less or unordered else installs 1
13823     __ csnegw(d, d, d, Assembler::LT);
13824     __ bind(done);
13825   %}
13826   ins_pipe(pipe_class_default);
13827 
13828 %}
13829 
13830 instruct compF3_reg_immF0(iRegINoSp dst, vRegF src1, immF0 zero, rFlagsReg cr)
13831 %{
13832   match(Set dst (CmpF3 src1 zero));
13833   effect(KILL cr);
13834 
13835   ins_cost(5 * INSN_COST);
13836   format %{ "fcmps $src1, 0.0\n\t"
13837             "csinvw($dst, zr, zr, eq\n\t"
13838             "csnegw($dst, $dst, $dst, lt)"
13839   %}
13840 
13841   ins_encode %{
13842     Label done;
13843     FloatRegister s1 = as_FloatRegister($src1$$reg);
13844     Register d = as_Register($dst$$reg);
13845     __ fcmps(s1, 0.0);
13846     // installs 0 if EQ else -1
13847     __ csinvw(d, zr, zr, Assembler::EQ);
13848     // keeps -1 if less or unordered else installs 1
13849     __ csnegw(d, d, d, Assembler::LT);
13850     __ bind(done);
13851   %}
13852 
13853   ins_pipe(pipe_class_default);
13854 
13855 %}
13856 
13857 instruct compD3_reg_immD0(iRegINoSp dst, vRegD src1, immD0 zero, rFlagsReg cr)
13858 %{
13859   match(Set dst (CmpD3 src1 zero));
13860   effect(KILL cr);
13861 
13862   ins_cost(5 * INSN_COST);
13863   format %{ "fcmpd $src1, 0.0\n\t"
13864             "csinvw($dst, zr, zr, eq\n\t"
13865             "csnegw($dst, $dst, $dst, lt)"
13866   %}
13867 
13868   ins_encode %{
13869     Label done;
13870     FloatRegister s1 = as_FloatRegister($src1$$reg);
13871     Register d = as_Register($dst$$reg);
13872     __ fcmpd(s1, 0.0);
13873     // installs 0 if EQ else -1
13874     __ csinvw(d, zr, zr, Assembler::EQ);
13875     // keeps -1 if less or unordered else installs 1
13876     __ csnegw(d, d, d, Assembler::LT);
13877     __ bind(done);
13878   %}
13879   ins_pipe(pipe_class_default);
13880 
13881 %}
13882 
13883 instruct cmpLTMask_reg_reg(iRegINoSp dst, iRegIorL2I p, iRegIorL2I q, rFlagsReg cr)
13884 %{
13885   match(Set dst (CmpLTMask p q));
13886   effect(KILL cr);
13887 
13888   ins_cost(3 * INSN_COST);
13889 
13890   format %{ "cmpw $p, $q\t# cmpLTMask\n\t"
13891             "csetw $dst, lt\n\t"
13892             "subw $dst, zr, $dst"
13893   %}
13894 
13895   ins_encode %{
13896     __ cmpw(as_Register($p$$reg), as_Register($q$$reg));
13897     __ csetw(as_Register($dst$$reg), Assembler::LT);
13898     __ subw(as_Register($dst$$reg), zr, as_Register($dst$$reg));
13899   %}
13900 
13901   ins_pipe(ialu_reg_reg);
13902 %}
13903 
13904 instruct cmpLTMask_reg_zero(iRegINoSp dst, iRegIorL2I src, immI0 zero, rFlagsReg cr)
13905 %{
13906   match(Set dst (CmpLTMask src zero));
13907   effect(KILL cr);
13908 
13909   ins_cost(INSN_COST);
13910 
13911   format %{ "asrw $dst, $src, #31\t# cmpLTMask0" %}
13912 
13913   ins_encode %{
13914     __ asrw(as_Register($dst$$reg), as_Register($src$$reg), 31);
13915   %}
13916 
13917   ins_pipe(ialu_reg_shift);
13918 %}
13919 
13920 // ============================================================================
13921 // Max and Min
13922 
13923 instruct minI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13924 %{
13925   match(Set dst (MinI src1 src2));
13926 
13927   effect(DEF dst, USE src1, USE src2, KILL cr);
13928   size(8);
13929 
13930   ins_cost(INSN_COST * 3);
13931   format %{
13932     "cmpw $src1 $src2\t signed int\n\t"
13933     "cselw $dst, $src1, $src2 lt\t"
13934   %}
13935 
13936   ins_encode %{
13937     __ cmpw(as_Register($src1$$reg),
13938             as_Register($src2$$reg));
13939     __ cselw(as_Register($dst$$reg),
13940              as_Register($src1$$reg),
13941              as_Register($src2$$reg),
13942              Assembler::LT);
13943   %}
13944 
13945   ins_pipe(ialu_reg_reg);
13946 %}
13947 // FROM HERE
13948 
13949 instruct maxI_rReg(iRegINoSp dst, iRegI src1, iRegI src2, rFlagsReg cr)
13950 %{
13951   match(Set dst (MaxI src1 src2));
13952 
13953   effect(DEF dst, USE src1, USE src2, KILL cr);
13954   size(8);
13955 
13956   ins_cost(INSN_COST * 3);
13957   format %{
13958     "cmpw $src1 $src2\t signed int\n\t"
13959     "cselw $dst, $src1, $src2 gt\t"
13960   %}
13961 
13962   ins_encode %{
13963     __ cmpw(as_Register($src1$$reg),
13964             as_Register($src2$$reg));
13965     __ cselw(as_Register($dst$$reg),
13966              as_Register($src1$$reg),
13967              as_Register($src2$$reg),
13968              Assembler::GT);
13969   %}
13970 
13971   ins_pipe(ialu_reg_reg);
13972 %}
13973 
13974 // ============================================================================
13975 // Branch Instructions
13976 
13977 // Direct Branch.
13978 instruct branch(label lbl)
13979 %{
13980   match(Goto);
13981 
13982   effect(USE lbl);
13983 
13984   ins_cost(BRANCH_COST);
13985   format %{ "b  $lbl" %}
13986 
13987   ins_encode(aarch64_enc_b(lbl));
13988 
13989   ins_pipe(pipe_branch);
13990 %}
13991 
13992 // Conditional Near Branch
13993 instruct branchCon(cmpOp cmp, rFlagsReg cr, label lbl)
13994 %{
13995   // Same match rule as `branchConFar'.
13996   match(If cmp cr);
13997 
13998   effect(USE lbl);
13999 
14000   ins_cost(BRANCH_COST);
14001   // If set to 1 this indicates that the current instruction is a
14002   // short variant of a long branch. This avoids using this
14003   // instruction in first-pass matching. It will then only be used in
14004   // the `Shorten_branches' pass.
14005   // ins_short_branch(1);
14006   format %{ "b$cmp  $lbl" %}
14007 
14008   ins_encode(aarch64_enc_br_con(cmp, lbl));
14009 
14010   ins_pipe(pipe_branch_cond);
14011 %}
14012 
14013 // Conditional Near Branch Unsigned
14014 instruct branchConU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14015 %{
14016   // Same match rule as `branchConFar'.
14017   match(If cmp cr);
14018 
14019   effect(USE lbl);
14020 
14021   ins_cost(BRANCH_COST);
14022   // If set to 1 this indicates that the current instruction is a
14023   // short variant of a long branch. This avoids using this
14024   // instruction in first-pass matching. It will then only be used in
14025   // the `Shorten_branches' pass.
14026   // ins_short_branch(1);
14027   format %{ "b$cmp  $lbl\t# unsigned" %}
14028 
14029   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14030 
14031   ins_pipe(pipe_branch_cond);
14032 %}
14033 
14034 // Make use of CBZ and CBNZ.  These instructions, as well as being
14035 // shorter than (cmp; branch), have the additional benefit of not
14036 // killing the flags.
14037 
14038 instruct cmpI_imm0_branch(cmpOpEqNe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
14039   match(If cmp (CmpI op1 op2));
14040   effect(USE labl);
14041 
14042   ins_cost(BRANCH_COST);
14043   format %{ "cbw$cmp   $op1, $labl" %}
14044   ins_encode %{
14045     Label* L = $labl$$label;
14046     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14047     if (cond == Assembler::EQ)
14048       __ cbzw($op1$$Register, *L);
14049     else
14050       __ cbnzw($op1$$Register, *L);
14051   %}
14052   ins_pipe(pipe_cmp_branch);
14053 %}
14054 
14055 instruct cmpL_imm0_branch(cmpOpEqNe cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
14056   match(If cmp (CmpL op1 op2));
14057   effect(USE labl);
14058 
14059   ins_cost(BRANCH_COST);
14060   format %{ "cb$cmp   $op1, $labl" %}
14061   ins_encode %{
14062     Label* L = $labl$$label;
14063     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14064     if (cond == Assembler::EQ)
14065       __ cbz($op1$$Register, *L);
14066     else
14067       __ cbnz($op1$$Register, *L);
14068   %}
14069   ins_pipe(pipe_cmp_branch);
14070 %}
14071 
14072 instruct cmpP_imm0_branch(cmpOpEqNe cmp, iRegP op1, immP0 op2, label labl, rFlagsReg cr) %{
14073   match(If cmp (CmpP op1 op2));
14074   effect(USE labl);
14075 
14076   ins_cost(BRANCH_COST);
14077   format %{ "cb$cmp   $op1, $labl" %}
14078   ins_encode %{
14079     Label* L = $labl$$label;
14080     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14081     if (cond == Assembler::EQ)
14082       __ cbz($op1$$Register, *L);
14083     else
14084       __ cbnz($op1$$Register, *L);
14085   %}
14086   ins_pipe(pipe_cmp_branch);
14087 %}
14088 
14089 instruct cmpN_imm0_branch(cmpOpEqNe cmp, iRegN op1, immN0 op2, label labl, rFlagsReg cr) %{
14090   match(If cmp (CmpN op1 op2));
14091   effect(USE labl);
14092 
14093   ins_cost(BRANCH_COST);
14094   format %{ "cbw$cmp   $op1, $labl" %}
14095   ins_encode %{
14096     Label* L = $labl$$label;
14097     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14098     if (cond == Assembler::EQ)
14099       __ cbzw($op1$$Register, *L);
14100     else
14101       __ cbnzw($op1$$Register, *L);
14102   %}
14103   ins_pipe(pipe_cmp_branch);
14104 %}
14105 
14106 instruct cmpP_narrowOop_imm0_branch(cmpOpEqNe cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
14107   match(If cmp (CmpP (DecodeN oop) zero));
14108   effect(USE labl);
14109 
14110   ins_cost(BRANCH_COST);
14111   format %{ "cb$cmp   $oop, $labl" %}
14112   ins_encode %{
14113     Label* L = $labl$$label;
14114     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14115     if (cond == Assembler::EQ)
14116       __ cbzw($oop$$Register, *L);
14117     else
14118       __ cbnzw($oop$$Register, *L);
14119   %}
14120   ins_pipe(pipe_cmp_branch);
14121 %}
14122 
14123 instruct cmpUI_imm0_branch(cmpOpUEqNeLtGe cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsRegU cr) %{
14124   match(If cmp (CmpU op1 op2));
14125   effect(USE labl);
14126 
14127   ins_cost(BRANCH_COST);
14128   format %{ "cbw$cmp   $op1, $labl" %}
14129   ins_encode %{
14130     Label* L = $labl$$label;
14131     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14132     if (cond == Assembler::EQ || cond == Assembler::LS)
14133       __ cbzw($op1$$Register, *L);
14134     else
14135       __ cbnzw($op1$$Register, *L);
14136   %}
14137   ins_pipe(pipe_cmp_branch);
14138 %}
14139 
14140 instruct cmpUL_imm0_branch(cmpOpUEqNeLtGe cmp, iRegL op1, immL0 op2, label labl, rFlagsRegU cr) %{
14141   match(If cmp (CmpUL op1 op2));
14142   effect(USE labl);
14143 
14144   ins_cost(BRANCH_COST);
14145   format %{ "cb$cmp   $op1, $labl" %}
14146   ins_encode %{
14147     Label* L = $labl$$label;
14148     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14149     if (cond == Assembler::EQ || cond == Assembler::LS)
14150       __ cbz($op1$$Register, *L);
14151     else
14152       __ cbnz($op1$$Register, *L);
14153   %}
14154   ins_pipe(pipe_cmp_branch);
14155 %}
14156 
14157 // Test bit and Branch
14158 
14159 // Patterns for short (< 32KiB) variants
14160 instruct cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14161   match(If cmp (CmpL op1 op2));
14162   effect(USE labl);
14163 
14164   ins_cost(BRANCH_COST);
14165   format %{ "cb$cmp   $op1, $labl # long" %}
14166   ins_encode %{
14167     Label* L = $labl$$label;
14168     Assembler::Condition cond =
14169       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14170     __ tbr(cond, $op1$$Register, 63, *L);
14171   %}
14172   ins_pipe(pipe_cmp_branch);
14173   ins_short_branch(1);
14174 %}
14175 
14176 instruct cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14177   match(If cmp (CmpI op1 op2));
14178   effect(USE labl);
14179 
14180   ins_cost(BRANCH_COST);
14181   format %{ "cb$cmp   $op1, $labl # int" %}
14182   ins_encode %{
14183     Label* L = $labl$$label;
14184     Assembler::Condition cond =
14185       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14186     __ tbr(cond, $op1$$Register, 31, *L);
14187   %}
14188   ins_pipe(pipe_cmp_branch);
14189   ins_short_branch(1);
14190 %}
14191 
14192 instruct cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14193   match(If cmp (CmpL (AndL op1 op2) op3));
14194   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14195   effect(USE labl);
14196 
14197   ins_cost(BRANCH_COST);
14198   format %{ "tb$cmp   $op1, $op2, $labl" %}
14199   ins_encode %{
14200     Label* L = $labl$$label;
14201     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14202     int bit = exact_log2($op2$$constant);
14203     __ tbr(cond, $op1$$Register, bit, *L);
14204   %}
14205   ins_pipe(pipe_cmp_branch);
14206   ins_short_branch(1);
14207 %}
14208 
14209 instruct cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14210   match(If cmp (CmpI (AndI op1 op2) op3));
14211   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14212   effect(USE labl);
14213 
14214   ins_cost(BRANCH_COST);
14215   format %{ "tb$cmp   $op1, $op2, $labl" %}
14216   ins_encode %{
14217     Label* L = $labl$$label;
14218     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14219     int bit = exact_log2($op2$$constant);
14220     __ tbr(cond, $op1$$Register, bit, *L);
14221   %}
14222   ins_pipe(pipe_cmp_branch);
14223   ins_short_branch(1);
14224 %}
14225 
14226 // And far variants
14227 instruct far_cmpL_branch_sign(cmpOpLtGe cmp, iRegL op1, immL0 op2, label labl) %{
14228   match(If cmp (CmpL op1 op2));
14229   effect(USE labl);
14230 
14231   ins_cost(BRANCH_COST);
14232   format %{ "cb$cmp   $op1, $labl # long" %}
14233   ins_encode %{
14234     Label* L = $labl$$label;
14235     Assembler::Condition cond =
14236       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14237     __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
14238   %}
14239   ins_pipe(pipe_cmp_branch);
14240 %}
14241 
14242 instruct far_cmpI_branch_sign(cmpOpLtGe cmp, iRegIorL2I op1, immI0 op2, label labl) %{
14243   match(If cmp (CmpI op1 op2));
14244   effect(USE labl);
14245 
14246   ins_cost(BRANCH_COST);
14247   format %{ "cb$cmp   $op1, $labl # int" %}
14248   ins_encode %{
14249     Label* L = $labl$$label;
14250     Assembler::Condition cond =
14251       ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
14252     __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
14253   %}
14254   ins_pipe(pipe_cmp_branch);
14255 %}
14256 
14257 instruct far_cmpL_branch_bit(cmpOpEqNe cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
14258   match(If cmp (CmpL (AndL op1 op2) op3));
14259   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
14260   effect(USE labl);
14261 
14262   ins_cost(BRANCH_COST);
14263   format %{ "tb$cmp   $op1, $op2, $labl" %}
14264   ins_encode %{
14265     Label* L = $labl$$label;
14266     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14267     int bit = exact_log2($op2$$constant);
14268     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14269   %}
14270   ins_pipe(pipe_cmp_branch);
14271 %}
14272 
14273 instruct far_cmpI_branch_bit(cmpOpEqNe cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
14274   match(If cmp (CmpI (AndI op1 op2) op3));
14275   predicate(is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
14276   effect(USE labl);
14277 
14278   ins_cost(BRANCH_COST);
14279   format %{ "tb$cmp   $op1, $op2, $labl" %}
14280   ins_encode %{
14281     Label* L = $labl$$label;
14282     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
14283     int bit = exact_log2($op2$$constant);
14284     __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
14285   %}
14286   ins_pipe(pipe_cmp_branch);
14287 %}
14288 
14289 // Test bits
14290 
14291 instruct cmpL_and(cmpOp cmp, iRegL op1, immL op2, immL0 op3, rFlagsReg cr) %{
14292   match(Set cr (CmpL (AndL op1 op2) op3));
14293   predicate(Assembler::operand_valid_for_logical_immediate
14294             (/*is_32*/false, n->in(1)->in(2)->get_long()));
14295 
14296   ins_cost(INSN_COST);
14297   format %{ "tst $op1, $op2 # long" %}
14298   ins_encode %{
14299     __ tst($op1$$Register, $op2$$constant);
14300   %}
14301   ins_pipe(ialu_reg_reg);
14302 %}
14303 
14304 instruct cmpI_and(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, rFlagsReg cr) %{
14305   match(Set cr (CmpI (AndI op1 op2) op3));
14306   predicate(Assembler::operand_valid_for_logical_immediate
14307             (/*is_32*/true, n->in(1)->in(2)->get_int()));
14308 
14309   ins_cost(INSN_COST);
14310   format %{ "tst $op1, $op2 # int" %}
14311   ins_encode %{
14312     __ tstw($op1$$Register, $op2$$constant);
14313   %}
14314   ins_pipe(ialu_reg_reg);
14315 %}
14316 
14317 instruct cmpL_and_reg(cmpOp cmp, iRegL op1, iRegL op2, immL0 op3, rFlagsReg cr) %{
14318   match(Set cr (CmpL (AndL op1 op2) op3));
14319 
14320   ins_cost(INSN_COST);
14321   format %{ "tst $op1, $op2 # long" %}
14322   ins_encode %{
14323     __ tst($op1$$Register, $op2$$Register);
14324   %}
14325   ins_pipe(ialu_reg_reg);
14326 %}
14327 
14328 instruct cmpI_and_reg(cmpOp cmp, iRegIorL2I op1, iRegIorL2I op2, immI0 op3, rFlagsReg cr) %{
14329   match(Set cr (CmpI (AndI op1 op2) op3));
14330 
14331   ins_cost(INSN_COST);
14332   format %{ "tstw $op1, $op2 # int" %}
14333   ins_encode %{
14334     __ tstw($op1$$Register, $op2$$Register);
14335   %}
14336   ins_pipe(ialu_reg_reg);
14337 %}
14338 
14339 
14340 // Conditional Far Branch
14341 // Conditional Far Branch Unsigned
14342 // TODO: fixme
14343 
14344 // counted loop end branch near
14345 instruct branchLoopEnd(cmpOp cmp, rFlagsReg cr, label lbl)
14346 %{
14347   match(CountedLoopEnd cmp cr);
14348 
14349   effect(USE lbl);
14350 
14351   ins_cost(BRANCH_COST);
14352   // short variant.
14353   // ins_short_branch(1);
14354   format %{ "b$cmp $lbl \t// counted loop end" %}
14355 
14356   ins_encode(aarch64_enc_br_con(cmp, lbl));
14357 
14358   ins_pipe(pipe_branch);
14359 %}
14360 
14361 // counted loop end branch near Unsigned
14362 instruct branchLoopEndU(cmpOpU cmp, rFlagsRegU cr, label lbl)
14363 %{
14364   match(CountedLoopEnd cmp cr);
14365 
14366   effect(USE lbl);
14367 
14368   ins_cost(BRANCH_COST);
14369   // short variant.
14370   // ins_short_branch(1);
14371   format %{ "b$cmp $lbl \t// counted loop end unsigned" %}
14372 
14373   ins_encode(aarch64_enc_br_conU(cmp, lbl));
14374 
14375   ins_pipe(pipe_branch);
14376 %}
14377 
14378 // counted loop end branch far
14379 // counted loop end branch far unsigned
14380 // TODO: fixme
14381 
14382 // ============================================================================
14383 // inlined locking and unlocking
14384 
14385 instruct cmpFastLock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14386 %{
14387   match(Set cr (FastLock object box));
14388   effect(TEMP tmp, TEMP tmp2);
14389 
14390   // TODO
14391   // identify correct cost
14392   ins_cost(5 * INSN_COST);
14393   format %{ "fastlock $object,$box\t! kills $tmp,$tmp2" %}
14394 
14395   ins_encode(aarch64_enc_fast_lock(object, box, tmp, tmp2));
14396 
14397   ins_pipe(pipe_serial);
14398 %}
14399 
14400 instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp, iRegPNoSp tmp2)
14401 %{
14402   match(Set cr (FastUnlock object box));
14403   effect(TEMP tmp, TEMP tmp2);
14404 
14405   ins_cost(5 * INSN_COST);
14406   format %{ "fastunlock $object,$box\t! kills $tmp, $tmp2" %}
14407 
14408   ins_encode(aarch64_enc_fast_unlock(object, box, tmp, tmp2));
14409 
14410   ins_pipe(pipe_serial);
14411 %}
14412 
14413 
14414 // ============================================================================
14415 // Safepoint Instructions
14416 
14417 // TODO
14418 // provide a near and far version of this code
14419 
14420 instruct safePoint(rFlagsReg cr, iRegP poll)
14421 %{
14422   match(SafePoint poll);
14423   effect(KILL cr);
14424 
14425   format %{
14426     "ldrw zr, [$poll]\t# Safepoint: poll for GC"
14427   %}
14428   ins_encode %{
14429     __ read_polling_page(as_Register($poll$$reg), relocInfo::poll_type);
14430   %}
14431   ins_pipe(pipe_serial); // ins_pipe(iload_reg_mem);
14432 %}
14433 
14434 
14435 // ============================================================================
14436 // Procedure Call/Return Instructions
14437 
14438 // Call Java Static Instruction
14439 
14440 instruct CallStaticJavaDirect(method meth)
14441 %{
14442   match(CallStaticJava);
14443 
14444   effect(USE meth);
14445 
14446   ins_cost(CALL_COST);
14447 
14448   format %{ "call,static $meth \t// ==> " %}
14449 
14450   ins_encode( aarch64_enc_java_static_call(meth),
14451               aarch64_enc_call_epilog );
14452 
14453   ins_pipe(pipe_class_call);
14454 %}
14455 
14456 // TO HERE
14457 
14458 // Call Java Dynamic Instruction
14459 instruct CallDynamicJavaDirect(method meth)
14460 %{
14461   match(CallDynamicJava);
14462 
14463   effect(USE meth);
14464 
14465   ins_cost(CALL_COST);
14466 
14467   format %{ "CALL,dynamic $meth \t// ==> " %}
14468 
14469   ins_encode( aarch64_enc_java_dynamic_call(meth),
14470                aarch64_enc_call_epilog );
14471 
14472   ins_pipe(pipe_class_call);
14473 %}
14474 
14475 // Call Runtime Instruction
14476 
14477 instruct CallRuntimeDirect(method meth)
14478 %{
14479   match(CallRuntime);
14480 
14481   effect(USE meth);
14482 
14483   ins_cost(CALL_COST);
14484 
14485   format %{ "CALL, runtime $meth" %}
14486 
14487   ins_encode( aarch64_enc_java_to_runtime(meth) );
14488 
14489   ins_pipe(pipe_class_call);
14490 %}
14491 
14492 // Call Runtime Instruction
14493 
14494 instruct CallLeafDirect(method meth)
14495 %{
14496   match(CallLeaf);
14497 
14498   effect(USE meth);
14499 
14500   ins_cost(CALL_COST);
14501 
14502   format %{ "CALL, runtime leaf $meth" %}
14503 
14504   ins_encode( aarch64_enc_java_to_runtime(meth) );
14505 
14506   ins_pipe(pipe_class_call);
14507 %}
14508 
14509 // Call Runtime Instruction
14510 
14511 instruct CallLeafNoFPDirect(method meth)
14512 %{
14513   match(CallLeafNoFP);
14514 
14515   effect(USE meth);
14516 
14517   ins_cost(CALL_COST);
14518 
14519   format %{ "CALL, runtime leaf nofp $meth" %}
14520 
14521   ins_encode( aarch64_enc_java_to_runtime(meth) );
14522 
14523   ins_pipe(pipe_class_call);
14524 %}
14525 
14526 // Tail Call; Jump from runtime stub to Java code.
14527 // Also known as an 'interprocedural jump'.
14528 // Target of jump will eventually return to caller.
14529 // TailJump below removes the return address.
14530 instruct TailCalljmpInd(iRegPNoSp jump_target, inline_cache_RegP method_oop)
14531 %{
14532   match(TailCall jump_target method_oop);
14533 
14534   ins_cost(CALL_COST);
14535 
14536   format %{ "br $jump_target\t# $method_oop holds method oop" %}
14537 
14538   ins_encode(aarch64_enc_tail_call(jump_target));
14539 
14540   ins_pipe(pipe_class_call);
14541 %}
14542 
14543 instruct TailjmpInd(iRegPNoSp jump_target, iRegP_R0 ex_oop)
14544 %{
14545   match(TailJump jump_target ex_oop);
14546 
14547   ins_cost(CALL_COST);
14548 
14549   format %{ "br $jump_target\t# $ex_oop holds exception oop" %}
14550 
14551   ins_encode(aarch64_enc_tail_jmp(jump_target));
14552 
14553   ins_pipe(pipe_class_call);
14554 %}
14555 
14556 // Create exception oop: created by stack-crawling runtime code.
14557 // Created exception is now available to this handler, and is setup
14558 // just prior to jumping to this handler. No code emitted.
14559 // TODO check
14560 // should ex_oop be in r0? intel uses rax, ppc cannot use r0 so uses rarg1
14561 instruct CreateException(iRegP_R0 ex_oop)
14562 %{
14563   match(Set ex_oop (CreateEx));
14564 
14565   format %{ " -- \t// exception oop; no code emitted" %}
14566 
14567   size(0);
14568 
14569   ins_encode( /*empty*/ );
14570 
14571   ins_pipe(pipe_class_empty);
14572 %}
14573 
14574 // Rethrow exception: The exception oop will come in the first
14575 // argument position. Then JUMP (not call) to the rethrow stub code.
14576 instruct RethrowException() %{
14577   match(Rethrow);
14578   ins_cost(CALL_COST);
14579 
14580   format %{ "b rethrow_stub" %}
14581 
14582   ins_encode( aarch64_enc_rethrow() );
14583 
14584   ins_pipe(pipe_class_call);
14585 %}
14586 
14587 
14588 // Return Instruction
14589 // epilog node loads ret address into lr as part of frame pop
14590 instruct Ret()
14591 %{
14592   match(Return);
14593 
14594   format %{ "ret\t// return register" %}
14595 
14596   ins_encode( aarch64_enc_ret() );
14597 
14598   ins_pipe(pipe_branch);
14599 %}
14600 
14601 // Die now.
14602 instruct ShouldNotReachHere() %{
14603   match(Halt);
14604 
14605   ins_cost(CALL_COST);
14606   format %{ "ShouldNotReachHere" %}
14607 
14608   ins_encode %{
14609     if (is_reachable()) {
14610       __ dpcs1(0xdead + 1);
14611     }
14612   %}
14613 
14614   ins_pipe(pipe_class_default);
14615 %}
14616 
14617 // ============================================================================
14618 // Partial Subtype Check
14619 //
14620 // superklass array for an instance of the superklass.  Set a hidden
14621 // internal cache on a hit (cache is checked with exposed code in
14622 // gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
14623 // encoding ALSO sets flags.
14624 
14625 instruct partialSubtypeCheck(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, rFlagsReg cr)
14626 %{
14627   match(Set result (PartialSubtypeCheck sub super));
14628   effect(KILL cr, KILL temp);
14629 
14630   ins_cost(1100);  // slightly larger than the next version
14631   format %{ "partialSubtypeCheck $result, $sub, $super" %}
14632 
14633   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14634 
14635   opcode(0x1); // Force zero of result reg on hit
14636 
14637   ins_pipe(pipe_class_memory);
14638 %}
14639 
14640 instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp, iRegP_R5 result, immP0 zero, rFlagsReg cr)
14641 %{
14642   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
14643   effect(KILL temp, KILL result);
14644 
14645   ins_cost(1100);  // slightly larger than the next version
14646   format %{ "partialSubtypeCheck $result, $sub, $super == 0" %}
14647 
14648   ins_encode(aarch64_enc_partial_subtype_check(sub, super, temp, result));
14649 
14650   opcode(0x0); // Don't zero result reg on hit
14651 
14652   ins_pipe(pipe_class_memory);
14653 %}
14654 
14655 instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14656                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14657 %{
14658   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
14659   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14660   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14661 
14662   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14663   ins_encode %{
14664     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14665     __ string_compare($str1$$Register, $str2$$Register,
14666                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14667                       $tmp1$$Register, $tmp2$$Register,
14668                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
14669   %}
14670   ins_pipe(pipe_class_memory);
14671 %}
14672 
14673 instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14674                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
14675 %{
14676   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
14677   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14678   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14679 
14680   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
14681   ins_encode %{
14682     __ string_compare($str1$$Register, $str2$$Register,
14683                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14684                       $tmp1$$Register, $tmp2$$Register,
14685                       fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
14686   %}
14687   ins_pipe(pipe_class_memory);
14688 %}
14689 
14690 instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14691                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14692                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14693 %{
14694   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
14695   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14696   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14697          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14698 
14699   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14700   ins_encode %{
14701     __ string_compare($str1$$Register, $str2$$Register,
14702                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14703                       $tmp1$$Register, $tmp2$$Register,
14704                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14705                       $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
14706   %}
14707   ins_pipe(pipe_class_memory);
14708 %}
14709 
14710 instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
14711                         iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
14712                         vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
14713 %{
14714   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
14715   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
14716   effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
14717          USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
14718 
14719   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
14720   ins_encode %{
14721     __ string_compare($str1$$Register, $str2$$Register,
14722                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
14723                       $tmp1$$Register, $tmp2$$Register,
14724                       $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
14725                       $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
14726   %}
14727   ins_pipe(pipe_class_memory);
14728 %}
14729 
14730 instruct string_indexofUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14731        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14732        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14733 %{
14734   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14735   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14736   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14737          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14738   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
14739 
14740   ins_encode %{
14741     __ string_indexof($str1$$Register, $str2$$Register,
14742                       $cnt1$$Register, $cnt2$$Register,
14743                       $tmp1$$Register, $tmp2$$Register,
14744                       $tmp3$$Register, $tmp4$$Register,
14745                       $tmp5$$Register, $tmp6$$Register,
14746                       -1, $result$$Register, StrIntrinsicNode::UU);
14747   %}
14748   ins_pipe(pipe_class_memory);
14749 %}
14750 
14751 instruct string_indexofLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14752        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14753        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14754 %{
14755   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14756   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14757   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14758          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14759   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
14760 
14761   ins_encode %{
14762     __ string_indexof($str1$$Register, $str2$$Register,
14763                       $cnt1$$Register, $cnt2$$Register,
14764                       $tmp1$$Register, $tmp2$$Register,
14765                       $tmp3$$Register, $tmp4$$Register,
14766                       $tmp5$$Register, $tmp6$$Register,
14767                       -1, $result$$Register, StrIntrinsicNode::LL);
14768   %}
14769   ins_pipe(pipe_class_memory);
14770 %}
14771 
14772 instruct string_indexofUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
14773        iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3,
14774        iRegINoSp tmp4, iRegINoSp tmp5, iRegINoSp tmp6, rFlagsReg cr)
14775 %{
14776   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14777   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
14778   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
14779          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
14780   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
14781 
14782   ins_encode %{
14783     __ string_indexof($str1$$Register, $str2$$Register,
14784                       $cnt1$$Register, $cnt2$$Register,
14785                       $tmp1$$Register, $tmp2$$Register,
14786                       $tmp3$$Register, $tmp4$$Register,
14787                       $tmp5$$Register, $tmp6$$Register,
14788                       -1, $result$$Register, StrIntrinsicNode::UL);
14789   %}
14790   ins_pipe(pipe_class_memory);
14791 %}
14792 
14793 instruct string_indexof_conUU(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14794                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14795                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14796 %{
14797   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
14798   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14799   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14800          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14801   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
14802 
14803   ins_encode %{
14804     int icnt2 = (int)$int_cnt2$$constant;
14805     __ string_indexof($str1$$Register, $str2$$Register,
14806                       $cnt1$$Register, zr,
14807                       $tmp1$$Register, $tmp2$$Register,
14808                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14809                       icnt2, $result$$Register, StrIntrinsicNode::UU);
14810   %}
14811   ins_pipe(pipe_class_memory);
14812 %}
14813 
14814 instruct string_indexof_conLL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14815                  immI_le_4 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14816                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14817 %{
14818   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
14819   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14820   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14821          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14822   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
14823 
14824   ins_encode %{
14825     int icnt2 = (int)$int_cnt2$$constant;
14826     __ string_indexof($str1$$Register, $str2$$Register,
14827                       $cnt1$$Register, zr,
14828                       $tmp1$$Register, $tmp2$$Register,
14829                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14830                       icnt2, $result$$Register, StrIntrinsicNode::LL);
14831   %}
14832   ins_pipe(pipe_class_memory);
14833 %}
14834 
14835 instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
14836                  immI_1 int_cnt2, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14837                  iRegINoSp tmp3, iRegINoSp tmp4, rFlagsReg cr)
14838 %{
14839   predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
14840   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
14841   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
14842          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
14843   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
14844 
14845   ins_encode %{
14846     int icnt2 = (int)$int_cnt2$$constant;
14847     __ string_indexof($str1$$Register, $str2$$Register,
14848                       $cnt1$$Register, zr,
14849                       $tmp1$$Register, $tmp2$$Register,
14850                       $tmp3$$Register, $tmp4$$Register, zr, zr,
14851                       icnt2, $result$$Register, StrIntrinsicNode::UL);
14852   %}
14853   ins_pipe(pipe_class_memory);
14854 %}
14855 
14856 instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
14857                               iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
14858                               iRegINoSp tmp3, rFlagsReg cr)
14859 %{
14860   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
14861   effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
14862          TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14863 
14864   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
14865 
14866   ins_encode %{
14867     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
14868                            $result$$Register, $tmp1$$Register, $tmp2$$Register,
14869                            $tmp3$$Register);
14870   %}
14871   ins_pipe(pipe_class_memory);
14872 %}
14873 
14874 instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14875                         iRegI_R0 result, rFlagsReg cr)
14876 %{
14877   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
14878   match(Set result (StrEquals (Binary str1 str2) cnt));
14879   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14880 
14881   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14882   ins_encode %{
14883     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14884     __ string_equals($str1$$Register, $str2$$Register,
14885                      $result$$Register, $cnt$$Register, 1);
14886   %}
14887   ins_pipe(pipe_class_memory);
14888 %}
14889 
14890 instruct string_equalsU(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
14891                         iRegI_R0 result, rFlagsReg cr)
14892 %{
14893   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
14894   match(Set result (StrEquals (Binary str1 str2) cnt));
14895   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
14896 
14897   format %{ "String Equals $str1,$str2,$cnt -> $result" %}
14898   ins_encode %{
14899     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
14900     __ string_equals($str1$$Register, $str2$$Register,
14901                      $result$$Register, $cnt$$Register, 2);
14902   %}
14903   ins_pipe(pipe_class_memory);
14904 %}
14905 
14906 instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14907                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14908                        iRegP_R10 tmp, rFlagsReg cr)
14909 %{
14910   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
14911   match(Set result (AryEq ary1 ary2));
14912   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14913 
14914   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14915   ins_encode %{
14916     __ arrays_equals($ary1$$Register, $ary2$$Register,
14917                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14918                      $result$$Register, $tmp$$Register, 1);
14919     %}
14920   ins_pipe(pipe_class_memory);
14921 %}
14922 
14923 instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
14924                        iRegP_R3 tmp1, iRegP_R4 tmp2, iRegP_R5 tmp3,
14925                        iRegP_R10 tmp, rFlagsReg cr)
14926 %{
14927   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
14928   match(Set result (AryEq ary1 ary2));
14929   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
14930 
14931   format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
14932   ins_encode %{
14933     __ arrays_equals($ary1$$Register, $ary2$$Register,
14934                      $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
14935                      $result$$Register, $tmp$$Register, 2);
14936   %}
14937   ins_pipe(pipe_class_memory);
14938 %}
14939 
14940 instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
14941 %{
14942   match(Set result (HasNegatives ary1 len));
14943   effect(USE_KILL ary1, USE_KILL len, KILL cr);
14944   format %{ "has negatives byte[] $ary1,$len -> $result" %}
14945   ins_encode %{
14946     __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
14947   %}
14948   ins_pipe( pipe_slow );
14949 %}
14950 
14951 // fast char[] to byte[] compression
14952 instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14953                          vRegD_V0 tmp1, vRegD_V1 tmp2,
14954                          vRegD_V2 tmp3, vRegD_V3 tmp4,
14955                          iRegI_R0 result, rFlagsReg cr)
14956 %{
14957   match(Set result (StrCompressedCopy src (Binary dst len)));
14958   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14959 
14960   format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
14961   ins_encode %{
14962     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
14963                            $tmp1$$FloatRegister, $tmp2$$FloatRegister,
14964                            $tmp3$$FloatRegister, $tmp4$$FloatRegister,
14965                            $result$$Register);
14966   %}
14967   ins_pipe( pipe_slow );
14968 %}
14969 
14970 // fast byte[] to char[] inflation
14971 instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len,
14972                         vRegD_V0 tmp1, vRegD_V1 tmp2, vRegD_V2 tmp3, iRegP_R3 tmp4, rFlagsReg cr)
14973 %{
14974   match(Set dummy (StrInflatedCopy src (Binary dst len)));
14975   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
14976 
14977   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
14978   ins_encode %{
14979     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
14980                           $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
14981   %}
14982   ins_pipe(pipe_class_memory);
14983 %}
14984 
14985 // encode char[] to byte[] in ISO_8859_1
14986 instruct encode_iso_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
14987                           vRegD_V0 Vtmp1, vRegD_V1 Vtmp2,
14988                           vRegD_V2 Vtmp3, vRegD_V3 Vtmp4,
14989                           iRegI_R0 result, rFlagsReg cr)
14990 %{
14991   match(Set result (EncodeISOArray src (Binary dst len)));
14992   effect(USE_KILL src, USE_KILL dst, USE_KILL len,
14993          KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
14994 
14995   format %{ "Encode array $src,$dst,$len -> $result" %}
14996   ins_encode %{
14997     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
14998          $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
14999          $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
15000   %}
15001   ins_pipe( pipe_class_memory );
15002 %}
15003 
15004 // ============================================================================
15005 // This name is KNOWN by the ADLC and cannot be changed.
15006 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
15007 // for this guy.
15008 instruct tlsLoadP(thread_RegP dst)
15009 %{
15010   match(Set dst (ThreadLocal));
15011 
15012   ins_cost(0);
15013 
15014   format %{ " -- \t// $dst=Thread::current(), empty" %}
15015 
15016   size(0);
15017 
15018   ins_encode( /*empty*/ );
15019 
15020   ins_pipe(pipe_class_empty);
15021 %}
15022 
15023 // ====================VECTOR INSTRUCTIONS=====================================
15024 
15025 // Load vector (32 bits)
15026 instruct loadV4(vecD dst, vmem4 mem)
15027 %{
15028   predicate(n->as_LoadVector()->memory_size() == 4);
15029   match(Set dst (LoadVector mem));
15030   ins_cost(4 * INSN_COST);
15031   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
15032   ins_encode( aarch64_enc_ldrvS(dst, mem) );
15033   ins_pipe(vload_reg_mem64);
15034 %}
15035 
15036 // Load vector (64 bits)
15037 instruct loadV8(vecD dst, vmem8 mem)
15038 %{
15039   predicate(n->as_LoadVector()->memory_size() == 8);
15040   match(Set dst (LoadVector mem));
15041   ins_cost(4 * INSN_COST);
15042   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
15043   ins_encode( aarch64_enc_ldrvD(dst, mem) );
15044   ins_pipe(vload_reg_mem64);
15045 %}
15046 
15047 // Load Vector (128 bits)
15048 instruct loadV16(vecX dst, vmem16 mem)
15049 %{
15050   predicate(n->as_LoadVector()->memory_size() == 16);
15051   match(Set dst (LoadVector mem));
15052   ins_cost(4 * INSN_COST);
15053   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
15054   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
15055   ins_pipe(vload_reg_mem128);
15056 %}
15057 
15058 // Store Vector (32 bits)
15059 instruct storeV4(vecD src, vmem4 mem)
15060 %{
15061   predicate(n->as_StoreVector()->memory_size() == 4);
15062   match(Set mem (StoreVector mem src));
15063   ins_cost(4 * INSN_COST);
15064   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
15065   ins_encode( aarch64_enc_strvS(src, mem) );
15066   ins_pipe(vstore_reg_mem64);
15067 %}
15068 
15069 // Store Vector (64 bits)
15070 instruct storeV8(vecD src, vmem8 mem)
15071 %{
15072   predicate(n->as_StoreVector()->memory_size() == 8);
15073   match(Set mem (StoreVector mem src));
15074   ins_cost(4 * INSN_COST);
15075   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
15076   ins_encode( aarch64_enc_strvD(src, mem) );
15077   ins_pipe(vstore_reg_mem64);
15078 %}
15079 
15080 // Store Vector (128 bits)
15081 instruct storeV16(vecX src, vmem16 mem)
15082 %{
15083   predicate(n->as_StoreVector()->memory_size() == 16);
15084   match(Set mem (StoreVector mem src));
15085   ins_cost(4 * INSN_COST);
15086   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
15087   ins_encode( aarch64_enc_strvQ(src, mem) );
15088   ins_pipe(vstore_reg_mem128);
15089 %}
15090 
15091 instruct replicate8B(vecD dst, iRegIorL2I src)
15092 %{
15093   predicate(n->as_Vector()->length() == 4 ||
15094             n->as_Vector()->length() == 8);
15095   match(Set dst (ReplicateB src));
15096   ins_cost(INSN_COST);
15097   format %{ "dup  $dst, $src\t# vector (8B)" %}
15098   ins_encode %{
15099     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
15100   %}
15101   ins_pipe(vdup_reg_reg64);
15102 %}
15103 
15104 instruct replicate16B(vecX dst, iRegIorL2I src)
15105 %{
15106   predicate(n->as_Vector()->length() == 16);
15107   match(Set dst (ReplicateB src));
15108   ins_cost(INSN_COST);
15109   format %{ "dup  $dst, $src\t# vector (16B)" %}
15110   ins_encode %{
15111     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
15112   %}
15113   ins_pipe(vdup_reg_reg128);
15114 %}
15115 
15116 instruct replicate8B_imm(vecD dst, immI con)
15117 %{
15118   predicate(n->as_Vector()->length() == 4 ||
15119             n->as_Vector()->length() == 8);
15120   match(Set dst (ReplicateB con));
15121   ins_cost(INSN_COST);
15122   format %{ "movi  $dst, $con\t# vector(8B)" %}
15123   ins_encode %{
15124     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
15125   %}
15126   ins_pipe(vmovi_reg_imm64);
15127 %}
15128 
15129 instruct replicate16B_imm(vecX dst, immI con)
15130 %{
15131   predicate(n->as_Vector()->length() == 16);
15132   match(Set dst (ReplicateB con));
15133   ins_cost(INSN_COST);
15134   format %{ "movi  $dst, $con\t# vector(16B)" %}
15135   ins_encode %{
15136     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
15137   %}
15138   ins_pipe(vmovi_reg_imm128);
15139 %}
15140 
15141 instruct replicate4S(vecD dst, iRegIorL2I src)
15142 %{
15143   predicate(n->as_Vector()->length() == 2 ||
15144             n->as_Vector()->length() == 4);
15145   match(Set dst (ReplicateS src));
15146   ins_cost(INSN_COST);
15147   format %{ "dup  $dst, $src\t# vector (4S)" %}
15148   ins_encode %{
15149     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
15150   %}
15151   ins_pipe(vdup_reg_reg64);
15152 %}
15153 
15154 instruct replicate8S(vecX dst, iRegIorL2I src)
15155 %{
15156   predicate(n->as_Vector()->length() == 8);
15157   match(Set dst (ReplicateS src));
15158   ins_cost(INSN_COST);
15159   format %{ "dup  $dst, $src\t# vector (8S)" %}
15160   ins_encode %{
15161     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
15162   %}
15163   ins_pipe(vdup_reg_reg128);
15164 %}
15165 
15166 instruct replicate4S_imm(vecD dst, immI con)
15167 %{
15168   predicate(n->as_Vector()->length() == 2 ||
15169             n->as_Vector()->length() == 4);
15170   match(Set dst (ReplicateS con));
15171   ins_cost(INSN_COST);
15172   format %{ "movi  $dst, $con\t# vector(4H)" %}
15173   ins_encode %{
15174     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
15175   %}
15176   ins_pipe(vmovi_reg_imm64);
15177 %}
15178 
15179 instruct replicate8S_imm(vecX dst, immI con)
15180 %{
15181   predicate(n->as_Vector()->length() == 8);
15182   match(Set dst (ReplicateS con));
15183   ins_cost(INSN_COST);
15184   format %{ "movi  $dst, $con\t# vector(8H)" %}
15185   ins_encode %{
15186     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
15187   %}
15188   ins_pipe(vmovi_reg_imm128);
15189 %}
15190 
15191 instruct replicate2I(vecD dst, iRegIorL2I src)
15192 %{
15193   predicate(n->as_Vector()->length() == 2);
15194   match(Set dst (ReplicateI src));
15195   ins_cost(INSN_COST);
15196   format %{ "dup  $dst, $src\t# vector (2I)" %}
15197   ins_encode %{
15198     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
15199   %}
15200   ins_pipe(vdup_reg_reg64);
15201 %}
15202 
15203 instruct replicate4I(vecX dst, iRegIorL2I src)
15204 %{
15205   predicate(n->as_Vector()->length() == 4);
15206   match(Set dst (ReplicateI src));
15207   ins_cost(INSN_COST);
15208   format %{ "dup  $dst, $src\t# vector (4I)" %}
15209   ins_encode %{
15210     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
15211   %}
15212   ins_pipe(vdup_reg_reg128);
15213 %}
15214 
15215 instruct replicate2I_imm(vecD dst, immI con)
15216 %{
15217   predicate(n->as_Vector()->length() == 2);
15218   match(Set dst (ReplicateI con));
15219   ins_cost(INSN_COST);
15220   format %{ "movi  $dst, $con\t# vector(2I)" %}
15221   ins_encode %{
15222     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
15223   %}
15224   ins_pipe(vmovi_reg_imm64);
15225 %}
15226 
15227 instruct replicate4I_imm(vecX dst, immI con)
15228 %{
15229   predicate(n->as_Vector()->length() == 4);
15230   match(Set dst (ReplicateI con));
15231   ins_cost(INSN_COST);
15232   format %{ "movi  $dst, $con\t# vector(4I)" %}
15233   ins_encode %{
15234     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
15235   %}
15236   ins_pipe(vmovi_reg_imm128);
15237 %}
15238 
15239 instruct replicate2L(vecX dst, iRegL src)
15240 %{
15241   predicate(n->as_Vector()->length() == 2);
15242   match(Set dst (ReplicateL src));
15243   ins_cost(INSN_COST);
15244   format %{ "dup  $dst, $src\t# vector (2L)" %}
15245   ins_encode %{
15246     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
15247   %}
15248   ins_pipe(vdup_reg_reg128);
15249 %}
15250 
15251 instruct replicate2L_zero(vecX dst, immI0 zero)
15252 %{
15253   predicate(n->as_Vector()->length() == 2);
15254   match(Set dst (ReplicateI zero));
15255   ins_cost(INSN_COST);
15256   format %{ "movi  $dst, $zero\t# vector(4I)" %}
15257   ins_encode %{
15258     __ eor(as_FloatRegister($dst$$reg), __ T16B,
15259            as_FloatRegister($dst$$reg),
15260            as_FloatRegister($dst$$reg));
15261   %}
15262   ins_pipe(vmovi_reg_imm128);
15263 %}
15264 
15265 instruct replicate2F(vecD dst, vRegF src)
15266 %{
15267   predicate(n->as_Vector()->length() == 2);
15268   match(Set dst (ReplicateF src));
15269   ins_cost(INSN_COST);
15270   format %{ "dup  $dst, $src\t# vector (2F)" %}
15271   ins_encode %{
15272     __ dup(as_FloatRegister($dst$$reg), __ T2S,
15273            as_FloatRegister($src$$reg));
15274   %}
15275   ins_pipe(vdup_reg_freg64);
15276 %}
15277 
15278 instruct replicate4F(vecX dst, vRegF src)
15279 %{
15280   predicate(n->as_Vector()->length() == 4);
15281   match(Set dst (ReplicateF src));
15282   ins_cost(INSN_COST);
15283   format %{ "dup  $dst, $src\t# vector (4F)" %}
15284   ins_encode %{
15285     __ dup(as_FloatRegister($dst$$reg), __ T4S,
15286            as_FloatRegister($src$$reg));
15287   %}
15288   ins_pipe(vdup_reg_freg128);
15289 %}
15290 
15291 instruct replicate2D(vecX dst, vRegD src)
15292 %{
15293   predicate(n->as_Vector()->length() == 2);
15294   match(Set dst (ReplicateD src));
15295   ins_cost(INSN_COST);
15296   format %{ "dup  $dst, $src\t# vector (2D)" %}
15297   ins_encode %{
15298     __ dup(as_FloatRegister($dst$$reg), __ T2D,
15299            as_FloatRegister($src$$reg));
15300   %}
15301   ins_pipe(vdup_reg_dreg128);
15302 %}
15303 
15304 // ====================REDUCTION ARITHMETIC====================================
15305 
15306 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp, iRegINoSp tmp2)
15307 %{
15308   match(Set dst (AddReductionVI src1 src2));
15309   ins_cost(INSN_COST);
15310   effect(TEMP tmp, TEMP tmp2);
15311   format %{ "umov  $tmp, $src2, S, 0\n\t"
15312             "umov  $tmp2, $src2, S, 1\n\t"
15313             "addw  $dst, $src1, $tmp\n\t"
15314             "addw  $dst, $dst, $tmp2\t add reduction2i"
15315   %}
15316   ins_encode %{
15317     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15318     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15319     __ addw($dst$$Register, $src1$$Register, $tmp$$Register);
15320     __ addw($dst$$Register, $dst$$Register, $tmp2$$Register);
15321   %}
15322   ins_pipe(pipe_class_default);
15323 %}
15324 
15325 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15326 %{
15327   match(Set dst (AddReductionVI src1 src2));
15328   ins_cost(INSN_COST);
15329   effect(TEMP tmp, TEMP tmp2);
15330   format %{ "addv  $tmp, T4S, $src2\n\t"
15331             "umov  $tmp2, $tmp, S, 0\n\t"
15332             "addw  $dst, $tmp2, $src1\t add reduction4i"
15333   %}
15334   ins_encode %{
15335     __ addv(as_FloatRegister($tmp$$reg), __ T4S,
15336             as_FloatRegister($src2$$reg));
15337     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15338     __ addw($dst$$Register, $tmp2$$Register, $src1$$Register);
15339   %}
15340   ins_pipe(pipe_class_default);
15341 %}
15342 
15343 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegINoSp tmp)
15344 %{
15345   match(Set dst (MulReductionVI src1 src2));
15346   ins_cost(INSN_COST);
15347   effect(TEMP tmp, TEMP dst);
15348   format %{ "umov  $tmp, $src2, S, 0\n\t"
15349             "mul   $dst, $tmp, $src1\n\t"
15350             "umov  $tmp, $src2, S, 1\n\t"
15351             "mul   $dst, $tmp, $dst\t mul reduction2i\n\t"
15352   %}
15353   ins_encode %{
15354     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0);
15355     __ mul($dst$$Register, $tmp$$Register, $src1$$Register);
15356     __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1);
15357     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
15358   %}
15359   ins_pipe(pipe_class_default);
15360 %}
15361 
15362 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegINoSp tmp2)
15363 %{
15364   match(Set dst (MulReductionVI src1 src2));
15365   ins_cost(INSN_COST);
15366   effect(TEMP tmp, TEMP tmp2, TEMP dst);
15367   format %{ "ins   $tmp, $src2, 0, 1\n\t"
15368             "mul   $tmp, $tmp, $src2\n\t"
15369             "umov  $tmp2, $tmp, S, 0\n\t"
15370             "mul   $dst, $tmp2, $src1\n\t"
15371             "umov  $tmp2, $tmp, S, 1\n\t"
15372             "mul   $dst, $tmp2, $dst\t mul reduction4i\n\t"
15373   %}
15374   ins_encode %{
15375     __ ins(as_FloatRegister($tmp$$reg), __ D,
15376            as_FloatRegister($src2$$reg), 0, 1);
15377     __ mulv(as_FloatRegister($tmp$$reg), __ T2S,
15378            as_FloatRegister($tmp$$reg), as_FloatRegister($src2$$reg));
15379     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 0);
15380     __ mul($dst$$Register, $tmp2$$Register, $src1$$Register);
15381     __ umov($tmp2$$Register, as_FloatRegister($tmp$$reg), __ S, 1);
15382     __ mul($dst$$Register, $tmp2$$Register, $dst$$Register);
15383   %}
15384   ins_pipe(pipe_class_default);
15385 %}
15386 
15387 instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15388 %{
15389   match(Set dst (AddReductionVF src1 src2));
15390   ins_cost(INSN_COST);
15391   effect(TEMP tmp, TEMP dst);
15392   format %{ "fadds $dst, $src1, $src2\n\t"
15393             "ins   $tmp, S, $src2, 0, 1\n\t"
15394             "fadds $dst, $dst, $tmp\t add reduction2f"
15395   %}
15396   ins_encode %{
15397     __ fadds(as_FloatRegister($dst$$reg),
15398              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15399     __ ins(as_FloatRegister($tmp$$reg), __ S,
15400            as_FloatRegister($src2$$reg), 0, 1);
15401     __ fadds(as_FloatRegister($dst$$reg),
15402              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15403   %}
15404   ins_pipe(pipe_class_default);
15405 %}
15406 
15407 instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15408 %{
15409   match(Set dst (AddReductionVF src1 src2));
15410   ins_cost(INSN_COST);
15411   effect(TEMP tmp, TEMP dst);
15412   format %{ "fadds $dst, $src1, $src2\n\t"
15413             "ins   $tmp, S, $src2, 0, 1\n\t"
15414             "fadds $dst, $dst, $tmp\n\t"
15415             "ins   $tmp, S, $src2, 0, 2\n\t"
15416             "fadds $dst, $dst, $tmp\n\t"
15417             "ins   $tmp, S, $src2, 0, 3\n\t"
15418             "fadds $dst, $dst, $tmp\t add reduction4f"
15419   %}
15420   ins_encode %{
15421     __ fadds(as_FloatRegister($dst$$reg),
15422              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15423     __ ins(as_FloatRegister($tmp$$reg), __ S,
15424            as_FloatRegister($src2$$reg), 0, 1);
15425     __ fadds(as_FloatRegister($dst$$reg),
15426              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15427     __ ins(as_FloatRegister($tmp$$reg), __ S,
15428            as_FloatRegister($src2$$reg), 0, 2);
15429     __ fadds(as_FloatRegister($dst$$reg),
15430              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15431     __ ins(as_FloatRegister($tmp$$reg), __ S,
15432            as_FloatRegister($src2$$reg), 0, 3);
15433     __ fadds(as_FloatRegister($dst$$reg),
15434              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15435   %}
15436   ins_pipe(pipe_class_default);
15437 %}
15438 
15439 instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp)
15440 %{
15441   match(Set dst (MulReductionVF src1 src2));
15442   ins_cost(INSN_COST);
15443   effect(TEMP tmp, TEMP dst);
15444   format %{ "fmuls $dst, $src1, $src2\n\t"
15445             "ins   $tmp, S, $src2, 0, 1\n\t"
15446             "fmuls $dst, $dst, $tmp\t add reduction4f"
15447   %}
15448   ins_encode %{
15449     __ fmuls(as_FloatRegister($dst$$reg),
15450              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15451     __ ins(as_FloatRegister($tmp$$reg), __ S,
15452            as_FloatRegister($src2$$reg), 0, 1);
15453     __ fmuls(as_FloatRegister($dst$$reg),
15454              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15455   %}
15456   ins_pipe(pipe_class_default);
15457 %}
15458 
15459 instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp)
15460 %{
15461   match(Set dst (MulReductionVF src1 src2));
15462   ins_cost(INSN_COST);
15463   effect(TEMP tmp, TEMP dst);
15464   format %{ "fmuls $dst, $src1, $src2\n\t"
15465             "ins   $tmp, S, $src2, 0, 1\n\t"
15466             "fmuls $dst, $dst, $tmp\n\t"
15467             "ins   $tmp, S, $src2, 0, 2\n\t"
15468             "fmuls $dst, $dst, $tmp\n\t"
15469             "ins   $tmp, S, $src2, 0, 3\n\t"
15470             "fmuls $dst, $dst, $tmp\t add reduction4f"
15471   %}
15472   ins_encode %{
15473     __ fmuls(as_FloatRegister($dst$$reg),
15474              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15475     __ ins(as_FloatRegister($tmp$$reg), __ S,
15476            as_FloatRegister($src2$$reg), 0, 1);
15477     __ fmuls(as_FloatRegister($dst$$reg),
15478              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15479     __ ins(as_FloatRegister($tmp$$reg), __ S,
15480            as_FloatRegister($src2$$reg), 0, 2);
15481     __ fmuls(as_FloatRegister($dst$$reg),
15482              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15483     __ ins(as_FloatRegister($tmp$$reg), __ S,
15484            as_FloatRegister($src2$$reg), 0, 3);
15485     __ fmuls(as_FloatRegister($dst$$reg),
15486              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15487   %}
15488   ins_pipe(pipe_class_default);
15489 %}
15490 
15491 instruct reduce_add2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15492 %{
15493   match(Set dst (AddReductionVD src1 src2));
15494   ins_cost(INSN_COST);
15495   effect(TEMP tmp, TEMP dst);
15496   format %{ "faddd $dst, $src1, $src2\n\t"
15497             "ins   $tmp, D, $src2, 0, 1\n\t"
15498             "faddd $dst, $dst, $tmp\t add reduction2d"
15499   %}
15500   ins_encode %{
15501     __ faddd(as_FloatRegister($dst$$reg),
15502              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15503     __ ins(as_FloatRegister($tmp$$reg), __ D,
15504            as_FloatRegister($src2$$reg), 0, 1);
15505     __ faddd(as_FloatRegister($dst$$reg),
15506              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15507   %}
15508   ins_pipe(pipe_class_default);
15509 %}
15510 
15511 instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp)
15512 %{
15513   match(Set dst (MulReductionVD src1 src2));
15514   ins_cost(INSN_COST);
15515   effect(TEMP tmp, TEMP dst);
15516   format %{ "fmuld $dst, $src1, $src2\n\t"
15517             "ins   $tmp, D, $src2, 0, 1\n\t"
15518             "fmuld $dst, $dst, $tmp\t add reduction2d"
15519   %}
15520   ins_encode %{
15521     __ fmuld(as_FloatRegister($dst$$reg),
15522              as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
15523     __ ins(as_FloatRegister($tmp$$reg), __ D,
15524            as_FloatRegister($src2$$reg), 0, 1);
15525     __ fmuld(as_FloatRegister($dst$$reg),
15526              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
15527   %}
15528   ins_pipe(pipe_class_default);
15529 %}
15530 
15531 // ====================VECTOR ARITHMETIC=======================================
15532 
15533 // --------------------------------- ADD --------------------------------------
15534 
15535 instruct vadd8B(vecD dst, vecD src1, vecD src2)
15536 %{
15537   predicate(n->as_Vector()->length() == 4 ||
15538             n->as_Vector()->length() == 8);
15539   match(Set dst (AddVB src1 src2));
15540   ins_cost(INSN_COST);
15541   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
15542   ins_encode %{
15543     __ addv(as_FloatRegister($dst$$reg), __ T8B,
15544             as_FloatRegister($src1$$reg),
15545             as_FloatRegister($src2$$reg));
15546   %}
15547   ins_pipe(vdop64);
15548 %}
15549 
15550 instruct vadd16B(vecX dst, vecX src1, vecX src2)
15551 %{
15552   predicate(n->as_Vector()->length() == 16);
15553   match(Set dst (AddVB src1 src2));
15554   ins_cost(INSN_COST);
15555   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
15556   ins_encode %{
15557     __ addv(as_FloatRegister($dst$$reg), __ T16B,
15558             as_FloatRegister($src1$$reg),
15559             as_FloatRegister($src2$$reg));
15560   %}
15561   ins_pipe(vdop128);
15562 %}
15563 
15564 instruct vadd4S(vecD dst, vecD src1, vecD src2)
15565 %{
15566   predicate(n->as_Vector()->length() == 2 ||
15567             n->as_Vector()->length() == 4);
15568   match(Set dst (AddVS src1 src2));
15569   ins_cost(INSN_COST);
15570   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
15571   ins_encode %{
15572     __ addv(as_FloatRegister($dst$$reg), __ T4H,
15573             as_FloatRegister($src1$$reg),
15574             as_FloatRegister($src2$$reg));
15575   %}
15576   ins_pipe(vdop64);
15577 %}
15578 
15579 instruct vadd8S(vecX dst, vecX src1, vecX src2)
15580 %{
15581   predicate(n->as_Vector()->length() == 8);
15582   match(Set dst (AddVS src1 src2));
15583   ins_cost(INSN_COST);
15584   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
15585   ins_encode %{
15586     __ addv(as_FloatRegister($dst$$reg), __ T8H,
15587             as_FloatRegister($src1$$reg),
15588             as_FloatRegister($src2$$reg));
15589   %}
15590   ins_pipe(vdop128);
15591 %}
15592 
15593 instruct vadd2I(vecD dst, vecD src1, vecD src2)
15594 %{
15595   predicate(n->as_Vector()->length() == 2);
15596   match(Set dst (AddVI src1 src2));
15597   ins_cost(INSN_COST);
15598   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
15599   ins_encode %{
15600     __ addv(as_FloatRegister($dst$$reg), __ T2S,
15601             as_FloatRegister($src1$$reg),
15602             as_FloatRegister($src2$$reg));
15603   %}
15604   ins_pipe(vdop64);
15605 %}
15606 
15607 instruct vadd4I(vecX dst, vecX src1, vecX src2)
15608 %{
15609   predicate(n->as_Vector()->length() == 4);
15610   match(Set dst (AddVI src1 src2));
15611   ins_cost(INSN_COST);
15612   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
15613   ins_encode %{
15614     __ addv(as_FloatRegister($dst$$reg), __ T4S,
15615             as_FloatRegister($src1$$reg),
15616             as_FloatRegister($src2$$reg));
15617   %}
15618   ins_pipe(vdop128);
15619 %}
15620 
15621 instruct vadd2L(vecX dst, vecX src1, vecX src2)
15622 %{
15623   predicate(n->as_Vector()->length() == 2);
15624   match(Set dst (AddVL src1 src2));
15625   ins_cost(INSN_COST);
15626   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
15627   ins_encode %{
15628     __ addv(as_FloatRegister($dst$$reg), __ T2D,
15629             as_FloatRegister($src1$$reg),
15630             as_FloatRegister($src2$$reg));
15631   %}
15632   ins_pipe(vdop128);
15633 %}
15634 
15635 instruct vadd2F(vecD dst, vecD src1, vecD src2)
15636 %{
15637   predicate(n->as_Vector()->length() == 2);
15638   match(Set dst (AddVF src1 src2));
15639   ins_cost(INSN_COST);
15640   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
15641   ins_encode %{
15642     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
15643             as_FloatRegister($src1$$reg),
15644             as_FloatRegister($src2$$reg));
15645   %}
15646   ins_pipe(vdop_fp64);
15647 %}
15648 
15649 instruct vadd4F(vecX dst, vecX src1, vecX src2)
15650 %{
15651   predicate(n->as_Vector()->length() == 4);
15652   match(Set dst (AddVF src1 src2));
15653   ins_cost(INSN_COST);
15654   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
15655   ins_encode %{
15656     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
15657             as_FloatRegister($src1$$reg),
15658             as_FloatRegister($src2$$reg));
15659   %}
15660   ins_pipe(vdop_fp128);
15661 %}
15662 
15663 instruct vadd2D(vecX dst, vecX src1, vecX src2)
15664 %{
15665   match(Set dst (AddVD src1 src2));
15666   ins_cost(INSN_COST);
15667   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
15668   ins_encode %{
15669     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
15670             as_FloatRegister($src1$$reg),
15671             as_FloatRegister($src2$$reg));
15672   %}
15673   ins_pipe(vdop_fp128);
15674 %}
15675 
15676 // --------------------------------- SUB --------------------------------------
15677 
15678 instruct vsub8B(vecD dst, vecD src1, vecD src2)
15679 %{
15680   predicate(n->as_Vector()->length() == 4 ||
15681             n->as_Vector()->length() == 8);
15682   match(Set dst (SubVB src1 src2));
15683   ins_cost(INSN_COST);
15684   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
15685   ins_encode %{
15686     __ subv(as_FloatRegister($dst$$reg), __ T8B,
15687             as_FloatRegister($src1$$reg),
15688             as_FloatRegister($src2$$reg));
15689   %}
15690   ins_pipe(vdop64);
15691 %}
15692 
15693 instruct vsub16B(vecX dst, vecX src1, vecX src2)
15694 %{
15695   predicate(n->as_Vector()->length() == 16);
15696   match(Set dst (SubVB src1 src2));
15697   ins_cost(INSN_COST);
15698   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
15699   ins_encode %{
15700     __ subv(as_FloatRegister($dst$$reg), __ T16B,
15701             as_FloatRegister($src1$$reg),
15702             as_FloatRegister($src2$$reg));
15703   %}
15704   ins_pipe(vdop128);
15705 %}
15706 
15707 instruct vsub4S(vecD dst, vecD src1, vecD src2)
15708 %{
15709   predicate(n->as_Vector()->length() == 2 ||
15710             n->as_Vector()->length() == 4);
15711   match(Set dst (SubVS src1 src2));
15712   ins_cost(INSN_COST);
15713   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
15714   ins_encode %{
15715     __ subv(as_FloatRegister($dst$$reg), __ T4H,
15716             as_FloatRegister($src1$$reg),
15717             as_FloatRegister($src2$$reg));
15718   %}
15719   ins_pipe(vdop64);
15720 %}
15721 
15722 instruct vsub8S(vecX dst, vecX src1, vecX src2)
15723 %{
15724   predicate(n->as_Vector()->length() == 8);
15725   match(Set dst (SubVS src1 src2));
15726   ins_cost(INSN_COST);
15727   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
15728   ins_encode %{
15729     __ subv(as_FloatRegister($dst$$reg), __ T8H,
15730             as_FloatRegister($src1$$reg),
15731             as_FloatRegister($src2$$reg));
15732   %}
15733   ins_pipe(vdop128);
15734 %}
15735 
15736 instruct vsub2I(vecD dst, vecD src1, vecD src2)
15737 %{
15738   predicate(n->as_Vector()->length() == 2);
15739   match(Set dst (SubVI src1 src2));
15740   ins_cost(INSN_COST);
15741   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
15742   ins_encode %{
15743     __ subv(as_FloatRegister($dst$$reg), __ T2S,
15744             as_FloatRegister($src1$$reg),
15745             as_FloatRegister($src2$$reg));
15746   %}
15747   ins_pipe(vdop64);
15748 %}
15749 
15750 instruct vsub4I(vecX dst, vecX src1, vecX src2)
15751 %{
15752   predicate(n->as_Vector()->length() == 4);
15753   match(Set dst (SubVI src1 src2));
15754   ins_cost(INSN_COST);
15755   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
15756   ins_encode %{
15757     __ subv(as_FloatRegister($dst$$reg), __ T4S,
15758             as_FloatRegister($src1$$reg),
15759             as_FloatRegister($src2$$reg));
15760   %}
15761   ins_pipe(vdop128);
15762 %}
15763 
15764 instruct vsub2L(vecX dst, vecX src1, vecX src2)
15765 %{
15766   predicate(n->as_Vector()->length() == 2);
15767   match(Set dst (SubVL src1 src2));
15768   ins_cost(INSN_COST);
15769   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
15770   ins_encode %{
15771     __ subv(as_FloatRegister($dst$$reg), __ T2D,
15772             as_FloatRegister($src1$$reg),
15773             as_FloatRegister($src2$$reg));
15774   %}
15775   ins_pipe(vdop128);
15776 %}
15777 
15778 instruct vsub2F(vecD dst, vecD src1, vecD src2)
15779 %{
15780   predicate(n->as_Vector()->length() == 2);
15781   match(Set dst (SubVF src1 src2));
15782   ins_cost(INSN_COST);
15783   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
15784   ins_encode %{
15785     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
15786             as_FloatRegister($src1$$reg),
15787             as_FloatRegister($src2$$reg));
15788   %}
15789   ins_pipe(vdop_fp64);
15790 %}
15791 
15792 instruct vsub4F(vecX dst, vecX src1, vecX src2)
15793 %{
15794   predicate(n->as_Vector()->length() == 4);
15795   match(Set dst (SubVF src1 src2));
15796   ins_cost(INSN_COST);
15797   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
15798   ins_encode %{
15799     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
15800             as_FloatRegister($src1$$reg),
15801             as_FloatRegister($src2$$reg));
15802   %}
15803   ins_pipe(vdop_fp128);
15804 %}
15805 
15806 instruct vsub2D(vecX dst, vecX src1, vecX src2)
15807 %{
15808   predicate(n->as_Vector()->length() == 2);
15809   match(Set dst (SubVD src1 src2));
15810   ins_cost(INSN_COST);
15811   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
15812   ins_encode %{
15813     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
15814             as_FloatRegister($src1$$reg),
15815             as_FloatRegister($src2$$reg));
15816   %}
15817   ins_pipe(vdop_fp128);
15818 %}
15819 
15820 // --------------------------------- MUL --------------------------------------
15821 
15822 instruct vmul4S(vecD dst, vecD src1, vecD src2)
15823 %{
15824   predicate(n->as_Vector()->length() == 2 ||
15825             n->as_Vector()->length() == 4);
15826   match(Set dst (MulVS src1 src2));
15827   ins_cost(INSN_COST);
15828   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
15829   ins_encode %{
15830     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
15831             as_FloatRegister($src1$$reg),
15832             as_FloatRegister($src2$$reg));
15833   %}
15834   ins_pipe(vmul64);
15835 %}
15836 
15837 instruct vmul8S(vecX dst, vecX src1, vecX src2)
15838 %{
15839   predicate(n->as_Vector()->length() == 8);
15840   match(Set dst (MulVS src1 src2));
15841   ins_cost(INSN_COST);
15842   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
15843   ins_encode %{
15844     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
15845             as_FloatRegister($src1$$reg),
15846             as_FloatRegister($src2$$reg));
15847   %}
15848   ins_pipe(vmul128);
15849 %}
15850 
15851 instruct vmul2I(vecD dst, vecD src1, vecD src2)
15852 %{
15853   predicate(n->as_Vector()->length() == 2);
15854   match(Set dst (MulVI src1 src2));
15855   ins_cost(INSN_COST);
15856   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
15857   ins_encode %{
15858     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
15859             as_FloatRegister($src1$$reg),
15860             as_FloatRegister($src2$$reg));
15861   %}
15862   ins_pipe(vmul64);
15863 %}
15864 
15865 instruct vmul4I(vecX dst, vecX src1, vecX src2)
15866 %{
15867   predicate(n->as_Vector()->length() == 4);
15868   match(Set dst (MulVI src1 src2));
15869   ins_cost(INSN_COST);
15870   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
15871   ins_encode %{
15872     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
15873             as_FloatRegister($src1$$reg),
15874             as_FloatRegister($src2$$reg));
15875   %}
15876   ins_pipe(vmul128);
15877 %}
15878 
15879 instruct vmul2F(vecD dst, vecD src1, vecD src2)
15880 %{
15881   predicate(n->as_Vector()->length() == 2);
15882   match(Set dst (MulVF src1 src2));
15883   ins_cost(INSN_COST);
15884   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
15885   ins_encode %{
15886     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
15887             as_FloatRegister($src1$$reg),
15888             as_FloatRegister($src2$$reg));
15889   %}
15890   ins_pipe(vmuldiv_fp64);
15891 %}
15892 
15893 instruct vmul4F(vecX dst, vecX src1, vecX src2)
15894 %{
15895   predicate(n->as_Vector()->length() == 4);
15896   match(Set dst (MulVF src1 src2));
15897   ins_cost(INSN_COST);
15898   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
15899   ins_encode %{
15900     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
15901             as_FloatRegister($src1$$reg),
15902             as_FloatRegister($src2$$reg));
15903   %}
15904   ins_pipe(vmuldiv_fp128);
15905 %}
15906 
15907 instruct vmul2D(vecX dst, vecX src1, vecX src2)
15908 %{
15909   predicate(n->as_Vector()->length() == 2);
15910   match(Set dst (MulVD src1 src2));
15911   ins_cost(INSN_COST);
15912   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
15913   ins_encode %{
15914     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
15915             as_FloatRegister($src1$$reg),
15916             as_FloatRegister($src2$$reg));
15917   %}
15918   ins_pipe(vmuldiv_fp128);
15919 %}
15920 
15921 // --------------------------------- MLA --------------------------------------
15922 
15923 instruct vmla4S(vecD dst, vecD src1, vecD src2)
15924 %{
15925   predicate(n->as_Vector()->length() == 2 ||
15926             n->as_Vector()->length() == 4);
15927   match(Set dst (AddVS dst (MulVS src1 src2)));
15928   ins_cost(INSN_COST);
15929   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
15930   ins_encode %{
15931     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
15932             as_FloatRegister($src1$$reg),
15933             as_FloatRegister($src2$$reg));
15934   %}
15935   ins_pipe(vmla64);
15936 %}
15937 
15938 instruct vmla8S(vecX dst, vecX src1, vecX src2)
15939 %{
15940   predicate(n->as_Vector()->length() == 8);
15941   match(Set dst (AddVS dst (MulVS src1 src2)));
15942   ins_cost(INSN_COST);
15943   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
15944   ins_encode %{
15945     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
15946             as_FloatRegister($src1$$reg),
15947             as_FloatRegister($src2$$reg));
15948   %}
15949   ins_pipe(vmla128);
15950 %}
15951 
15952 instruct vmla2I(vecD dst, vecD src1, vecD src2)
15953 %{
15954   predicate(n->as_Vector()->length() == 2);
15955   match(Set dst (AddVI dst (MulVI src1 src2)));
15956   ins_cost(INSN_COST);
15957   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
15958   ins_encode %{
15959     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
15960             as_FloatRegister($src1$$reg),
15961             as_FloatRegister($src2$$reg));
15962   %}
15963   ins_pipe(vmla64);
15964 %}
15965 
15966 instruct vmla4I(vecX dst, vecX src1, vecX src2)
15967 %{
15968   predicate(n->as_Vector()->length() == 4);
15969   match(Set dst (AddVI dst (MulVI src1 src2)));
15970   ins_cost(INSN_COST);
15971   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
15972   ins_encode %{
15973     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
15974             as_FloatRegister($src1$$reg),
15975             as_FloatRegister($src2$$reg));
15976   %}
15977   ins_pipe(vmla128);
15978 %}
15979 
15980 // dst + src1 * src2
15981 instruct vmla2F(vecD dst, vecD src1, vecD src2) %{
15982   predicate(UseFMA && n->as_Vector()->length() == 2);
15983   match(Set dst (FmaVF  dst (Binary src1 src2)));
15984   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
15985   ins_cost(INSN_COST);
15986   ins_encode %{
15987     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
15988             as_FloatRegister($src1$$reg),
15989             as_FloatRegister($src2$$reg));
15990   %}
15991   ins_pipe(vmuldiv_fp64);
15992 %}
15993 
15994 // dst + src1 * src2
15995 instruct vmla4F(vecX dst, vecX src1, vecX src2) %{
15996   predicate(UseFMA && n->as_Vector()->length() == 4);
15997   match(Set dst (FmaVF  dst (Binary src1 src2)));
15998   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
15999   ins_cost(INSN_COST);
16000   ins_encode %{
16001     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
16002             as_FloatRegister($src1$$reg),
16003             as_FloatRegister($src2$$reg));
16004   %}
16005   ins_pipe(vmuldiv_fp128);
16006 %}
16007 
16008 // dst + src1 * src2
16009 instruct vmla2D(vecX dst, vecX src1, vecX src2) %{
16010   predicate(UseFMA && n->as_Vector()->length() == 2);
16011   match(Set dst (FmaVD  dst (Binary src1 src2)));
16012   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
16013   ins_cost(INSN_COST);
16014   ins_encode %{
16015     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
16016             as_FloatRegister($src1$$reg),
16017             as_FloatRegister($src2$$reg));
16018   %}
16019   ins_pipe(vmuldiv_fp128);
16020 %}
16021 
16022 // --------------------------------- MLS --------------------------------------
16023 
16024 instruct vmls4S(vecD dst, vecD src1, vecD src2)
16025 %{
16026   predicate(n->as_Vector()->length() == 2 ||
16027             n->as_Vector()->length() == 4);
16028   match(Set dst (SubVS dst (MulVS src1 src2)));
16029   ins_cost(INSN_COST);
16030   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
16031   ins_encode %{
16032     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
16033             as_FloatRegister($src1$$reg),
16034             as_FloatRegister($src2$$reg));
16035   %}
16036   ins_pipe(vmla64);
16037 %}
16038 
16039 instruct vmls8S(vecX dst, vecX src1, vecX src2)
16040 %{
16041   predicate(n->as_Vector()->length() == 8);
16042   match(Set dst (SubVS dst (MulVS src1 src2)));
16043   ins_cost(INSN_COST);
16044   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
16045   ins_encode %{
16046     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
16047             as_FloatRegister($src1$$reg),
16048             as_FloatRegister($src2$$reg));
16049   %}
16050   ins_pipe(vmla128);
16051 %}
16052 
16053 instruct vmls2I(vecD dst, vecD src1, vecD src2)
16054 %{
16055   predicate(n->as_Vector()->length() == 2);
16056   match(Set dst (SubVI dst (MulVI src1 src2)));
16057   ins_cost(INSN_COST);
16058   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
16059   ins_encode %{
16060     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
16061             as_FloatRegister($src1$$reg),
16062             as_FloatRegister($src2$$reg));
16063   %}
16064   ins_pipe(vmla64);
16065 %}
16066 
16067 instruct vmls4I(vecX dst, vecX src1, vecX src2)
16068 %{
16069   predicate(n->as_Vector()->length() == 4);
16070   match(Set dst (SubVI dst (MulVI src1 src2)));
16071   ins_cost(INSN_COST);
16072   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
16073   ins_encode %{
16074     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
16075             as_FloatRegister($src1$$reg),
16076             as_FloatRegister($src2$$reg));
16077   %}
16078   ins_pipe(vmla128);
16079 %}
16080 
16081 // dst - src1 * src2
16082 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
16083   predicate(UseFMA && n->as_Vector()->length() == 2);
16084   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16085   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16086   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
16087   ins_cost(INSN_COST);
16088   ins_encode %{
16089     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
16090             as_FloatRegister($src1$$reg),
16091             as_FloatRegister($src2$$reg));
16092   %}
16093   ins_pipe(vmuldiv_fp64);
16094 %}
16095 
16096 // dst - src1 * src2
16097 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
16098   predicate(UseFMA && n->as_Vector()->length() == 4);
16099   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
16100   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
16101   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
16102   ins_cost(INSN_COST);
16103   ins_encode %{
16104     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
16105             as_FloatRegister($src1$$reg),
16106             as_FloatRegister($src2$$reg));
16107   %}
16108   ins_pipe(vmuldiv_fp128);
16109 %}
16110 
16111 // dst - src1 * src2
16112 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
16113   predicate(UseFMA && n->as_Vector()->length() == 2);
16114   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
16115   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
16116   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
16117   ins_cost(INSN_COST);
16118   ins_encode %{
16119     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
16120             as_FloatRegister($src1$$reg),
16121             as_FloatRegister($src2$$reg));
16122   %}
16123   ins_pipe(vmuldiv_fp128);
16124 %}
16125 
16126 // --------------------------------- DIV --------------------------------------
16127 
16128 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
16129 %{
16130   predicate(n->as_Vector()->length() == 2);
16131   match(Set dst (DivVF src1 src2));
16132   ins_cost(INSN_COST);
16133   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
16134   ins_encode %{
16135     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
16136             as_FloatRegister($src1$$reg),
16137             as_FloatRegister($src2$$reg));
16138   %}
16139   ins_pipe(vmuldiv_fp64);
16140 %}
16141 
16142 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
16143 %{
16144   predicate(n->as_Vector()->length() == 4);
16145   match(Set dst (DivVF src1 src2));
16146   ins_cost(INSN_COST);
16147   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
16148   ins_encode %{
16149     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
16150             as_FloatRegister($src1$$reg),
16151             as_FloatRegister($src2$$reg));
16152   %}
16153   ins_pipe(vmuldiv_fp128);
16154 %}
16155 
16156 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
16157 %{
16158   predicate(n->as_Vector()->length() == 2);
16159   match(Set dst (DivVD src1 src2));
16160   ins_cost(INSN_COST);
16161   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
16162   ins_encode %{
16163     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
16164             as_FloatRegister($src1$$reg),
16165             as_FloatRegister($src2$$reg));
16166   %}
16167   ins_pipe(vmuldiv_fp128);
16168 %}
16169 
16170 // --------------------------------- SQRT -------------------------------------
16171 
16172 instruct vsqrt2D(vecX dst, vecX src)
16173 %{
16174   predicate(n->as_Vector()->length() == 2);
16175   match(Set dst (SqrtVD src));
16176   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
16177   ins_encode %{
16178     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
16179              as_FloatRegister($src$$reg));
16180   %}
16181   ins_pipe(vsqrt_fp128);
16182 %}
16183 
16184 // --------------------------------- ABS --------------------------------------
16185 
16186 instruct vabs2F(vecD dst, vecD src)
16187 %{
16188   predicate(n->as_Vector()->length() == 2);
16189   match(Set dst (AbsVF src));
16190   ins_cost(INSN_COST * 3);
16191   format %{ "fabs  $dst,$src\t# vector (2S)" %}
16192   ins_encode %{
16193     __ fabs(as_FloatRegister($dst$$reg), __ T2S,
16194             as_FloatRegister($src$$reg));
16195   %}
16196   ins_pipe(vunop_fp64);
16197 %}
16198 
16199 instruct vabs4F(vecX dst, vecX src)
16200 %{
16201   predicate(n->as_Vector()->length() == 4);
16202   match(Set dst (AbsVF src));
16203   ins_cost(INSN_COST * 3);
16204   format %{ "fabs  $dst,$src\t# vector (4S)" %}
16205   ins_encode %{
16206     __ fabs(as_FloatRegister($dst$$reg), __ T4S,
16207             as_FloatRegister($src$$reg));
16208   %}
16209   ins_pipe(vunop_fp128);
16210 %}
16211 
16212 instruct vabs2D(vecX dst, vecX src)
16213 %{
16214   predicate(n->as_Vector()->length() == 2);
16215   match(Set dst (AbsVD src));
16216   ins_cost(INSN_COST * 3);
16217   format %{ "fabs  $dst,$src\t# vector (2D)" %}
16218   ins_encode %{
16219     __ fabs(as_FloatRegister($dst$$reg), __ T2D,
16220             as_FloatRegister($src$$reg));
16221   %}
16222   ins_pipe(vunop_fp128);
16223 %}
16224 
16225 // --------------------------------- NEG --------------------------------------
16226 
16227 instruct vneg2F(vecD dst, vecD src)
16228 %{
16229   predicate(n->as_Vector()->length() == 2);
16230   match(Set dst (NegVF src));
16231   ins_cost(INSN_COST * 3);
16232   format %{ "fneg  $dst,$src\t# vector (2S)" %}
16233   ins_encode %{
16234     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
16235             as_FloatRegister($src$$reg));
16236   %}
16237   ins_pipe(vunop_fp64);
16238 %}
16239 
16240 instruct vneg4F(vecX dst, vecX src)
16241 %{
16242   predicate(n->as_Vector()->length() == 4);
16243   match(Set dst (NegVF src));
16244   ins_cost(INSN_COST * 3);
16245   format %{ "fneg  $dst,$src\t# vector (4S)" %}
16246   ins_encode %{
16247     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
16248             as_FloatRegister($src$$reg));
16249   %}
16250   ins_pipe(vunop_fp128);
16251 %}
16252 
16253 instruct vneg2D(vecX dst, vecX src)
16254 %{
16255   predicate(n->as_Vector()->length() == 2);
16256   match(Set dst (NegVD src));
16257   ins_cost(INSN_COST * 3);
16258   format %{ "fneg  $dst,$src\t# vector (2D)" %}
16259   ins_encode %{
16260     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
16261             as_FloatRegister($src$$reg));
16262   %}
16263   ins_pipe(vunop_fp128);
16264 %}
16265 
16266 // --------------------------------- AND --------------------------------------
16267 
16268 instruct vand8B(vecD dst, vecD src1, vecD src2)
16269 %{
16270   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16271             n->as_Vector()->length_in_bytes() == 8);
16272   match(Set dst (AndV src1 src2));
16273   ins_cost(INSN_COST);
16274   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16275   ins_encode %{
16276     __ andr(as_FloatRegister($dst$$reg), __ T8B,
16277             as_FloatRegister($src1$$reg),
16278             as_FloatRegister($src2$$reg));
16279   %}
16280   ins_pipe(vlogical64);
16281 %}
16282 
16283 instruct vand16B(vecX dst, vecX src1, vecX src2)
16284 %{
16285   predicate(n->as_Vector()->length_in_bytes() == 16);
16286   match(Set dst (AndV src1 src2));
16287   ins_cost(INSN_COST);
16288   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
16289   ins_encode %{
16290     __ andr(as_FloatRegister($dst$$reg), __ T16B,
16291             as_FloatRegister($src1$$reg),
16292             as_FloatRegister($src2$$reg));
16293   %}
16294   ins_pipe(vlogical128);
16295 %}
16296 
16297 // --------------------------------- OR ---------------------------------------
16298 
16299 instruct vor8B(vecD dst, vecD src1, vecD src2)
16300 %{
16301   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16302             n->as_Vector()->length_in_bytes() == 8);
16303   match(Set dst (OrV src1 src2));
16304   ins_cost(INSN_COST);
16305   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
16306   ins_encode %{
16307     __ orr(as_FloatRegister($dst$$reg), __ T8B,
16308             as_FloatRegister($src1$$reg),
16309             as_FloatRegister($src2$$reg));
16310   %}
16311   ins_pipe(vlogical64);
16312 %}
16313 
16314 instruct vor16B(vecX dst, vecX src1, vecX src2)
16315 %{
16316   predicate(n->as_Vector()->length_in_bytes() == 16);
16317   match(Set dst (OrV src1 src2));
16318   ins_cost(INSN_COST);
16319   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
16320   ins_encode %{
16321     __ orr(as_FloatRegister($dst$$reg), __ T16B,
16322             as_FloatRegister($src1$$reg),
16323             as_FloatRegister($src2$$reg));
16324   %}
16325   ins_pipe(vlogical128);
16326 %}
16327 
16328 // --------------------------------- XOR --------------------------------------
16329 
16330 instruct vxor8B(vecD dst, vecD src1, vecD src2)
16331 %{
16332   predicate(n->as_Vector()->length_in_bytes() == 4 ||
16333             n->as_Vector()->length_in_bytes() == 8);
16334   match(Set dst (XorV src1 src2));
16335   ins_cost(INSN_COST);
16336   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
16337   ins_encode %{
16338     __ eor(as_FloatRegister($dst$$reg), __ T8B,
16339             as_FloatRegister($src1$$reg),
16340             as_FloatRegister($src2$$reg));
16341   %}
16342   ins_pipe(vlogical64);
16343 %}
16344 
16345 instruct vxor16B(vecX dst, vecX src1, vecX src2)
16346 %{
16347   predicate(n->as_Vector()->length_in_bytes() == 16);
16348   match(Set dst (XorV src1 src2));
16349   ins_cost(INSN_COST);
16350   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
16351   ins_encode %{
16352     __ eor(as_FloatRegister($dst$$reg), __ T16B,
16353             as_FloatRegister($src1$$reg),
16354             as_FloatRegister($src2$$reg));
16355   %}
16356   ins_pipe(vlogical128);
16357 %}
16358 
16359 // ------------------------------ Shift ---------------------------------------
16360 instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
16361   predicate(n->as_Vector()->length_in_bytes() == 8);
16362   match(Set dst (LShiftCntV cnt));
16363   match(Set dst (RShiftCntV cnt));
16364   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
16365   ins_encode %{
16366     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
16367   %}
16368   ins_pipe(vdup_reg_reg64);
16369 %}
16370 
16371 instruct vshiftcnt16B(vecX dst, iRegIorL2I cnt) %{
16372   predicate(n->as_Vector()->length_in_bytes() == 16);
16373   match(Set dst (LShiftCntV cnt));
16374   match(Set dst (RShiftCntV cnt));
16375   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
16376   ins_encode %{
16377     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
16378   %}
16379   ins_pipe(vdup_reg_reg128);
16380 %}
16381 
16382 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
16383   predicate(n->as_Vector()->length() == 4 ||
16384             n->as_Vector()->length() == 8);
16385   match(Set dst (LShiftVB src shift));
16386   ins_cost(INSN_COST);
16387   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
16388   ins_encode %{
16389     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16390             as_FloatRegister($src$$reg),
16391             as_FloatRegister($shift$$reg));
16392   %}
16393   ins_pipe(vshift64);
16394 %}
16395 
16396 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
16397   predicate(n->as_Vector()->length() == 16);
16398   match(Set dst (LShiftVB src shift));
16399   ins_cost(INSN_COST);
16400   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
16401   ins_encode %{
16402     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16403             as_FloatRegister($src$$reg),
16404             as_FloatRegister($shift$$reg));
16405   %}
16406   ins_pipe(vshift128);
16407 %}
16408 
16409 // Right shifts with vector shift count on aarch64 SIMD are implemented
16410 // as left shift by negative shift count.
16411 // There are two cases for vector shift count.
16412 //
16413 // Case 1: The vector shift count is from replication.
16414 //        |            |
16415 //    LoadVector  RShiftCntV
16416 //        |       /
16417 //     RShiftVI
16418 // Note: In inner loop, multiple neg instructions are used, which can be
16419 // moved to outer loop and merge into one neg instruction.
16420 //
16421 // Case 2: The vector shift count is from loading.
16422 // This case isn't supported by middle-end now. But it's supported by
16423 // panama/vectorIntrinsics(JEP 338: Vector API).
16424 //        |            |
16425 //    LoadVector  LoadVector
16426 //        |       /
16427 //     RShiftVI
16428 //
16429 
16430 instruct vsra8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16431   predicate(n->as_Vector()->length() == 4 ||
16432             n->as_Vector()->length() == 8);
16433   match(Set dst (RShiftVB src shift));
16434   ins_cost(INSN_COST);
16435   effect(TEMP tmp);
16436   format %{ "negr  $tmp,$shift\t"
16437             "sshl  $dst,$src,$tmp\t# vector (8B)" %}
16438   ins_encode %{
16439     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16440             as_FloatRegister($shift$$reg));
16441     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
16442             as_FloatRegister($src$$reg),
16443             as_FloatRegister($tmp$$reg));
16444   %}
16445   ins_pipe(vshift64);
16446 %}
16447 
16448 instruct vsra16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16449   predicate(n->as_Vector()->length() == 16);
16450   match(Set dst (RShiftVB src shift));
16451   ins_cost(INSN_COST);
16452   effect(TEMP tmp);
16453   format %{ "negr  $tmp,$shift\t"
16454             "sshl  $dst,$src,$tmp\t# vector (16B)" %}
16455   ins_encode %{
16456     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16457             as_FloatRegister($shift$$reg));
16458     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
16459             as_FloatRegister($src$$reg),
16460             as_FloatRegister($tmp$$reg));
16461   %}
16462   ins_pipe(vshift128);
16463 %}
16464 
16465 instruct vsrl8B(vecD dst, vecD src, vecD shift, vecD tmp) %{
16466   predicate(n->as_Vector()->length() == 4 ||
16467             n->as_Vector()->length() == 8);
16468   match(Set dst (URShiftVB src shift));
16469   ins_cost(INSN_COST);
16470   effect(TEMP tmp);
16471   format %{ "negr  $tmp,$shift\t"
16472             "ushl  $dst,$src,$tmp\t# vector (8B)" %}
16473   ins_encode %{
16474     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16475             as_FloatRegister($shift$$reg));
16476     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
16477             as_FloatRegister($src$$reg),
16478             as_FloatRegister($tmp$$reg));
16479   %}
16480   ins_pipe(vshift64);
16481 %}
16482 
16483 instruct vsrl16B(vecX dst, vecX src, vecX shift, vecX tmp) %{
16484   predicate(n->as_Vector()->length() == 16);
16485   match(Set dst (URShiftVB src shift));
16486   ins_cost(INSN_COST);
16487   effect(TEMP tmp);
16488   format %{ "negr  $tmp,$shift\t"
16489             "ushl  $dst,$src,$tmp\t# vector (16B)" %}
16490   ins_encode %{
16491     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16492             as_FloatRegister($shift$$reg));
16493     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
16494             as_FloatRegister($src$$reg),
16495             as_FloatRegister($tmp$$reg));
16496   %}
16497   ins_pipe(vshift128);
16498 %}
16499 
16500 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
16501   predicate(n->as_Vector()->length() == 4 ||
16502             n->as_Vector()->length() == 8);
16503   match(Set dst (LShiftVB src shift));
16504   ins_cost(INSN_COST);
16505   format %{ "shl    $dst, $src, $shift\t# vector (8B)" %}
16506   ins_encode %{
16507     int sh = (int)$shift$$constant;
16508     if (sh >= 8) {
16509       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16510              as_FloatRegister($src$$reg),
16511              as_FloatRegister($src$$reg));
16512     } else {
16513       __ shl(as_FloatRegister($dst$$reg), __ T8B,
16514              as_FloatRegister($src$$reg), sh);
16515     }
16516   %}
16517   ins_pipe(vshift64_imm);
16518 %}
16519 
16520 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
16521   predicate(n->as_Vector()->length() == 16);
16522   match(Set dst (LShiftVB src shift));
16523   ins_cost(INSN_COST);
16524   format %{ "shl    $dst, $src, $shift\t# vector (16B)" %}
16525   ins_encode %{
16526     int sh = (int)$shift$$constant;
16527     if (sh >= 8) {
16528       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16529              as_FloatRegister($src$$reg),
16530              as_FloatRegister($src$$reg));
16531     } else {
16532       __ shl(as_FloatRegister($dst$$reg), __ T16B,
16533              as_FloatRegister($src$$reg), sh);
16534     }
16535   %}
16536   ins_pipe(vshift128_imm);
16537 %}
16538 
16539 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
16540   predicate(n->as_Vector()->length() == 4 ||
16541             n->as_Vector()->length() == 8);
16542   match(Set dst (RShiftVB src shift));
16543   ins_cost(INSN_COST);
16544   format %{ "sshr    $dst, $src, $shift\t# vector (8B)" %}
16545   ins_encode %{
16546     int sh = (int)$shift$$constant;
16547     if (sh >= 8) sh = 7;
16548     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
16549            as_FloatRegister($src$$reg), sh);
16550   %}
16551   ins_pipe(vshift64_imm);
16552 %}
16553 
16554 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
16555   predicate(n->as_Vector()->length() == 16);
16556   match(Set dst (RShiftVB src shift));
16557   ins_cost(INSN_COST);
16558   format %{ "sshr    $dst, $src, $shift\t# vector (16B)" %}
16559   ins_encode %{
16560     int sh = (int)$shift$$constant;
16561     if (sh >= 8) sh = 7;
16562     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
16563            as_FloatRegister($src$$reg), sh);
16564   %}
16565   ins_pipe(vshift128_imm);
16566 %}
16567 
16568 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
16569   predicate(n->as_Vector()->length() == 4 ||
16570             n->as_Vector()->length() == 8);
16571   match(Set dst (URShiftVB src shift));
16572   ins_cost(INSN_COST);
16573   format %{ "ushr    $dst, $src, $shift\t# vector (8B)" %}
16574   ins_encode %{
16575     int sh = (int)$shift$$constant;
16576     if (sh >= 8) {
16577       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16578              as_FloatRegister($src$$reg),
16579              as_FloatRegister($src$$reg));
16580     } else {
16581       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
16582              as_FloatRegister($src$$reg), sh);
16583     }
16584   %}
16585   ins_pipe(vshift64_imm);
16586 %}
16587 
16588 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
16589   predicate(n->as_Vector()->length() == 16);
16590   match(Set dst (URShiftVB src shift));
16591   ins_cost(INSN_COST);
16592   format %{ "ushr    $dst, $src, $shift\t# vector (16B)" %}
16593   ins_encode %{
16594     int sh = (int)$shift$$constant;
16595     if (sh >= 8) {
16596       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16597              as_FloatRegister($src$$reg),
16598              as_FloatRegister($src$$reg));
16599     } else {
16600       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
16601              as_FloatRegister($src$$reg), sh);
16602     }
16603   %}
16604   ins_pipe(vshift128_imm);
16605 %}
16606 
16607 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
16608   predicate(n->as_Vector()->length() == 2 ||
16609             n->as_Vector()->length() == 4);
16610   match(Set dst (LShiftVS src shift));
16611   ins_cost(INSN_COST);
16612   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
16613   ins_encode %{
16614     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16615             as_FloatRegister($src$$reg),
16616             as_FloatRegister($shift$$reg));
16617   %}
16618   ins_pipe(vshift64);
16619 %}
16620 
16621 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
16622   predicate(n->as_Vector()->length() == 8);
16623   match(Set dst (LShiftVS src shift));
16624   ins_cost(INSN_COST);
16625   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
16626   ins_encode %{
16627     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16628             as_FloatRegister($src$$reg),
16629             as_FloatRegister($shift$$reg));
16630   %}
16631   ins_pipe(vshift128);
16632 %}
16633 
16634 instruct vsra4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16635   predicate(n->as_Vector()->length() == 2 ||
16636             n->as_Vector()->length() == 4);
16637   match(Set dst (RShiftVS src shift));
16638   ins_cost(INSN_COST);
16639   effect(TEMP tmp);
16640   format %{ "negr  $tmp,$shift\t"
16641             "sshl  $dst,$src,$tmp\t# vector (4H)" %}
16642   ins_encode %{
16643     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16644             as_FloatRegister($shift$$reg));
16645     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
16646             as_FloatRegister($src$$reg),
16647             as_FloatRegister($tmp$$reg));
16648   %}
16649   ins_pipe(vshift64);
16650 %}
16651 
16652 instruct vsra8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16653   predicate(n->as_Vector()->length() == 8);
16654   match(Set dst (RShiftVS src shift));
16655   ins_cost(INSN_COST);
16656   effect(TEMP tmp);
16657   format %{ "negr  $tmp,$shift\t"
16658             "sshl  $dst,$src,$tmp\t# vector (8H)" %}
16659   ins_encode %{
16660     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16661             as_FloatRegister($shift$$reg));
16662     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
16663             as_FloatRegister($src$$reg),
16664             as_FloatRegister($tmp$$reg));
16665   %}
16666   ins_pipe(vshift128);
16667 %}
16668 
16669 instruct vsrl4S(vecD dst, vecD src, vecD shift, vecD tmp) %{
16670   predicate(n->as_Vector()->length() == 2 ||
16671             n->as_Vector()->length() == 4);
16672   match(Set dst (URShiftVS src shift));
16673   ins_cost(INSN_COST);
16674   effect(TEMP tmp);
16675   format %{ "negr  $tmp,$shift\t"
16676             "ushl  $dst,$src,$tmp\t# vector (4H)" %}
16677   ins_encode %{
16678     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16679             as_FloatRegister($shift$$reg));
16680     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
16681             as_FloatRegister($src$$reg),
16682             as_FloatRegister($tmp$$reg));
16683   %}
16684   ins_pipe(vshift64);
16685 %}
16686 
16687 instruct vsrl8S(vecX dst, vecX src, vecX shift, vecX tmp) %{
16688   predicate(n->as_Vector()->length() == 8);
16689   match(Set dst (URShiftVS src shift));
16690   ins_cost(INSN_COST);
16691   effect(TEMP tmp);
16692   format %{ "negr  $tmp,$shift\t"
16693             "ushl  $dst,$src,$tmp\t# vector (8H)" %}
16694   ins_encode %{
16695     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16696             as_FloatRegister($shift$$reg));
16697     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
16698             as_FloatRegister($src$$reg),
16699             as_FloatRegister($tmp$$reg));
16700   %}
16701   ins_pipe(vshift128);
16702 %}
16703 
16704 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
16705   predicate(n->as_Vector()->length() == 2 ||
16706             n->as_Vector()->length() == 4);
16707   match(Set dst (LShiftVS src shift));
16708   ins_cost(INSN_COST);
16709   format %{ "shl    $dst, $src, $shift\t# vector (4H)" %}
16710   ins_encode %{
16711     int sh = (int)$shift$$constant;
16712     if (sh >= 16) {
16713       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16714              as_FloatRegister($src$$reg),
16715              as_FloatRegister($src$$reg));
16716     } else {
16717       __ shl(as_FloatRegister($dst$$reg), __ T4H,
16718              as_FloatRegister($src$$reg), sh);
16719     }
16720   %}
16721   ins_pipe(vshift64_imm);
16722 %}
16723 
16724 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
16725   predicate(n->as_Vector()->length() == 8);
16726   match(Set dst (LShiftVS src shift));
16727   ins_cost(INSN_COST);
16728   format %{ "shl    $dst, $src, $shift\t# vector (8H)" %}
16729   ins_encode %{
16730     int sh = (int)$shift$$constant;
16731     if (sh >= 16) {
16732       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16733              as_FloatRegister($src$$reg),
16734              as_FloatRegister($src$$reg));
16735     } else {
16736       __ shl(as_FloatRegister($dst$$reg), __ T8H,
16737              as_FloatRegister($src$$reg), sh);
16738     }
16739   %}
16740   ins_pipe(vshift128_imm);
16741 %}
16742 
16743 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
16744   predicate(n->as_Vector()->length() == 2 ||
16745             n->as_Vector()->length() == 4);
16746   match(Set dst (RShiftVS src shift));
16747   ins_cost(INSN_COST);
16748   format %{ "sshr    $dst, $src, $shift\t# vector (4H)" %}
16749   ins_encode %{
16750     int sh = (int)$shift$$constant;
16751     if (sh >= 16) sh = 15;
16752     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
16753            as_FloatRegister($src$$reg), sh);
16754   %}
16755   ins_pipe(vshift64_imm);
16756 %}
16757 
16758 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
16759   predicate(n->as_Vector()->length() == 8);
16760   match(Set dst (RShiftVS src shift));
16761   ins_cost(INSN_COST);
16762   format %{ "sshr    $dst, $src, $shift\t# vector (8H)" %}
16763   ins_encode %{
16764     int sh = (int)$shift$$constant;
16765     if (sh >= 16) sh = 15;
16766     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
16767            as_FloatRegister($src$$reg), sh);
16768   %}
16769   ins_pipe(vshift128_imm);
16770 %}
16771 
16772 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
16773   predicate(n->as_Vector()->length() == 2 ||
16774             n->as_Vector()->length() == 4);
16775   match(Set dst (URShiftVS src shift));
16776   ins_cost(INSN_COST);
16777   format %{ "ushr    $dst, $src, $shift\t# vector (4H)" %}
16778   ins_encode %{
16779     int sh = (int)$shift$$constant;
16780     if (sh >= 16) {
16781       __ eor(as_FloatRegister($dst$$reg), __ T8B,
16782              as_FloatRegister($src$$reg),
16783              as_FloatRegister($src$$reg));
16784     } else {
16785       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
16786              as_FloatRegister($src$$reg), sh);
16787     }
16788   %}
16789   ins_pipe(vshift64_imm);
16790 %}
16791 
16792 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
16793   predicate(n->as_Vector()->length() == 8);
16794   match(Set dst (URShiftVS src shift));
16795   ins_cost(INSN_COST);
16796   format %{ "ushr    $dst, $src, $shift\t# vector (8H)" %}
16797   ins_encode %{
16798     int sh = (int)$shift$$constant;
16799     if (sh >= 16) {
16800       __ eor(as_FloatRegister($dst$$reg), __ T16B,
16801              as_FloatRegister($src$$reg),
16802              as_FloatRegister($src$$reg));
16803     } else {
16804       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
16805              as_FloatRegister($src$$reg), sh);
16806     }
16807   %}
16808   ins_pipe(vshift128_imm);
16809 %}
16810 
16811 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
16812   predicate(n->as_Vector()->length() == 2);
16813   match(Set dst (LShiftVI src shift));
16814   ins_cost(INSN_COST);
16815   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
16816   ins_encode %{
16817     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16818             as_FloatRegister($src$$reg),
16819             as_FloatRegister($shift$$reg));
16820   %}
16821   ins_pipe(vshift64);
16822 %}
16823 
16824 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
16825   predicate(n->as_Vector()->length() == 4);
16826   match(Set dst (LShiftVI src shift));
16827   ins_cost(INSN_COST);
16828   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
16829   ins_encode %{
16830     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16831             as_FloatRegister($src$$reg),
16832             as_FloatRegister($shift$$reg));
16833   %}
16834   ins_pipe(vshift128);
16835 %}
16836 
16837 instruct vsra2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16838   predicate(n->as_Vector()->length() == 2);
16839   match(Set dst (RShiftVI src shift));
16840   ins_cost(INSN_COST);
16841   effect(TEMP tmp);
16842   format %{ "negr  $tmp,$shift\t"
16843             "sshl  $dst,$src,$tmp\t# vector (2S)" %}
16844   ins_encode %{
16845     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16846             as_FloatRegister($shift$$reg));
16847     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
16848             as_FloatRegister($src$$reg),
16849             as_FloatRegister($tmp$$reg));
16850   %}
16851   ins_pipe(vshift64);
16852 %}
16853 
16854 instruct vsra4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16855   predicate(n->as_Vector()->length() == 4);
16856   match(Set dst (RShiftVI src shift));
16857   ins_cost(INSN_COST);
16858   effect(TEMP tmp);
16859   format %{ "negr  $tmp,$shift\t"
16860             "sshl  $dst,$src,$tmp\t# vector (4S)" %}
16861   ins_encode %{
16862     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16863             as_FloatRegister($shift$$reg));
16864     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
16865             as_FloatRegister($src$$reg),
16866             as_FloatRegister($tmp$$reg));
16867   %}
16868   ins_pipe(vshift128);
16869 %}
16870 
16871 instruct vsrl2I(vecD dst, vecD src, vecD shift, vecD tmp) %{
16872   predicate(n->as_Vector()->length() == 2);
16873   match(Set dst (URShiftVI src shift));
16874   ins_cost(INSN_COST);
16875   effect(TEMP tmp);
16876   format %{ "negr  $tmp,$shift\t"
16877             "ushl  $dst,$src,$tmp\t# vector (2S)" %}
16878   ins_encode %{
16879     __ negr(as_FloatRegister($tmp$$reg), __ T8B,
16880             as_FloatRegister($shift$$reg));
16881     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
16882             as_FloatRegister($src$$reg),
16883             as_FloatRegister($tmp$$reg));
16884   %}
16885   ins_pipe(vshift64);
16886 %}
16887 
16888 instruct vsrl4I(vecX dst, vecX src, vecX shift, vecX tmp) %{
16889   predicate(n->as_Vector()->length() == 4);
16890   match(Set dst (URShiftVI src shift));
16891   ins_cost(INSN_COST);
16892   effect(TEMP tmp);
16893   format %{ "negr  $tmp,$shift\t"
16894             "ushl  $dst,$src,$tmp\t# vector (4S)" %}
16895   ins_encode %{
16896     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
16897             as_FloatRegister($shift$$reg));
16898     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
16899             as_FloatRegister($src$$reg),
16900             as_FloatRegister($tmp$$reg));
16901   %}
16902   ins_pipe(vshift128);
16903 %}
16904 
16905 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
16906   predicate(n->as_Vector()->length() == 2);
16907   match(Set dst (LShiftVI src shift));
16908   ins_cost(INSN_COST);
16909   format %{ "shl    $dst, $src, $shift\t# vector (2S)" %}
16910   ins_encode %{
16911     __ shl(as_FloatRegister($dst$$reg), __ T2S,
16912            as_FloatRegister($src$$reg),
16913            (int)$shift$$constant);
16914   %}
16915   ins_pipe(vshift64_imm);
16916 %}
16917 
16918 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
16919   predicate(n->as_Vector()->length() == 4);
16920   match(Set dst (LShiftVI src shift));
16921   ins_cost(INSN_COST);
16922   format %{ "shl    $dst, $src, $shift\t# vector (4S)" %}
16923   ins_encode %{
16924     __ shl(as_FloatRegister($dst$$reg), __ T4S,
16925            as_FloatRegister($src$$reg),
16926            (int)$shift$$constant);
16927   %}
16928   ins_pipe(vshift128_imm);
16929 %}
16930 
16931 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
16932   predicate(n->as_Vector()->length() == 2);
16933   match(Set dst (RShiftVI src shift));
16934   ins_cost(INSN_COST);
16935   format %{ "sshr    $dst, $src, $shift\t# vector (2S)" %}
16936   ins_encode %{
16937     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
16938             as_FloatRegister($src$$reg),
16939             (int)$shift$$constant);
16940   %}
16941   ins_pipe(vshift64_imm);
16942 %}
16943 
16944 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
16945   predicate(n->as_Vector()->length() == 4);
16946   match(Set dst (RShiftVI src shift));
16947   ins_cost(INSN_COST);
16948   format %{ "sshr    $dst, $src, $shift\t# vector (4S)" %}
16949   ins_encode %{
16950     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
16951             as_FloatRegister($src$$reg),
16952             (int)$shift$$constant);
16953   %}
16954   ins_pipe(vshift128_imm);
16955 %}
16956 
16957 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
16958   predicate(n->as_Vector()->length() == 2);
16959   match(Set dst (URShiftVI src shift));
16960   ins_cost(INSN_COST);
16961   format %{ "ushr    $dst, $src, $shift\t# vector (2S)" %}
16962   ins_encode %{
16963     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
16964             as_FloatRegister($src$$reg),
16965             (int)$shift$$constant);
16966   %}
16967   ins_pipe(vshift64_imm);
16968 %}
16969 
16970 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
16971   predicate(n->as_Vector()->length() == 4);
16972   match(Set dst (URShiftVI src shift));
16973   ins_cost(INSN_COST);
16974   format %{ "ushr    $dst, $src, $shift\t# vector (4S)" %}
16975   ins_encode %{
16976     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
16977             as_FloatRegister($src$$reg),
16978             (int)$shift$$constant);
16979   %}
16980   ins_pipe(vshift128_imm);
16981 %}
16982 
16983 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
16984   predicate(n->as_Vector()->length() == 2);
16985   match(Set dst (LShiftVL src shift));
16986   ins_cost(INSN_COST);
16987   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
16988   ins_encode %{
16989     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
16990             as_FloatRegister($src$$reg),
16991             as_FloatRegister($shift$$reg));
16992   %}
16993   ins_pipe(vshift128);
16994 %}
16995 
16996 instruct vsra2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
16997   predicate(n->as_Vector()->length() == 2);
16998   match(Set dst (RShiftVL src shift));
16999   ins_cost(INSN_COST);
17000   effect(TEMP tmp);
17001   format %{ "negr  $tmp,$shift\t"
17002             "sshl  $dst,$src,$tmp\t# vector (2D)" %}
17003   ins_encode %{
17004     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17005             as_FloatRegister($shift$$reg));
17006     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
17007             as_FloatRegister($src$$reg),
17008             as_FloatRegister($tmp$$reg));
17009   %}
17010   ins_pipe(vshift128);
17011 %}
17012 
17013 instruct vsrl2L(vecX dst, vecX src, vecX shift, vecX tmp) %{
17014   predicate(n->as_Vector()->length() == 2);
17015   match(Set dst (URShiftVL src shift));
17016   ins_cost(INSN_COST);
17017   effect(TEMP tmp);
17018   format %{ "negr  $tmp,$shift\t"
17019             "ushl  $dst,$src,$tmp\t# vector (2D)" %}
17020   ins_encode %{
17021     __ negr(as_FloatRegister($tmp$$reg), __ T16B,
17022             as_FloatRegister($shift$$reg));
17023     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
17024             as_FloatRegister($src$$reg),
17025             as_FloatRegister($tmp$$reg));
17026   %}
17027   ins_pipe(vshift128);
17028 %}
17029 
17030 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
17031   predicate(n->as_Vector()->length() == 2);
17032   match(Set dst (LShiftVL src shift));
17033   ins_cost(INSN_COST);
17034   format %{ "shl    $dst, $src, $shift\t# vector (2D)" %}
17035   ins_encode %{
17036     __ shl(as_FloatRegister($dst$$reg), __ T2D,
17037            as_FloatRegister($src$$reg),
17038            (int)$shift$$constant);
17039   %}
17040   ins_pipe(vshift128_imm);
17041 %}
17042 
17043 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
17044   predicate(n->as_Vector()->length() == 2);
17045   match(Set dst (RShiftVL src shift));
17046   ins_cost(INSN_COST);
17047   format %{ "sshr    $dst, $src, $shift\t# vector (2D)" %}
17048   ins_encode %{
17049     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
17050             as_FloatRegister($src$$reg),
17051             (int)$shift$$constant);
17052   %}
17053   ins_pipe(vshift128_imm);
17054 %}
17055 
17056 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
17057   predicate(n->as_Vector()->length() == 2);
17058   match(Set dst (URShiftVL src shift));
17059   ins_cost(INSN_COST);
17060   format %{ "ushr    $dst, $src, $shift\t# vector (2D)" %}
17061   ins_encode %{
17062     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
17063             as_FloatRegister($src$$reg),
17064             (int)$shift$$constant);
17065   %}
17066   ins_pipe(vshift128_imm);
17067 %}
17068 
17069 //----------PEEPHOLE RULES-----------------------------------------------------
17070 // These must follow all instruction definitions as they use the names
17071 // defined in the instructions definitions.
17072 //
17073 // peepmatch ( root_instr_name [preceding_instruction]* );
17074 //
17075 // peepconstraint %{
17076 // (instruction_number.operand_name relational_op instruction_number.operand_name
17077 //  [, ...] );
17078 // // instruction numbers are zero-based using left to right order in peepmatch
17079 //
17080 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
17081 // // provide an instruction_number.operand_name for each operand that appears
17082 // // in the replacement instruction's match rule
17083 //
17084 // ---------VM FLAGS---------------------------------------------------------
17085 //
17086 // All peephole optimizations can be turned off using -XX:-OptoPeephole
17087 //
17088 // Each peephole rule is given an identifying number starting with zero and
17089 // increasing by one in the order seen by the parser.  An individual peephole
17090 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
17091 // on the command-line.
17092 //
17093 // ---------CURRENT LIMITATIONS----------------------------------------------
17094 //
17095 // Only match adjacent instructions in same basic block
17096 // Only equality constraints
17097 // Only constraints between operands, not (0.dest_reg == RAX_enc)
17098 // Only one replacement instruction
17099 //
17100 // ---------EXAMPLE----------------------------------------------------------
17101 //
17102 // // pertinent parts of existing instructions in architecture description
17103 // instruct movI(iRegINoSp dst, iRegI src)
17104 // %{
17105 //   match(Set dst (CopyI src));
17106 // %}
17107 //
17108 // instruct incI_iReg(iRegINoSp dst, immI1 src, rFlagsReg cr)
17109 // %{
17110 //   match(Set dst (AddI dst src));
17111 //   effect(KILL cr);
17112 // %}
17113 //
17114 // // Change (inc mov) to lea
17115 // peephole %{
17116 //   // increment preceeded by register-register move
17117 //   peepmatch ( incI_iReg movI );
17118 //   // require that the destination register of the increment
17119 //   // match the destination register of the move
17120 //   peepconstraint ( 0.dst == 1.dst );
17121 //   // construct a replacement instruction that sets
17122 //   // the destination to ( move's source register + one )
17123 //   peepreplace ( leaI_iReg_immI( 0.dst 1.src 0.src ) );
17124 // %}
17125 //
17126 
17127 // Implementation no longer uses movX instructions since
17128 // machine-independent system no longer uses CopyX nodes.
17129 //
17130 // peephole
17131 // %{
17132 //   peepmatch (incI_iReg movI);
17133 //   peepconstraint (0.dst == 1.dst);
17134 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17135 // %}
17136 
17137 // peephole
17138 // %{
17139 //   peepmatch (decI_iReg movI);
17140 //   peepconstraint (0.dst == 1.dst);
17141 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17142 // %}
17143 
17144 // peephole
17145 // %{
17146 //   peepmatch (addI_iReg_imm movI);
17147 //   peepconstraint (0.dst == 1.dst);
17148 //   peepreplace (leaI_iReg_immI(0.dst 1.src 0.src));
17149 // %}
17150 
17151 // peephole
17152 // %{
17153 //   peepmatch (incL_iReg movL);
17154 //   peepconstraint (0.dst == 1.dst);
17155 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17156 // %}
17157 
17158 // peephole
17159 // %{
17160 //   peepmatch (decL_iReg movL);
17161 //   peepconstraint (0.dst == 1.dst);
17162 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17163 // %}
17164 
17165 // peephole
17166 // %{
17167 //   peepmatch (addL_iReg_imm movL);
17168 //   peepconstraint (0.dst == 1.dst);
17169 //   peepreplace (leaL_iReg_immL(0.dst 1.src 0.src));
17170 // %}
17171 
17172 // peephole
17173 // %{
17174 //   peepmatch (addP_iReg_imm movP);
17175 //   peepconstraint (0.dst == 1.dst);
17176 //   peepreplace (leaP_iReg_imm(0.dst 1.src 0.src));
17177 // %}
17178 
17179 // // Change load of spilled value to only a spill
17180 // instruct storeI(memory mem, iRegI src)
17181 // %{
17182 //   match(Set mem (StoreI mem src));
17183 // %}
17184 //
17185 // instruct loadI(iRegINoSp dst, memory mem)
17186 // %{
17187 //   match(Set dst (LoadI mem));
17188 // %}
17189 //
17190 
17191 //----------SMARTSPILL RULES---------------------------------------------------
17192 // These must follow all instruction definitions as they use the names
17193 // defined in the instructions definitions.
17194 
17195 // Local Variables:
17196 // mode: c++
17197 // End: