1 //
   2 // Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2020, Arm Limited. All rights reserved.
   4 // Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
   5 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6 //
   7 // This code is free software; you can redistribute it and/or modify it
   8 // under the terms of the GNU General Public License version 2 only, as
   9 // published by the Free Software Foundation.
  10 //
  11 // This code is distributed in the hope that it will be useful, but WITHOUT
  12 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 // version 2 for more details (a copy is included in the LICENSE file that
  15 // accompanied this code).
  16 //
  17 // You should have received a copy of the GNU General Public License version
  18 // 2 along with this work; if not, write to the Free Software Foundation,
  19 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20 //
  21 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22 // or visit www.oracle.com if you need additional information or have any
  23 // questions.
  24 //
  25 //
  26 
  27 // Riscv64 VEC Architecture Description File
  28 
  29 opclass vmemA(indirect);
  30 
  31 source_hpp %{
  32   bool op_vec_supported(int opcode);
  33 %}
  34 
  35 source %{
  36   static Assembler::SEW elemType_to_sew(BasicType bt) {
  37     return Assembler::elemBytes_to_sew(type2aelembytes(bt));
  38   }
  39 
  40   static void loadStore(C2_MacroAssembler masm, bool is_store,
  41                         VectorRegister reg, BasicType bt, Register base) {
  42     Assembler::SEW sew = elemType_to_sew(bt);
  43     masm.vsetvli(t0, x0, sew);
  44     if (is_store) {
  45       masm.vsex_v(reg, base, sew);
  46     } else {
  47       masm.vlex_v(reg, base, sew);
  48     }
  49   }
  50 
  51   bool op_vec_supported(int opcode) {
  52     switch (opcode) {
  53       // No multiply reduction instructions
  54       case Op_MulReductionVD: // fall through
  55       case Op_MulReductionVF: // fall through
  56       case Op_MulReductionVI: // fall through
  57       case Op_MulReductionVL: // fall through
  58       // Others
  59       case Op_Extract:        // fall through
  60       case Op_ExtractB:       // fall through
  61       case Op_ExtractC:       // fall through
  62       case Op_ExtractD:       // fall through
  63       case Op_ExtractF:       // fall through
  64       case Op_ExtractI:       // fall through
  65       case Op_ExtractL:       // fall through
  66       case Op_ExtractS:       // fall through
  67       case Op_ExtractUB:
  68       // Vector API specific
  69       case Op_AndReductionV:
  70       case Op_OrReductionV:
  71       case Op_XorReductionV:
  72       case Op_LoadVectorGather:
  73       case Op_StoreVectorScatter:
  74       case Op_VectorBlend:
  75       case Op_VectorCast:
  76       case Op_VectorCastB2X:
  77       case Op_VectorCastD2X:
  78       case Op_VectorCastF2X:
  79       case Op_VectorCastI2X:
  80       case Op_VectorCastL2X:
  81       case Op_VectorCastS2X:
  82       case Op_VectorInsert:
  83       case Op_VectorLoadConst:
  84       case Op_VectorLoadMask:
  85       case Op_VectorLoadShuffle:
  86       case Op_VectorMaskCmp:
  87       case Op_VectorRearrange:
  88       case Op_VectorReinterpret:
  89       case Op_VectorStoreMask:
  90       case Op_VectorTest:
  91         return false;
  92       default:
  93         return UseVExt;
  94     }
  95   }
  96 
  97 %}
  98 
  99 definitions %{
 100   int_def VEC_COST             (200, 200);
 101 %}
 102 
 103 // All VEC instructions
 104 
 105 // vector load/store
 106 instruct loadV(vReg dst, vmemA mem) %{
 107   match(Set dst (LoadVector mem));
 108   ins_cost(VEC_COST);
 109   format %{ "vle $dst, $mem\t#@loadV" %}
 110   ins_encode %{
 111     VectorRegister dst_reg = as_VectorRegister($dst$$reg);
 112     loadStore(C2_MacroAssembler(&cbuf), false, dst_reg,
 113               Matcher::vector_element_basic_type(this), as_Register($mem$$base));
 114   %}
 115   ins_pipe(pipe_slow);
 116 %}
 117 
 118 instruct storeV(vReg src, vmemA mem) %{
 119   match(Set mem (StoreVector mem src));
 120   ins_cost(VEC_COST);
 121   format %{ "vse $src, $mem\t#@storeV" %}
 122   ins_encode %{
 123     VectorRegister src_reg = as_VectorRegister($src$$reg);
 124     loadStore(C2_MacroAssembler(&cbuf), true, src_reg,
 125               Matcher::vector_element_basic_type(this, $src), as_Register($mem$$base));
 126   %}
 127   ins_pipe(pipe_slow);
 128 %}
 129 
 130 // vector abs
 131 
 132 instruct vabsB(vReg dst, vReg src, vReg tmp) %{
 133   match(Set dst (AbsVB src));
 134   ins_cost(VEC_COST);
 135   effect(TEMP tmp);
 136   format %{ "vrsub.vi $tmp, 0, $src\t#@vabsB\n\t"
 137             "vmax.vv $dst, $tmp, $src" %}
 138   ins_encode %{
 139     __ vsetvli(t0, x0, Assembler::e8);
 140     __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
 141     __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
 142   %}
 143   ins_pipe(pipe_slow);
 144 %}
 145 
 146 instruct vabsS(vReg dst, vReg src, vReg tmp) %{
 147   match(Set dst (AbsVS src));
 148   ins_cost(VEC_COST);
 149   effect(TEMP tmp);
 150   format %{ "vrsub.vi $tmp, 0, $src\t#@vabsS\n\t"
 151             "vmax.vv $dst, $tmp, $src" %}
 152   ins_encode %{
 153     __ vsetvli(t0, x0, Assembler::e16);
 154     __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
 155     __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
 156   %}
 157   ins_pipe(pipe_slow);
 158 %}
 159 
 160 instruct vabsI(vReg dst, vReg src, vReg tmp) %{
 161   match(Set dst (AbsVI src));
 162   ins_cost(VEC_COST);
 163   effect(TEMP tmp);
 164   format %{ "vrsub.vi $tmp, 0, $src\t#@vabsI\n\t"
 165             "vmax.vv $dst, $tmp, $src" %}
 166   ins_encode %{
 167     __ vsetvli(t0, x0, Assembler::e32);
 168     __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
 169     __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
 170   %}
 171   ins_pipe(pipe_slow);
 172 %}
 173 
 174 instruct vabsL(vReg dst, vReg src, vReg tmp) %{
 175   match(Set dst (AbsVL src));
 176   ins_cost(VEC_COST);
 177   effect(TEMP tmp);
 178   format %{ "vrsub.vi $tmp, 0, $src\t#@vabsL\n\t"
 179             "vmax.vv $dst, $tmp, $src" %}
 180   ins_encode %{
 181     __ vsetvli(t0, x0, Assembler::e64);
 182     __ vrsub_vi(as_VectorRegister($tmp$$reg), 0, as_VectorRegister($src$$reg));
 183     __ vmax_vv(as_VectorRegister($dst$$reg), as_VectorRegister($tmp$$reg), as_VectorRegister($src$$reg));
 184   %}
 185   ins_pipe(pipe_slow);
 186 %}
 187 
 188 instruct vabsF(vReg dst, vReg src) %{
 189   match(Set dst (AbsVF src));
 190   ins_cost(VEC_COST);
 191   format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsF" %}
 192   ins_encode %{
 193     __ vsetvli(t0, x0, Assembler::e32);
 194     __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
 195   %}
 196   ins_pipe(pipe_slow);
 197 %}
 198 
 199 instruct vabsD(vReg dst, vReg src) %{
 200   match(Set dst (AbsVD src));
 201   ins_cost(VEC_COST);
 202   format %{ "vfsgnjx.vv $dst, $src, $src, vm\t#@vabsD" %}
 203   ins_encode %{
 204     __ vsetvli(t0, x0, Assembler::e64);
 205     __ vfsgnjx_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), as_VectorRegister($src$$reg));
 206   %}
 207   ins_pipe(pipe_slow);
 208 %}
 209 
 210 // vector add
 211 
 212 instruct vaddB(vReg dst, vReg src1, vReg src2) %{
 213   match(Set dst (AddVB src1 src2));
 214   ins_cost(VEC_COST);
 215   format %{ "vadd.vv $dst, $src1, $src2\t#@vaddB" %}
 216   ins_encode %{
 217     __ vsetvli(t0, x0, Assembler::e8);
 218     __ vadd_vv(as_VectorRegister($dst$$reg),
 219                as_VectorRegister($src1$$reg),
 220                as_VectorRegister($src2$$reg));
 221   %}
 222   ins_pipe(pipe_slow);
 223 %}
 224 
 225 instruct vaddS(vReg dst, vReg src1, vReg src2) %{
 226   match(Set dst (AddVS src1 src2));
 227   ins_cost(VEC_COST);
 228   format %{ "vadd.vv $dst, $src1, $src2\t#@vaddS" %}
 229   ins_encode %{
 230     __ vsetvli(t0, x0, Assembler::e16);
 231     __ vadd_vv(as_VectorRegister($dst$$reg),
 232                as_VectorRegister($src1$$reg),
 233                as_VectorRegister($src2$$reg));
 234   %}
 235   ins_pipe(pipe_slow);
 236 %}
 237 
 238 instruct vaddI(vReg dst, vReg src1, vReg src2) %{
 239   match(Set dst (AddVI src1 src2));
 240   ins_cost(VEC_COST);
 241   format %{ "vadd.vv $dst, $src1, $src2\t#@vaddI" %}
 242   ins_encode %{
 243     __ vsetvli(t0, x0, Assembler::e32);
 244     __ vadd_vv(as_VectorRegister($dst$$reg),
 245                as_VectorRegister($src1$$reg),
 246                as_VectorRegister($src2$$reg));
 247   %}
 248   ins_pipe(pipe_slow);
 249 %}
 250 
 251 instruct vaddL(vReg dst, vReg src1, vReg src2) %{
 252   match(Set dst (AddVL src1 src2));
 253   ins_cost(VEC_COST);
 254   format %{ "vadd.vv $dst, $src1, $src2\t#@vaddL" %}
 255   ins_encode %{
 256     __ vsetvli(t0, x0, Assembler::e64);
 257     __ vadd_vv(as_VectorRegister($dst$$reg),
 258                as_VectorRegister($src1$$reg),
 259                as_VectorRegister($src2$$reg));
 260   %}
 261   ins_pipe(pipe_slow);
 262 %}
 263 
 264 instruct vaddF(vReg dst, vReg src1, vReg src2) %{
 265   match(Set dst (AddVF src1 src2));
 266   ins_cost(VEC_COST);
 267   format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddF" %}
 268   ins_encode %{
 269     __ vsetvli(t0, x0, Assembler::e32);
 270     __ vfadd_vv(as_VectorRegister($dst$$reg),
 271                 as_VectorRegister($src1$$reg),
 272                 as_VectorRegister($src2$$reg));
 273   %}
 274   ins_pipe(pipe_slow);
 275 %}
 276 
 277 instruct vaddD(vReg dst, vReg src1, vReg src2) %{
 278   match(Set dst (AddVD src1 src2));
 279   ins_cost(VEC_COST);
 280   format %{ "vfadd.vv $dst, $src1, $src2\t#@vaddD" %}
 281   ins_encode %{
 282     __ vsetvli(t0, x0, Assembler::e64);
 283     __ vfadd_vv(as_VectorRegister($dst$$reg),
 284                 as_VectorRegister($src1$$reg),
 285                 as_VectorRegister($src2$$reg));
 286   %}
 287   ins_pipe(pipe_slow);
 288 %}
 289 
 290 // vector and
 291 
 292 instruct vand(vReg dst, vReg src1, vReg src2) %{
 293   match(Set dst (AndV src1 src2));
 294   ins_cost(VEC_COST);
 295   format %{ "vand.vv  $dst, $src1, $src2\t#@vand" %}
 296   ins_encode %{
 297     __ vsetvli(t0, x0, Assembler::e64);
 298     __ vand_vv(as_VectorRegister($dst$$reg),
 299                as_VectorRegister($src1$$reg),
 300                as_VectorRegister($src2$$reg));
 301   %}
 302   ins_pipe(pipe_slow);
 303 %}
 304 
 305 // vector or
 306 
 307 instruct vor(vReg dst, vReg src1, vReg src2) %{
 308   match(Set dst (OrV src1 src2));
 309   ins_cost(VEC_COST);
 310   format %{ "vor.vv  $dst, $src1, $src2\t#@vor" %}
 311   ins_encode %{
 312     __ vsetvli(t0, x0, Assembler::e64);
 313     __ vor_vv(as_VectorRegister($dst$$reg),
 314               as_VectorRegister($src1$$reg),
 315               as_VectorRegister($src2$$reg));
 316   %}
 317   ins_pipe(pipe_slow);
 318 %}
 319 
 320 // vector xor
 321 
 322 instruct vxor(vReg dst, vReg src1, vReg src2) %{
 323   match(Set dst (XorV src1 src2));
 324   ins_cost(VEC_COST);
 325   format %{ "vxor.vv  $dst, $src1, $src2\t#@vxor" %}
 326   ins_encode %{
 327     __ vsetvli(t0, x0, Assembler::e64);
 328     __ vxor_vv(as_VectorRegister($dst$$reg),
 329                as_VectorRegister($src1$$reg),
 330                as_VectorRegister($src2$$reg));
 331   %}
 332   ins_pipe(pipe_slow);
 333 %}
 334 
 335 // vector float div
 336 
 337 instruct vdivF(vReg dst, vReg src1, vReg src2) %{
 338   match(Set dst (DivVF src1 src2));
 339   ins_cost(VEC_COST);
 340   format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivF" %}
 341   ins_encode %{
 342     __ vsetvli(t0, x0, Assembler::e32);
 343     __ vfdiv_vv(as_VectorRegister($dst$$reg),
 344                 as_VectorRegister($src1$$reg),
 345                 as_VectorRegister($src2$$reg));
 346   %}
 347   ins_pipe(pipe_slow);
 348 %}
 349 
 350 instruct vdivD(vReg dst, vReg src1, vReg src2) %{
 351   match(Set dst (DivVD src1 src2));
 352   ins_cost(VEC_COST);
 353   format %{ "vfdiv.vv  $dst, $src1, $src2\t#@vdivD" %}
 354   ins_encode %{
 355     __ vsetvli(t0, x0, Assembler::e64);
 356     __ vfdiv_vv(as_VectorRegister($dst$$reg),
 357                 as_VectorRegister($src1$$reg),
 358                 as_VectorRegister($src2$$reg));
 359   %}
 360   ins_pipe(pipe_slow);
 361 %}
 362 
 363 // vector integer max/min
 364 
 365 instruct vmax(vReg dst, vReg src1, vReg src2) %{
 366   predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
 367             n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
 368   match(Set dst (MaxV src1 src2));
 369   ins_cost(VEC_COST);
 370   format %{ "vmax.vv $dst, $src1, $src2\t#@vmax" %}
 371   ins_encode %{
 372     BasicType bt = Matcher::vector_element_basic_type(this);
 373     Assembler::SEW sew = elemType_to_sew(bt);
 374     __ vsetvli(t0, x0, sew);
 375     __ vmax_vv(as_VectorRegister($dst$$reg),
 376                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
 377   %}
 378   ins_pipe(pipe_slow);
 379 %}
 380 
 381 instruct vmin(vReg dst, vReg src1, vReg src2) %{
 382   predicate(n->bottom_type()->is_vect()->element_basic_type() != T_FLOAT &&
 383             n->bottom_type()->is_vect()->element_basic_type() != T_DOUBLE);
 384   match(Set dst (MinV src1 src2));
 385   ins_cost(VEC_COST);
 386   format %{ "vmin.vv $dst, $src1, $src2\t#@vmin" %}
 387   ins_encode %{
 388     BasicType bt = Matcher::vector_element_basic_type(this);
 389     Assembler::SEW sew = elemType_to_sew(bt);
 390     __ vsetvli(t0, x0, sew);
 391     __ vmin_vv(as_VectorRegister($dst$$reg),
 392                as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg));
 393   %}
 394   ins_pipe(pipe_slow);
 395 %}
 396 
 397 // vector float-point max/min
 398 
 399 instruct vmaxF(vReg dst, vReg src1, vReg src2) %{
 400   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 401   match(Set dst (MaxV src1 src2));
 402   effect(TEMP_DEF dst);
 403   ins_cost(VEC_COST);
 404   format %{ "vmaxF $dst, $src1, $src2\t#@vmaxF" %}
 405   ins_encode %{
 406     __ minmax_FD_v(as_VectorRegister($dst$$reg),
 407                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
 408                    false /* is_double */, false /* is_min */);
 409   %}
 410   ins_pipe(pipe_slow);
 411 %}
 412 
 413 instruct vmaxD(vReg dst, vReg src1, vReg src2) %{
 414   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 415   match(Set dst (MaxV src1 src2));
 416   effect(TEMP_DEF dst);
 417   ins_cost(VEC_COST);
 418   format %{ "vmaxD $dst, $src1, $src2\t#@vmaxD" %}
 419   ins_encode %{
 420     __ minmax_FD_v(as_VectorRegister($dst$$reg),
 421                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
 422                    true /* is_double */, false /* is_min */);
 423   %}
 424   ins_pipe(pipe_slow);
 425 %}
 426 
 427 instruct vminF(vReg dst, vReg src1, vReg src2) %{
 428   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 429   match(Set dst (MinV src1 src2));
 430   effect(TEMP_DEF dst);
 431   ins_cost(VEC_COST);
 432   format %{ "vminF $dst, $src1, $src2\t#@vminF" %}
 433   ins_encode %{
 434     __ minmax_FD_v(as_VectorRegister($dst$$reg),
 435                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
 436                    false /* is_double */, true /* is_min */);
 437   %}
 438   ins_pipe(pipe_slow);
 439 %}
 440 
 441 instruct vminD(vReg dst, vReg src1, vReg src2) %{
 442   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 443   match(Set dst (MinV src1 src2));
 444   effect(TEMP_DEF dst);
 445   ins_cost(VEC_COST);
 446   format %{ "vminD $dst, $src1, $src2\t#@vminD" %}
 447   ins_encode %{
 448     __ minmax_FD_v(as_VectorRegister($dst$$reg),   
 449                    as_VectorRegister($src1$$reg), as_VectorRegister($src2$$reg),
 450                    true /* is_double */, true /* is_min */);
 451   %}
 452   ins_pipe(pipe_slow);
 453 %}
 454 
 455 // vector fmla
 456 
 457 // dst_src1 = dst_src1 + src2 * src3
 458 instruct vfmlaF(vReg dst_src1, vReg src2, vReg src3) %{
 459   predicate(UseFMA);
 460   match(Set dst_src1 (FmaVF dst_src1 (Binary src2 src3)));
 461   ins_cost(VEC_COST);
 462   format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaF" %}
 463   ins_encode %{
 464     __ vsetvli(t0, x0, Assembler::e32);
 465     __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
 466                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 467   %}
 468   ins_pipe(pipe_slow);
 469 %}
 470 
 471 // dst_src1 = dst_src1 + src2 * src3
 472 instruct vfmlaD(vReg dst_src1, vReg src2, vReg src3) %{
 473   predicate(UseFMA);
 474   match(Set dst_src1 (FmaVD dst_src1 (Binary src2 src3)));
 475   ins_cost(VEC_COST);
 476   format %{ "vfmacc.vv $dst_src1, $src2, $src3\t#@vfmlaD" %}
 477   ins_encode %{
 478     __ vsetvli(t0, x0, Assembler::e64);
 479     __ vfmacc_vv(as_VectorRegister($dst_src1$$reg),
 480                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 481   %}
 482   ins_pipe(pipe_slow);
 483 %}
 484 
 485 // vector fmls
 486 
 487 // dst_src1 = dst_src1 + -src2 * src3
 488 // dst_src1 = dst_src1 + src2 * -src3
 489 instruct vfmlsF(vReg dst_src1, vReg src2, vReg src3) %{
 490   predicate(UseFMA);
 491   match(Set dst_src1 (FmaVF dst_src1 (Binary (NegVF src2) src3)));
 492   match(Set dst_src1 (FmaVF dst_src1 (Binary src2 (NegVF src3))));
 493   ins_cost(VEC_COST);
 494   format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsF" %}
 495   ins_encode %{
 496     __ vsetvli(t0, x0, Assembler::e32);
 497     __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
 498                   as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 499   %}
 500   ins_pipe(pipe_slow);
 501 %}
 502 
 503 // dst_src1 = dst_src1 + -src2 * src3
 504 // dst_src1 = dst_src1 + src2 * -src3
 505 instruct vfmlsD(vReg dst_src1, vReg src2, vReg src3) %{
 506   predicate(UseFMA);
 507   match(Set dst_src1 (FmaVD dst_src1 (Binary (NegVD src2) src3)));
 508   match(Set dst_src1 (FmaVD dst_src1 (Binary src2 (NegVD src3))));
 509   ins_cost(VEC_COST);
 510   format %{ "vfnmsac.vv $dst_src1, $src2, $src3\t#@vfmlsD" %}
 511   ins_encode %{
 512     __ vsetvli(t0, x0, Assembler::e64);
 513     __ vfnmsac_vv(as_VectorRegister($dst_src1$$reg),
 514                   as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 515   %}
 516   ins_pipe(pipe_slow);
 517 %}
 518 
 519 // vector fnmla
 520 
 521 // dst_src1 = -dst_src1 + -src2 * src3
 522 // dst_src1 = -dst_src1 + src2 * -src3
 523 instruct vfnmlaF(vReg dst_src1, vReg src2, vReg src3) %{
 524   predicate(UseFMA);
 525   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary (NegVF src2) src3)));
 526   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 (NegVF src3))));
 527   ins_cost(VEC_COST);
 528   format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaF" %}
 529   ins_encode %{
 530     __ vsetvli(t0, x0, Assembler::e32);
 531     __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
 532                   as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 533   %}
 534   ins_pipe(pipe_slow);
 535 %}
 536 
 537 // dst_src1 = -dst_src1 + -src2 * src3
 538 // dst_src1 = -dst_src1 + src2 * -src3
 539 instruct vfnmlaD(vReg dst_src1, vReg src2, vReg src3) %{
 540   predicate(UseFMA);
 541   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary (NegVD src2) src3)));
 542   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 (NegVD src3))));
 543   ins_cost(VEC_COST);
 544   format %{ "vfnmacc.vv $dst_src1, $src2, $src3\t#@vfnmlaD" %}
 545   ins_encode %{
 546     __ vsetvli(t0, x0, Assembler::e64);
 547     __ vfnmacc_vv(as_VectorRegister($dst_src1$$reg),
 548                   as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 549   %}
 550   ins_pipe(pipe_slow);
 551 %}
 552 
 553 // vector fnmls
 554 
 555 // dst_src1 = -dst_src1 + src2 * src3
 556 instruct vfnmlsF(vReg dst_src1, vReg src2, vReg src3) %{
 557   predicate(UseFMA);
 558   match(Set dst_src1 (FmaVF (NegVF dst_src1) (Binary src2 src3)));
 559   ins_cost(VEC_COST);
 560   format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsF" %}
 561   ins_encode %{
 562     __ vsetvli(t0, x0, Assembler::e32);
 563     __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
 564                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 565   %}
 566   ins_pipe(pipe_slow);
 567 %}
 568 
 569 // dst_src1 = -dst_src1 + src2 * src3
 570 instruct vfnmlsD(vReg dst_src1, vReg src2, vReg src3) %{
 571   predicate(UseFMA);
 572   match(Set dst_src1 (FmaVD (NegVD dst_src1) (Binary src2 src3)));
 573   ins_cost(VEC_COST);
 574   format %{ "vfmsac.vv $dst_src1, $src2, $src3\t#@vfnmlsD" %}
 575   ins_encode %{
 576     __ vsetvli(t0, x0, Assembler::e64);
 577     __ vfmsac_vv(as_VectorRegister($dst_src1$$reg),
 578                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 579   %}
 580   ins_pipe(pipe_slow);
 581 %}
 582 
 583 // vector mla
 584 
 585 // dst_src1 = dst_src1 + src2 * src3
 586 instruct vmlaB(vReg dst_src1, vReg src2, vReg src3) %{
 587   match(Set dst_src1 (AddVB dst_src1 (MulVB src2 src3)));
 588   ins_cost(VEC_COST);
 589   format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaB" %}
 590   ins_encode %{
 591     __ vsetvli(t0, x0, Assembler::e8);
 592     __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
 593                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 594   %}
 595   ins_pipe(pipe_slow);
 596 %}
 597 
 598 // dst_src1 = dst_src1 + src2 * src3
 599 instruct vmlaS(vReg dst_src1, vReg src2, vReg src3) %{
 600   match(Set dst_src1 (AddVS dst_src1 (MulVS src2 src3)));
 601   ins_cost(VEC_COST);
 602   format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaS" %}
 603   ins_encode %{
 604     __ vsetvli(t0, x0, Assembler::e16);
 605     __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
 606                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 607   %}
 608   ins_pipe(pipe_slow);
 609 %}
 610 
 611 // dst_src1 = dst_src1 + src2 * src3
 612 instruct vmlaI(vReg dst_src1, vReg src2, vReg src3) %{
 613   match(Set dst_src1 (AddVI dst_src1 (MulVI src2 src3)));
 614   ins_cost(VEC_COST);
 615   format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaI" %}
 616   ins_encode %{
 617     __ vsetvli(t0, x0, Assembler::e32);
 618     __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
 619                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 620   %}
 621   ins_pipe(pipe_slow);
 622 %}
 623 
 624 // dst_src1 = dst_src1 + src2 * src3
 625 instruct vmlaL(vReg dst_src1, vReg src2, vReg src3) %{
 626   match(Set dst_src1 (AddVL dst_src1 (MulVL src2 src3)));
 627   ins_cost(VEC_COST);
 628   format %{ "vmacc.vv $dst_src1, src2, src3\t#@vmlaL" %}
 629   ins_encode %{
 630     __ vsetvli(t0, x0, Assembler::e64);
 631     __ vmacc_vv(as_VectorRegister($dst_src1$$reg),
 632                 as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 633   %}
 634   ins_pipe(pipe_slow);
 635 %}
 636 
 637 // vector mls
 638 
 639 // dst_src1 = dst_src1 - src2 * src3
 640 instruct vmlsB(vReg dst_src1, vReg src2, vReg src3) %{
 641   match(Set dst_src1 (SubVB dst_src1 (MulVB src2 src3)));
 642   ins_cost(VEC_COST);
 643   format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsB" %}
 644   ins_encode %{
 645     __ vsetvli(t0, x0, Assembler::e8);
 646     __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
 647                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 648   %}
 649   ins_pipe(pipe_slow);
 650 %}
 651 
 652 // dst_src1 = dst_src1 - src2 * src3
 653 instruct vmlsS(vReg dst_src1, vReg src2, vReg src3) %{
 654   match(Set dst_src1 (SubVS dst_src1 (MulVS src2 src3)));
 655   ins_cost(VEC_COST);
 656   format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsS" %}
 657   ins_encode %{
 658     __ vsetvli(t0, x0, Assembler::e16);
 659     __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
 660                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 661   %}
 662   ins_pipe(pipe_slow);
 663 %}
 664 
 665 // dst_src1 = dst_src1 - src2 * src3
 666 instruct vmlsI(vReg dst_src1, vReg src2, vReg src3) %{
 667   match(Set dst_src1 (SubVI dst_src1 (MulVI src2 src3)));
 668   ins_cost(VEC_COST);
 669   format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsI" %}
 670   ins_encode %{
 671     __ vsetvli(t0, x0, Assembler::e32);
 672     __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
 673                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 674   %}
 675   ins_pipe(pipe_slow);
 676 %}
 677 
 678 // dst_src1 = dst_src1 - src2 * src3
 679 instruct vmlsL(vReg dst_src1, vReg src2, vReg src3) %{
 680   match(Set dst_src1 (SubVL dst_src1 (MulVL src2 src3)));
 681   ins_cost(VEC_COST);
 682   format %{ "vnmsac.vv $dst_src1, src2, src3\t#@vmlsL" %}
 683   ins_encode %{
 684     __ vsetvli(t0, x0, Assembler::e64);
 685     __ vnmsac_vv(as_VectorRegister($dst_src1$$reg),
 686                  as_VectorRegister($src2$$reg), as_VectorRegister($src3$$reg));
 687   %}
 688   ins_pipe(pipe_slow);
 689 %}
 690 
 691 // vector mul
 692 
 693 instruct vmulB(vReg dst, vReg src1, vReg src2) %{
 694   match(Set dst (MulVB src1 src2));
 695   ins_cost(VEC_COST);
 696   format %{ "vmul.vv $dst, $src1, $src2\t#@vmulB" %}
 697   ins_encode %{
 698     __ vsetvli(t0, x0, Assembler::e8);
 699     __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
 700                as_VectorRegister($src2$$reg));
 701   %}
 702   ins_pipe(pipe_slow);
 703 %}
 704 
 705 instruct vmulS(vReg dst, vReg src1, vReg src2) %{
 706   match(Set dst (MulVS src1 src2));
 707   ins_cost(VEC_COST);
 708   format %{ "vmul.vv $dst, $src1, $src2\t#@vmulS" %}
 709   ins_encode %{
 710     __ vsetvli(t0, x0, Assembler::e16);
 711     __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
 712                as_VectorRegister($src2$$reg));
 713   %}
 714   ins_pipe(pipe_slow);
 715 %}
 716 
 717 instruct vmulI(vReg dst, vReg src1, vReg src2) %{
 718   match(Set dst (MulVI src1 src2));
 719   ins_cost(VEC_COST);
 720   format %{ "vmul.vv $dst, $src1, $src2\t#@vmulI" %}
 721   ins_encode %{
 722     __ vsetvli(t0, x0, Assembler::e32);
 723     __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
 724                as_VectorRegister($src2$$reg));
 725   %}
 726   ins_pipe(pipe_slow);
 727 %}
 728 
 729 instruct vmulL(vReg dst, vReg src1, vReg src2) %{
 730   match(Set dst (MulVL src1 src2));
 731   ins_cost(VEC_COST);
 732   format %{ "vmul.vv $dst, $src1, $src2\t#@vmulL" %}
 733   ins_encode %{
 734     __ vsetvli(t0, x0, Assembler::e64);
 735     __ vmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
 736                as_VectorRegister($src2$$reg));
 737   %}
 738   ins_pipe(pipe_slow);
 739 %}
 740 
 741 instruct vmulF(vReg dst, vReg src1, vReg src2) %{
 742   match(Set dst (MulVF src1 src2));
 743   ins_cost(VEC_COST);
 744   format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulF" %}
 745   ins_encode %{
 746     __ vsetvli(t0, x0, Assembler::e32);
 747     __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
 748                 as_VectorRegister($src2$$reg));
 749   %}
 750   ins_pipe(pipe_slow);
 751 %}
 752 
 753 instruct vmulD(vReg dst, vReg src1, vReg src2) %{
 754   match(Set dst (MulVD src1 src2));
 755   ins_cost(VEC_COST);
 756   format %{ "vfmul.vv $dst, $src1, $src2\t#@vmulD" %}
 757   ins_encode %{
 758     __ vsetvli(t0, x0, Assembler::e64);
 759     __ vfmul_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
 760                 as_VectorRegister($src2$$reg));
 761   %}
 762   ins_pipe(pipe_slow);
 763 %}
 764 
 765 // vector fneg
 766 
 767 instruct vnegF(vReg dst, vReg src) %{
 768   match(Set dst (NegVF src));
 769   ins_cost(VEC_COST);
 770   format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegF" %}
 771   ins_encode %{
 772     __ vsetvli(t0, x0, Assembler::e32);
 773     __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
 774   %}
 775   ins_pipe(pipe_slow);
 776 %}
 777 
 778 instruct vnegD(vReg dst, vReg src) %{
 779   match(Set dst (NegVD src));
 780   ins_cost(VEC_COST);
 781   format %{ "vfsgnjn.vv $dst, $src, $src\t#@vnegD" %}
 782   ins_encode %{
 783     __ vsetvli(t0, x0, Assembler::e64);
 784     __ vfneg_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
 785   %}
 786   ins_pipe(pipe_slow);
 787 %}
 788 
 789 // popcount vector
 790 
 791 instruct vpopcountI(iRegINoSp dst, vReg src) %{
 792   match(Set dst (PopCountVI src));
 793   format %{ "vpopc.m $dst, $src\t#@vpopcountI" %}
 794   ins_encode %{
 795     __ vsetvli(t0, x0, Assembler::e32);
 796     __ vpopc_m(as_Register($dst$$reg), as_VectorRegister($src$$reg));
 797   %}
 798   ins_pipe(pipe_slow);
 799 %}
 800 
 801 // vector add reduction
 802 
 803 instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 804   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 805   match(Set dst (AddReductionVI src1 src2));
 806   effect(TEMP tmp);
 807   ins_cost(VEC_COST);
 808   format %{ "vmv.s.x $tmp, $src1\t#@reduce_addB\n\t"
 809             "vredsum.vs $tmp, $src2, $tmp\n\t"
 810             "vmv.x.s  $dst, $tmp" %}
 811   ins_encode %{
 812     __ vsetvli(t0, x0, Assembler::e8);
 813     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
 814     __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
 815                   as_VectorRegister($tmp$$reg));
 816     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 817   %}
 818   ins_pipe(pipe_slow);
 819 %}
 820 
 821 instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 822   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 823   match(Set dst (AddReductionVI src1 src2));
 824   effect(TEMP tmp);
 825   ins_cost(VEC_COST);
 826   format %{ "vmv.s.x $tmp, $src1\t#@reduce_addS\n\t"
 827             "vredsum.vs $tmp, $src2, $tmp\n\t"
 828             "vmv.x.s  $dst, $tmp" %}
 829   ins_encode %{
 830     __ vsetvli(t0, x0, Assembler::e16);
 831     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
 832     __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
 833                   as_VectorRegister($tmp$$reg));
 834     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 835   %}
 836   ins_pipe(pipe_slow);
 837 %}
 838 
 839 instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 840   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 841   match(Set dst (AddReductionVI src1 src2));
 842   effect(TEMP tmp);
 843   ins_cost(VEC_COST);
 844   format %{ "vmv.s.x $tmp, $src1\t#@reduce_addI\n\t"
 845             "vredsum.vs $tmp, $src2, $tmp\n\t"
 846             "vmv.x.s  $dst, $tmp" %}
 847   ins_encode %{
 848     __ vsetvli(t0, x0, Assembler::e32);
 849     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
 850     __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
 851                   as_VectorRegister($tmp$$reg));
 852     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 853   %}
 854   ins_pipe(pipe_slow);
 855 %}
 856 
 857 instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
 858   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 859   match(Set dst (AddReductionVL src1 src2));
 860   effect(TEMP tmp);
 861   ins_cost(VEC_COST);
 862   format %{ "vmv.s.x $tmp, $src1\t#@reduce_addL\n\t"
 863             "vredsum.vs $tmp, $src2, $tmp\n\t"
 864             "vmv.x.s  $dst, $tmp" %}
 865   ins_encode %{
 866     __ vsetvli(t0, x0, Assembler::e64);
 867     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
 868     __ vredsum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
 869                   as_VectorRegister($tmp$$reg));
 870     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 871   %}
 872   ins_pipe(pipe_slow);
 873 %}
 874 
 875 instruct reduce_addF(fRegF src1_dst, vReg src2, vReg tmp) %{
 876   match(Set src1_dst (AddReductionVF src1_dst src2));
 877   effect(TEMP tmp);
 878   ins_cost(VEC_COST);
 879   format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addF\n\t"
 880             "vfredosum.vs $tmp, $src2, $tmp\n\t"
 881             "vfmv.f.s $src1_dst, $tmp" %}
 882   ins_encode %{
 883     __ vsetvli(t0, x0, Assembler::e32);
 884     __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
 885     __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
 886                     as_VectorRegister($tmp$$reg));
 887     __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
 888   %}
 889   ins_pipe(pipe_slow);
 890 %}
 891 
 892 instruct reduce_addD(fRegD src1_dst, vReg src2, vReg tmp) %{
 893   match(Set src1_dst (AddReductionVD src1_dst src2));
 894   effect(TEMP tmp);
 895   ins_cost(VEC_COST);
 896   format %{ "vfmv.s.f $tmp, $src1_dst\t#@reduce_addD\n\t"
 897             "vfredosum.vs $tmp, $src2, $tmp\n\t"
 898             "vfmv.f.s $src1_dst, $tmp" %}
 899   ins_encode %{
 900     __ vsetvli(t0, x0, Assembler::e64);
 901     __ vfmv_s_f(as_VectorRegister($tmp$$reg), $src1_dst$$FloatRegister);
 902     __ vfredosum_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg),
 903                     as_VectorRegister($tmp$$reg));
 904     __ vfmv_f_s($src1_dst$$FloatRegister, as_VectorRegister($tmp$$reg));
 905   %}
 906   ins_pipe(pipe_slow);
 907 %}
 908 
 909 // vector integer max reduction
 910 instruct vreduce_maxB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 911   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 912   match(Set dst (MaxReductionV src1 src2));
 913   ins_cost(VEC_COST);
 914   effect(TEMP tmp);
 915   format %{ "vreduce_maxB $dst, $src1, $src2, $tmp" %}
 916   ins_encode %{
 917     __ vsetvli(t0, x0, Assembler::e8);
 918     __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
 919     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 920     Label Ldone;
 921     __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
 922     __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
 923     __ bind(Ldone);
 924   %}
 925   ins_pipe(pipe_slow);
 926 %}
 927 
 928 instruct vreduce_maxS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 929   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 930   match(Set dst (MaxReductionV src1 src2));
 931   ins_cost(VEC_COST);
 932   effect(TEMP tmp);
 933   format %{ "vreduce_maxS $dst, $src1, $src2, $tmp" %}
 934   ins_encode %{
 935     __ vsetvli(t0, x0, Assembler::e16);
 936     __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
 937     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 938     Label Ldone;
 939     __ ble(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
 940     __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
 941     __ bind(Ldone);
 942   %}
 943   ins_pipe(pipe_slow);
 944 %}
 945 
 946 instruct vreduce_maxI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 947   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 948   match(Set dst (MaxReductionV src1 src2));
 949   ins_cost(VEC_COST);
 950   effect(TEMP tmp);
 951   format %{ "vreduce_maxI $dst, $src1, $src2, $tmp" %}
 952   ins_encode %{
 953     __ vsetvli(t0, x0, Assembler::e32);
 954     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
 955     __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
 956     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 957   %}
 958   ins_pipe(pipe_slow);
 959 %}
 960 
 961 instruct vreduce_maxL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
 962   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 963   match(Set dst (MaxReductionV src1 src2));
 964   ins_cost(VEC_COST);
 965   effect(TEMP tmp);
 966   format %{ "vreduce_maxL $dst, $src1, $src2, $tmp" %}
 967   ins_encode %{
 968     __ vsetvli(t0, x0, Assembler::e64);
 969     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
 970     __ vredmax_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
 971     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 972   %}
 973   ins_pipe(pipe_slow);
 974 %}
 975 
 976 // vector integer min reduction
 977 instruct vreduce_minB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 978   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 979   match(Set dst (MinReductionV src1 src2));
 980   ins_cost(VEC_COST);
 981   effect(TEMP tmp);
 982   format %{ "vreduce_minB $dst, $src1, $src2, $tmp" %}
 983   ins_encode %{
 984     __ vsetvli(t0, x0, Assembler::e8);
 985     __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
 986     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
 987     Label Ldone;
 988     __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
 989     __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
 990     __ bind(Ldone);
 991   %}
 992   ins_pipe(pipe_slow);
 993 %}
 994 
 995 instruct vreduce_minS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
 996   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 997   match(Set dst (MinReductionV src1 src2));
 998   ins_cost(VEC_COST);
 999   effect(TEMP tmp);
1000   format %{ "vreduce_minS $dst, $src1, $src2, $tmp" %}
1001   ins_encode %{
1002     __ vsetvli(t0, x0, Assembler::e16);
1003     __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($src2$$reg));
1004     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
1005     Label Ldone;
1006     __ bge(as_Register($src1$$reg), as_Register($dst$$reg), Ldone);
1007     __ mv(as_Register($dst$$reg), as_Register($src1$$reg));
1008     __ bind(Ldone);
1009   %}
1010   ins_pipe(pipe_slow);
1011 %}
1012 
1013 instruct vreduce_minI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vReg tmp) %{
1014   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1015   match(Set dst (MinReductionV src1 src2));
1016   ins_cost(VEC_COST);
1017   effect(TEMP tmp);
1018   format %{ "vreduce_minI $dst, $src1, $src2, $tmp" %}
1019   ins_encode %{
1020     __ vsetvli(t0, x0, Assembler::e32);
1021     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
1022     __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
1023     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
1024   %}
1025   ins_pipe(pipe_slow);
1026 %}
1027 
1028 instruct vreduce_minL(iRegLNoSp dst, iRegL src1, vReg src2, vReg tmp) %{
1029   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1030   match(Set dst (MinReductionV src1 src2));
1031   ins_cost(VEC_COST);
1032   effect(TEMP tmp);
1033   format %{ "vreduce_minL $dst, $src1, $src2, $tmp" %}
1034   ins_encode %{
1035     __ vsetvli(t0, x0, Assembler::e64);
1036     __ vmv_s_x(as_VectorRegister($tmp$$reg), $src1$$Register);
1037     __ vredmin_vs(as_VectorRegister($tmp$$reg), as_VectorRegister($src2$$reg), as_VectorRegister($tmp$$reg));
1038     __ vmv_x_s($dst$$Register, as_VectorRegister($tmp$$reg));
1039   %}
1040   ins_pipe(pipe_slow);
1041 %}
1042 
1043 // vector float max reduction
1044 
1045 instruct vreduce_maxF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
1046   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1047   match(Set dst (MaxReductionV src1 src2));
1048   ins_cost(VEC_COST);
1049   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
1050   format %{ "reduce_maxF $dst, $src1, $src2, $tmp1, $tmp2" %}
1051   ins_encode %{
1052     __ reduce_minmax_FD_v($dst$$FloatRegister,
1053                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
1054                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1055                           false /* is_double */, false /* is_min */);
1056   %}
1057   ins_pipe(pipe_slow);
1058 %}
1059 
1060 instruct vreduce_maxD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
1061   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1062   match(Set dst (MaxReductionV src1 src2));
1063   ins_cost(VEC_COST);
1064   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
1065   format %{ "reduce_maxD $dst, $src1, $src2, $tmp1, $tmp2" %}
1066   ins_encode %{
1067     __ reduce_minmax_FD_v($dst$$FloatRegister,
1068                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
1069                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1070                           true /* is_double */, false /* is_min */); 
1071   %}
1072   ins_pipe(pipe_slow);
1073 %}
1074 
1075 // vector float min reduction
1076 
1077 instruct vreduce_minF(fRegF dst, fRegF src1, vReg src2, vReg tmp1, vReg tmp2) %{
1078   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1079   match(Set dst (MinReductionV src1 src2));
1080   ins_cost(VEC_COST);
1081   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
1082   format %{ "reduce_minF $dst, $src1, $src2, $tmp1, $tmp2" %}
1083   ins_encode %{
1084     __ reduce_minmax_FD_v($dst$$FloatRegister,
1085                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
1086                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1087                           false /* is_double */, true /* is_min */); 
1088   %}
1089   ins_pipe(pipe_slow);
1090 %}
1091 
1092 instruct vreduce_minD(fRegD dst, fRegD src1, vReg src2, vReg tmp1, vReg tmp2) %{
1093   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1094   match(Set dst (MinReductionV src1 src2));
1095   ins_cost(VEC_COST);
1096   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2);
1097   format %{ "reduce_minD $dst, $src1, $src2, $tmp1, $tmp2" %}
1098   ins_encode %{
1099     __ reduce_minmax_FD_v($dst$$FloatRegister,
1100                           $src1$$FloatRegister, as_VectorRegister($src2$$reg),
1101                           as_VectorRegister($tmp1$$reg), as_VectorRegister($tmp2$$reg),
1102                           true /* is_double */, true /* is_min */); 
1103   %}
1104   ins_pipe(pipe_slow);
1105 %}
1106 
1107 // vector Math.rint, floor, ceil
1108 
1109 instruct vroundD(vReg dst, vReg src, immI rmode) %{
1110   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1111   match(Set dst (RoundDoubleModeV src rmode));
1112   format %{ "vroundD $dst, $src, $rmode" %}
1113   ins_encode %{
1114     switch ($rmode$$constant) {
1115       case RoundDoubleModeNode::rmode_rint:
1116         __ csrwi(CSR_FRM, C2_MacroAssembler::rne);
1117         __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
1118         break;
1119       case RoundDoubleModeNode::rmode_floor:
1120         __ csrwi(CSR_FRM, C2_MacroAssembler::rdn);
1121         __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
1122         break;
1123       case RoundDoubleModeNode::rmode_ceil:
1124         __ csrwi(CSR_FRM, C2_MacroAssembler::rup);
1125         __ vfcvt_rtz_x_f_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
1126         break;
1127       default:
1128         ShouldNotReachHere();
1129         break;
1130     }
1131   %}
1132   ins_pipe(pipe_slow);
1133 %}
1134 
1135 // vector replicate
1136 
1137 instruct replicateB(vReg dst, iRegIorL2I src) %{
1138   match(Set dst (ReplicateB src));
1139   ins_cost(VEC_COST);
1140   format %{ "vmv.v.x  $dst, $src\t#@replicateB" %}
1141   ins_encode %{
1142     __ vsetvli(t0, x0, Assembler::e8);
1143     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
1144   %}
1145   ins_pipe(pipe_slow);
1146 %}
1147 
1148 instruct replicateS(vReg dst, iRegIorL2I src) %{
1149   match(Set dst (ReplicateS src));
1150   ins_cost(VEC_COST);
1151   format %{ "vmv.v.x  $dst, $src\t#@replicateS" %}
1152   ins_encode %{
1153     __ vsetvli(t0, x0, Assembler::e16);
1154     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
1155   %}
1156   ins_pipe(pipe_slow);
1157 %}
1158 
1159 instruct replicateI(vReg dst, iRegIorL2I src) %{
1160   match(Set dst (ReplicateI src));
1161   ins_cost(VEC_COST);
1162   format %{ "vmv.v.x  $dst, $src\t#@replicateI" %}
1163   ins_encode %{
1164     __ vsetvli(t0, x0, Assembler::e32);
1165     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
1166   %}
1167   ins_pipe(pipe_slow);
1168 %}
1169 
1170 instruct replicateL(vReg dst, iRegL src) %{
1171   match(Set dst (ReplicateL src));
1172   ins_cost(VEC_COST);
1173   format %{ "vmv.v.x  $dst, $src\t#@replicateL" %}
1174   ins_encode %{
1175     __ vsetvli(t0, x0, Assembler::e64);
1176     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($src$$reg));
1177   %}
1178   ins_pipe(pipe_slow);
1179 %}
1180 
1181 instruct replicateB_imm5(vReg dst, immI5 con) %{
1182   match(Set dst (ReplicateB con));
1183   ins_cost(VEC_COST);
1184   format %{ "vmv.v.i  $dst, $con\t#@replicateB_imm5" %}
1185   ins_encode %{
1186     __ vsetvli(t0, x0, Assembler::e8);
1187     __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
1188   %}
1189   ins_pipe(pipe_slow);
1190 %}
1191 
1192 instruct replicateS_imm5(vReg dst, immI5 con) %{
1193   match(Set dst (ReplicateS con));
1194   ins_cost(VEC_COST);
1195   format %{ "vmv.v.i  $dst, $con\t#@replicateS_imm5" %}
1196   ins_encode %{
1197     __ vsetvli(t0, x0, Assembler::e16);
1198     __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
1199   %}
1200   ins_pipe(pipe_slow);
1201 %}
1202 
1203 instruct replicateI_imm5(vReg dst, immI5 con) %{
1204   match(Set dst (ReplicateI con));
1205   ins_cost(VEC_COST);
1206   format %{ "vmv.v.i  $dst, $con\t#@replicateI_imm5" %}
1207   ins_encode %{
1208     __ vsetvli(t0, x0, Assembler::e32);
1209     __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
1210   %}
1211   ins_pipe(pipe_slow);
1212 %}
1213 
1214 instruct replicateL_imm5(vReg dst, immL5 con) %{
1215   match(Set dst (ReplicateL con));
1216   ins_cost(VEC_COST);
1217   format %{ "vmv.v.i  $dst, $con\t#@replicateL_imm5" %}
1218   ins_encode %{
1219     __ vsetvli(t0, x0, Assembler::e64);
1220     __ vmv_v_i(as_VectorRegister($dst$$reg), $con$$constant);
1221   %}
1222   ins_pipe(pipe_slow);
1223 %}
1224 
1225 instruct replicateF(vReg dst, fRegF src) %{
1226   match(Set dst (ReplicateF src));
1227   ins_cost(VEC_COST);
1228   format %{ "vfmv.v.f  $dst, $src\t#@replicateF" %}
1229   ins_encode %{
1230     __ vsetvli(t0, x0, Assembler::e32);
1231     __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
1232   %}
1233   ins_pipe(pipe_slow);
1234 %}
1235 
1236 instruct replicateD(vReg dst, fRegD src) %{
1237   match(Set dst (ReplicateD src));
1238   ins_cost(VEC_COST);
1239   format %{ "vfmv.v.f  $dst, $src\t#@replicateD" %}
1240   ins_encode %{
1241     __ vsetvli(t0, x0, Assembler::e64);
1242     __ vfmv_v_f(as_VectorRegister($dst$$reg), $src$$FloatRegister);
1243   %}
1244   ins_pipe(pipe_slow);
1245 %}
1246 
1247 // vector shift
1248 
1249 instruct vasrB(vReg dst, vReg src, vReg shift) %{
1250   match(Set dst (RShiftVB src shift));
1251   ins_cost(VEC_COST);
1252   effect(TEMP_DEF dst);
1253   format %{ "vmsgtu.vi v0, $shift 7\t#@vasrB\n\t"
1254             "vsra.vi $dst, $src, 7, Assembler::v0_t\n\t"
1255             "vmnot.m v0, v0\n\t"
1256             "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
1257   ins_encode %{
1258     __ vsetvli(t0, x0, Assembler::e8);
1259     // if shift > BitsPerByte - 1, clear the low BitsPerByte - 1 bits
1260     __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
1261     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1262                BitsPerByte - 1, Assembler::v0_t);
1263     // otherwise, shift
1264     __ vmnot_m(v0, v0);
1265     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1266                as_VectorRegister($shift$$reg), Assembler::v0_t);
1267   %}
1268   ins_pipe(pipe_slow);
1269 %}
1270 
1271 instruct vasrS(vReg dst, vReg src, vReg shift) %{
1272   match(Set dst (RShiftVS src shift));
1273   ins_cost(VEC_COST);
1274   effect(TEMP_DEF dst);
1275   format %{ "vmsgtu.vi v0, $shift, 15\t#@vasrS\n\t"
1276             "vsra.vi $dst, $src, 15, Assembler::v0_t\n\t"
1277             "vmnot.m v0, v0\n\t"
1278             "vsra.vv $dst, $src, $shift, Assembler::v0_t" %}
1279   ins_encode %{
1280     __ vsetvli(t0, x0, Assembler::e16);
1281     // if shift > BitsPerShort - 1, clear the low BitsPerShort - 1 bits
1282     __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
1283     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1284                BitsPerShort - 1, Assembler::v0_t);
1285     // otherwise, shift
1286     __ vmnot_m(v0, v0);
1287     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1288                as_VectorRegister($shift$$reg), Assembler::v0_t);
1289   %}
1290   ins_pipe(pipe_slow);
1291 %}
1292 
1293 instruct vasrI(vReg dst, vReg src, vReg shift) %{
1294   match(Set dst (RShiftVI src shift));
1295   ins_cost(VEC_COST);
1296   format %{ "vsra.vv $dst, $src, $shift\t#@vasrI" %}
1297   ins_encode %{
1298     __ vsetvli(t0, x0, Assembler::e32);
1299     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1300                as_VectorRegister($shift$$reg));
1301   %}
1302   ins_pipe(pipe_slow);
1303 %}
1304 
1305 instruct vasrL(vReg dst, vReg src, vReg shift) %{
1306   match(Set dst (RShiftVL src shift));
1307   ins_cost(VEC_COST);
1308   format %{ "vsra.vv $dst, $src, $shift\t#@vasrL" %}
1309   ins_encode %{
1310     __ vsetvli(t0, x0, Assembler::e64);
1311     __ vsra_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1312          as_VectorRegister($shift$$reg));
1313   %}
1314   ins_pipe(pipe_slow);
1315 %}
1316 
1317 instruct vlslB(vReg dst, vReg src, vReg shift) %{
1318   match(Set dst (LShiftVB src shift));
1319   ins_cost(VEC_COST);
1320   effect( TEMP_DEF dst);
1321   format %{ "vmsgtu.vi v0, $shift, 7\t#@vlslB\n\t"
1322             "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
1323             "vmnot.m v0, v0\n\t"
1324             "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
1325   ins_encode %{
1326     __ vsetvli(t0, x0, Assembler::e8);
1327     // if shift > BitsPerByte - 1, clear the element
1328     __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
1329     __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1330                as_VectorRegister($src$$reg), Assembler::v0_t);
1331     // otherwise, shift
1332     __ vmnot_m(v0, v0);
1333     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1334                as_VectorRegister($shift$$reg), Assembler::v0_t);
1335   %}
1336   ins_pipe(pipe_slow);
1337 %}
1338 
1339 instruct vlslS(vReg dst, vReg src, vReg shift) %{
1340   match(Set dst (LShiftVS src shift));
1341   ins_cost(VEC_COST);
1342   effect(TEMP_DEF dst);
1343   format %{ "vmsgtu.vi v0, $shift, 15\t#@vlslS\n\t"
1344             "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
1345             "vmnot.m v0, v0\n\t"
1346             "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
1347   ins_encode %{
1348     __ vsetvli(t0, x0, Assembler::e16);
1349     // if shift > BitsPerShort - 1, clear the element
1350     __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
1351     __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1352                as_VectorRegister($src$$reg), Assembler::v0_t);
1353     // otherwise, shift
1354     __ vmnot_m(v0, v0);
1355     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1356                as_VectorRegister($shift$$reg), Assembler::v0_t);
1357   %}
1358   ins_pipe(pipe_slow);
1359 %}
1360 
1361 instruct vlslI(vReg dst, vReg src, vReg shift) %{
1362   match(Set dst (LShiftVI src shift));
1363   ins_cost(VEC_COST);
1364   format %{ "vsll.vv $dst, $src, $shift\t#@vlslI" %}
1365   ins_encode %{
1366     __ vsetvli(t0, x0, Assembler::e32);
1367     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1368                as_VectorRegister($shift$$reg));
1369   %}
1370   ins_pipe(pipe_slow);
1371 %}
1372 
1373 instruct vlslL(vReg dst, vReg src, vReg shift) %{
1374   match(Set dst (LShiftVL src shift));
1375   ins_cost(VEC_COST);
1376   format %{ "vsll.vv $dst, $src, $shift\t# vector (D)" %}
1377   ins_encode %{
1378     __ vsetvli(t0, x0, Assembler::e64);
1379     __ vsll_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1380                as_VectorRegister($shift$$reg));
1381   %}
1382   ins_pipe(pipe_slow);
1383 %}
1384 
1385 instruct vlsrB(vReg dst, vReg src, vReg shift) %{
1386   match(Set dst (URShiftVB src shift));
1387   ins_cost(VEC_COST);
1388   effect(TEMP_DEF dst);
1389   format %{ "vmsgtu.vi v0, $shift, 7\t#@vlsrB\n\t"
1390             "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
1391             "vmnot.m v0, v0, v0\n\t"
1392             "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
1393   ins_encode %{
1394     __ vsetvli(t0, x0, Assembler::e8);
1395     // if shift > BitsPerByte - 1, clear the element
1396     __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerByte - 1);
1397     __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1398                as_VectorRegister($src$$reg), Assembler::v0_t);
1399     // otherwise, shift
1400     __ vmnot_m(v0, v0);
1401     __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1402                as_VectorRegister($shift$$reg), Assembler::v0_t);
1403   %}
1404   ins_pipe(pipe_slow);
1405 %}
1406 
1407 instruct vlsrS(vReg dst, vReg src, vReg shift) %{
1408   match(Set dst (URShiftVS src shift));
1409   ins_cost(VEC_COST);
1410   effect(TEMP_DEF dst);
1411   format %{ "vmsgtu.vi v0, $shift, 15\t#@vlsrS\n\t"
1412             "vxor.vv $dst, $src, $src, Assembler::v0_t\n\t"
1413             "vmnot.m v0, v0\n\t"
1414             "vsll.vv $dst, $src, $shift, Assembler::v0_t" %}
1415   ins_encode %{
1416     __ vsetvli(t0, x0, Assembler::e16);
1417     // if shift > BitsPerShort - 1, clear the element
1418     __ vmsgtu_vi(v0, as_VectorRegister($shift$$reg), BitsPerShort - 1);
1419     __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1420                as_VectorRegister($src$$reg), Assembler::v0_t);
1421     // otherwise, shift
1422     __ vmnot_m(v0, v0);
1423     __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1424                as_VectorRegister($shift$$reg), Assembler::v0_t);
1425   %}
1426   ins_pipe(pipe_slow);
1427 %}
1428 
1429 
1430 instruct vlsrI(vReg dst, vReg src, vReg shift) %{
1431   match(Set dst (URShiftVI src shift));
1432   ins_cost(VEC_COST);
1433   format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrI" %}
1434   ins_encode %{
1435     __ vsetvli(t0, x0, Assembler::e32);
1436     __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1437                as_VectorRegister($shift$$reg));
1438   %}
1439   ins_pipe(pipe_slow);
1440 %}
1441 
1442 
1443 instruct vlsrL(vReg dst, vReg src, vReg shift) %{
1444   match(Set dst (URShiftVL src shift));
1445   ins_cost(VEC_COST);
1446   format %{ "vsrl.vv $dst, $src, $shift\t#@vlsrL" %}
1447   ins_encode %{
1448     __ vsetvli(t0, x0, Assembler::e64);
1449     __ vsrl_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1450                as_VectorRegister($shift$$reg));
1451   %}
1452   ins_pipe(pipe_slow);
1453 %}
1454 
1455 instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
1456   match(Set dst (RShiftVB src (RShiftCntV shift)));
1457   ins_cost(VEC_COST);
1458   format %{ "vsra.vi $dst, $src, $shift\t#@vasrB_imm" %}
1459   ins_encode %{
1460     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1461     __ vsetvli(t0, x0, Assembler::e8);
1462     if (con == 0) {
1463       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1464                 as_VectorRegister($src$$reg));
1465       return;
1466     }
1467     if (con >= BitsPerByte) con = BitsPerByte - 1;
1468     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1469   %}
1470   ins_pipe(pipe_slow);
1471 %}
1472 
1473 instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
1474   match(Set dst (RShiftVS src (RShiftCntV shift)));
1475   ins_cost(VEC_COST);
1476   format %{ "vsra.vi $dst, $src, $shift\t#@vasrS_imm" %}
1477   ins_encode %{
1478     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1479     __ vsetvli(t0, x0, Assembler::e16);
1480     if (con == 0) {
1481       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1482                 as_VectorRegister($src$$reg));
1483       return;
1484     }
1485     if (con >= BitsPerShort) con = BitsPerShort - 1;
1486     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1487   %}
1488   ins_pipe(pipe_slow);
1489 %}
1490 
1491 instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
1492   match(Set dst (RShiftVI src (RShiftCntV shift)));
1493   ins_cost(VEC_COST);
1494   format %{ "vsrl.vi $dst, $src, $shift\t#@vasrI_imm" %}
1495   ins_encode %{
1496     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1497     __ vsetvli(t0, x0, Assembler::e32);
1498     if (con == 0) {
1499       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1500                 as_VectorRegister($src$$reg));
1501       return;
1502     }
1503     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1504   %}
1505   ins_pipe(pipe_slow);
1506 %}
1507 
1508 instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
1509   predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
1510   match(Set dst (RShiftVL src (RShiftCntV shift)));
1511   ins_cost(VEC_COST);
1512   format %{ "vsrl.vi $dst, $src, $shift\t#@vasrL_imm" %}
1513   ins_encode %{
1514     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1515     __ vsetvli(t0, x0, Assembler::e64);
1516     if (con == 0) {
1517       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1518                 as_VectorRegister($src$$reg));
1519       return;
1520     }
1521     __ vsra_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1522   %}
1523   ins_pipe(pipe_slow);
1524 %}
1525 
1526 instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
1527   match(Set dst (URShiftVB src (RShiftCntV shift)));
1528   ins_cost(VEC_COST);
1529   format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrB_imm" %}
1530   ins_encode %{
1531     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1532     __ vsetvli(t0, x0, Assembler::e8);
1533     if (con == 0) {
1534       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1535                 as_VectorRegister($src$$reg));
1536       return;
1537     }
1538     if (con >= BitsPerByte) {
1539       __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1540                  as_VectorRegister($src$$reg));
1541       return;
1542     }
1543     __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1544   %}
1545   ins_pipe(pipe_slow);
1546 %}
1547 
1548 instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
1549   match(Set dst (URShiftVS src (RShiftCntV shift)));
1550   ins_cost(VEC_COST);
1551   format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrS_imm" %}
1552   ins_encode %{
1553     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1554     __ vsetvli(t0, x0, Assembler::e16);
1555     if (con == 0) {
1556       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1557                 as_VectorRegister($src$$reg));
1558       return;
1559     }
1560     if (con >= BitsPerShort) {
1561       __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1562                  as_VectorRegister($src$$reg));
1563       return;
1564     }
1565     __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1566   %}
1567   ins_pipe(pipe_slow);
1568 %}
1569 
1570 instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
1571   match(Set dst (URShiftVI src (RShiftCntV shift)));
1572   ins_cost(VEC_COST);
1573   format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrI_imm" %}
1574   ins_encode %{
1575     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1576     __ vsetvli(t0, x0, Assembler::e32);
1577     if (con == 0) {
1578       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1579                 as_VectorRegister($src$$reg));
1580       return;
1581     }
1582     __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1583   %}
1584   ins_pipe(pipe_slow);
1585 %}
1586 
1587 instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
1588   predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
1589   match(Set dst (URShiftVL src (RShiftCntV shift)));
1590   ins_cost(VEC_COST);
1591   format %{ "vsrl.vi $dst, $src, $shift\t#@vlsrL_imm" %}
1592   ins_encode %{
1593     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1594     __ vsetvli(t0, x0, Assembler::e64);
1595     if (con == 0) {
1596       __ vor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1597                 as_VectorRegister($src$$reg));
1598       return;
1599     }
1600     __ vsrl_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1601   %}
1602   ins_pipe(pipe_slow);
1603 %}
1604 
1605 instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
1606   match(Set dst (LShiftVB src (LShiftCntV shift)));
1607   ins_cost(VEC_COST);
1608   format %{ "vsll.vi $dst, $src, $shift\t#@vlslB_imm" %}
1609   ins_encode %{
1610     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1611     __ vsetvli(t0, x0, Assembler::e8);
1612     if (con >= BitsPerByte) {
1613       __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1614                  as_VectorRegister($src$$reg));
1615       return;
1616     }
1617     __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1618   %}
1619   ins_pipe(pipe_slow);
1620 %}
1621 
1622 instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
1623   match(Set dst (LShiftVS src (LShiftCntV shift)));
1624   ins_cost(VEC_COST);
1625   format %{ "vsll.vi $dst, $src, $shift\t#@vlslS_imm" %}
1626   ins_encode %{
1627     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1628     __ vsetvli(t0, x0, Assembler::e16);
1629     if (con >= BitsPerShort) {
1630       __ vxor_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg),
1631                  as_VectorRegister($src$$reg));
1632       return;
1633     }
1634     __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1635   %}
1636   ins_pipe(pipe_slow);
1637 %}
1638 
1639 instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
1640   match(Set dst (LShiftVI src (LShiftCntV shift)));
1641   ins_cost(VEC_COST);
1642   format %{ "vsll.vi $dst, $src, $shift\t#@vlslI_imm" %}
1643   ins_encode %{
1644     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1645     __ vsetvli(t0, x0, Assembler::e32);
1646     __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1647   %}
1648   ins_pipe(pipe_slow);
1649 %}
1650 
1651 instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
1652   predicate((n->in(2)->in(1)->get_int() & 0x3f) < 32);
1653   match(Set dst (LShiftVL src (LShiftCntV shift)));
1654   ins_cost(VEC_COST);
1655   format %{ "vsll.vi $dst, $src, $shift\t#@vlslL_imm" %}
1656   ins_encode %{
1657     uint32_t con = (unsigned)$shift$$constant & 0x1f;
1658     __ vsetvli(t0, x0, Assembler::e64);
1659     __ vsll_vi(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg), con);
1660   %}
1661   ins_pipe(pipe_slow);
1662 %}
1663 
1664 instruct vshiftcntB(vReg dst, iRegIorL2I cnt) %{
1665   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1666   match(Set dst (LShiftCntV cnt));
1667   match(Set dst (RShiftCntV cnt));
1668   format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntB" %}
1669   ins_encode %{
1670     __ vsetvli(t0, x0, Assembler::e8);
1671     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
1672   %}
1673   ins_pipe(pipe_slow);
1674 %}
1675 
1676 instruct vshiftcntS(vReg dst, iRegIorL2I cnt) %{
1677   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1678             n->bottom_type()->is_vect()->element_basic_type() == T_CHAR);
1679   match(Set dst (LShiftCntV cnt));
1680   match(Set dst (RShiftCntV cnt));
1681   format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntS" %}
1682   ins_encode %{
1683     __ vsetvli(t0, x0, Assembler::e16);
1684     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
1685   %}
1686   ins_pipe(pipe_slow);
1687 %}
1688 
1689 instruct vshiftcntI(vReg dst, iRegIorL2I cnt) %{
1690   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
1691   match(Set dst (LShiftCntV cnt));
1692   match(Set dst (RShiftCntV cnt));
1693   format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntI" %}
1694   ins_encode %{
1695     __ vsetvli(t0, x0, Assembler::e32);
1696     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
1697   %}
1698   ins_pipe(pipe_slow);
1699 %}
1700 
1701 instruct vshiftcntL(vReg dst, iRegIorL2I cnt) %{
1702   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1703   match(Set dst (LShiftCntV cnt));
1704   match(Set dst (RShiftCntV cnt));
1705   format %{ "vmv.v.x $dst, $cnt\t#@vshiftcntL" %}
1706   ins_encode %{
1707     __ vsetvli(t0, x0, Assembler::e64);
1708     __ vmv_v_x(as_VectorRegister($dst$$reg), as_Register($cnt$$reg));
1709   %}
1710   ins_pipe(pipe_slow);
1711 %}
1712 
1713 // vector sqrt
1714 
1715 instruct vsqrtF(vReg dst, vReg src) %{
1716   match(Set dst (SqrtVF src));
1717   ins_cost(VEC_COST);
1718   format %{ "vfsqrt.v $dst, $src\t#@vsqrtF" %}
1719   ins_encode %{
1720     __ vsetvli(t0, x0, Assembler::e32);
1721     __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
1722   %}
1723   ins_pipe(pipe_slow);
1724 %}
1725 
1726 instruct vsqrtD(vReg dst, vReg src) %{
1727   match(Set dst (SqrtVD src));
1728   ins_cost(VEC_COST);
1729   format %{ "vfsqrt.v $dst, $src\t#@vsqrtD" %}
1730   ins_encode %{
1731     __ vsetvli(t0, x0, Assembler::e64);
1732     __ vfsqrt_v(as_VectorRegister($dst$$reg), as_VectorRegister($src$$reg));
1733   %}
1734   ins_pipe(pipe_slow);
1735 %}
1736 
1737 // vector sub
1738 
1739 instruct vsubB(vReg dst, vReg src1, vReg src2) %{
1740   match(Set dst (SubVB src1 src2));
1741   ins_cost(VEC_COST);
1742   format %{ "vsub.vv $dst, $src1, $src2\t#@vsubB" %}
1743   ins_encode %{
1744     __ vsetvli(t0, x0, Assembler::e8);
1745     __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
1746                as_VectorRegister($src2$$reg));
1747   %}
1748   ins_pipe(pipe_slow);
1749 %}
1750 
1751 instruct vsubS(vReg dst, vReg src1, vReg src2) %{
1752   match(Set dst (SubVS src1 src2));
1753   ins_cost(VEC_COST);
1754   format %{ "vsub.vv $dst, $src1, $src2\t#@vsubS" %}
1755   ins_encode %{
1756     __ vsetvli(t0, x0, Assembler::e16);
1757     __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
1758                as_VectorRegister($src2$$reg));
1759   %}
1760   ins_pipe(pipe_slow);
1761 %}
1762 
1763 instruct vsubI(vReg dst, vReg src1, vReg src2) %{
1764   match(Set dst (SubVI src1 src2));
1765   ins_cost(VEC_COST);
1766   format %{ "vsub.vv $dst, $src1, $src2\t#@vsubI" %}
1767   ins_encode %{
1768     __ vsetvli(t0, x0, Assembler::e32);
1769     __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
1770                as_VectorRegister($src2$$reg));
1771   %}
1772   ins_pipe(pipe_slow);
1773 %}
1774 
1775 instruct vsubL(vReg dst, vReg src1, vReg src2) %{
1776   match(Set dst (SubVL src1 src2));
1777   ins_cost(VEC_COST);
1778   format %{ "vsub.vv $dst, $src1, $src2\t#@vsubL" %}
1779   ins_encode %{
1780     __ vsetvli(t0, x0, Assembler::e64);
1781     __ vsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
1782                as_VectorRegister($src2$$reg));
1783   %}
1784   ins_pipe(pipe_slow);
1785 %}
1786 
1787 instruct vsubF(vReg dst, vReg src1, vReg src2) %{
1788   match(Set dst (SubVF src1 src2));
1789   ins_cost(VEC_COST);
1790   format %{ "vfsub.vv $dst, $src1, $src2\t@vsubF" %}
1791   ins_encode %{
1792     __ vsetvli(t0, x0, Assembler::e32);
1793     __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
1794                 as_VectorRegister($src2$$reg));
1795   %}
1796   ins_pipe(pipe_slow);
1797 %}
1798 
1799 instruct vsubD(vReg dst, vReg src1, vReg src2) %{
1800   match(Set dst (SubVD src1 src2));
1801   ins_cost(VEC_COST);
1802   format %{ "vfsub.vv $dst, $src1, $src2\t#@vsubD" %}
1803   ins_encode %{
1804     __ vsetvli(t0, x0, Assembler::e64);
1805     __ vfsub_vv(as_VectorRegister($dst$$reg), as_VectorRegister($src1$$reg),
1806                 as_VectorRegister($src2$$reg));
1807   %}
1808   ins_pipe(pipe_slow);
1809 %}
1810 
1811 instruct vstring_equalsL(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
1812                          iRegI_R10 result, vReg_V1 v1,
1813                          vReg_V2 v2, vReg_V3 v3, iRegL_R6 r6)
1814 %{
1815   predicate(UseVExt && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
1816   match(Set result (StrEquals (Binary str1 str2) cnt));
1817   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
1818 
1819   format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsL" %}
1820   ins_encode %{
1821     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
1822     __ string_equals_v($str1$$Register, $str2$$Register,
1823                        $result$$Register, $cnt$$Register, 1);
1824   %}
1825   ins_pipe(pipe_class_memory);
1826 %}
1827 
1828 instruct vstring_equalsU(iRegP_R11 str1, iRegP_R13 str2, iRegI_R14 cnt,
1829                          iRegI_R10 result, vReg_V1 v1,
1830                          vReg_V2 v2, vReg_V3 v3, iRegL_R6 r6)
1831 %{
1832   predicate(UseVExt && ((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
1833   match(Set result (StrEquals (Binary str1 str2) cnt));
1834   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL r6, TEMP v1, TEMP v2, TEMP v3);
1835 
1836   format %{ "String Equals $str1, $str2, $cnt -> $result\t#@string_equalsU" %}
1837   ins_encode %{
1838     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
1839     __ string_equals_v($str1$$Register, $str2$$Register,
1840                        $result$$Register, $cnt$$Register, 2);
1841   %}
1842   ins_pipe(pipe_class_memory);
1843 %}
1844 
1845 instruct varray_equalsB(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
1846                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, iRegL_R6 r6)
1847 %{
1848   predicate(UseVExt && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
1849   match(Set result (AryEq ary1 ary2));
1850   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
1851 
1852   format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsB // KILL $tmp" %}
1853   ins_encode %{
1854     __ arrays_equals_v($ary1$$Register, $ary2$$Register,
1855                        $result$$Register, $tmp$$Register, 1);
1856     %}
1857   ins_pipe(pipe_class_memory);
1858 %}
1859 
1860 instruct varray_equalsC(iRegP_R11 ary1, iRegP_R12 ary2, iRegI_R10 result,
1861                         vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegP_R28 tmp, iRegL_R6 r6)
1862 %{
1863   predicate(UseVExt && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
1864   match(Set result (AryEq ary1 ary2));
1865   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP v1, TEMP v2, TEMP v3, KILL r6);
1866 
1867   format %{ "Array Equals $ary1, ary2 -> $result\t#@array_equalsC // KILL $tmp" %}
1868   ins_encode %{
1869     __ arrays_equals_v($ary1$$Register, $ary2$$Register,
1870                        $result$$Register, $tmp$$Register, 2);
1871   %}
1872   ins_pipe(pipe_class_memory);
1873 %}
1874 
1875 instruct vstring_compareU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
1876                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
1877                           iRegP_R28 tmp1, iRegL_R29 tmp2)
1878 %{
1879   predicate(UseVExt && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UU);
1880   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
1881   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
1882          TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
1883 
1884   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareU" %}
1885   ins_encode %{
1886     // Count is in 8-bit bytes; non-Compact chars are 16 bits.
1887     __ string_compare_v($str1$$Register, $str2$$Register,
1888                         $cnt1$$Register, $cnt2$$Register, $result$$Register,
1889                         $tmp1$$Register, $tmp2$$Register,
1890                         StrIntrinsicNode::UU);
1891   %}
1892   ins_pipe(pipe_class_memory);
1893 %}
1894 instruct vstring_compareL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
1895                           iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
1896                           iRegP_R28 tmp1, iRegL_R29 tmp2)
1897 %{
1898   predicate(UseVExt && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LL);
1899   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
1900   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
1901          TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
1902 
1903   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareL" %}
1904   ins_encode %{
1905     __ string_compare_v($str1$$Register, $str2$$Register,
1906                         $cnt1$$Register, $cnt2$$Register, $result$$Register,
1907                         $tmp1$$Register, $tmp2$$Register,
1908                         StrIntrinsicNode::LL);
1909   %}
1910   ins_pipe(pipe_class_memory);
1911 %}
1912 
1913 instruct vstring_compareUL(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
1914                            iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
1915                            iRegP_R28 tmp1, iRegL_R29 tmp2)
1916 %{
1917   predicate(UseVExt && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::UL);
1918   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
1919   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
1920          TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
1921 
1922   format %{"String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareUL" %}
1923   ins_encode %{
1924     __ string_compare_v($str1$$Register, $str2$$Register,
1925                         $cnt1$$Register, $cnt2$$Register, $result$$Register,
1926                         $tmp1$$Register, $tmp2$$Register,
1927                         StrIntrinsicNode::UL);
1928   %}
1929   ins_pipe(pipe_class_memory);
1930 %}
1931 instruct vstring_compareLU(iRegP_R11 str1, iRegI_R12 cnt1, iRegP_R13 str2, iRegI_R14 cnt2,
1932                            iRegI_R10 result, vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, vReg_V4 v4, vReg_V5 v5,
1933                            iRegP_R28 tmp1, iRegL_R29 tmp2)
1934 %{
1935   predicate(UseVExt && ((StrCompNode *)n)->encoding() == StrIntrinsicNode::LU);
1936   match(Set result(StrComp(Binary str1 cnt1)(Binary str2 cnt2)));
1937   effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
1938          TEMP v1, TEMP v2, TEMP v3, TEMP v4, TEMP v5);
1939 
1940   format %{ "String Compare $str1, $cnt1, $str2, $cnt2 -> $result\t#@string_compareLU" %}
1941   ins_encode %{
1942     __ string_compare_v($str1$$Register, $str2$$Register,
1943                         $cnt1$$Register, $cnt2$$Register, $result$$Register,
1944                         $tmp1$$Register, $tmp2$$Register,
1945                         StrIntrinsicNode::LU);
1946   %}
1947   ins_pipe(pipe_class_memory);
1948 %}
1949 
1950 // fast byte[] to char[] inflation
1951 instruct vstring_inflate(Universe dummy, iRegP_R10 src, iRegP_R11 dst, iRegI_R12 len,
1952                          vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegI_R6 tmp)
1953 %{
1954   predicate(UseVExt);
1955   match(Set dummy (StrInflatedCopy src (Binary dst len)));
1956   effect(TEMP v1, TEMP v2, TEMP v3, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len);
1957 
1958   format %{ "String Inflate $src,$dst" %}
1959   ins_encode %{
1960     __ byte_array_inflate_v($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register);
1961   %}
1962   ins_pipe(pipe_class_memory);
1963 %}
1964 
1965 // encode char[] to byte[] in ISO_8859_1
1966 instruct vencode_iso_array(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
1967                            vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegI_R6 tmp)
1968 %{
1969   predicate(UseVExt);
1970   match(Set result (EncodeISOArray src (Binary dst len)));
1971   effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
1972          TEMP v1, TEMP v2, TEMP v3, KILL tmp);
1973 
1974   format %{ "Encode array $src,$dst,$len -> $result" %}
1975   ins_encode %{
1976     __ encode_iso_array_v($src$$Register, $dst$$Register, $len$$Register,
1977                           $result$$Register, $tmp$$Register);
1978   %}
1979   ins_pipe( pipe_class_memory );
1980 %}
1981 
1982 // fast char[] to byte[] compression
1983 instruct vstring_compress(iRegP_R12 src, iRegP_R11 dst, iRegI_R13 len, iRegI_R10 result,
1984                           vReg_V1 v1, vReg_V2 v2, vReg_V3 v3, iRegI_R6 tmp)
1985 %{
1986   predicate(UseVExt);
1987   match(Set result (StrCompressedCopy src (Binary dst len)));
1988   effect(TEMP_DEF result, USE_KILL src, USE_KILL dst, USE_KILL len,
1989          TEMP v1, TEMP v2, TEMP v3, KILL tmp);
1990 
1991   format %{ "String Compress $src,$dst -> $result    // KILL R11, R12, R13" %}
1992   ins_encode %{
1993     __ char_array_compress_v($src$$Register, $dst$$Register, $len$$Register,
1994                              $result$$Register, $tmp$$Register);
1995   %}
1996   ins_pipe( pipe_slow );
1997 %}
1998 
1999 instruct vhas_negatives(iRegP_R11 ary1, iRegI_R12 len, iRegI_R10 result, iRegL_R6 tmp)
2000 %{
2001   predicate(UseVExt);
2002   match(Set result (HasNegatives ary1 len));
2003   effect(USE_KILL ary1, USE_KILL len, KILL tmp);
2004   format %{ "has negatives byte[] $ary1,$len -> $result" %}
2005   ins_encode %{
2006     __ has_negatives_v($ary1$$Register, $len$$Register, $result$$Register, $tmp$$Register);
2007   %}
2008   ins_pipe( pipe_slow );
2009 %}
2010 
2011 instruct vstringU_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
2012                                iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
2013                                vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
2014 %{
2015   predicate(UseVExt && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
2016   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
2017   effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
2018          TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
2019 
2020   format %{ "StringUTF16 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
2021 
2022   ins_encode %{
2023     __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
2024                              $result$$Register, $tmp1$$Register, $tmp2$$Register,
2025                              false /* isL */);
2026   %}
2027 
2028   ins_pipe(pipe_class_memory);
2029 %}
2030 
2031 instruct vstringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
2032                                iRegI_R10 result, iRegINoSp tmp1, iRegINoSp tmp2,
2033                                vReg_V1 v1, vReg_V2 v2, vReg_V3 v3)
2034 %{
2035   predicate(UseVExt && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
2036   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
2037   effect(TEMP_DEF result, USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
2038          TEMP tmp1, TEMP tmp2, TEMP v1, TEMP v2, TEMP v3);
2039 
2040   format %{ "StringLatin1 IndexOf char[] $str1, $cnt1, $ch -> $result" %}
2041 
2042   ins_encode %{
2043     __ string_indexof_char_v($str1$$Register, $cnt1$$Register, $ch$$Register,
2044                              $result$$Register, $tmp1$$Register, $tmp2$$Register,
2045                              true /* isL */);
2046   %}
2047 
2048   ins_pipe(pipe_class_memory);
2049 %}
2050 
2051 // clearing of an array
2052 instruct vclearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, Universe dummy,
2053                              vReg_V1 vReg1, vReg_V2 vReg2, vReg_V3 vReg3)
2054 %{
2055   predicate(UseVExt);
2056   match(Set dummy (ClearArray cnt base));
2057   effect(USE_KILL cnt, USE_KILL base, TEMP vReg1, TEMP vReg2, TEMP vReg3);
2058 
2059   format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}
2060 
2061   ins_encode %{
2062     __ clear_array_v($base$$Register, $cnt$$Register);
2063   %}
2064 
2065   ins_pipe(pipe_class_memory);
2066 %}