1 // Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
   2 // Copyright (c) 2020, 2022, Arm Limited. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
  26 
  27 // AArch64 NEON Architecture Description File
  28 
  29 // ====================VECTOR INSTRUCTIONS==================================
  30 
  31 // ------------------------------ Load/store/reinterpret -----------------------
  32 
  33 // Load Vector (16 bits)
  34 instruct loadV2(vecD dst, vmem2 mem)
  35 %{
  36   predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 2);
  37   match(Set dst (LoadVector mem));
  38   ins_cost(4 * INSN_COST);
  39   format %{ "ldrh   $dst,$mem\t# vector (16 bits)" %}
  40   ins_encode( aarch64_enc_ldrvH(dst, mem) );
  41   ins_pipe(vload_reg_mem64);
  42 %}
  43 
  44 // Load Vector (32 bits)
  45 instruct loadV4(vecD dst, vmem4 mem)
  46 %{
  47   predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 4);
  48   match(Set dst (LoadVector mem));
  49   ins_cost(4 * INSN_COST);
  50   format %{ "ldrs   $dst,$mem\t# vector (32 bits)" %}
  51   ins_encode( aarch64_enc_ldrvS(dst, mem) );
  52   ins_pipe(vload_reg_mem64);
  53 %}
  54 
  55 // Load Vector (64 bits)
  56 instruct loadV8(vecD dst, vmem8 mem)
  57 %{
  58   predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 8);
  59   match(Set dst (LoadVector mem));
  60   ins_cost(4 * INSN_COST);
  61   format %{ "ldrd   $dst,$mem\t# vector (64 bits)" %}
  62   ins_encode( aarch64_enc_ldrvD(dst, mem) );
  63   ins_pipe(vload_reg_mem64);
  64 %}
  65 
  66 // Load Vector (128 bits)
  67 instruct loadV16(vecX dst, vmem16 mem)
  68 %{
  69   predicate(UseSVE == 0 && n->as_LoadVector()->memory_size() == 16);
  70   match(Set dst (LoadVector mem));
  71   ins_cost(4 * INSN_COST);
  72   format %{ "ldrq   $dst,$mem\t# vector (128 bits)" %}
  73   ins_encode( aarch64_enc_ldrvQ(dst, mem) );
  74   ins_pipe(vload_reg_mem128);
  75 %}
  76 
  77 // Store Vector (16 bits)
  78 instruct storeV2(vecD src, vmem2 mem)
  79 %{
  80   predicate(n->as_StoreVector()->memory_size() == 2);
  81   match(Set mem (StoreVector mem src));
  82   ins_cost(4 * INSN_COST);
  83   format %{ "strh   $mem,$src\t# vector (16 bits)" %}
  84   ins_encode( aarch64_enc_strvH(src, mem) );
  85   ins_pipe(vstore_reg_mem64);
  86 %}
  87 
  88 // Store Vector (32 bits)
  89 instruct storeV4(vecD src, vmem4 mem)
  90 %{
  91   predicate(n->as_StoreVector()->memory_size() == 4);
  92   match(Set mem (StoreVector mem src));
  93   ins_cost(4 * INSN_COST);
  94   format %{ "strs   $mem,$src\t# vector (32 bits)" %}
  95   ins_encode( aarch64_enc_strvS(src, mem) );
  96   ins_pipe(vstore_reg_mem64);
  97 %}
  98 
  99 // Store Vector (64 bits)
 100 instruct storeV8(vecD src, vmem8 mem)
 101 %{
 102   predicate(n->as_StoreVector()->memory_size() == 8);
 103   match(Set mem (StoreVector mem src));
 104   ins_cost(4 * INSN_COST);
 105   format %{ "strd   $mem,$src\t# vector (64 bits)" %}
 106   ins_encode( aarch64_enc_strvD(src, mem) );
 107   ins_pipe(vstore_reg_mem64);
 108 %}
 109 
 110 // Store Vector (128 bits)
 111 instruct storeV16(vecX src, vmem16 mem)
 112 %{
 113   predicate(n->as_StoreVector()->memory_size() == 16);
 114   match(Set mem (StoreVector mem src));
 115   ins_cost(4 * INSN_COST);
 116   format %{ "strq   $mem,$src\t# vector (128 bits)" %}
 117   ins_encode( aarch64_enc_strvQ(src, mem) );
 118   ins_pipe(vstore_reg_mem128);
 119 %}
 120 
 121 instruct reinterpretD(vecD dst)
 122 %{
 123   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
 124             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
 125   match(Set dst (VectorReinterpret dst));
 126   ins_cost(0);
 127   format %{ " # reinterpret $dst" %}
 128   ins_encode %{
 129     // empty
 130   %}
 131   ins_pipe(pipe_class_empty);
 132 %}
 133 
 134 instruct reinterpretX(vecX dst)
 135 %{
 136   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
 137             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
 138   match(Set dst (VectorReinterpret dst));
 139   ins_cost(0);
 140   format %{ " # reinterpret $dst" %}
 141   ins_encode %{
 142     // empty
 143   %}
 144   ins_pipe(pipe_class_empty);
 145 %}
 146 
 147 instruct reinterpretD2X(vecX dst, vecD src)
 148 %{
 149   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
 150             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
 151   match(Set dst (VectorReinterpret src));
 152   ins_cost(INSN_COST);
 153   format %{ " # reinterpret $dst,$src\t# D to X" %}
 154   ins_encode %{
 155     // The higher 64-bits of the "dst" register must be cleared to zero.
 156     __ orr(as_FloatRegister($dst$$reg), __ T8B,
 157            as_FloatRegister($src$$reg),
 158            as_FloatRegister($src$$reg));
 159   %}
 160   ins_pipe(vlogical64);
 161 %}
 162 
 163 instruct reinterpretX2D(vecD dst, vecX src)
 164 %{
 165   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
 166             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
 167   match(Set dst (VectorReinterpret src));
 168   ins_cost(INSN_COST);
 169   format %{ " # reinterpret $dst,$src\t# X to D" %}
 170   ins_encode %{
 171     // The higher 64-bits of the "dst" register must be cleared to zero.
 172     __ orr(as_FloatRegister($dst$$reg), __ T8B,
 173            as_FloatRegister($src$$reg),
 174            as_FloatRegister($src$$reg));
 175   %}
 176   ins_pipe(vlogical64);
 177 %}
 178 
 179 instruct reinterpretS2X(vecX dst, vecD src)
 180 %{
 181   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
 182             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4);
 183   match(Set dst (VectorReinterpret src));
 184   ins_cost(INSN_COST);
 185   format %{ " # reinterpret $dst,$src\t# S to X" %}
 186   ins_encode %{
 187     // The higher bits of the "dst" register must be cleared to zero.
 188     __ dup(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
 189   %}
 190   ins_pipe(pipe_slow);
 191 %}
 192 
 193 instruct reinterpretX2S(vecD dst, vecX src)
 194 %{
 195   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 &&
 196             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
 197   match(Set dst (VectorReinterpret src));
 198   ins_cost(INSN_COST);
 199   format %{ " # reinterpret $dst,$src\t# X to S" %}
 200   ins_encode %{
 201     // The higher bits of the "dst" register must be cleared to zero.
 202     __ dup(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
 203   %}
 204   ins_pipe(pipe_slow);
 205 %}
 206 
 207 instruct reinterpretS2D(vecD dst, vecD src)
 208 %{
 209   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
 210             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 4);
 211   match(Set dst (VectorReinterpret src));
 212   ins_cost(INSN_COST);
 213   format %{ " # reinterpret $dst,$src\t# S to D" %}
 214   ins_encode %{
 215     // The higher bits of the "dst" register must be cleared to zero.
 216     __ dup(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
 217   %}
 218   ins_pipe(pipe_slow);
 219 %}
 220 
 221 instruct reinterpretD2S(vecD dst, vecD src)
 222 %{
 223   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 4 &&
 224             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
 225   match(Set dst (VectorReinterpret src));
 226   ins_cost(INSN_COST);
 227   format %{ " # reinterpret $dst,$src\t# D to S" %}
 228   ins_encode %{
 229     // The higher bits of the "dst" register must be cleared to zero.
 230     __ dup(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($src$$reg));
 231   %}
 232   ins_pipe(pipe_slow);
 233 %}
 234 
 235 // ------------------------------ Vector cast -------------------------------
 236 
 237 instruct vcvt8Bto8S(vecX dst, vecD src)
 238 %{
 239   predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 240   match(Set dst (VectorCastB2X src));
 241   format %{ "sxtl  $dst, T8H, $src, T8B\t# convert 8B to 8S vector" %}
 242   ins_encode %{
 243     __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
 244   %}
 245   ins_pipe(pipe_class_default);
 246 %}
 247 
 248 instruct vcvt4Bto4S(vecD dst, vecD src)
 249 %{
 250   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 251   match(Set dst (VectorCastB2X src));
 252   format %{ "sxtl  $dst, T8H, $src, T8B\t# convert 4B to 4S vector" %}
 253   ins_encode %{
 254     __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
 255   %}
 256   ins_pipe(pipe_class_default);
 257 %}
 258 
 259 instruct vcvt8Sto8B(vecD dst, vecX src)
 260 %{
 261   predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 262   match(Set dst (VectorCastS2X src));
 263   format %{ "xtn  $dst, T8B, $src, T8H\t# convert 8S to 8B vector" %}
 264   ins_encode %{
 265     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
 266   %}
 267   ins_pipe(pipe_class_default);
 268 %}
 269 
 270 instruct vcvt4Sto4B(vecD dst, vecD src)
 271 %{
 272   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 273   match(Set dst (VectorCastS2X src));
 274   format %{ "xtn  $dst, T8B, $src, T8H\t# convert 4S to 4B vector" %}
 275   ins_encode %{
 276     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
 277   %}
 278   ins_pipe(pipe_class_default);
 279 %}
 280 
 281 instruct vcvt4Sto4I(vecX dst, vecD src)
 282 %{
 283   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 284   match(Set dst (VectorCastS2X src));
 285   format %{ "sxtl  $dst, T4S, $src, T4H\t# convert 4S to 4I vector" %}
 286   ins_encode %{
 287     __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
 288   %}
 289   ins_pipe(pipe_class_default);
 290 %}
 291 
 292 instruct vcvt4Ito4S(vecD dst, vecX src)
 293 %{
 294   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 295   match(Set dst (VectorCastI2X src));
 296   format %{ "xtn  $dst, T4H, $src, T4S\t# convert 4I to 4S vector" %}
 297   ins_encode %{
 298     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
 299   %}
 300   ins_pipe(pipe_class_default);
 301 %}
 302 
 303 instruct vcvt2Ito2L(vecX dst, vecD src)
 304 %{
 305   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 306   match(Set dst (VectorCastI2X src));
 307   format %{ "sxtl  $dst, T2D, $src, T2S\t# convert 2I to 2L vector" %}
 308   ins_encode %{
 309     __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
 310   %}
 311   ins_pipe(pipe_class_default);
 312 %}
 313 
 314 instruct vcvt2Lto2I(vecD dst, vecX src)
 315 %{
 316   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 317   match(Set dst (VectorCastL2X src));
 318   format %{ "xtn  $dst, T2S, $src, T2D\t# convert 2L to 2I vector" %}
 319   ins_encode %{
 320     __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
 321   %}
 322   ins_pipe(pipe_class_default);
 323 %}
 324 
 325 instruct vcvt4Ito4B(vecD dst, vecX src)
 326 %{
 327   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 328   match(Set dst (VectorCastI2X src));
 329   format %{ "xtn  $dst, T4H, $src, T4S\n\t"
 330             "xtn  $dst, T8B, $dst, T8H\t# convert 4I to 4B vector"
 331   %}
 332   ins_encode %{
 333     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
 334     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
 335   %}
 336   ins_pipe(pipe_class_default);
 337 %}
 338 
 339 instruct vcvt4Bto4I(vecX dst, vecD src)
 340 %{
 341   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 342   match(Set dst (VectorCastB2X src));
 343   format %{ "sxtl  $dst, T8H, $src, T8B\n\t"
 344             "sxtl  $dst, T4S, $dst, T4H\t# convert 4B to 4I vector"
 345   %}
 346   ins_encode %{
 347     __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
 348     __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
 349   %}
 350   ins_pipe(pipe_class_default);
 351 %}
 352 
 353 instruct vcvt2Lto2F(vecD dst, vecX src, vRegF tmp)
 354 %{
 355   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 356   match(Set dst (VectorCastL2X src));
 357   effect(TEMP_DEF dst, TEMP tmp);
 358   format %{ "umov   rscratch1, $src, D, 0\n\t"
 359             "scvtfs $dst, rscratch1\n\t"
 360             "umov   rscratch1, $src, D, 1\n\t"
 361             "scvtfs $tmp, rscratch1\n\t"
 362             "ins    $dst, S, $tmp, 1, 0\t# convert 2L to 2F vector"
 363   %}
 364   ins_encode %{
 365     __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 0);
 366     __ scvtfs(as_FloatRegister($dst$$reg), rscratch1);
 367     __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
 368     __ scvtfs(as_FloatRegister($tmp$$reg), rscratch1);
 369     __ ins(as_FloatRegister($dst$$reg), __ S, as_FloatRegister($tmp$$reg), 1, 0);
 370   %}
 371   ins_pipe(pipe_slow);
 372 %}
 373 
 374 instruct vcvt2Ito2F(vecD dst, vecD src)
 375 %{
 376   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 377   match(Set dst (VectorCastI2X src));
 378   format %{ "scvtfv  T2S, $dst, $src\t# convert 2I to 2F vector" %}
 379   ins_encode %{
 380     __ scvtfv(__ T2S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 381   %}
 382   ins_pipe(pipe_class_default);
 383 %}
 384 
 385 instruct vcvt4Ito4F(vecX dst, vecX src)
 386 %{
 387   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 388   match(Set dst (VectorCastI2X src));
 389   format %{ "scvtfv  T4S, $dst, $src\t# convert 4I to 4F vector" %}
 390   ins_encode %{
 391     __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 392   %}
 393   ins_pipe(pipe_class_default);
 394 %}
 395 
 396 instruct vcvt2Lto2D(vecX dst, vecX src)
 397 %{
 398   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 399   match(Set dst (VectorCastL2X src));
 400   format %{ "scvtfv  T2D, $dst, $src\t# convert 2L to 2D vector" %}
 401   ins_encode %{
 402     __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
 403   %}
 404   ins_pipe(pipe_class_default);
 405 %}
 406 
 407 instruct vcvt4Sto4F(vecX dst, vecD src)
 408 %{
 409   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 410   match(Set dst (VectorCastS2X src));
 411   format %{ "sxtl    $dst, T4S, $src, T4H\n\t"
 412             "scvtfv  T4S, $dst, $dst\t# convert 4S to 4F vector"
 413   %}
 414   ins_encode %{
 415     __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
 416     __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
 417   %}
 418   ins_pipe(pipe_slow);
 419 %}
 420 
 421 instruct vcvt2Ito2D(vecX dst, vecD src)
 422 %{
 423   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 424   match(Set dst (VectorCastI2X src));
 425   format %{ "sxtl    $dst, T2D, $src, T2S\n\t"
 426             "scvtfv  T2D, $dst, $dst\t# convert 2I to 2D vector"
 427   %}
 428   ins_encode %{
 429     __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
 430     __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
 431   %}
 432   ins_pipe(pipe_slow);
 433 %}
 434 
 435 instruct vcvt4Bto4F(vecX dst, vecD src)
 436 %{
 437   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 438   match(Set dst (VectorCastB2X src));
 439   format %{ "sxtl    $dst, T8H, $src, T8B\n\t"
 440             "sxtl    $dst, T4S, $dst, T4H\n\t"
 441             "scvtfv  T4S, $dst, $dst\t# convert 4B to 4F vector"
 442   %}
 443   ins_encode %{
 444     __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
 445     __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
 446     __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
 447   %}
 448   ins_pipe(pipe_slow);
 449 %}
 450 
 451 instruct vcvt2Fto2L(vecX dst, vecD src)
 452 %{
 453   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 454   match(Set dst (VectorCastF2X src));
 455   format %{ "fcvtl   $dst, T2D, $src, T2S\n\t"
 456             "fcvtzs  $dst, T2D, $dst\t# convert 2F to 2L vector"
 457   %}
 458   ins_encode %{
 459     __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
 460     __ fcvtzs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
 461   %}
 462   ins_pipe(pipe_slow);
 463 %}
 464 
 465 instruct vcvt2Fto2I(vecD dst, vecD src)
 466 %{
 467   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 468   match(Set dst (VectorCastF2X src));
 469   format %{ "fcvtzs  $dst, T2S, $src\t# convert 2F to 2I vector" %}
 470   ins_encode %{
 471     __ fcvtzs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
 472   %}
 473   ins_pipe(pipe_class_default);
 474 %}
 475 
 476 instruct vcvt4Fto4I(vecX dst, vecX src)
 477 %{
 478   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 479   match(Set dst (VectorCastF2X src));
 480   format %{ "fcvtzs  $dst, T4S, $src\t# convert 4F to 4I vector" %}
 481   ins_encode %{
 482     __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
 483   %}
 484   ins_pipe(pipe_class_default);
 485 %}
 486 
 487 instruct vcvt2Dto2L(vecX dst, vecX src)
 488 %{
 489   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 490   match(Set dst (VectorCastD2X src));
 491   format %{ "fcvtzs  $dst, T2D, $src\t# convert 2D to 2L vector" %}
 492   ins_encode %{
 493     __ fcvtzs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
 494   %}
 495   ins_pipe(pipe_class_default);
 496 %}
 497 
 498 instruct vcvt4Fto4S(vecD dst, vecX src)
 499 %{
 500   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 501   match(Set dst (VectorCastF2X src));
 502   format %{ "fcvtzs  $dst, T4S, $src\n\t"
 503             "xtn     $dst, T4H, $dst, T4S\t# convert 4F to 4S vector"
 504   %}
 505   ins_encode %{
 506     __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
 507     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
 508   %}
 509   ins_pipe(pipe_slow);
 510 %}
 511 
 512 instruct vcvt2Dto2I(vecD dst, vecX src)
 513 %{
 514   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 515   match(Set dst (VectorCastD2X src));
 516   effect(TEMP_DEF dst);
 517   format %{ "ins      $dst, D, $src, 0, 1\n\t"
 518             "fcvtzdw  rscratch1, $src\n\t"
 519             "fcvtzdw  rscratch2, $dst\n\t"
 520             "fmovs    $dst, rscratch1\n\t"
 521             "mov      $dst, S, 1, rscratch2\t#convert 2D to 2I vector"
 522   %}
 523   ins_encode %{
 524     __ ins(as_FloatRegister($dst$$reg), __ D, as_FloatRegister($src$$reg), 0, 1);
 525     // We can't use fcvtzs(vector, integer) instruction here because we need
 526     // saturation arithmetic. See JDK-8276151.
 527     __ fcvtzdw(rscratch1, as_FloatRegister($src$$reg));
 528     __ fcvtzdw(rscratch2, as_FloatRegister($dst$$reg));
 529     __ fmovs(as_FloatRegister($dst$$reg), rscratch1);
 530     __ mov(as_FloatRegister($dst$$reg), __ S, 1, rscratch2);
 531   %}
 532   ins_pipe(pipe_slow);
 533 %}
 534 
 535 instruct vcvt4Fto4B(vecD dst, vecX src)
 536 %{
 537   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 538   match(Set dst (VectorCastF2X src));
 539   format %{ "fcvtzs  $dst, T4S, $src\n\t"
 540             "xtn     $dst, T4H, $dst, T4S\n\t"
 541             "xtn     $dst, T8B, $dst, T8H\t# convert 4F to 4B vector"
 542   %}
 543   ins_encode %{
 544     __ fcvtzs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
 545     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
 546     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
 547   %}
 548   ins_pipe(pipe_slow);
 549 %}
 550 
 551 instruct vcvt2Fto2D(vecX dst, vecD src)
 552 %{
 553   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
 554   match(Set dst (VectorCastF2X src));
 555   format %{ "fcvtl  $dst, T2D, $src, T2S\t# convert 2F to 2D vector" %}
 556   ins_encode %{
 557     __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
 558   %}
 559   ins_pipe(pipe_class_default);
 560 %}
 561 
 562 instruct vcvt2Dto2F(vecD dst, vecX src)
 563 %{
 564   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
 565   match(Set dst (VectorCastD2X src));
 566   format %{ "fcvtn  $dst, T2S, $src, T2D\t# convert 2D to 2F vector" %}
 567   ins_encode %{
 568     __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
 569   %}
 570   ins_pipe(pipe_class_default);
 571 %}
 572 
 573 
 574 instruct vroundvecD2Fto2I(vecD dst, vecD src, vecD tmp1, vecD tmp2, vecD tmp3)
 575 %{
 576   predicate(UseSVE == 0 &&
 577             n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 578   match(Set dst (RoundVF src));
 579   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 580   format %{ "vround  $dst, T2S, $src\t# round vecD 2F to 2I vector" %}
 581   ins_encode %{
 582     __ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
 583                          as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
 584                          as_FloatRegister($tmp3$$reg), __ T2S);
 585   %}
 586   ins_pipe(pipe_class_default);
 587 %}
 588 
 589 instruct vroundvecX4Fto4I(vecX dst, vecX src, vecX tmp1, vecX tmp2, vecX tmp3)
 590 %{
 591   predicate(UseSVE == 0 &&
 592             n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
 593   match(Set dst (RoundVF src));
 594   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 595   format %{ "vround  $dst, T4S, $src\t# round vecX 4F to 4I vector" %}
 596   ins_encode %{
 597     __ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
 598                          as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
 599                          as_FloatRegister($tmp3$$reg), __ T4S);
 600   %}
 601   ins_pipe(pipe_class_default);
 602 %}
 603 
 604 instruct vroundvecX2Dto2L(vecX dst, vecX src, vecX tmp1, vecX tmp2, vecX tmp3)
 605 %{
 606   predicate(UseSVE == 0 &&
 607             n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
 608   match(Set dst (RoundVD src));
 609   effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3);
 610   format %{ "vround  $dst, T2D, $src\t# round vecX 2D to 2L vector" %}
 611   ins_encode %{
 612     __ vector_round_neon(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
 613                          as_FloatRegister($tmp1$$reg), as_FloatRegister($tmp2$$reg),
 614                          as_FloatRegister($tmp3$$reg), __ T2D);
 615   %}
 616   ins_pipe(pipe_class_default);
 617 %}
 618 
 619 // ------------------------------ Reduction -------------------------------
 620 
 621 instruct reduce_add8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
 622 %{
 623   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 624   match(Set dst (AddReductionVI isrc vsrc));
 625   ins_cost(INSN_COST);
 626   effect(TEMP_DEF dst, TEMP tmp);
 627   format %{ "addv  $tmp, T8B, $vsrc\n\t"
 628             "smov  $dst, $tmp, B, 0\n\t"
 629             "addw  $dst, $dst, $isrc\n\t"
 630             "sxtb  $dst, $dst\t# add reduction8B"
 631   %}
 632   ins_encode %{
 633     __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
 634     __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
 635     __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
 636     __ sxtb($dst$$Register, $dst$$Register);
 637   %}
 638   ins_pipe(pipe_slow);
 639 %}
 640 
 641 instruct reduce_add16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
 642 %{
 643   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 644   match(Set dst (AddReductionVI isrc vsrc));
 645   ins_cost(INSN_COST);
 646   effect(TEMP_DEF dst, TEMP tmp);
 647   format %{ "addv  $tmp, T16B, $vsrc\n\t"
 648             "smov  $dst, $tmp, B, 0\n\t"
 649             "addw  $dst, $dst, $isrc\n\t"
 650             "sxtb  $dst, $dst\t# add reduction16B"
 651   %}
 652   ins_encode %{
 653     __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
 654     __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
 655     __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
 656     __ sxtb($dst$$Register, $dst$$Register);
 657   %}
 658   ins_pipe(pipe_slow);
 659 %}
 660 
 661 instruct reduce_add4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
 662 %{
 663   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 664   match(Set dst (AddReductionVI isrc vsrc));
 665   ins_cost(INSN_COST);
 666   effect(TEMP_DEF dst, TEMP tmp);
 667   format %{ "addv  $tmp, T4H, $vsrc\n\t"
 668             "smov  $dst, $tmp, H, 0\n\t"
 669             "addw  $dst, $dst, $isrc\n\t"
 670             "sxth  $dst, $dst\t# add reduction4S"
 671   %}
 672   ins_encode %{
 673     __ addv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
 674     __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
 675     __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
 676     __ sxth($dst$$Register, $dst$$Register);
 677   %}
 678   ins_pipe(pipe_slow);
 679 %}
 680 
 681 instruct reduce_add8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
 682 %{
 683   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 684   match(Set dst (AddReductionVI isrc vsrc));
 685   ins_cost(INSN_COST);
 686   effect(TEMP_DEF dst, TEMP tmp);
 687   format %{ "addv  $tmp, T8H, $vsrc\n\t"
 688             "smov  $dst, $tmp, H, 0\n\t"
 689             "addw  $dst, $dst, $isrc\n\t"
 690             "sxth  $dst, $dst\t# add reduction8S"
 691   %}
 692   ins_encode %{
 693     __ addv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
 694     __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
 695     __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
 696     __ sxth($dst$$Register, $dst$$Register);
 697   %}
 698   ins_pipe(pipe_slow);
 699 %}
 700 
 701 instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
 702 %{
 703   match(Set dst (AddReductionVL isrc vsrc));
 704   ins_cost(INSN_COST);
 705   effect(TEMP_DEF dst, TEMP tmp);
 706   format %{ "addpd $tmp, $vsrc\n\t"
 707             "umov  $dst, $tmp, D, 0\n\t"
 708             "add   $dst, $isrc, $dst\t# add reduction2L"
 709   %}
 710   ins_encode %{
 711     __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
 712     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
 713     __ add($dst$$Register, $isrc$$Register, $dst$$Register);
 714   %}
 715   ins_pipe(pipe_slow);
 716 %}
 717 
 718 instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
 719 %{
 720   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 721   match(Set dst (MulReductionVI isrc vsrc));
 722   ins_cost(INSN_COST);
 723   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 724   format %{ "ins   $vtmp1, S, $vsrc, 0, 1\n\t"
 725             "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
 726             "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
 727             "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
 728             "umov  $itmp, $vtmp2, B, 0\n\t"
 729             "mulw  $dst, $itmp, $isrc\n\t"
 730             "sxtb  $dst, $dst\n\t"
 731             "umov  $itmp, $vtmp2, B, 1\n\t"
 732             "mulw  $dst, $itmp, $dst\n\t"
 733             "sxtb  $dst, $dst\t# mul reduction8B"
 734   %}
 735   ins_encode %{
 736     __ ins(as_FloatRegister($vtmp1$$reg), __ S,
 737            as_FloatRegister($vsrc$$reg), 0, 1);
 738     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 739             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 740     __ ins(as_FloatRegister($vtmp2$$reg), __ H,
 741            as_FloatRegister($vtmp1$$reg), 0, 1);
 742     __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
 743             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 744     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
 745     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 746     __ sxtb($dst$$Register, $dst$$Register);
 747     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
 748     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 749     __ sxtb($dst$$Register, $dst$$Register);
 750   %}
 751   ins_pipe(pipe_slow);
 752 %}
 753 
 754 instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
 755 %{
 756   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 757   match(Set dst (MulReductionVI isrc vsrc));
 758   ins_cost(INSN_COST);
 759   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 760   format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
 761             "mulv  $vtmp1, T8B, $vtmp1, $vsrc\n\t"
 762             "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
 763             "mulv  $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
 764             "ins   $vtmp2, H, $vtmp1, 0, 1\n\t"
 765             "mulv  $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
 766             "umov  $itmp, $vtmp2, B, 0\n\t"
 767             "mulw  $dst, $itmp, $isrc\n\t"
 768             "sxtb  $dst, $dst\n\t"
 769             "umov  $itmp, $vtmp2, B, 1\n\t"
 770             "mulw  $dst, $itmp, $dst\n\t"
 771             "sxtb  $dst, $dst\t# mul reduction16B"
 772   %}
 773   ins_encode %{
 774     __ ins(as_FloatRegister($vtmp1$$reg), __ D,
 775            as_FloatRegister($vsrc$$reg), 0, 1);
 776     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 777             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 778     __ ins(as_FloatRegister($vtmp2$$reg), __ S,
 779            as_FloatRegister($vtmp1$$reg), 0, 1);
 780     __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
 781             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 782     __ ins(as_FloatRegister($vtmp2$$reg), __ H,
 783            as_FloatRegister($vtmp1$$reg), 0, 1);
 784     __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
 785             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 786     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
 787     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 788     __ sxtb($dst$$Register, $dst$$Register);
 789     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
 790     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 791     __ sxtb($dst$$Register, $dst$$Register);
 792   %}
 793   ins_pipe(pipe_slow);
 794 %}
 795 
 796 instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
 797 %{
 798   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 799   match(Set dst (MulReductionVI isrc vsrc));
 800   ins_cost(INSN_COST);
 801   effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
 802   format %{ "ins   $vtmp, S, $vsrc, 0, 1\n\t"
 803             "mulv  $vtmp, T4H, $vtmp, $vsrc\n\t"
 804             "umov  $itmp, $vtmp, H, 0\n\t"
 805             "mulw  $dst, $itmp, $isrc\n\t"
 806             "sxth  $dst, $dst\n\t"
 807             "umov  $itmp, $vtmp, H, 1\n\t"
 808             "mulw  $dst, $itmp, $dst\n\t"
 809             "sxth  $dst, $dst\t# mul reduction4S"
 810   %}
 811   ins_encode %{
 812     __ ins(as_FloatRegister($vtmp$$reg), __ S,
 813            as_FloatRegister($vsrc$$reg), 0, 1);
 814     __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
 815             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
 816     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
 817     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 818     __ sxth($dst$$Register, $dst$$Register);
 819     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
 820     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 821     __ sxth($dst$$Register, $dst$$Register);
 822   %}
 823   ins_pipe(pipe_slow);
 824 %}
 825 
 826 instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
 827 %{
 828   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 829   match(Set dst (MulReductionVI isrc vsrc));
 830   ins_cost(INSN_COST);
 831   effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
 832   format %{ "ins   $vtmp1, D, $vsrc, 0, 1\n\t"
 833             "mulv  $vtmp1, T4H, $vtmp1, $vsrc\n\t"
 834             "ins   $vtmp2, S, $vtmp1, 0, 1\n\t"
 835             "mulv  $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
 836             "umov  $itmp, $vtmp2, H, 0\n\t"
 837             "mulw  $dst, $itmp, $isrc\n\t"
 838             "sxth  $dst, $dst\n\t"
 839             "umov  $itmp, $vtmp2, H, 1\n\t"
 840             "mulw  $dst, $itmp, $dst\n\t"
 841             "sxth  $dst, $dst\t# mul reduction8S"
 842   %}
 843   ins_encode %{
 844     __ ins(as_FloatRegister($vtmp1$$reg), __ D,
 845            as_FloatRegister($vsrc$$reg), 0, 1);
 846     __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
 847             as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
 848     __ ins(as_FloatRegister($vtmp2$$reg), __ S,
 849            as_FloatRegister($vtmp1$$reg), 0, 1);
 850     __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
 851             as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
 852     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
 853     __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
 854     __ sxth($dst$$Register, $dst$$Register);
 855     __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
 856     __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
 857     __ sxth($dst$$Register, $dst$$Register);
 858   %}
 859   ins_pipe(pipe_slow);
 860 %}
 861 
 862 instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
 863 %{
 864   match(Set dst (MulReductionVL isrc vsrc));
 865   ins_cost(INSN_COST);
 866   effect(TEMP_DEF dst, TEMP tmp);
 867   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
 868             "mul   $dst, $isrc, $tmp\n\t"
 869             "umov  $tmp, $vsrc, D, 1\n\t"
 870             "mul   $dst, $dst, $tmp\t# mul reduction2L"
 871   %}
 872   ins_encode %{
 873     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
 874     __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
 875     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
 876     __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
 877   %}
 878   ins_pipe(pipe_slow);
 879 %}
 880 
 881 instruct reduce_max8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
 882 %{
 883   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 884   match(Set dst (MaxReductionV isrc vsrc));
 885   ins_cost(INSN_COST);
 886   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 887   format %{ "smaxv $tmp, T8B, $vsrc\n\t"
 888             "smov  $dst, $tmp, B, 0\n\t"
 889             "cmpw  $dst, $isrc\n\t"
 890             "cselw $dst, $dst, $isrc GT\t# max reduction8B"
 891   %}
 892   ins_encode %{
 893     __ smaxv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
 894     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
 895     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 896     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
 897   %}
 898   ins_pipe(pipe_slow);
 899 %}
 900 
 901 instruct reduce_max16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
 902 %{
 903   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 904   match(Set dst (MaxReductionV isrc vsrc));
 905   ins_cost(INSN_COST);
 906   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 907   format %{ "smaxv $tmp, T16B, $vsrc\n\t"
 908             "smov  $dst, $tmp, B, 0\n\t"
 909             "cmpw  $dst, $isrc\n\t"
 910             "cselw $dst, $dst, $isrc GT\t# max reduction16B"
 911   %}
 912   ins_encode %{
 913     __ smaxv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
 914     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
 915     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 916     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
 917   %}
 918   ins_pipe(pipe_slow);
 919 %}
 920 
 921 instruct reduce_max4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
 922 %{
 923   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 924   match(Set dst (MaxReductionV isrc vsrc));
 925   ins_cost(INSN_COST);
 926   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 927   format %{ "smaxv $tmp, T4H, $vsrc\n\t"
 928             "smov  $dst, $tmp, H, 0\n\t"
 929             "cmpw  $dst, $isrc\n\t"
 930             "cselw $dst, $dst, $isrc GT\t# max reduction4S"
 931   %}
 932   ins_encode %{
 933     __ smaxv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
 934     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
 935     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 936     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
 937   %}
 938   ins_pipe(pipe_slow);
 939 %}
 940 
 941 instruct reduce_max8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
 942 %{
 943   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
 944   match(Set dst (MaxReductionV isrc vsrc));
 945   ins_cost(INSN_COST);
 946   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 947   format %{ "smaxv $tmp, T8H, $vsrc\n\t"
 948             "smov  $dst, $tmp, H, 0\n\t"
 949             "cmpw  $dst, $isrc\n\t"
 950             "cselw $dst, $dst, $isrc GT\t# max reduction8S"
 951   %}
 952   ins_encode %{
 953     __ smaxv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
 954     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
 955     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 956     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
 957   %}
 958   ins_pipe(pipe_slow);
 959 %}
 960 
 961 instruct reduce_max4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
 962 %{
 963   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
 964   match(Set dst (MaxReductionV isrc vsrc));
 965   ins_cost(INSN_COST);
 966   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 967   format %{ "smaxv $tmp, T4S, $vsrc\n\t"
 968             "umov  $dst, $tmp, S, 0\n\t"
 969             "cmpw  $dst, $isrc\n\t"
 970             "cselw $dst, $dst, $isrc GT\t# max reduction4I"
 971   %}
 972   ins_encode %{
 973     __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
 974     __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
 975     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 976     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
 977   %}
 978   ins_pipe(pipe_slow);
 979 %}
 980 
 981 instruct reduce_min8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
 982 %{
 983   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
 984   match(Set dst (MinReductionV isrc vsrc));
 985   ins_cost(INSN_COST);
 986   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
 987   format %{ "sminv $tmp, T8B, $vsrc\n\t"
 988             "smov  $dst, $tmp, B, 0\n\t"
 989             "cmpw  $dst, $isrc\n\t"
 990             "cselw $dst, $dst, $isrc LT\t# min reduction8B"
 991   %}
 992   ins_encode %{
 993     __ sminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
 994     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
 995     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
 996     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
 997   %}
 998   ins_pipe(pipe_slow);
 999 %}
1000 
1001 instruct reduce_min16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
1002 %{
1003   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1004   match(Set dst (MinReductionV isrc vsrc));
1005   ins_cost(INSN_COST);
1006   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1007   format %{ "sminv $tmp, T16B, $vsrc\n\t"
1008             "smov  $dst, $tmp, B, 0\n\t"
1009             "cmpw  $dst, $isrc\n\t"
1010             "cselw $dst, $dst, $isrc LT\t# min reduction16B"
1011   %}
1012   ins_encode %{
1013     __ sminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
1014     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
1015     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
1016     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
1017   %}
1018   ins_pipe(pipe_slow);
1019 %}
1020 
1021 instruct reduce_min4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
1022 %{
1023   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1024   match(Set dst (MinReductionV isrc vsrc));
1025   ins_cost(INSN_COST);
1026   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1027   format %{ "sminv $tmp, T4H, $vsrc\n\t"
1028             "smov  $dst, $tmp, H, 0\n\t"
1029             "cmpw  $dst, $isrc\n\t"
1030             "cselw $dst, $dst, $isrc LT\t# min reduction4S"
1031   %}
1032   ins_encode %{
1033     __ sminv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
1034     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
1035     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
1036     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
1037   %}
1038   ins_pipe(pipe_slow);
1039 %}
1040 
1041 instruct reduce_min8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
1042 %{
1043   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1044   match(Set dst (MinReductionV isrc vsrc));
1045   ins_cost(INSN_COST);
1046   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1047   format %{ "sminv $tmp, T8H, $vsrc\n\t"
1048             "smov  $dst, $tmp, H, 0\n\t"
1049             "cmpw  $dst, $isrc\n\t"
1050             "cselw $dst, $dst, $isrc LT\t# min reduction8S"
1051   %}
1052   ins_encode %{
1053     __ sminv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
1054     __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
1055     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
1056     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
1057   %}
1058   ins_pipe(pipe_slow);
1059 %}
1060 
1061 instruct reduce_min4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
1062 %{
1063   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1064   match(Set dst (MinReductionV isrc vsrc));
1065   ins_cost(INSN_COST);
1066   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1067   format %{ "sminv $tmp, T4S, $vsrc\n\t"
1068             "umov  $dst, $tmp, S, 0\n\t"
1069             "cmpw  $dst, $isrc\n\t"
1070             "cselw $dst, $dst, $isrc LT\t# min reduction4I"
1071   %}
1072   ins_encode %{
1073     __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
1074     __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
1075     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
1076     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
1077   %}
1078   ins_pipe(pipe_slow);
1079 %}
1080 
1081 instruct reduce_max2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
1082 %{
1083   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1084   match(Set dst (MaxReductionV isrc vsrc));
1085   ins_cost(INSN_COST);
1086   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1087   format %{ "smaxp $tmp, T2S, $vsrc, $vsrc\n\t"
1088             "umov  $dst, $tmp, S, 0\n\t"
1089             "cmpw  $dst, $isrc\n\t"
1090             "cselw $dst, $dst, $isrc GT\t# max reduction2I"
1091   %}
1092   ins_encode %{
1093     __ smaxp(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
1094     __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
1095     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
1096     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
1097   %}
1098   ins_pipe(pipe_slow);
1099 %}
1100 
1101 instruct reduce_min2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
1102 %{
1103   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1104   match(Set dst (MinReductionV isrc vsrc));
1105   ins_cost(INSN_COST);
1106   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1107   format %{ "sminp $tmp, T2S, $vsrc, $vsrc\n\t"
1108             "umov  $dst, $tmp, S, 0\n\t"
1109             "cmpw  $dst, $isrc\n\t"
1110             "cselw $dst, $dst, $isrc LT\t# min reduction2I"
1111   %}
1112   ins_encode %{
1113     __ sminp(as_FloatRegister($tmp$$reg), __ T2S, as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
1114     __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
1115     __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
1116     __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
1117   %}
1118   ins_pipe(pipe_slow);
1119 %}
1120 
1121 instruct reduce_max2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
1122 %{
1123   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1124   match(Set dst (MaxReductionV isrc vsrc));
1125   ins_cost(INSN_COST);
1126   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1127   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1128             "cmp   $isrc,$tmp\n\t"
1129             "csel  $dst, $isrc, $tmp GT\n\t"
1130             "umov  $tmp, $vsrc, D, 1\n\t"
1131             "cmp   $dst, $tmp\n\t"
1132             "csel  $dst, $dst, $tmp GT\t# max reduction2L"
1133   %}
1134   ins_encode %{
1135     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
1136     __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
1137     __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::GT);
1138     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
1139     __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
1140     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::GT);
1141   %}
1142   ins_pipe(pipe_slow);
1143 %}
1144 
1145 instruct reduce_min2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
1146 %{
1147   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1148   match(Set dst (MinReductionV isrc vsrc));
1149   ins_cost(INSN_COST);
1150   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
1151   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1152             "cmp   $isrc,$tmp\n\t"
1153             "csel  $dst, $isrc, $tmp LT\n\t"
1154             "umov  $tmp, $vsrc, D, 1\n\t"
1155             "cmp   $dst, $tmp\n\t"
1156             "csel  $dst, $dst, $tmp LT\t# min reduction2L"
1157   %}
1158   ins_encode %{
1159     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
1160     __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
1161     __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::LT);
1162     __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
1163     __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
1164     __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::LT);
1165   %}
1166   ins_pipe(pipe_slow);
1167 %}
1168 
1169 instruct reduce_max2F(vRegF dst, vRegF fsrc, vecD vsrc) %{
1170   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1171   match(Set dst (MaxReductionV fsrc vsrc));
1172   ins_cost(INSN_COST);
1173   effect(TEMP_DEF dst);
1174   format %{ "fmaxp $dst, $vsrc, S\n\t"
1175             "fmaxs $dst, $dst, $fsrc\t# max reduction2F" %}
1176   ins_encode %{
1177     __ fmaxp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ S);
1178     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
1179   %}
1180   ins_pipe(pipe_class_default);
1181 %}
1182 
1183 instruct reduce_max4F(vRegF dst, vRegF fsrc, vecX vsrc) %{
1184   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1185   match(Set dst (MaxReductionV fsrc vsrc));
1186   ins_cost(INSN_COST);
1187   effect(TEMP_DEF dst);
1188   format %{ "fmaxv $dst,  T4S, $vsrc\n\t"
1189             "fmaxs $dst, $dst, $fsrc\t# max reduction4F" %}
1190   ins_encode %{
1191     __ fmaxv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
1192     __ fmaxs(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
1193   %}
1194   ins_pipe(pipe_class_default);
1195 %}
1196 
1197 instruct reduce_max2D(vRegD dst, vRegD dsrc, vecX vsrc) %{
1198   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1199   match(Set dst (MaxReductionV dsrc vsrc));
1200   ins_cost(INSN_COST);
1201   effect(TEMP_DEF dst);
1202   format %{ "fmaxp $dst, $vsrc, D\n\t"
1203             "fmaxd $dst, $dst, $dsrc\t# max reduction2D" %}
1204   ins_encode %{
1205     __ fmaxp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ D);
1206     __ fmaxd(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($dsrc$$reg));
1207   %}
1208   ins_pipe(pipe_class_default);
1209 %}
1210 
1211 instruct reduce_min2F(vRegF dst, vRegF fsrc, vecD vsrc) %{
1212   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1213   match(Set dst (MinReductionV fsrc vsrc));
1214   ins_cost(INSN_COST);
1215   effect(TEMP_DEF dst);
1216   format %{ "fminp $dst, $vsrc, S\n\t"
1217             "fmins $dst, $dst, $fsrc\t# min reduction2F" %}
1218   ins_encode %{
1219     __ fminp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ S);
1220     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
1221   %}
1222   ins_pipe(pipe_class_default);
1223 %}
1224 
1225 instruct reduce_min4F(vRegF dst, vRegF fsrc, vecX vsrc) %{
1226   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1227   match(Set dst (MinReductionV fsrc vsrc));
1228   ins_cost(INSN_COST);
1229   effect(TEMP_DEF dst);
1230   format %{ "fminv $dst,  T4S, $vsrc\n\t"
1231             "fmins $dst, $dst, $fsrc\t# min reduction4F" %}
1232   ins_encode %{
1233     __ fminv(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
1234     __ fmins(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($fsrc$$reg));
1235   %}
1236   ins_pipe(pipe_class_default);
1237 %}
1238 
1239 instruct reduce_min2D(vRegD dst, vRegD dsrc, vecX vsrc) %{
1240   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1241   match(Set dst (MinReductionV dsrc vsrc));
1242   ins_cost(INSN_COST);
1243   effect(TEMP_DEF dst);
1244   format %{ "fminp $dst, $vsrc, D\n\t"
1245             "fmind $dst, $dst, $dsrc\t# min reduction2D" %}
1246   ins_encode %{
1247     __ fminp(as_FloatRegister($dst$$reg), as_FloatRegister($vsrc$$reg), __ D);
1248     __ fmind(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($dsrc$$reg));
1249   %}
1250   ins_pipe(pipe_class_default);
1251 %}
1252 
1253 instruct reduce_and8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1254 %{
1255   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1256   match(Set dst (AndReductionV isrc vsrc));
1257   ins_cost(INSN_COST);
1258   effect(TEMP_DEF dst, TEMP tmp);
1259   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1260             "umov   $dst, $vsrc, S, 1\n\t"
1261             "andw   $dst, $dst, $tmp\n\t"
1262             "andw   $dst, $dst, $dst, LSR #16\n\t"
1263             "andw   $dst, $dst, $dst, LSR #8\n\t"
1264             "andw   $dst, $isrc, $dst\n\t"
1265             "sxtb   $dst, $dst\t# and reduction8B"
1266   %}
1267   ins_encode %{
1268     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1269     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1270     __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
1271     __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1272     __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1273     __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1274     __ sxtb($dst$$Register, $dst$$Register);
1275   %}
1276   ins_pipe(pipe_slow);
1277 %}
1278 
1279 instruct reduce_orr8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1280 %{
1281   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1282   match(Set dst (OrReductionV isrc vsrc));
1283   ins_cost(INSN_COST);
1284   effect(TEMP_DEF dst, TEMP tmp);
1285   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1286             "umov   $dst, $vsrc, S, 1\n\t"
1287             "orrw   $dst, $dst, $tmp\n\t"
1288             "orrw   $dst, $dst, $dst, LSR #16\n\t"
1289             "orrw   $dst, $dst, $dst, LSR #8\n\t"
1290             "orrw   $dst, $isrc, $dst\n\t"
1291             "sxtb   $dst, $dst\t# orr reduction8B"
1292   %}
1293   ins_encode %{
1294     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1295     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1296     __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
1297     __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1298     __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1299     __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1300     __ sxtb($dst$$Register, $dst$$Register);
1301   %}
1302   ins_pipe(pipe_slow);
1303 %}
1304 
1305 instruct reduce_eor8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1306 %{
1307   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1308   match(Set dst (XorReductionV isrc vsrc));
1309   ins_cost(INSN_COST);
1310   effect(TEMP_DEF dst, TEMP tmp);
1311   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1312             "umov   $dst, $vsrc, S, 1\n\t"
1313             "eorw   $dst, $dst, $tmp\n\t"
1314             "eorw   $dst, $dst, $dst, LSR #16\n\t"
1315             "eorw   $dst, $dst, $dst, LSR #8\n\t"
1316             "eorw   $dst, $isrc, $dst\n\t"
1317             "sxtb   $dst, $dst\t# eor reduction8B"
1318   %}
1319   ins_encode %{
1320     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1321     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1322     __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
1323     __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1324     __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1325     __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1326     __ sxtb($dst$$Register, $dst$$Register);
1327   %}
1328   ins_pipe(pipe_slow);
1329 %}
1330 
1331 instruct reduce_and16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1332 %{
1333   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1334   match(Set dst (AndReductionV isrc vsrc));
1335   ins_cost(INSN_COST);
1336   effect(TEMP_DEF dst, TEMP tmp);
1337   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1338             "umov   $dst, $vsrc, D, 1\n\t"
1339             "andr   $dst, $dst, $tmp\n\t"
1340             "andr   $dst, $dst, $dst, LSR #32\n\t"
1341             "andw   $dst, $dst, $dst, LSR #16\n\t"
1342             "andw   $dst, $dst, $dst, LSR #8\n\t"
1343             "andw   $dst, $isrc, $dst\n\t"
1344             "sxtb   $dst, $dst\t# and reduction16B"
1345   %}
1346   ins_encode %{
1347     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1348     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1349     __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1350     __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1351     __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1352     __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1353     __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1354     __ sxtb($dst$$Register, $dst$$Register);
1355   %}
1356   ins_pipe(pipe_slow);
1357 %}
1358 
1359 instruct reduce_orr16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1360 %{
1361   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1362   match(Set dst (OrReductionV isrc vsrc));
1363   ins_cost(INSN_COST);
1364   effect(TEMP_DEF dst, TEMP tmp);
1365   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1366             "umov   $dst, $vsrc, D, 1\n\t"
1367             "orr    $dst, $dst, $tmp\n\t"
1368             "orr    $dst, $dst, $dst, LSR #32\n\t"
1369             "orrw   $dst, $dst, $dst, LSR #16\n\t"
1370             "orrw   $dst, $dst, $dst, LSR #8\n\t"
1371             "orrw   $dst, $isrc, $dst\n\t"
1372             "sxtb   $dst, $dst\t# orr reduction16B"
1373   %}
1374   ins_encode %{
1375     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1376     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1377     __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1378     __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1379     __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1380     __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1381     __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1382     __ sxtb($dst$$Register, $dst$$Register);
1383   %}
1384   ins_pipe(pipe_slow);
1385 %}
1386 
1387 instruct reduce_eor16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1388 %{
1389   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
1390   match(Set dst (XorReductionV isrc vsrc));
1391   ins_cost(INSN_COST);
1392   effect(TEMP_DEF dst, TEMP tmp);
1393   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1394             "umov   $dst, $vsrc, D, 1\n\t"
1395             "eor    $dst, $dst, $tmp\n\t"
1396             "eor    $dst, $dst, $dst, LSR #32\n\t"
1397             "eorw   $dst, $dst, $dst, LSR #16\n\t"
1398             "eorw   $dst, $dst, $dst, LSR #8\n\t"
1399             "eorw   $dst, $isrc, $dst\n\t"
1400             "sxtb   $dst, $dst\t# eor reduction16B"
1401   %}
1402   ins_encode %{
1403     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1404     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1405     __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1406     __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1407     __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1408     __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
1409     __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1410     __ sxtb($dst$$Register, $dst$$Register);
1411   %}
1412   ins_pipe(pipe_slow);
1413 %}
1414 
1415 instruct reduce_and4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1416 %{
1417   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1418   match(Set dst (AndReductionV isrc vsrc));
1419   ins_cost(INSN_COST);
1420   effect(TEMP_DEF dst, TEMP tmp);
1421   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1422             "umov   $dst, $vsrc, S, 1\n\t"
1423             "andw   $dst, $dst, $tmp\n\t"
1424             "andw   $dst, $dst, $dst, LSR #16\n\t"
1425             "andw   $dst, $isrc, $dst\n\t"
1426             "sxth   $dst, $dst\t# and reduction4S"
1427   %}
1428   ins_encode %{
1429     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1430     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1431     __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
1432     __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1433     __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1434     __ sxth($dst$$Register, $dst$$Register);
1435   %}
1436   ins_pipe(pipe_slow);
1437 %}
1438 
1439 instruct reduce_orr4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1440 %{
1441   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1442   match(Set dst (OrReductionV isrc vsrc));
1443   ins_cost(INSN_COST);
1444   effect(TEMP_DEF dst, TEMP tmp);
1445   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1446             "umov   $dst, $vsrc, S, 1\n\t"
1447             "orrw   $dst, $dst, $tmp\n\t"
1448             "orrw   $dst, $dst, $dst, LSR #16\n\t"
1449             "orrw   $dst, $isrc, $dst\n\t"
1450             "sxth   $dst, $dst\t# orr reduction4S"
1451   %}
1452   ins_encode %{
1453     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1454     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1455     __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
1456     __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1457     __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1458     __ sxth($dst$$Register, $dst$$Register);
1459   %}
1460   ins_pipe(pipe_slow);
1461 %}
1462 
1463 instruct reduce_eor4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1464 %{
1465   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1466   match(Set dst (XorReductionV isrc vsrc));
1467   ins_cost(INSN_COST);
1468   effect(TEMP_DEF dst, TEMP tmp);
1469   format %{ "umov   $tmp, $vsrc, S, 0\n\t"
1470             "umov   $dst, $vsrc, S, 1\n\t"
1471             "eorw   $dst, $dst, $tmp\n\t"
1472             "eorw   $dst, $dst, $dst, LSR #16\n\t"
1473             "eorw   $dst, $isrc, $dst\n\t"
1474             "sxth   $dst, $dst\t# eor reduction4S"
1475   %}
1476   ins_encode %{
1477     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1478     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1479     __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
1480     __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1481     __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1482     __ sxth($dst$$Register, $dst$$Register);
1483   %}
1484   ins_pipe(pipe_slow);
1485 %}
1486 
1487 instruct reduce_and8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1488 %{
1489   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1490   match(Set dst (AndReductionV isrc vsrc));
1491   ins_cost(INSN_COST);
1492   effect(TEMP_DEF dst, TEMP tmp);
1493   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1494             "umov   $dst, $vsrc, D, 1\n\t"
1495             "andr   $dst, $dst, $tmp\n\t"
1496             "andr   $dst, $dst, $dst, LSR #32\n\t"
1497             "andw   $dst, $dst, $dst, LSR #16\n\t"
1498             "andw   $dst, $isrc, $dst\n\t"
1499             "sxth   $dst, $dst\t# and reduction8S"
1500   %}
1501   ins_encode %{
1502     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1503     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1504     __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1505     __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1506     __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1507     __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1508     __ sxth($dst$$Register, $dst$$Register);
1509   %}
1510   ins_pipe(pipe_slow);
1511 %}
1512 
1513 instruct reduce_orr8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1514 %{
1515   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1516   match(Set dst (OrReductionV isrc vsrc));
1517   ins_cost(INSN_COST);
1518   effect(TEMP_DEF dst, TEMP tmp);
1519   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1520             "umov   $dst, $vsrc, D, 1\n\t"
1521             "orr    $dst, $dst, $tmp\n\t"
1522             "orr    $dst, $dst, $dst, LSR #32\n\t"
1523             "orrw   $dst, $dst, $dst, LSR #16\n\t"
1524             "orrw   $dst, $isrc, $dst\n\t"
1525             "sxth   $dst, $dst\t# orr reduction8S"
1526   %}
1527   ins_encode %{
1528     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1529     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1530     __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1531     __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1532     __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1533     __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1534     __ sxth($dst$$Register, $dst$$Register);
1535   %}
1536   ins_pipe(pipe_slow);
1537 %}
1538 
1539 instruct reduce_eor8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1540 %{
1541   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
1542   match(Set dst (XorReductionV isrc vsrc));
1543   ins_cost(INSN_COST);
1544   effect(TEMP_DEF dst, TEMP tmp);
1545   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1546             "umov   $dst, $vsrc, D, 1\n\t"
1547             "eor    $dst, $dst, $tmp\n\t"
1548             "eor    $dst, $dst, $dst, LSR #32\n\t"
1549             "eorw   $dst, $dst, $dst, LSR #16\n\t"
1550             "eorw   $dst, $isrc, $dst\n\t"
1551             "sxth   $dst, $dst\t# eor reduction8S"
1552   %}
1553   ins_encode %{
1554     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1555     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1556     __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1557     __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1558     __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
1559     __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1560     __ sxth($dst$$Register, $dst$$Register);
1561   %}
1562   ins_pipe(pipe_slow);
1563 %}
1564 
1565 instruct reduce_and2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1566 %{
1567   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1568   match(Set dst (AndReductionV isrc vsrc));
1569   ins_cost(INSN_COST);
1570   effect(TEMP_DEF dst, TEMP tmp);
1571   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
1572             "andw  $dst, $tmp, $isrc\n\t"
1573             "umov  $tmp, $vsrc, S, 1\n\t"
1574             "andw  $dst, $tmp, $dst\t# and reduction2I"
1575   %}
1576   ins_encode %{
1577     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1578     __ andw($dst$$Register, $tmp$$Register, $isrc$$Register);
1579     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1580     __ andw($dst$$Register, $tmp$$Register, $dst$$Register);
1581   %}
1582   ins_pipe(pipe_slow);
1583 %}
1584 
1585 instruct reduce_orr2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1586 %{
1587   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1588   match(Set dst (OrReductionV isrc vsrc));
1589   ins_cost(INSN_COST);
1590   effect(TEMP_DEF dst, TEMP tmp);
1591   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
1592             "orrw  $dst, $tmp, $isrc\n\t"
1593             "umov  $tmp, $vsrc, S, 1\n\t"
1594             "orrw  $dst, $tmp, $dst\t# orr reduction2I"
1595   %}
1596   ins_encode %{
1597     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1598     __ orrw($dst$$Register, $tmp$$Register, $isrc$$Register);
1599     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1600     __ orrw($dst$$Register, $tmp$$Register, $dst$$Register);
1601   %}
1602   ins_pipe(pipe_slow);
1603 %}
1604 
1605 instruct reduce_eor2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
1606 %{
1607   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1608   match(Set dst (XorReductionV isrc vsrc));
1609   ins_cost(INSN_COST);
1610   effect(TEMP_DEF dst, TEMP tmp);
1611   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
1612             "eorw  $dst, $tmp, $isrc\n\t"
1613             "umov  $tmp, $vsrc, S, 1\n\t"
1614             "eorw  $dst, $tmp, $dst\t# eor reduction2I"
1615   %}
1616   ins_encode %{
1617     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
1618     __ eorw($dst$$Register, $tmp$$Register, $isrc$$Register);
1619     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
1620     __ eorw($dst$$Register, $tmp$$Register, $dst$$Register);
1621   %}
1622   ins_pipe(pipe_slow);
1623 %}
1624 
1625 instruct reduce_and4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1626 %{
1627   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1628   match(Set dst (AndReductionV isrc vsrc));
1629   ins_cost(INSN_COST);
1630   effect(TEMP_DEF dst, TEMP tmp);
1631   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1632             "umov   $dst, $vsrc, D, 1\n\t"
1633             "andr   $dst, $dst, $tmp\n\t"
1634             "andr   $dst, $dst, $dst, LSR #32\n\t"
1635             "andw   $dst, $isrc, $dst\t# and reduction4I"
1636   %}
1637   ins_encode %{
1638     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1639     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1640     __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1641     __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1642     __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
1643   %}
1644   ins_pipe(pipe_slow);
1645 %}
1646 
1647 instruct reduce_orr4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1648 %{
1649   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1650   match(Set dst (OrReductionV isrc vsrc));
1651   ins_cost(INSN_COST);
1652   effect(TEMP_DEF dst, TEMP tmp);
1653   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1654             "umov   $dst, $vsrc, D, 1\n\t"
1655             "orr    $dst, $dst, $tmp\n\t"
1656             "orr    $dst, $dst, $dst, LSR #32\n\t"
1657             "orrw   $dst, $isrc, $dst\t# orr reduction4I"
1658   %}
1659   ins_encode %{
1660     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1661     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1662     __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1663     __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1664     __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
1665   %}
1666   ins_pipe(pipe_slow);
1667 %}
1668 
1669 instruct reduce_eor4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
1670 %{
1671   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
1672   match(Set dst (XorReductionV isrc vsrc));
1673   ins_cost(INSN_COST);
1674   effect(TEMP_DEF dst, TEMP tmp);
1675   format %{ "umov   $tmp, $vsrc, D, 0\n\t"
1676             "umov   $dst, $vsrc, D, 1\n\t"
1677             "eor    $dst, $dst, $tmp\n\t"
1678             "eor    $dst, $dst, $dst, LSR #32\n\t"
1679             "eorw   $dst, $isrc, $dst\t# eor reduction4I"
1680   %}
1681   ins_encode %{
1682     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1683     __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1684     __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1685     __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
1686     __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
1687   %}
1688   ins_pipe(pipe_slow);
1689 %}
1690 
1691 instruct reduce_and2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
1692 %{
1693   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1694   match(Set dst (AndReductionV isrc vsrc));
1695   ins_cost(INSN_COST);
1696   effect(TEMP_DEF dst, TEMP tmp);
1697   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1698             "andr  $dst, $isrc, $tmp\n\t"
1699             "umov  $tmp, $vsrc, D, 1\n\t"
1700             "andr  $dst, $dst, $tmp\t# and reduction2L"
1701   %}
1702   ins_encode %{
1703     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1704     __ andr($dst$$Register, $isrc$$Register, $tmp$$Register);
1705     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1706     __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
1707   %}
1708   ins_pipe(pipe_slow);
1709 %}
1710 
1711 instruct reduce_orr2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
1712 %{
1713   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1714   match(Set dst (OrReductionV isrc vsrc));
1715   ins_cost(INSN_COST);
1716   effect(TEMP_DEF dst, TEMP tmp);
1717   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1718             "orr   $dst, $isrc, $tmp\n\t"
1719             "umov  $tmp, $vsrc, D, 1\n\t"
1720             "orr   $dst, $dst, $tmp\t# orr reduction2L"
1721   %}
1722   ins_encode %{
1723     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1724     __ orr ($dst$$Register, $isrc$$Register, $tmp$$Register);
1725     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1726     __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
1727   %}
1728   ins_pipe(pipe_slow);
1729 %}
1730 
1731 instruct reduce_eor2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
1732 %{
1733   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1734   match(Set dst (XorReductionV isrc vsrc));
1735   ins_cost(INSN_COST);
1736   effect(TEMP_DEF dst, TEMP tmp);
1737   format %{ "umov  $tmp, $vsrc, D, 0\n\t"
1738             "eor   $dst, $isrc, $tmp\n\t"
1739             "umov  $tmp, $vsrc, D, 1\n\t"
1740             "eor   $dst, $dst, $tmp\t# eor reduction2L"
1741   %}
1742   ins_encode %{
1743     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
1744     __ eor ($dst$$Register, $isrc$$Register, $tmp$$Register);
1745     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
1746     __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
1747   %}
1748   ins_pipe(pipe_slow);
1749 %}
1750 
1751 // ------------------------------ Vector insert ---------------------------------
1752 
1753 instruct insertID(vecD dst, vecD src, iRegIorL2I val, immI idx)
1754 %{
1755   predicate((n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1756              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1757              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
1758   match(Set dst (VectorInsert (Binary src val) idx));
1759   ins_cost(2 * INSN_COST);
1760   format %{ "orr    $dst, T8B, $src, $src\n\t"
1761             "mov    $dst, B/H/S, $idx, $val\t# insert into vector (B/H/S)" %}
1762   ins_encode %{
1763     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1764       __ orr(as_FloatRegister($dst$$reg), __ T8B,
1765              as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1766     }
1767     __ mov(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)),
1768            $idx$$constant, $val$$Register);
1769   %}
1770   ins_pipe(pipe_slow);
1771 %}
1772 
1773 instruct insertIX(vecX dst, vecX src, iRegIorL2I val, immI idx)
1774 %{
1775   predicate((n->bottom_type()->is_vect()->element_basic_type() == T_BYTE ||
1776              n->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
1777              n->bottom_type()->is_vect()->element_basic_type() == T_INT));
1778   match(Set dst (VectorInsert (Binary src val) idx));
1779   ins_cost(2 * INSN_COST);
1780   format %{ "orr    $dst, T16B, $src, $src\n\t"
1781             "mov    $dst, B/H/S, $idx, $val\t# insert into vector (B/H/S)" %}
1782   ins_encode %{
1783     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1784       __ orr(as_FloatRegister($dst$$reg), __ T16B,
1785              as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1786     }
1787     __ mov(as_FloatRegister($dst$$reg), __ elemType_to_regVariant(Matcher::vector_element_basic_type(this)),
1788            $idx$$constant, $val$$Register);
1789   %}
1790   ins_pipe(pipe_slow);
1791 %}
1792 
1793 instruct insert2L(vecX dst, vecX src, iRegL val, immI idx)
1794 %{
1795   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
1796   match(Set dst (VectorInsert (Binary src val) idx));
1797   ins_cost(2 * INSN_COST);
1798   format %{ "orr    $dst, T16B, $src, $src\n\t"
1799             "mov    $dst, D, $idx, $val\t# insert into vector (D)" %}
1800   ins_encode %{
1801     if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
1802       __ orr(as_FloatRegister($dst$$reg), __ T16B,
1803              as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1804     }
1805     __ mov(as_FloatRegister($dst$$reg), __ D,
1806            $idx$$constant, $val$$Register);
1807   %}
1808   ins_pipe(pipe_slow);
1809 %}
1810 
1811 instruct insert2F(vecD dst, vecD src, vRegF val, immI idx)
1812 %{
1813   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1814   match(Set dst (VectorInsert (Binary src val) idx));
1815   ins_cost(2 * INSN_COST);
1816   effect(TEMP_DEF dst);
1817   format %{ "orr    $dst, T8B, $src, $src\n\t"
1818             "ins    $dst, S, $val, $idx, 0\t# insert into vector(2F)" %}
1819   ins_encode %{
1820     __ orr(as_FloatRegister($dst$$reg), __ T8B,
1821            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1822     __ ins(as_FloatRegister($dst$$reg), __ S,
1823            as_FloatRegister($val$$reg), $idx$$constant, 0);
1824   %}
1825   ins_pipe(pipe_slow);
1826 %}
1827 
1828 instruct insert4F(vecX dst, vecX src, vRegF val, immI idx)
1829 %{
1830   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
1831   match(Set dst (VectorInsert (Binary src val) idx));
1832   ins_cost(2 * INSN_COST);
1833   effect(TEMP_DEF dst);
1834   format %{ "orr    $dst, T16B, $src, $src\n\t"
1835             "ins    $dst, S, $val, $idx, 0\t# insert into vector(4F)" %}
1836   ins_encode %{
1837     __ orr(as_FloatRegister($dst$$reg), __ T16B,
1838            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1839     __ ins(as_FloatRegister($dst$$reg), __ S,
1840            as_FloatRegister($val$$reg), $idx$$constant, 0);
1841   %}
1842   ins_pipe(pipe_slow);
1843 %}
1844 
1845 instruct insert2D(vecX dst, vecX src, vRegD val, immI idx)
1846 %{
1847   predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
1848   match(Set dst (VectorInsert (Binary src val) idx));
1849   ins_cost(2 * INSN_COST);
1850   effect(TEMP_DEF dst);
1851   format %{ "orr    $dst, T16B, $src, $src\n\t"
1852             "ins    $dst, D, $val, $idx, 0\t# insert into vector(2D)" %}
1853   ins_encode %{
1854     __ orr(as_FloatRegister($dst$$reg), __ T16B,
1855            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
1856     __ ins(as_FloatRegister($dst$$reg), __ D,
1857            as_FloatRegister($val$$reg), $idx$$constant, 0);
1858   %}
1859   ins_pipe(pipe_slow);
1860 %}
1861 
1862 // ------------------------------ Vector extract ---------------------------------
1863 
1864 instruct extract8B(iRegINoSp dst, vecD src, immI idx)
1865 %{
1866   predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
1867   match(Set dst (ExtractB src idx));
1868   ins_cost(INSN_COST);
1869   format %{ "smov    $dst, $src, B, $idx\t# extract from vector(8B)" %}
1870   ins_encode %{
1871     __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
1872   %}
1873   ins_pipe(pipe_class_default);
1874 %}
1875 
1876 instruct extract16B(iRegINoSp dst, vecX src, immI idx)
1877 %{
1878   predicate(n->in(1)->bottom_type()->is_vect()->length() == 16);
1879   match(Set dst (ExtractB src idx));
1880   ins_cost(INSN_COST);
1881   format %{ "smov    $dst, $src, B, $idx\t# extract from vector(16B)" %}
1882   ins_encode %{
1883     __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
1884   %}
1885   ins_pipe(pipe_class_default);
1886 %}
1887 
1888 instruct extract4S(iRegINoSp dst, vecD src, immI idx)
1889 %{
1890   predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
1891   match(Set dst (ExtractS src idx));
1892   ins_cost(INSN_COST);
1893   format %{ "smov    $dst, $src, H, $idx\t# extract from vector(4S)" %}
1894   ins_encode %{
1895     __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
1896   %}
1897   ins_pipe(pipe_class_default);
1898 %}
1899 
1900 instruct extract8S(iRegINoSp dst, vecX src, immI idx)
1901 %{
1902   predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
1903   match(Set dst (ExtractS src idx));
1904   ins_cost(INSN_COST);
1905   format %{ "smov    $dst, $src, H, $idx\t# extract from vector(8S)" %}
1906   ins_encode %{
1907     __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
1908   %}
1909   ins_pipe(pipe_class_default);
1910 %}
1911 
1912 instruct extract2I(iRegINoSp dst, vecD src, immI idx)
1913 %{
1914   predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1915   match(Set dst (ExtractI src idx));
1916   ins_cost(INSN_COST);
1917   format %{ "umov    $dst, $src, S, $idx\t# extract from vector(2I)" %}
1918   ins_encode %{
1919     __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
1920   %}
1921   ins_pipe(pipe_class_default);
1922 %}
1923 
1924 instruct extract4I(iRegINoSp dst, vecX src, immI idx)
1925 %{
1926   predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
1927   match(Set dst (ExtractI src idx));
1928   ins_cost(INSN_COST);
1929   format %{ "umov    $dst, $src, S, $idx\t# extract from vector(4I)" %}
1930   ins_encode %{
1931     __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
1932   %}
1933   ins_pipe(pipe_class_default);
1934 %}
1935 
1936 instruct extract2L(iRegLNoSp dst, vecX src, immI idx)
1937 %{
1938   predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1939   match(Set dst (ExtractL src idx));
1940   ins_cost(INSN_COST);
1941   format %{ "umov    $dst, $src, D, $idx\t# extract from vector(2L)" %}
1942   ins_encode %{
1943     __ umov($dst$$Register, as_FloatRegister($src$$reg), __ D, $idx$$constant);
1944   %}
1945   ins_pipe(pipe_class_default);
1946 %}
1947 
1948 instruct extract2F(vRegF dst, vecD src, immI idx)
1949 %{
1950   predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1951   match(Set dst (ExtractF src idx));
1952   ins_cost(INSN_COST);
1953   format %{ "ins   $dst, S, $src, 0, $idx\t# extract from vector(2F)" %}
1954   ins_encode %{
1955     if ((0 == $idx$$constant) &&
1956         (as_FloatRegister($dst$$reg) == as_FloatRegister($src$$reg))) {
1957       /* empty */
1958     } else if ($idx$$constant == 0) {
1959       __ fmovs(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
1960     } else {
1961       __ ins(as_FloatRegister($dst$$reg), __ S,
1962              as_FloatRegister($src$$reg), 0, $idx$$constant);
1963     }
1964   %}
1965   ins_pipe(pipe_class_default);
1966 %}
1967 
1968 instruct extract4F(vRegF dst, vecX src, immI idx)
1969 %{
1970   predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
1971   match(Set dst (ExtractF src idx));
1972   ins_cost(INSN_COST);
1973   format %{ "ins   $dst, S, $src, 0, $idx\t# extract from vector(4F)" %}
1974   ins_encode %{
1975     if ((0 == $idx$$constant) &&
1976         (as_FloatRegister($dst$$reg) == as_FloatRegister($src$$reg))) {
1977       /* empty */
1978     } else if ($idx$$constant == 0) {
1979       __ fmovs(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
1980     } else {
1981       __ ins(as_FloatRegister($dst$$reg), __ S,
1982              as_FloatRegister($src$$reg), 0, $idx$$constant);
1983     }
1984   %}
1985   ins_pipe(pipe_class_default);
1986 %}
1987 
1988 instruct extract2D(vRegD dst, vecX src, immI idx)
1989 %{
1990   predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
1991   match(Set dst (ExtractD src idx));
1992   ins_cost(INSN_COST);
1993   format %{ "ins   $dst, D, $src, 0, $idx\t# extract from vector(2D)" %}
1994   ins_encode %{
1995     if ((0 == $idx$$constant) &&
1996         (as_FloatRegister($dst$$reg) == as_FloatRegister($src$$reg))) {
1997       /* empty */
1998     } else if ($idx$$constant == 0) {
1999       __ fmovd(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
2000     } else {
2001       __ ins(as_FloatRegister($dst$$reg), __ D,
2002              as_FloatRegister($src$$reg), 0, $idx$$constant);
2003     }
2004   %}
2005   ins_pipe(pipe_class_default);
2006 %}
2007 
2008 // ------------------------------ Vector comparison ---------------------------------
2009 
2010 instruct vcmpD(vecD dst, vecD src1, vecD src2, immI cond)
2011 %{
2012   predicate(n->as_Vector()->length_in_bytes() == 8);
2013   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
2014   format %{ "vcmpD  $dst, $src1, $src2\t# vector compare " %}
2015   ins_cost(INSN_COST);
2016   ins_encode %{
2017     BasicType bt = Matcher::vector_element_basic_type(this);
2018     assert(type2aelembytes(bt) != 8, "not supported");
2019     __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg),
2020                     as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ false);
2021   %}
2022   ins_pipe(vdop64);
2023 %}
2024 
2025 instruct vcmpX(vecX dst, vecX src1, vecX src2, immI cond)
2026 %{
2027   predicate(n->as_Vector()->length_in_bytes() == 16);
2028   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
2029   format %{ "vcmpX  $dst, $src1, $src2\t# vector compare " %}
2030   ins_cost(INSN_COST);
2031   ins_encode %{
2032     BasicType bt = Matcher::vector_element_basic_type(this);
2033     __ neon_compare(as_FloatRegister($dst$$reg), bt, as_FloatRegister($src1$$reg),
2034                     as_FloatRegister($src2$$reg), (int)$cond$$constant, /*isQ*/ true);
2035   %}
2036   ins_pipe(vdop128);
2037 %}
2038 
2039 // ------------------------------ Vector mul -----------------------------------
2040 
2041 instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
2042 %{
2043   predicate(n->as_Vector()->length() == 2);
2044   match(Set dst (MulVL src1 src2));
2045   ins_cost(INSN_COST);
2046   effect(TEMP tmp1, TEMP tmp2);
2047   format %{ "umov   $tmp1, $src1, D, 0\n\t"
2048             "umov   $tmp2, $src2, D, 0\n\t"
2049             "mul    $tmp2, $tmp2, $tmp1\n\t"
2050             "mov    $dst,  T2D,   0, $tmp2\t# insert into vector(2L)\n\t"
2051             "umov   $tmp1, $src1, D, 1\n\t"
2052             "umov   $tmp2, $src2, D, 1\n\t"
2053             "mul    $tmp2, $tmp2, $tmp1\n\t"
2054             "mov    $dst,  T2D,   1, $tmp2\t# insert into vector(2L)"
2055   %}
2056   ins_encode %{
2057     __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
2058     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
2059     __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
2060     __ mov(as_FloatRegister($dst$$reg), __ D, 0, $tmp2$$Register);
2061     __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
2062     __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
2063     __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
2064     __ mov(as_FloatRegister($dst$$reg), __ D, 1, $tmp2$$Register);
2065   %}
2066   ins_pipe(pipe_slow);
2067 %}
2068 
2069 // --------------------------------- Vector not --------------------------------
2070 
2071 instruct vnot2I(vecD dst, vecD src, immI_M1 m1)
2072 %{
2073   predicate(n->as_Vector()->length_in_bytes() == 8);
2074   match(Set dst (XorV src (ReplicateB m1)));
2075   match(Set dst (XorV src (ReplicateS m1)));
2076   match(Set dst (XorV src (ReplicateI m1)));
2077   ins_cost(INSN_COST);
2078   format %{ "not  $dst, T8B, $src\t# vector (8B)" %}
2079   ins_encode %{
2080     __ notr(as_FloatRegister($dst$$reg), __ T8B,
2081             as_FloatRegister($src$$reg));
2082   %}
2083   ins_pipe(pipe_class_default);
2084 %}
2085 
2086 instruct vnot4I(vecX dst, vecX src, immI_M1 m1)
2087 %{
2088   predicate(n->as_Vector()->length_in_bytes() == 16);
2089   match(Set dst (XorV src (ReplicateB m1)));
2090   match(Set dst (XorV src (ReplicateS m1)));
2091   match(Set dst (XorV src (ReplicateI m1)));
2092   ins_cost(INSN_COST);
2093   format %{ "not  $dst, T16B, $src\t# vector (16B)" %}
2094   ins_encode %{
2095     __ notr(as_FloatRegister($dst$$reg), __ T16B,
2096             as_FloatRegister($src$$reg));
2097   %}
2098   ins_pipe(pipe_class_default);
2099 %}
2100 
2101 instruct vnot2L(vecX dst, vecX src, immL_M1 m1)
2102 %{
2103   predicate(n->as_Vector()->length_in_bytes() == 16);
2104   match(Set dst (XorV src (ReplicateL m1)));
2105   ins_cost(INSN_COST);
2106   format %{ "not  $dst, T16B, $src\t# vector (16B)" %}
2107   ins_encode %{
2108     __ notr(as_FloatRegister($dst$$reg), __ T16B,
2109             as_FloatRegister($src$$reg));
2110   %}
2111   ins_pipe(pipe_class_default);
2112 %}
2113 
2114 // ------------------------------ Vector and_not -------------------------------
2115 
2116 instruct vand_not2I(vecD dst, vecD src1, vecD src2, immI_M1 m1)
2117 %{
2118   predicate(n->as_Vector()->length_in_bytes() == 8);
2119   match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
2120   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
2121   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
2122   ins_cost(INSN_COST);
2123   format %{ "bic  $dst, T8B, $src1, $src2\t# vector (8B)" %}
2124   ins_encode %{
2125     __ bic(as_FloatRegister($dst$$reg), __ T8B,
2126            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2127   %}
2128   ins_pipe(pipe_class_default);
2129 %}
2130 
2131 instruct vand_not4I(vecX dst, vecX src1, vecX src2, immI_M1 m1)
2132 %{
2133   predicate(n->as_Vector()->length_in_bytes() == 16);
2134   match(Set dst (AndV src1 (XorV src2 (ReplicateB m1))));
2135   match(Set dst (AndV src1 (XorV src2 (ReplicateS m1))));
2136   match(Set dst (AndV src1 (XorV src2 (ReplicateI m1))));
2137   ins_cost(INSN_COST);
2138   format %{ "bic  $dst, T16B, $src1, $src2\t# vector (16B)" %}
2139   ins_encode %{
2140     __ bic(as_FloatRegister($dst$$reg), __ T16B,
2141            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2142   %}
2143   ins_pipe(pipe_class_default);
2144 %}
2145 
2146 instruct vand_not2L(vecX dst, vecX src1, vecX src2, immL_M1 m1)
2147 %{
2148   predicate(n->as_Vector()->length_in_bytes() == 16);
2149   match(Set dst (AndV src1 (XorV src2 (ReplicateL m1))));
2150   ins_cost(INSN_COST);
2151   format %{ "bic  $dst, T16B, $src1, $src2\t# vector (16B)" %}
2152   ins_encode %{
2153     __ bic(as_FloatRegister($dst$$reg), __ T16B,
2154            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2155   %}
2156   ins_pipe(pipe_class_default);
2157 %}
2158 
2159 // ------------------------------ Vector max/min -------------------------------
2160 
2161 instruct vmax8B(vecD dst, vecD src1, vecD src2)
2162 %{
2163   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
2164              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2165   match(Set dst (MaxV src1 src2));
2166   ins_cost(INSN_COST);
2167   format %{ "maxv  $dst, T8B, $src1, $src2\t# vector (8B)" %}
2168   ins_encode %{
2169     __ maxv(as_FloatRegister($dst$$reg), __ T8B,
2170             as_FloatRegister($src1$$reg),
2171             as_FloatRegister($src2$$reg));
2172   %}
2173   ins_pipe(vdop64);
2174 %}
2175 
2176 instruct vmax16B(vecX dst, vecX src1, vecX src2)
2177 %{
2178   predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2179   match(Set dst (MaxV src1 src2));
2180   ins_cost(INSN_COST);
2181   format %{ "maxv  $dst, T16B, $src1, $src2\t# vector (16B)" %}
2182   ins_encode %{
2183     __ maxv(as_FloatRegister($dst$$reg), __ T16B,
2184             as_FloatRegister($src1$$reg),
2185             as_FloatRegister($src2$$reg));
2186   %}
2187   ins_pipe(vdop128);
2188 %}
2189 
2190 instruct vmax4S(vecD dst, vecD src1, vecD src2)
2191 %{
2192   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2193   match(Set dst (MaxV src1 src2));
2194   ins_cost(INSN_COST);
2195   format %{ "maxv  $dst, T4H, $src1, $src2\t# vector (4S)" %}
2196   ins_encode %{
2197     __ maxv(as_FloatRegister($dst$$reg), __ T4H,
2198             as_FloatRegister($src1$$reg),
2199             as_FloatRegister($src2$$reg));
2200   %}
2201   ins_pipe(vdop64);
2202 %}
2203 
2204 instruct vmax8S(vecX dst, vecX src1, vecX src2)
2205 %{
2206   predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2207   match(Set dst (MaxV src1 src2));
2208   ins_cost(INSN_COST);
2209   format %{ "maxv  $dst, T8H, $src1, $src2\t# vector (8S)" %}
2210   ins_encode %{
2211     __ maxv(as_FloatRegister($dst$$reg), __ T8H,
2212             as_FloatRegister($src1$$reg),
2213             as_FloatRegister($src2$$reg));
2214   %}
2215   ins_pipe(vdop128);
2216 %}
2217 
2218 instruct vmax2I(vecD dst, vecD src1, vecD src2)
2219 %{
2220   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2221   match(Set dst (MaxV src1 src2));
2222   ins_cost(INSN_COST);
2223   format %{ "maxv  $dst, T2S, $src1, $src2\t# vector (2I)" %}
2224   ins_encode %{
2225     __ maxv(as_FloatRegister($dst$$reg), __ T2S,
2226             as_FloatRegister($src1$$reg),
2227             as_FloatRegister($src2$$reg));
2228   %}
2229   ins_pipe(vdop64);
2230 %}
2231 
2232 instruct vmax4I(vecX dst, vecX src1, vecX src2)
2233 %{
2234   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2235   match(Set dst (MaxV src1 src2));
2236   ins_cost(INSN_COST);
2237   format %{ "maxv  $dst, T4S, $src1, $src2\t# vector (4I)" %}
2238   ins_encode %{
2239     __ maxv(as_FloatRegister($dst$$reg), __ T4S,
2240             as_FloatRegister($src1$$reg),
2241             as_FloatRegister($src2$$reg));
2242   %}
2243   ins_pipe(vdop128);
2244 %}
2245 
2246 instruct vmin8B(vecD dst, vecD src1, vecD src2)
2247 %{
2248   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
2249              n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2250   match(Set dst (MinV src1 src2));
2251   ins_cost(INSN_COST);
2252   format %{ "minv  $dst, T8B, $src1, $src2\t# vector (8B)" %}
2253   ins_encode %{
2254     __ minv(as_FloatRegister($dst$$reg), __ T8B,
2255             as_FloatRegister($src1$$reg),
2256             as_FloatRegister($src2$$reg));
2257   %}
2258   ins_pipe(vdop64);
2259 %}
2260 
2261 instruct vmin16B(vecX dst, vecX src1, vecX src2)
2262 %{
2263   predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2264   match(Set dst (MinV src1 src2));
2265   ins_cost(INSN_COST);
2266   format %{ "minv  $dst, T16B, $src1, $src2\t# vector (16B)" %}
2267   ins_encode %{
2268     __ minv(as_FloatRegister($dst$$reg), __ T16B,
2269             as_FloatRegister($src1$$reg),
2270             as_FloatRegister($src2$$reg));
2271   %}
2272   ins_pipe(vdop128);
2273 %}
2274 
2275 instruct vmin4S(vecD dst, vecD src1, vecD src2)
2276 %{
2277   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2278   match(Set dst (MinV src1 src2));
2279   ins_cost(INSN_COST);
2280   format %{ "minv  $dst, T4H, $src1, $src2\t# vector (4S)" %}
2281   ins_encode %{
2282     __ minv(as_FloatRegister($dst$$reg), __ T4H,
2283             as_FloatRegister($src1$$reg),
2284             as_FloatRegister($src2$$reg));
2285   %}
2286   ins_pipe(vdop64);
2287 %}
2288 
2289 instruct vmin8S(vecX dst, vecX src1, vecX src2)
2290 %{
2291   predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2292   match(Set dst (MinV src1 src2));
2293   ins_cost(INSN_COST);
2294   format %{ "minv  $dst, T8H, $src1, $src2\t# vector (8S)" %}
2295   ins_encode %{
2296     __ minv(as_FloatRegister($dst$$reg), __ T8H,
2297             as_FloatRegister($src1$$reg),
2298             as_FloatRegister($src2$$reg));
2299   %}
2300   ins_pipe(vdop128);
2301 %}
2302 
2303 instruct vmin2I(vecD dst, vecD src1, vecD src2)
2304 %{
2305   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2306   match(Set dst (MinV src1 src2));
2307   ins_cost(INSN_COST);
2308   format %{ "minv  $dst, T2S, $src1, $src2\t# vector (2I)" %}
2309   ins_encode %{
2310     __ minv(as_FloatRegister($dst$$reg), __ T2S,
2311             as_FloatRegister($src1$$reg),
2312             as_FloatRegister($src2$$reg));
2313   %}
2314   ins_pipe(vdop64);
2315 %}
2316 
2317 instruct vmin4I(vecX dst, vecX src1, vecX src2)
2318 %{
2319   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
2320   match(Set dst (MinV src1 src2));
2321   ins_cost(INSN_COST);
2322   format %{ "minv  $dst, T4S, $src1, $src2\t# vector (4I)" %}
2323   ins_encode %{
2324     __ minv(as_FloatRegister($dst$$reg), __ T4S,
2325             as_FloatRegister($src1$$reg),
2326             as_FloatRegister($src2$$reg));
2327   %}
2328   ins_pipe(vdop128);
2329 %}
2330 
2331 
2332 instruct vmax2L(vecX dst, vecX src1, vecX src2)
2333 %{
2334   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2335   match(Set dst (MaxV src1 src2));
2336   ins_cost(INSN_COST);
2337   effect(TEMP dst);
2338   format %{ "cmgt  $dst, T2D, $src1, $src2\t# vector (2L)\n\t"
2339             "bsl   $dst, T16B, $src1, $src2\t# vector (16B)" %}
2340   ins_encode %{
2341     __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
2342             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2343     __ bsl(as_FloatRegister($dst$$reg), __ T16B,
2344            as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2345   %}
2346   ins_pipe(vdop128);
2347 %}
2348 
2349 instruct vmin2L(vecX dst, vecX src1, vecX src2)
2350 %{
2351   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
2352   match(Set dst (MinV src1 src2));
2353   ins_cost(INSN_COST);
2354   effect(TEMP dst);
2355   format %{ "cmgt  $dst, T2D, $src1, $src2\t# vector (2L)\n\t"
2356             "bsl   $dst, T16B, $src2, $src1\t# vector (16B)" %}
2357   ins_encode %{
2358     __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
2359             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
2360     __ bsl(as_FloatRegister($dst$$reg), __ T16B,
2361            as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
2362   %}
2363   ins_pipe(vdop128);
2364 %}
2365 
2366 // --------------------------------- blend (bsl) ----------------------------
2367 
2368 instruct vbsl8B(vecD dst, vecD src1, vecD src2)
2369 %{
2370   predicate(n->as_Vector()->length_in_bytes() == 8);
2371   match(Set dst (VectorBlend (Binary src1 src2) dst));
2372   ins_cost(INSN_COST);
2373   format %{ "bsl  $dst, T8B, $src2, $src1\t# vector (8B)" %}
2374   ins_encode %{
2375     __ bsl(as_FloatRegister($dst$$reg), __ T8B,
2376            as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
2377   %}
2378   ins_pipe(vlogical64);
2379 %}
2380 
2381 instruct vbsl16B(vecX dst, vecX src1, vecX src2)
2382 %{
2383   predicate(n->as_Vector()->length_in_bytes() == 16);
2384   match(Set dst (VectorBlend (Binary src1 src2) dst));
2385   ins_cost(INSN_COST);
2386   format %{ "bsl  $dst, T16B, $src2, $src1\t# vector (16B)" %}
2387   ins_encode %{
2388     __ bsl(as_FloatRegister($dst$$reg), __ T16B,
2389            as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
2390   %}
2391   ins_pipe(vlogical128);
2392 %}
2393 
2394 // --------------------------------- Load/store Mask ----------------------------
2395 
2396 instruct loadmask8B(vecD dst, vecD src  )
2397 %{
2398   predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2399   match(Set dst (VectorLoadMask src ));
2400   ins_cost(INSN_COST);
2401   format %{ "negr  $dst, T8B, $src\t# load mask (8B to 8B)" %}
2402   ins_encode %{
2403     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
2404   %}
2405   ins_pipe(pipe_class_default);
2406 %}
2407 
2408 instruct loadmask16B(vecX dst, vecX src  )
2409 %{
2410   predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2411   match(Set dst (VectorLoadMask src ));
2412   ins_cost(INSN_COST);
2413   format %{ "negr  $dst, T16B, $src\t# load mask (16B to 16B)" %}
2414   ins_encode %{
2415     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
2416   %}
2417   ins_pipe(pipe_class_default);
2418 %}
2419 
2420 instruct storemask8B(vecD dst, vecD src , immI_1 size)
2421 %{
2422   predicate(n->as_Vector()->length() == 8);
2423   match(Set dst (VectorStoreMask src size));
2424   ins_cost(INSN_COST);
2425   format %{ "negr  $dst, T8B, $src\t# store mask (8B to 8B)" %}
2426   ins_encode %{
2427     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
2428   %}
2429   ins_pipe(pipe_class_default);
2430 %}
2431 
2432 instruct storemask16B(vecX dst, vecX src , immI_1 size)
2433 %{
2434   predicate(n->as_Vector()->length() == 16);
2435   match(Set dst (VectorStoreMask src size));
2436   ins_cost(INSN_COST);
2437   format %{ "negr  $dst, T16B, $src\t# store mask (16B to 16B)" %}
2438   ins_encode %{
2439     __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
2440   %}
2441   ins_pipe(pipe_class_default);
2442 %}
2443 
2444 instruct loadmask4S(vecD dst, vecD src  )
2445 %{
2446   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2447   match(Set dst (VectorLoadMask src ));
2448   ins_cost(INSN_COST);
2449   format %{ "uxtl  $dst, T8H, $src, T8B\n\t"
2450             "negr  $dst, T8H, $dst\t# load mask (4B to 4H)" %}
2451   ins_encode %{
2452     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2453     __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
2454   %}
2455   ins_pipe(pipe_slow);
2456 %}
2457 
2458 instruct loadmask8S(vecX dst, vecD src  )
2459 %{
2460   predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2461   match(Set dst (VectorLoadMask src ));
2462   ins_cost(INSN_COST);
2463   format %{ "uxtl  $dst, T8H, $src, T8B\n\t"
2464             "negr  $dst, T8H, $dst\t# load mask (8B to 8H)" %}
2465   ins_encode %{
2466     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2467     __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
2468   %}
2469   ins_pipe(pipe_slow);
2470 %}
2471 
2472 instruct storemask4S(vecD dst, vecD src , immI_2 size)
2473 %{
2474   predicate(n->as_Vector()->length() == 4);
2475   match(Set dst (VectorStoreMask src size));
2476   ins_cost(INSN_COST);
2477   format %{ "xtn  $dst, T8B, $src, T8H\n\t"
2478             "negr  $dst, T8B, $dst\t# store mask (4H to 4B)" %}
2479   ins_encode %{
2480     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
2481     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2482   %}
2483   ins_pipe(pipe_slow);
2484 %}
2485 
2486 instruct storemask8S(vecD dst, vecX src , immI_2 size)
2487 %{
2488   predicate(n->as_Vector()->length() == 8);
2489   match(Set dst (VectorStoreMask src size));
2490   ins_cost(INSN_COST);
2491   format %{ "xtn  $dst, T8B, $src, T8H\n\t"
2492             "negr  $dst, T8B, $dst\t# store mask (8H to 8B)" %}
2493   ins_encode %{
2494     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
2495     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2496   %}
2497   ins_pipe(pipe_slow);
2498 %}
2499 
2500 instruct loadmask2I(vecD dst, vecD src  )
2501 %{
2502   predicate(n->as_Vector()->length() == 2 &&
2503             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2504              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2505   match(Set dst (VectorLoadMask src ));
2506   ins_cost(INSN_COST);
2507   format %{ "uxtl  $dst, T8H, $src, T8B\t# 2B to 2H\n\t"
2508             "uxtl  $dst, T4S, $dst, T4H\t# 2H to 2S\n\t"
2509             "negr   $dst, T4S, $dst\t# load mask (2B to 2S)" %}
2510   ins_encode %{
2511     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2512     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2513     __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
2514   %}
2515   ins_pipe(pipe_slow);
2516 %}
2517 
2518 instruct loadmask4I(vecX dst, vecD src  )
2519 %{
2520   predicate(n->as_Vector()->length() == 4 &&
2521             (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2522              n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2523   match(Set dst (VectorLoadMask src ));
2524   ins_cost(INSN_COST);
2525   format %{ "uxtl  $dst, T8H, $src, T8B\t# 4B to 4H\n\t"
2526             "uxtl  $dst, T4S, $dst, T4H\t# 4H to 4S\n\t"
2527             "negr   $dst, T4S, $dst\t# load mask (4B to 4S)" %}
2528   ins_encode %{
2529     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2530     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2531     __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
2532   %}
2533   ins_pipe(pipe_slow);
2534 %}
2535 
2536 instruct storemask2I(vecD dst, vecD src , immI_4 size)
2537 %{
2538   predicate(n->as_Vector()->length() == 2);
2539   match(Set dst (VectorStoreMask src size));
2540   ins_cost(INSN_COST);
2541   format %{ "xtn  $dst, T4H, $src, T4S\t# 2S to 2H\n\t"
2542             "xtn  $dst, T8B, $dst, T8H\t# 2H to 2B\n\t"
2543             "negr   $dst, T8B, $dst\t# store mask (2S to 2B)" %}
2544   ins_encode %{
2545     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
2546     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
2547     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2548   %}
2549   ins_pipe(pipe_slow);
2550 %}
2551 
2552 instruct storemask4I(vecD dst, vecX src , immI_4 size)
2553 %{
2554   predicate(n->as_Vector()->length() == 4);
2555   match(Set dst (VectorStoreMask src size));
2556   ins_cost(INSN_COST);
2557   format %{ "xtn  $dst, T4H, $src, T4S\t# 4S to 4H\n\t"
2558             "xtn  $dst, T8B, $dst, T8H\t# 4H to 4B\n\t"
2559             "negr   $dst, T8B, $dst\t# store mask (4S to 4B)" %}
2560   ins_encode %{
2561     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
2562     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
2563     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2564   %}
2565   ins_pipe(pipe_slow);
2566 %}
2567 
2568 instruct loadmask2L(vecX dst, vecD src)
2569 %{
2570   predicate(n->as_Vector()->length() == 2 &&
2571             (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
2572              n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
2573   match(Set dst (VectorLoadMask src));
2574   ins_cost(INSN_COST);
2575   format %{ "uxtl  $dst, T8H, $src, T8B\t# 2B to 2S\n\t"
2576             "uxtl  $dst, T4S, $dst, T4H\t# 2S to 2I\n\t"
2577             "uxtl  $dst, T2D, $dst, T2S\t# 2I to 2L\n\t"
2578             "neg   $dst, T2D, $dst\t# load mask (2B to 2L)" %}
2579   ins_encode %{
2580     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2581     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2582     __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
2583     __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
2584   %}
2585   ins_pipe(pipe_slow);
2586 %}
2587 
2588 instruct storemask2L(vecD dst, vecX src, immI_8 size)
2589 %{
2590   predicate(n->as_Vector()->length() == 2);
2591   match(Set dst (VectorStoreMask src size));
2592   ins_cost(INSN_COST);
2593   format %{ "xtn  $dst, T2S, $src, T2D\t# 2L to 2I\n\t"
2594             "xtn  $dst, T4H, $dst, T4S\t# 2I to 2S\n\t"
2595             "xtn  $dst, T8B, $dst, T8H\t# 2S to 2B\n\t"
2596             "neg  $dst, T8B, $dst\t# store mask (2L to 2B)" %}
2597   ins_encode %{
2598     __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
2599     __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
2600     __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
2601     __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
2602   %}
2603   ins_pipe(pipe_slow);
2604 %}
2605 
2606 // vector mask cast
2607 
2608 instruct vmaskcastD(vecD dst)
2609 %{
2610   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
2611             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8 &&
2612             n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
2613   match(Set dst (VectorMaskCast dst));
2614   ins_cost(0);
2615   format %{ "vmaskcast $dst\t# empty" %}
2616   ins_encode %{
2617     // empty
2618   %}
2619   ins_pipe(pipe_class_empty);
2620 %}
2621 
2622 instruct vmaskcastX(vecX dst)
2623 %{
2624   predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
2625             n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16 &&
2626             n->bottom_type()->is_vect()->length() == n->in(1)->bottom_type()->is_vect()->length());
2627   match(Set dst (VectorMaskCast dst));
2628   ins_cost(0);
2629   format %{ "vmaskcast $dst\t# empty" %}
2630   ins_encode %{
2631     // empty
2632   %}
2633   ins_pipe(pipe_class_empty);
2634 %}
2635 
2636 //-------------------------------- LOAD_IOTA_INDICES----------------------------------
2637 
2638 instruct loadcon8B(vecD dst, immI0 src)
2639 %{
2640   predicate(UseSVE == 0 &&
2641            (n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
2642             n->as_Vector()->length() == 8) &&
2643             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2644   match(Set dst (VectorLoadConst src));
2645   ins_cost(INSN_COST);
2646   format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
2647   ins_encode %{
2648     __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
2649     __ ldrd(as_FloatRegister($dst$$reg), rscratch1);
2650   %}
2651   ins_pipe(pipe_class_memory);
2652 %}
2653 
2654 instruct loadcon16B(vecX dst, immI0 src)
2655 %{
2656   predicate(UseSVE == 0 && n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2657   match(Set dst (VectorLoadConst src));
2658   ins_cost(INSN_COST);
2659   format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
2660   ins_encode %{
2661     __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
2662     __ ldrq(as_FloatRegister($dst$$reg), rscratch1);
2663   %}
2664   ins_pipe(pipe_class_memory);
2665 %}
2666 
2667 //-------------------------------- LOAD_SHUFFLE ----------------------------------
2668 
2669 instruct loadshuffle8B(vecD dst, vecD src)
2670 %{
2671   predicate(n->as_Vector()->length() == 8 &&
2672             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2673   match(Set dst (VectorLoadShuffle src));
2674   ins_cost(INSN_COST);
2675   format %{ "mov  $dst, T8B, $src\t# get 8B shuffle" %}
2676   ins_encode %{
2677     __ orr(as_FloatRegister($dst$$reg), __ T8B,
2678            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2679   %}
2680   ins_pipe(pipe_class_default);
2681 %}
2682 
2683 instruct loadshuffle16B(vecX dst, vecX src)
2684 %{
2685   predicate(n->as_Vector()->length() == 16 &&
2686             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2687   match(Set dst (VectorLoadShuffle src));
2688   ins_cost(INSN_COST);
2689   format %{ "mov  $dst, T16B, $src\t# get 16B shuffle" %}
2690   ins_encode %{
2691     __ orr(as_FloatRegister($dst$$reg), __ T16B,
2692            as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
2693   %}
2694   ins_pipe(pipe_class_default);
2695 %}
2696 
2697 instruct loadshuffle4S(vecD dst, vecD src)
2698 %{
2699   predicate(n->as_Vector()->length() == 4 &&
2700             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2701   match(Set dst (VectorLoadShuffle src));
2702   ins_cost(INSN_COST);
2703   format %{ "uxtl  $dst, T8H, $src, T8B\t# 4B to 4H" %}
2704   ins_encode %{
2705     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2706   %}
2707   ins_pipe(pipe_class_default);
2708 %}
2709 
2710 instruct loadshuffle8S(vecX dst, vecD src)
2711 %{
2712   predicate(n->as_Vector()->length() == 8 &&
2713             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2714   match(Set dst (VectorLoadShuffle src));
2715   ins_cost(INSN_COST);
2716   format %{ "uxtl  $dst, T8H, $src, T8B\t# 8B to 8H" %}
2717   ins_encode %{
2718     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2719   %}
2720   ins_pipe(pipe_class_default);
2721 %}
2722 
2723 instruct loadshuffle4I(vecX dst, vecD src)
2724 %{
2725   predicate(n->as_Vector()->length() == 4 &&
2726            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2727             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2728   match(Set dst (VectorLoadShuffle src));
2729   ins_cost(INSN_COST);
2730   format %{ "uxtl  $dst, T8H, $src, T8B\t# 4B to 4H \n\t"
2731             "uxtl  $dst, T4S, $dst, T4H\t# 4H to 4S" %}
2732   ins_encode %{
2733     __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
2734     __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
2735   %}
2736   ins_pipe(pipe_slow);
2737 %}
2738 
2739 //-------------------------------- Rearrange -------------------------------------
2740 // Here is an example that rearranges a NEON vector with 4 ints:
2741 // Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
2742 //   1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
2743 //   2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
2744 //   3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
2745 //   4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
2746 //      and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
2747 //   5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
2748 //      and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
2749 //   6. Use Vm as index register, and use V1 as table register.
2750 //      Then get V2 as the result by tbl NEON instructions.
2751 // Notes:
2752 //   Step 1 matches VectorLoadConst.
2753 //   Step 3 matches VectorLoadShuffle.
2754 //   Step 4, 5, 6 match VectorRearrange.
2755 //   For VectorRearrange short/int, the reason why such complex calculation is
2756 //   required is because NEON tbl supports bytes table only, so for short/int, we
2757 //   need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
2758 //   to implement rearrange.
2759 
2760 instruct rearrange8B(vecD dst, vecD src, vecD shuffle)
2761 %{
2762   predicate(n->as_Vector()->length() == 8 &&
2763             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2764   match(Set dst (VectorRearrange src shuffle));
2765   ins_cost(INSN_COST);
2766   effect(TEMP_DEF dst);
2767   format %{ "tbl $dst, T8B, {$dst}, $shuffle\t# rearrange 8B" %}
2768   ins_encode %{
2769     __ tbl(as_FloatRegister($dst$$reg), __ T8B,
2770            as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
2771   %}
2772   ins_pipe(pipe_slow);
2773 %}
2774 
2775 instruct rearrange16B(vecX dst, vecX src, vecX shuffle)
2776 %{
2777   predicate(n->as_Vector()->length() == 16 &&
2778             n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
2779   match(Set dst (VectorRearrange src shuffle));
2780   ins_cost(INSN_COST);
2781   effect(TEMP_DEF dst);
2782   format %{ "tbl $dst, T16B, {$dst}, $shuffle\t# rearrange 16B" %}
2783   ins_encode %{
2784     __ tbl(as_FloatRegister($dst$$reg), __ T16B,
2785            as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
2786   %}
2787   ins_pipe(pipe_slow);
2788 %}
2789 
2790 instruct rearrange4S(vecD dst, vecD src, vecD shuffle, vecD tmp0, vecD tmp1)
2791 %{
2792   predicate(n->as_Vector()->length() == 4 &&
2793             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2794   match(Set dst (VectorRearrange src shuffle));
2795   ins_cost(INSN_COST);
2796   effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
2797   format %{ "mov   $tmp0, T8B, CONSTANT\t# constant 0x0202020202020202\n\t"
2798             "mov   $tmp1, T4H, CONSTANT\t# constant 0x0100010001000100\n\t"
2799             "mulv  $dst, T4H, T4H, $shuffle, $tmp0\n\t"
2800             "addv  $dst, T8B, T8B, $dst, $tmp1\n\t"
2801             "tbl   $dst, T8B, {$src}, 1, $dst\t# rearrange 4S" %}
2802   ins_encode %{
2803     __ mov(as_FloatRegister($tmp0$$reg), __ T8B, 0x02);
2804     __ mov(as_FloatRegister($tmp1$$reg), __ T4H, 0x0100);
2805     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
2806             as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
2807     __ addv(as_FloatRegister($dst$$reg), __ T8B,
2808             as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
2809     __ tbl(as_FloatRegister($dst$$reg), __ T8B,
2810            as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
2811   %}
2812   ins_pipe(pipe_slow);
2813 %}
2814 
2815 instruct rearrange8S(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
2816 %{
2817   predicate(n->as_Vector()->length() == 8 &&
2818             n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
2819   match(Set dst (VectorRearrange src shuffle));
2820   ins_cost(INSN_COST);
2821   effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
2822   format %{ "mov   $tmp0, T16B, CONSTANT\t# constant 0x0202020202020202\n\t"
2823             "mov   $tmp1, T8H, CONSTANT\t# constant 0x0100010001000100\n\t"
2824             "mulv  $dst, T8H, T8H, $shuffle, $tmp0\n\t"
2825             "addv  $dst, T16B, T16B, $dst, $tmp1\n\t"
2826             "tbl   $dst, T16B, {$src}, 1, $dst\t# rearrange 8S" %}
2827   ins_encode %{
2828     __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x02);
2829     __ mov(as_FloatRegister($tmp1$$reg), __ T8H, 0x0100);
2830     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
2831             as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
2832     __ addv(as_FloatRegister($dst$$reg), __ T16B,
2833             as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
2834     __ tbl(as_FloatRegister($dst$$reg), __ T16B,
2835            as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
2836   %}
2837   ins_pipe(pipe_slow);
2838 %}
2839 
2840 instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
2841 %{
2842   predicate(n->as_Vector()->length() == 4 &&
2843            (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
2844             n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
2845   match(Set dst (VectorRearrange src shuffle));
2846   ins_cost(INSN_COST);
2847   effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
2848   format %{ "mov   $tmp0, T16B, CONSTANT\t# constant 0x0404040404040404\n\t"
2849             "mov   $tmp1, T4S, CONSTANT\t# constant 0x0302010003020100\n\t"
2850             "mulv  $dst, T4S, $shuffle, $tmp0\n\t"
2851             "addv  $dst, T16B, $dst, $tmp1\n\t"
2852             "tbl   $dst, T16B, {$src}, 1, $dst\t# rearrange 4I" %}
2853   ins_encode %{
2854     __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
2855     __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
2856     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
2857             as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
2858     __ addv(as_FloatRegister($dst$$reg), __ T16B,
2859             as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
2860     __ tbl(as_FloatRegister($dst$$reg), __ T16B,
2861            as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
2862   %}
2863   ins_pipe(pipe_slow);
2864 %}
2865 
2866 //-------------------------------- Anytrue/alltrue -----------------------------
2867 
2868 instruct anytrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
2869 %{
2870   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
2871   match(Set dst (VectorTest src1 src2 ));
2872   ins_cost(INSN_COST);
2873   effect(TEMP tmp, KILL cr);
2874   format %{ "addv  $tmp, T8B, $src1\n\t"
2875             "umov  $dst, $tmp, B, 0\n\t"
2876             "cmp   $dst, 0\n\t"
2877             "cset  $dst\t# anytrue 8B" %}
2878   ins_encode %{
2879     // No need to use src2.
2880     __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
2881     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2882     __ cmpw($dst$$Register, zr);
2883     __ csetw($dst$$Register, Assembler::NE);
2884   %}
2885   ins_pipe(pipe_slow);
2886 %}
2887 
2888 instruct anytrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
2889 %{
2890   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
2891   match(Set dst (VectorTest src1 src2 ));
2892   ins_cost(INSN_COST);
2893   effect(TEMP tmp, KILL cr);
2894   format %{ "addv  $tmp, T16B, $src1\n\t"
2895             "umov  $dst, $tmp, B, 0\n\t"
2896             "cmp   $dst, 0\n\t"
2897             "cset  $dst\t# anytrue 16B" %}
2898   ins_encode %{
2899     // No need to use src2.
2900     __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
2901     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2902     __ cmpw($dst$$Register, zr);
2903     __ csetw($dst$$Register, Assembler::NE);
2904   %}
2905   ins_pipe(pipe_slow);
2906 %}
2907 
2908 instruct alltrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
2909 %{
2910   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
2911   match(Set dst (VectorTest src1 src2 ));
2912   ins_cost(INSN_COST);
2913   effect(TEMP tmp, KILL cr);
2914   format %{ "uminv $tmp, T8B, $src1\n\t"
2915             "umov  $dst, $tmp, B, 0\n\t"
2916             "cmp   $dst, 0xff\n\t"
2917             "cset  $dst\t# alltrue 8B" %}
2918   ins_encode %{
2919     // No need to use src2.
2920     __ uminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
2921     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2922     __ cmpw($dst$$Register, 0xff);
2923     __ csetw($dst$$Register, Assembler::EQ);
2924   %}
2925   ins_pipe(pipe_slow);
2926 %}
2927 
2928 instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
2929 %{
2930   predicate(static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
2931   match(Set dst (VectorTest src1 src2 ));
2932   ins_cost(INSN_COST);
2933   effect(TEMP tmp, KILL cr);
2934   format %{ "uminv $tmp, T16B, $src1\n\t"
2935             "umov  $dst, $tmp, B, 0\n\t"
2936             "cmp   $dst, 0xff\n\t"
2937             "cset  $dst\t# alltrue 16B" %}
2938   ins_encode %{
2939     // No need to use src2.
2940     __ uminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
2941     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
2942     __ cmpw($dst$$Register, 0xff);
2943     __ csetw($dst$$Register, Assembler::EQ);
2944   %}
2945   ins_pipe(pipe_slow);
2946 %}
2947 
2948 // --------------------------------- ABS --------------------------------------
2949 
2950 instruct vabs8B(vecD dst, vecD src)
2951 %{
2952   predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);
2953   match(Set dst (AbsVB src));
2954   ins_cost(INSN_COST);
2955   format %{ "abs  $dst, T8B, $src\t# vector (8B)" %}
2956   ins_encode %{
2957     __ absr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
2958   %}
2959   ins_pipe(vlogical64);
2960 %}
2961 
2962 instruct vabs16B(vecX dst, vecX src)
2963 %{
2964   predicate(n->as_Vector()->length() == 16);
2965   match(Set dst (AbsVB src));
2966   ins_cost(INSN_COST);
2967   format %{ "abs  $dst, T16B, $src\t# vector (16B)" %}
2968   ins_encode %{
2969     __ absr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
2970   %}
2971   ins_pipe(vlogical128);
2972 %}
2973 
2974 instruct vabs4S(vecD dst, vecD src)
2975 %{
2976   predicate(n->as_Vector()->length() == 4);
2977   match(Set dst (AbsVS src));
2978   ins_cost(INSN_COST);
2979   format %{ "abs  $dst, T4H, $src\t# vector (4H)" %}
2980   ins_encode %{
2981     __ absr(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg));
2982   %}
2983   ins_pipe(vlogical64);
2984 %}
2985 
2986 instruct vabs8S(vecX dst, vecX src)
2987 %{
2988   predicate(n->as_Vector()->length() == 8);
2989   match(Set dst (AbsVS src));
2990   ins_cost(INSN_COST);
2991   format %{ "abs  $dst, T8H, $src\t# vector (8H)" %}
2992   ins_encode %{
2993     __ absr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg));
2994   %}
2995   ins_pipe(vlogical128);
2996 %}
2997 
2998 instruct vabs2I(vecD dst, vecD src)
2999 %{
3000   predicate(n->as_Vector()->length() == 2);
3001   match(Set dst (AbsVI src));
3002   ins_cost(INSN_COST);
3003   format %{ "abs  $dst, T2S, $src\t# vector (2S)" %}
3004   ins_encode %{
3005     __ absr(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
3006   %}
3007   ins_pipe(vlogical64);
3008 %}
3009 
3010 instruct vabs4I(vecX dst, vecX src)
3011 %{
3012   predicate(n->as_Vector()->length() == 4);
3013   match(Set dst (AbsVI src));
3014   ins_cost(INSN_COST);
3015   format %{ "abs  $dst, T4S, $src\t# vector (4S)" %}
3016   ins_encode %{
3017     __ absr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
3018   %}
3019   ins_pipe(vlogical128);
3020 %}
3021 
3022 instruct vabs2L(vecX dst, vecX src)
3023 %{
3024   predicate(n->as_Vector()->length() == 2);
3025   match(Set dst (AbsVL src));
3026   ins_cost(INSN_COST);
3027   format %{ "abs  $dst, T2D, $src\t# vector (2D)" %}
3028   ins_encode %{
3029     __ absr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
3030   %}
3031   ins_pipe(vlogical128);
3032 %}
3033 
3034 instruct vabs2F(vecD dst, vecD src)
3035 %{
3036   predicate(n->as_Vector()->length() == 2);
3037   match(Set dst (AbsVF src));
3038   ins_cost(INSN_COST * 3);
3039   format %{ "fabs  $dst, T2S, $src\t# vector (2S)" %}
3040   ins_encode %{
3041     __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
3042   %}
3043   ins_pipe(vunop_fp64);
3044 %}
3045 
3046 instruct vabs4F(vecX dst, vecX src)
3047 %{
3048   predicate(n->as_Vector()->length() == 4);
3049   match(Set dst (AbsVF src));
3050   ins_cost(INSN_COST * 3);
3051   format %{ "fabs  $dst, T4S, $src\t# vector (4S)" %}
3052   ins_encode %{
3053     __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
3054   %}
3055   ins_pipe(vunop_fp128);
3056 %}
3057 
3058 instruct vabs2D(vecX dst, vecX src)
3059 %{
3060   predicate(n->as_Vector()->length() == 2);
3061   match(Set dst (AbsVD src));
3062   ins_cost(INSN_COST * 3);
3063   format %{ "fabs  $dst, T2D, $src\t# vector (2D)" %}
3064   ins_encode %{
3065     __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
3066   %}
3067   ins_pipe(vunop_fp128);
3068 %}
3069 
3070 // --------------------------------- FABS DIFF --------------------------------
3071 
3072 instruct vabd2F(vecD dst, vecD src1, vecD src2)
3073 %{
3074   predicate(n->as_Vector()->length() == 2);
3075   match(Set dst (AbsVF (SubVF src1 src2)));
3076   ins_cost(INSN_COST * 3);
3077   format %{ "fabd  $dst, T2S, $src1, $src2\t# vector (2S)" %}
3078   ins_encode %{
3079     __ fabd(as_FloatRegister($dst$$reg), __ T2S,
3080             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
3081   %}
3082   ins_pipe(vunop_fp64);
3083 %}
3084 
3085 instruct vabd4F(vecX dst, vecX src1, vecX src2)
3086 %{
3087   predicate(n->as_Vector()->length() == 4);
3088   match(Set dst (AbsVF (SubVF src1 src2)));
3089   ins_cost(INSN_COST * 3);
3090   format %{ "fabd  $dst, T4S, $src1, $src2\t# vector (4S)" %}
3091   ins_encode %{
3092     __ fabd(as_FloatRegister($dst$$reg), __ T4S,
3093             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
3094   %}
3095   ins_pipe(vunop_fp128);
3096 %}
3097 
3098 instruct vabd2D(vecX dst, vecX src1, vecX src2)
3099 %{
3100   predicate(n->as_Vector()->length() == 2);
3101   match(Set dst (AbsVD (SubVD src1 src2)));
3102   ins_cost(INSN_COST * 3);
3103   format %{ "fabd  $dst, T2D, $src1, $src2\t# vector (2D)" %}
3104   ins_encode %{
3105     __ fabd(as_FloatRegister($dst$$reg), __ T2D,
3106             as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
3107   %}
3108   ins_pipe(vunop_fp128);
3109 %}
3110 
3111 instruct replicate8B(vecD dst, iRegIorL2I src)
3112 %{
3113   predicate(UseSVE == 0 && (n->as_Vector()->length() == 8 ||
3114                             n->as_Vector()->length() == 4));
3115   match(Set dst (ReplicateB src));
3116   ins_cost(INSN_COST);
3117   format %{ "dup  $dst, $src\t# vector (8B)" %}
3118   ins_encode %{
3119     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg));
3120   %}
3121   ins_pipe(vdup_reg_reg64);
3122 %}
3123 
3124 instruct replicate16B(vecX dst, iRegIorL2I src)
3125 %{
3126   predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
3127   match(Set dst (ReplicateB src));
3128   ins_cost(INSN_COST);
3129   format %{ "dup  $dst, $src\t# vector (16B)" %}
3130   ins_encode %{
3131     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($src$$reg));
3132   %}
3133   ins_pipe(vdup_reg_reg128);
3134 %}
3135 
3136 instruct replicate8B_imm(vecD dst, immI con)
3137 %{
3138   predicate(UseSVE == 0 && (n->as_Vector()->length() == 8 ||
3139                             n->as_Vector()->length() == 4));
3140   match(Set dst (ReplicateB con));
3141   ins_cost(INSN_COST);
3142   format %{ "movi  $dst, $con\t# vector (8B)" %}
3143   ins_encode %{
3144     __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff);
3145   %}
3146   ins_pipe(vmovi_reg_imm64);
3147 %}
3148 
3149 instruct replicate16B_imm(vecX dst, immI con)
3150 %{
3151   predicate(UseSVE == 0 && n->as_Vector()->length() == 16);
3152   match(Set dst (ReplicateB con));
3153   ins_cost(INSN_COST);
3154   format %{ "movi  $dst, $con\t# vector (16B)" %}
3155   ins_encode %{
3156     __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff);
3157   %}
3158   ins_pipe(vmovi_reg_imm128);
3159 %}
3160 
3161 instruct replicate4S(vecD dst, iRegIorL2I src)
3162 %{
3163   predicate(UseSVE == 0 && (n->as_Vector()->length() == 4 ||
3164                             n->as_Vector()->length() == 2));
3165   match(Set dst (ReplicateS src));
3166   ins_cost(INSN_COST);
3167   format %{ "dup  $dst, $src\t# vector (4S)" %}
3168   ins_encode %{
3169     __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg));
3170   %}
3171   ins_pipe(vdup_reg_reg64);
3172 %}
3173 
3174 instruct replicate8S(vecX dst, iRegIorL2I src)
3175 %{
3176   predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
3177   match(Set dst (ReplicateS src));
3178   ins_cost(INSN_COST);
3179   format %{ "dup  $dst, $src\t# vector (8S)" %}
3180   ins_encode %{
3181     __ dup(as_FloatRegister($dst$$reg), __ T8H, as_Register($src$$reg));
3182   %}
3183   ins_pipe(vdup_reg_reg128);
3184 %}
3185 
3186 instruct replicate4S_imm(vecD dst, immI con)
3187 %{
3188   predicate(UseSVE == 0 && (n->as_Vector()->length() == 4 ||
3189                             n->as_Vector()->length() == 2));
3190   match(Set dst (ReplicateS con));
3191   ins_cost(INSN_COST);
3192   format %{ "movi  $dst, $con\t# vector (4H)" %}
3193   ins_encode %{
3194     __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff);
3195   %}
3196   ins_pipe(vmovi_reg_imm64);
3197 %}
3198 
3199 instruct replicate8S_imm(vecX dst, immI con)
3200 %{
3201   predicate(UseSVE == 0 && n->as_Vector()->length() == 8);
3202   match(Set dst (ReplicateS con));
3203   ins_cost(INSN_COST);
3204   format %{ "movi  $dst, $con\t# vector (8H)" %}
3205   ins_encode %{
3206     __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff);
3207   %}
3208   ins_pipe(vmovi_reg_imm128);
3209 %}
3210 
3211 instruct replicate2I(vecD dst, iRegIorL2I src)
3212 %{
3213   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3214   match(Set dst (ReplicateI src));
3215   ins_cost(INSN_COST);
3216   format %{ "dup  $dst, $src\t# vector (2I)" %}
3217   ins_encode %{
3218     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg));
3219   %}
3220   ins_pipe(vdup_reg_reg64);
3221 %}
3222 
3223 instruct replicate4I(vecX dst, iRegIorL2I src)
3224 %{
3225   predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
3226   match(Set dst (ReplicateI src));
3227   ins_cost(INSN_COST);
3228   format %{ "dup  $dst, $src\t# vector (4I)" %}
3229   ins_encode %{
3230     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_Register($src$$reg));
3231   %}
3232   ins_pipe(vdup_reg_reg128);
3233 %}
3234 
3235 instruct replicate2I_imm(vecD dst, immI con)
3236 %{
3237   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3238   match(Set dst (ReplicateI con));
3239   ins_cost(INSN_COST);
3240   format %{ "movi  $dst, $con\t# vector (2I)" %}
3241   ins_encode %{
3242     __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant);
3243   %}
3244   ins_pipe(vmovi_reg_imm64);
3245 %}
3246 
3247 instruct replicate4I_imm(vecX dst, immI con)
3248 %{
3249   predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
3250   match(Set dst (ReplicateI con));
3251   ins_cost(INSN_COST);
3252   format %{ "movi  $dst, $con\t# vector (4I)" %}
3253   ins_encode %{
3254     __ mov(as_FloatRegister($dst$$reg), __ T4S, $con$$constant);
3255   %}
3256   ins_pipe(vmovi_reg_imm128);
3257 %}
3258 
3259 instruct replicate2L(vecX dst, iRegL src)
3260 %{
3261   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3262   match(Set dst (ReplicateL src));
3263   ins_cost(INSN_COST);
3264   format %{ "dup  $dst, $src\t# vector (2L)" %}
3265   ins_encode %{
3266     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_Register($src$$reg));
3267   %}
3268   ins_pipe(vdup_reg_reg128);
3269 %}
3270 
3271 instruct replicate2L_imm(vecX dst, immL con)
3272 %{
3273   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3274   match(Set dst (ReplicateL con));
3275   ins_cost(INSN_COST);
3276   format %{ "movi  $dst, $con\t# vector (2L)" %}
3277   ins_encode %{
3278     __ mov(as_FloatRegister($dst$$reg), __ T2D, $con$$constant);
3279   %}
3280   ins_pipe(vmovi_reg_imm128);
3281 %}
3282 
3283 instruct replicate2F(vecD dst, vRegF src)
3284 %{
3285   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3286   match(Set dst (ReplicateF src));
3287   ins_cost(INSN_COST);
3288   format %{ "dup  $dst, $src\t# vector (2F)" %}
3289   ins_encode %{
3290     __ dup(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
3291   %}
3292   ins_pipe(vdup_reg_freg64);
3293 %}
3294 
3295 instruct replicate4F(vecX dst, vRegF src)
3296 %{
3297   predicate(UseSVE == 0 && n->as_Vector()->length() == 4);
3298   match(Set dst (ReplicateF src));
3299   ins_cost(INSN_COST);
3300   format %{ "dup  $dst, $src\t# vector (4F)" %}
3301   ins_encode %{
3302     __ dup(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
3303   %}
3304   ins_pipe(vdup_reg_freg128);
3305 %}
3306 
3307 instruct replicate2D(vecX dst, vRegD src)
3308 %{
3309   predicate(UseSVE == 0 && n->as_Vector()->length() == 2);
3310   match(Set dst (ReplicateD src));
3311   ins_cost(INSN_COST);
3312   format %{ "dup  $dst, $src\t# vector (2D)" %}
3313   ins_encode %{
3314     __ dup(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
3315   %}
3316   ins_pipe(vdup_reg_dreg128);
3317 %}
3318 
3319 // ====================REDUCTION ARITHMETIC====================================
3320 
3321 instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
3322 %{
3323   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3324   match(Set dst (AddReductionVI isrc vsrc));
3325   ins_cost(INSN_COST);
3326   effect(TEMP vtmp, TEMP itmp);
3327   format %{ "addpv  $vtmp, T2S, $vsrc, $vsrc\n\t"
3328             "umov  $itmp, $vtmp, S, 0\n\t"
3329             "addw  $dst, $itmp, $isrc\t# add reduction2I"
3330   %}
3331   ins_encode %{
3332     __ addpv(as_FloatRegister($vtmp$$reg), __ T2S,
3333              as_FloatRegister($vsrc$$reg), as_FloatRegister($vsrc$$reg));
3334     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
3335     __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
3336   %}
3337   ins_pipe(pipe_class_default);
3338 %}
3339 
3340 instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
3341 %{
3342   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3343   match(Set dst (AddReductionVI isrc vsrc));
3344   ins_cost(INSN_COST);
3345   effect(TEMP vtmp, TEMP itmp);
3346   format %{ "addv  $vtmp, T4S, $vsrc\n\t"
3347             "umov  $itmp, $vtmp, S, 0\n\t"
3348             "addw  $dst, $itmp, $isrc\t# add reduction4I"
3349   %}
3350   ins_encode %{
3351     __ addv(as_FloatRegister($vtmp$$reg), __ T4S,
3352             as_FloatRegister($vsrc$$reg));
3353     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
3354     __ addw($dst$$Register, $itmp$$Register, $isrc$$Register);
3355   %}
3356   ins_pipe(pipe_class_default);
3357 %}
3358 
3359 instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
3360 %{
3361   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3362   match(Set dst (MulReductionVI isrc vsrc));
3363   ins_cost(INSN_COST);
3364   effect(TEMP tmp, TEMP dst);
3365   format %{ "umov  $tmp, $vsrc, S, 0\n\t"
3366             "mul   $dst, $tmp, $isrc\n\t"
3367             "umov  $tmp, $vsrc, S, 1\n\t"
3368             "mul   $dst, $tmp, $dst\t# mul reduction2I"
3369   %}
3370   ins_encode %{
3371     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
3372     __ mul($dst$$Register, $tmp$$Register, $isrc$$Register);
3373     __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
3374     __ mul($dst$$Register, $tmp$$Register, $dst$$Register);
3375   %}
3376   ins_pipe(pipe_class_default);
3377 %}
3378 
3379 instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
3380 %{
3381   predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
3382   match(Set dst (MulReductionVI isrc vsrc));
3383   ins_cost(INSN_COST);
3384   effect(TEMP vtmp, TEMP itmp, TEMP dst);
3385   format %{ "ins   $vtmp, D, $vsrc, 0, 1\n\t"
3386             "mulv  $vtmp, T2S, $vtmp, $vsrc\n\t"
3387             "umov  $itmp, $vtmp, S, 0\n\t"
3388             "mul   $dst, $itmp, $isrc\n\t"
3389             "umov  $itmp, $vtmp, S, 1\n\t"
3390             "mul   $dst, $itmp, $dst\t# mul reduction4I"
3391   %}
3392   ins_encode %{
3393     __ ins(as_FloatRegister($vtmp$$reg), __ D,
3394            as_FloatRegister($vsrc$$reg), 0, 1);
3395     __ mulv(as_FloatRegister($vtmp$$reg), __ T2S,
3396             as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
3397     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 0);
3398     __ mul($dst$$Register, $itmp$$Register, $isrc$$Register);
3399     __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ S, 1);
3400     __ mul($dst$$Register, $itmp$$Register, $dst$$Register);
3401   %}
3402   ins_pipe(pipe_class_default);
3403 %}
3404 
3405 instruct reduce_add2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp)
3406 %{
3407   match(Set dst (AddReductionVF fsrc vsrc));
3408   ins_cost(INSN_COST);
3409   effect(TEMP tmp, TEMP dst);
3410   format %{ "fadds $dst, $fsrc, $vsrc\n\t"
3411             "ins   $tmp, S, $vsrc, 0, 1\n\t"
3412             "fadds $dst, $dst, $tmp\t# add reduction2F"
3413   %}
3414   ins_encode %{
3415     __ fadds(as_FloatRegister($dst$$reg),
3416              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3417     __ ins(as_FloatRegister($tmp$$reg), __ S,
3418            as_FloatRegister($vsrc$$reg), 0, 1);
3419     __ fadds(as_FloatRegister($dst$$reg),
3420              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3421   %}
3422   ins_pipe(pipe_class_default);
3423 %}
3424 
3425 instruct reduce_add4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp)
3426 %{
3427   match(Set dst (AddReductionVF fsrc vsrc));
3428   ins_cost(INSN_COST);
3429   effect(TEMP tmp, TEMP dst);
3430   format %{ "fadds $dst, $fsrc, $vsrc\n\t"
3431             "ins   $tmp, S, $vsrc, 0, 1\n\t"
3432             "fadds $dst, $dst, $tmp\n\t"
3433             "ins   $tmp, S, $vsrc, 0, 2\n\t"
3434             "fadds $dst, $dst, $tmp\n\t"
3435             "ins   $tmp, S, $vsrc, 0, 3\n\t"
3436             "fadds $dst, $dst, $tmp\t# add reduction4F"
3437   %}
3438   ins_encode %{
3439     __ fadds(as_FloatRegister($dst$$reg),
3440              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3441     __ ins(as_FloatRegister($tmp$$reg), __ S,
3442            as_FloatRegister($vsrc$$reg), 0, 1);
3443     __ fadds(as_FloatRegister($dst$$reg),
3444              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3445     __ ins(as_FloatRegister($tmp$$reg), __ S,
3446            as_FloatRegister($vsrc$$reg), 0, 2);
3447     __ fadds(as_FloatRegister($dst$$reg),
3448              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3449     __ ins(as_FloatRegister($tmp$$reg), __ S,
3450            as_FloatRegister($vsrc$$reg), 0, 3);
3451     __ fadds(as_FloatRegister($dst$$reg),
3452              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3453   %}
3454   ins_pipe(pipe_class_default);
3455 %}
3456 
3457 instruct reduce_mul2F(vRegF dst, vRegF fsrc, vecD vsrc, vecD tmp)
3458 %{
3459   match(Set dst (MulReductionVF fsrc vsrc));
3460   ins_cost(INSN_COST);
3461   effect(TEMP tmp, TEMP dst);
3462   format %{ "fmuls $dst, $fsrc, $vsrc\n\t"
3463             "ins   $tmp, S, $vsrc, 0, 1\n\t"
3464             "fmuls $dst, $dst, $tmp\t# mul reduction2F"
3465   %}
3466   ins_encode %{
3467     __ fmuls(as_FloatRegister($dst$$reg),
3468              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3469     __ ins(as_FloatRegister($tmp$$reg), __ S,
3470            as_FloatRegister($vsrc$$reg), 0, 1);
3471     __ fmuls(as_FloatRegister($dst$$reg),
3472              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3473   %}
3474   ins_pipe(pipe_class_default);
3475 %}
3476 
3477 instruct reduce_mul4F(vRegF dst, vRegF fsrc, vecX vsrc, vecX tmp)
3478 %{
3479   match(Set dst (MulReductionVF fsrc vsrc));
3480   ins_cost(INSN_COST);
3481   effect(TEMP tmp, TEMP dst);
3482   format %{ "fmuls $dst, $fsrc, $vsrc\n\t"
3483             "ins   $tmp, S, $vsrc, 0, 1\n\t"
3484             "fmuls $dst, $dst, $tmp\n\t"
3485             "ins   $tmp, S, $vsrc, 0, 2\n\t"
3486             "fmuls $dst, $dst, $tmp\n\t"
3487             "ins   $tmp, S, $vsrc, 0, 3\n\t"
3488             "fmuls $dst, $dst, $tmp\t# mul reduction4F"
3489   %}
3490   ins_encode %{
3491     __ fmuls(as_FloatRegister($dst$$reg),
3492              as_FloatRegister($fsrc$$reg), as_FloatRegister($vsrc$$reg));
3493     __ ins(as_FloatRegister($tmp$$reg), __ S,
3494            as_FloatRegister($vsrc$$reg), 0, 1);
3495     __ fmuls(as_FloatRegister($dst$$reg),
3496              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3497     __ ins(as_FloatRegister($tmp$$reg), __ S,
3498            as_FloatRegister($vsrc$$reg), 0, 2);
3499     __ fmuls(as_FloatRegister($dst$$reg),
3500              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3501     __ ins(as_FloatRegister($tmp$$reg), __ S,
3502            as_FloatRegister($vsrc$$reg), 0, 3);
3503     __ fmuls(as_FloatRegister($dst$$reg),
3504              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3505   %}
3506   ins_pipe(pipe_class_default);
3507 %}
3508 
3509 instruct reduce_add2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp)
3510 %{
3511   match(Set dst (AddReductionVD dsrc vsrc));
3512   ins_cost(INSN_COST);
3513   effect(TEMP tmp, TEMP dst);
3514   format %{ "faddd $dst, $dsrc, $vsrc\n\t"
3515             "ins   $tmp, D, $vsrc, 0, 1\n\t"
3516             "faddd $dst, $dst, $tmp\t# add reduction2D"
3517   %}
3518   ins_encode %{
3519     __ faddd(as_FloatRegister($dst$$reg),
3520              as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
3521     __ ins(as_FloatRegister($tmp$$reg), __ D,
3522            as_FloatRegister($vsrc$$reg), 0, 1);
3523     __ faddd(as_FloatRegister($dst$$reg),
3524              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3525   %}
3526   ins_pipe(pipe_class_default);
3527 %}
3528 
3529 instruct reduce_mul2D(vRegD dst, vRegD dsrc, vecX vsrc, vecX tmp)
3530 %{
3531   match(Set dst (MulReductionVD dsrc vsrc));
3532   ins_cost(INSN_COST);
3533   effect(TEMP tmp, TEMP dst);
3534   format %{ "fmuld $dst, $dsrc, $vsrc\n\t"
3535             "ins   $tmp, D, $vsrc, 0, 1\n\t"
3536             "fmuld $dst, $dst, $tmp\t# mul reduction2D"
3537   %}
3538   ins_encode %{
3539     __ fmuld(as_FloatRegister($dst$$reg),
3540              as_FloatRegister($dsrc$$reg), as_FloatRegister($vsrc$$reg));
3541     __ ins(as_FloatRegister($tmp$$reg), __ D,
3542            as_FloatRegister($vsrc$$reg), 0, 1);
3543     __ fmuld(as_FloatRegister($dst$$reg),
3544              as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg));
3545   %}
3546   ins_pipe(pipe_class_default);
3547 %}
3548 
3549 // ====================VECTOR ARITHMETIC=======================================
3550 
3551 // --------------------------------- ADD --------------------------------------
3552 
3553 instruct vadd8B(vecD dst, vecD src1, vecD src2)
3554 %{
3555   predicate(n->as_Vector()->length() == 4 ||
3556             n->as_Vector()->length() == 8);
3557   match(Set dst (AddVB src1 src2));
3558   ins_cost(INSN_COST);
3559   format %{ "addv  $dst,$src1,$src2\t# vector (8B)" %}
3560   ins_encode %{
3561     __ addv(as_FloatRegister($dst$$reg), __ T8B,
3562             as_FloatRegister($src1$$reg),
3563             as_FloatRegister($src2$$reg));
3564   %}
3565   ins_pipe(vdop64);
3566 %}
3567 
3568 instruct vadd16B(vecX dst, vecX src1, vecX src2)
3569 %{
3570   predicate(n->as_Vector()->length() == 16);
3571   match(Set dst (AddVB src1 src2));
3572   ins_cost(INSN_COST);
3573   format %{ "addv  $dst,$src1,$src2\t# vector (16B)" %}
3574   ins_encode %{
3575     __ addv(as_FloatRegister($dst$$reg), __ T16B,
3576             as_FloatRegister($src1$$reg),
3577             as_FloatRegister($src2$$reg));
3578   %}
3579   ins_pipe(vdop128);
3580 %}
3581 
3582 instruct vadd4S(vecD dst, vecD src1, vecD src2)
3583 %{
3584   predicate(n->as_Vector()->length() == 2 ||
3585             n->as_Vector()->length() == 4);
3586   match(Set dst (AddVS src1 src2));
3587   ins_cost(INSN_COST);
3588   format %{ "addv  $dst,$src1,$src2\t# vector (4H)" %}
3589   ins_encode %{
3590     __ addv(as_FloatRegister($dst$$reg), __ T4H,
3591             as_FloatRegister($src1$$reg),
3592             as_FloatRegister($src2$$reg));
3593   %}
3594   ins_pipe(vdop64);
3595 %}
3596 
3597 instruct vadd8S(vecX dst, vecX src1, vecX src2)
3598 %{
3599   predicate(n->as_Vector()->length() == 8);
3600   match(Set dst (AddVS src1 src2));
3601   ins_cost(INSN_COST);
3602   format %{ "addv  $dst,$src1,$src2\t# vector (8H)" %}
3603   ins_encode %{
3604     __ addv(as_FloatRegister($dst$$reg), __ T8H,
3605             as_FloatRegister($src1$$reg),
3606             as_FloatRegister($src2$$reg));
3607   %}
3608   ins_pipe(vdop128);
3609 %}
3610 
3611 instruct vadd2I(vecD dst, vecD src1, vecD src2)
3612 %{
3613   predicate(n->as_Vector()->length() == 2);
3614   match(Set dst (AddVI src1 src2));
3615   ins_cost(INSN_COST);
3616   format %{ "addv  $dst,$src1,$src2\t# vector (2S)" %}
3617   ins_encode %{
3618     __ addv(as_FloatRegister($dst$$reg), __ T2S,
3619             as_FloatRegister($src1$$reg),
3620             as_FloatRegister($src2$$reg));
3621   %}
3622   ins_pipe(vdop64);
3623 %}
3624 
3625 instruct vadd4I(vecX dst, vecX src1, vecX src2)
3626 %{
3627   predicate(n->as_Vector()->length() == 4);
3628   match(Set dst (AddVI src1 src2));
3629   ins_cost(INSN_COST);
3630   format %{ "addv  $dst,$src1,$src2\t# vector (4S)" %}
3631   ins_encode %{
3632     __ addv(as_FloatRegister($dst$$reg), __ T4S,
3633             as_FloatRegister($src1$$reg),
3634             as_FloatRegister($src2$$reg));
3635   %}
3636   ins_pipe(vdop128);
3637 %}
3638 
3639 instruct vadd2L(vecX dst, vecX src1, vecX src2)
3640 %{
3641   predicate(n->as_Vector()->length() == 2);
3642   match(Set dst (AddVL src1 src2));
3643   ins_cost(INSN_COST);
3644   format %{ "addv  $dst,$src1,$src2\t# vector (2L)" %}
3645   ins_encode %{
3646     __ addv(as_FloatRegister($dst$$reg), __ T2D,
3647             as_FloatRegister($src1$$reg),
3648             as_FloatRegister($src2$$reg));
3649   %}
3650   ins_pipe(vdop128);
3651 %}
3652 
3653 instruct vadd2F(vecD dst, vecD src1, vecD src2)
3654 %{
3655   predicate(n->as_Vector()->length() == 2);
3656   match(Set dst (AddVF src1 src2));
3657   ins_cost(INSN_COST);
3658   format %{ "fadd  $dst,$src1,$src2\t# vector (2S)" %}
3659   ins_encode %{
3660     __ fadd(as_FloatRegister($dst$$reg), __ T2S,
3661             as_FloatRegister($src1$$reg),
3662             as_FloatRegister($src2$$reg));
3663   %}
3664   ins_pipe(vdop_fp64);
3665 %}
3666 
3667 instruct vadd4F(vecX dst, vecX src1, vecX src2)
3668 %{
3669   predicate(n->as_Vector()->length() == 4);
3670   match(Set dst (AddVF src1 src2));
3671   ins_cost(INSN_COST);
3672   format %{ "fadd  $dst,$src1,$src2\t# vector (4S)" %}
3673   ins_encode %{
3674     __ fadd(as_FloatRegister($dst$$reg), __ T4S,
3675             as_FloatRegister($src1$$reg),
3676             as_FloatRegister($src2$$reg));
3677   %}
3678   ins_pipe(vdop_fp128);
3679 %}
3680 
3681 instruct vadd2D(vecX dst, vecX src1, vecX src2)
3682 %{
3683   match(Set dst (AddVD src1 src2));
3684   ins_cost(INSN_COST);
3685   format %{ "fadd  $dst,$src1,$src2\t# vector (2D)" %}
3686   ins_encode %{
3687     __ fadd(as_FloatRegister($dst$$reg), __ T2D,
3688             as_FloatRegister($src1$$reg),
3689             as_FloatRegister($src2$$reg));
3690   %}
3691   ins_pipe(vdop_fp128);
3692 %}
3693 
3694 // --------------------------------- SUB --------------------------------------
3695 
3696 instruct vsub8B(vecD dst, vecD src1, vecD src2)
3697 %{
3698   predicate(n->as_Vector()->length() == 4 ||
3699             n->as_Vector()->length() == 8);
3700   match(Set dst (SubVB src1 src2));
3701   ins_cost(INSN_COST);
3702   format %{ "subv  $dst,$src1,$src2\t# vector (8B)" %}
3703   ins_encode %{
3704     __ subv(as_FloatRegister($dst$$reg), __ T8B,
3705             as_FloatRegister($src1$$reg),
3706             as_FloatRegister($src2$$reg));
3707   %}
3708   ins_pipe(vdop64);
3709 %}
3710 
3711 instruct vsub16B(vecX dst, vecX src1, vecX src2)
3712 %{
3713   predicate(n->as_Vector()->length() == 16);
3714   match(Set dst (SubVB src1 src2));
3715   ins_cost(INSN_COST);
3716   format %{ "subv  $dst,$src1,$src2\t# vector (16B)" %}
3717   ins_encode %{
3718     __ subv(as_FloatRegister($dst$$reg), __ T16B,
3719             as_FloatRegister($src1$$reg),
3720             as_FloatRegister($src2$$reg));
3721   %}
3722   ins_pipe(vdop128);
3723 %}
3724 
3725 instruct vsub4S(vecD dst, vecD src1, vecD src2)
3726 %{
3727   predicate(n->as_Vector()->length() == 2 ||
3728             n->as_Vector()->length() == 4);
3729   match(Set dst (SubVS src1 src2));
3730   ins_cost(INSN_COST);
3731   format %{ "subv  $dst,$src1,$src2\t# vector (4H)" %}
3732   ins_encode %{
3733     __ subv(as_FloatRegister($dst$$reg), __ T4H,
3734             as_FloatRegister($src1$$reg),
3735             as_FloatRegister($src2$$reg));
3736   %}
3737   ins_pipe(vdop64);
3738 %}
3739 
3740 instruct vsub8S(vecX dst, vecX src1, vecX src2)
3741 %{
3742   predicate(n->as_Vector()->length() == 8);
3743   match(Set dst (SubVS src1 src2));
3744   ins_cost(INSN_COST);
3745   format %{ "subv  $dst,$src1,$src2\t# vector (8H)" %}
3746   ins_encode %{
3747     __ subv(as_FloatRegister($dst$$reg), __ T8H,
3748             as_FloatRegister($src1$$reg),
3749             as_FloatRegister($src2$$reg));
3750   %}
3751   ins_pipe(vdop128);
3752 %}
3753 
3754 instruct vsub2I(vecD dst, vecD src1, vecD src2)
3755 %{
3756   predicate(n->as_Vector()->length() == 2);
3757   match(Set dst (SubVI src1 src2));
3758   ins_cost(INSN_COST);
3759   format %{ "subv  $dst,$src1,$src2\t# vector (2S)" %}
3760   ins_encode %{
3761     __ subv(as_FloatRegister($dst$$reg), __ T2S,
3762             as_FloatRegister($src1$$reg),
3763             as_FloatRegister($src2$$reg));
3764   %}
3765   ins_pipe(vdop64);
3766 %}
3767 
3768 instruct vsub4I(vecX dst, vecX src1, vecX src2)
3769 %{
3770   predicate(n->as_Vector()->length() == 4);
3771   match(Set dst (SubVI src1 src2));
3772   ins_cost(INSN_COST);
3773   format %{ "subv  $dst,$src1,$src2\t# vector (4S)" %}
3774   ins_encode %{
3775     __ subv(as_FloatRegister($dst$$reg), __ T4S,
3776             as_FloatRegister($src1$$reg),
3777             as_FloatRegister($src2$$reg));
3778   %}
3779   ins_pipe(vdop128);
3780 %}
3781 
3782 instruct vsub2L(vecX dst, vecX src1, vecX src2)
3783 %{
3784   predicate(n->as_Vector()->length() == 2);
3785   match(Set dst (SubVL src1 src2));
3786   ins_cost(INSN_COST);
3787   format %{ "subv  $dst,$src1,$src2\t# vector (2L)" %}
3788   ins_encode %{
3789     __ subv(as_FloatRegister($dst$$reg), __ T2D,
3790             as_FloatRegister($src1$$reg),
3791             as_FloatRegister($src2$$reg));
3792   %}
3793   ins_pipe(vdop128);
3794 %}
3795 
3796 instruct vsub2F(vecD dst, vecD src1, vecD src2)
3797 %{
3798   predicate(n->as_Vector()->length() == 2);
3799   match(Set dst (SubVF src1 src2));
3800   ins_cost(INSN_COST);
3801   format %{ "fsub  $dst,$src1,$src2\t# vector (2S)" %}
3802   ins_encode %{
3803     __ fsub(as_FloatRegister($dst$$reg), __ T2S,
3804             as_FloatRegister($src1$$reg),
3805             as_FloatRegister($src2$$reg));
3806   %}
3807   ins_pipe(vdop_fp64);
3808 %}
3809 
3810 instruct vsub4F(vecX dst, vecX src1, vecX src2)
3811 %{
3812   predicate(n->as_Vector()->length() == 4);
3813   match(Set dst (SubVF src1 src2));
3814   ins_cost(INSN_COST);
3815   format %{ "fsub  $dst,$src1,$src2\t# vector (4S)" %}
3816   ins_encode %{
3817     __ fsub(as_FloatRegister($dst$$reg), __ T4S,
3818             as_FloatRegister($src1$$reg),
3819             as_FloatRegister($src2$$reg));
3820   %}
3821   ins_pipe(vdop_fp128);
3822 %}
3823 
3824 instruct vsub2D(vecX dst, vecX src1, vecX src2)
3825 %{
3826   predicate(n->as_Vector()->length() == 2);
3827   match(Set dst (SubVD src1 src2));
3828   ins_cost(INSN_COST);
3829   format %{ "fsub  $dst,$src1,$src2\t# vector (2D)" %}
3830   ins_encode %{
3831     __ fsub(as_FloatRegister($dst$$reg), __ T2D,
3832             as_FloatRegister($src1$$reg),
3833             as_FloatRegister($src2$$reg));
3834   %}
3835   ins_pipe(vdop_fp128);
3836 %}
3837 
3838 // --------------------------------- MUL --------------------------------------
3839 
3840 instruct vmul8B(vecD dst, vecD src1, vecD src2)
3841 %{
3842   predicate(n->as_Vector()->length() == 4 ||
3843             n->as_Vector()->length() == 8);
3844   match(Set dst (MulVB src1 src2));
3845   ins_cost(INSN_COST);
3846   format %{ "mulv  $dst,$src1,$src2\t# vector (8B)" %}
3847   ins_encode %{
3848     __ mulv(as_FloatRegister($dst$$reg), __ T8B,
3849             as_FloatRegister($src1$$reg),
3850             as_FloatRegister($src2$$reg));
3851   %}
3852   ins_pipe(vmul64);
3853 %}
3854 
3855 instruct vmul16B(vecX dst, vecX src1, vecX src2)
3856 %{
3857   predicate(n->as_Vector()->length() == 16);
3858   match(Set dst (MulVB src1 src2));
3859   ins_cost(INSN_COST);
3860   format %{ "mulv  $dst,$src1,$src2\t# vector (16B)" %}
3861   ins_encode %{
3862     __ mulv(as_FloatRegister($dst$$reg), __ T16B,
3863             as_FloatRegister($src1$$reg),
3864             as_FloatRegister($src2$$reg));
3865   %}
3866   ins_pipe(vmul128);
3867 %}
3868 
3869 instruct vmul4S(vecD dst, vecD src1, vecD src2)
3870 %{
3871   predicate(n->as_Vector()->length() == 2 ||
3872             n->as_Vector()->length() == 4);
3873   match(Set dst (MulVS src1 src2));
3874   ins_cost(INSN_COST);
3875   format %{ "mulv  $dst,$src1,$src2\t# vector (4H)" %}
3876   ins_encode %{
3877     __ mulv(as_FloatRegister($dst$$reg), __ T4H,
3878             as_FloatRegister($src1$$reg),
3879             as_FloatRegister($src2$$reg));
3880   %}
3881   ins_pipe(vmul64);
3882 %}
3883 
3884 instruct vmul8S(vecX dst, vecX src1, vecX src2)
3885 %{
3886   predicate(n->as_Vector()->length() == 8);
3887   match(Set dst (MulVS src1 src2));
3888   ins_cost(INSN_COST);
3889   format %{ "mulv  $dst,$src1,$src2\t# vector (8H)" %}
3890   ins_encode %{
3891     __ mulv(as_FloatRegister($dst$$reg), __ T8H,
3892             as_FloatRegister($src1$$reg),
3893             as_FloatRegister($src2$$reg));
3894   %}
3895   ins_pipe(vmul128);
3896 %}
3897 
3898 instruct vmul2I(vecD dst, vecD src1, vecD src2)
3899 %{
3900   predicate(n->as_Vector()->length() == 2);
3901   match(Set dst (MulVI src1 src2));
3902   ins_cost(INSN_COST);
3903   format %{ "mulv  $dst,$src1,$src2\t# vector (2S)" %}
3904   ins_encode %{
3905     __ mulv(as_FloatRegister($dst$$reg), __ T2S,
3906             as_FloatRegister($src1$$reg),
3907             as_FloatRegister($src2$$reg));
3908   %}
3909   ins_pipe(vmul64);
3910 %}
3911 
3912 instruct vmul4I(vecX dst, vecX src1, vecX src2)
3913 %{
3914   predicate(n->as_Vector()->length() == 4);
3915   match(Set dst (MulVI src1 src2));
3916   ins_cost(INSN_COST);
3917   format %{ "mulv  $dst,$src1,$src2\t# vector (4S)" %}
3918   ins_encode %{
3919     __ mulv(as_FloatRegister($dst$$reg), __ T4S,
3920             as_FloatRegister($src1$$reg),
3921             as_FloatRegister($src2$$reg));
3922   %}
3923   ins_pipe(vmul128);
3924 %}
3925 
3926 instruct vmul2F(vecD dst, vecD src1, vecD src2)
3927 %{
3928   predicate(n->as_Vector()->length() == 2);
3929   match(Set dst (MulVF src1 src2));
3930   ins_cost(INSN_COST);
3931   format %{ "fmul  $dst,$src1,$src2\t# vector (2S)" %}
3932   ins_encode %{
3933     __ fmul(as_FloatRegister($dst$$reg), __ T2S,
3934             as_FloatRegister($src1$$reg),
3935             as_FloatRegister($src2$$reg));
3936   %}
3937   ins_pipe(vmuldiv_fp64);
3938 %}
3939 
3940 instruct vmul4F(vecX dst, vecX src1, vecX src2)
3941 %{
3942   predicate(n->as_Vector()->length() == 4);
3943   match(Set dst (MulVF src1 src2));
3944   ins_cost(INSN_COST);
3945   format %{ "fmul  $dst,$src1,$src2\t# vector (4S)" %}
3946   ins_encode %{
3947     __ fmul(as_FloatRegister($dst$$reg), __ T4S,
3948             as_FloatRegister($src1$$reg),
3949             as_FloatRegister($src2$$reg));
3950   %}
3951   ins_pipe(vmuldiv_fp128);
3952 %}
3953 
3954 instruct vmul2D(vecX dst, vecX src1, vecX src2)
3955 %{
3956   predicate(n->as_Vector()->length() == 2);
3957   match(Set dst (MulVD src1 src2));
3958   ins_cost(INSN_COST);
3959   format %{ "fmul  $dst,$src1,$src2\t# vector (2D)" %}
3960   ins_encode %{
3961     __ fmul(as_FloatRegister($dst$$reg), __ T2D,
3962             as_FloatRegister($src1$$reg),
3963             as_FloatRegister($src2$$reg));
3964   %}
3965   ins_pipe(vmuldiv_fp128);
3966 %}
3967 
3968 // --------------------------------- MLA --------------------------------------
3969 
3970 instruct vmla4S(vecD dst, vecD src1, vecD src2)
3971 %{
3972   predicate(n->as_Vector()->length() == 2 ||
3973             n->as_Vector()->length() == 4);
3974   match(Set dst (AddVS dst (MulVS src1 src2)));
3975   ins_cost(INSN_COST);
3976   format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
3977   ins_encode %{
3978     __ mlav(as_FloatRegister($dst$$reg), __ T4H,
3979             as_FloatRegister($src1$$reg),
3980             as_FloatRegister($src2$$reg));
3981   %}
3982   ins_pipe(vmla64);
3983 %}
3984 
3985 instruct vmla8S(vecX dst, vecX src1, vecX src2)
3986 %{
3987   predicate(n->as_Vector()->length() == 8);
3988   match(Set dst (AddVS dst (MulVS src1 src2)));
3989   ins_cost(INSN_COST);
3990   format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
3991   ins_encode %{
3992     __ mlav(as_FloatRegister($dst$$reg), __ T8H,
3993             as_FloatRegister($src1$$reg),
3994             as_FloatRegister($src2$$reg));
3995   %}
3996   ins_pipe(vmla128);
3997 %}
3998 
3999 instruct vmla2I(vecD dst, vecD src1, vecD src2)
4000 %{
4001   predicate(n->as_Vector()->length() == 2);
4002   match(Set dst (AddVI dst (MulVI src1 src2)));
4003   ins_cost(INSN_COST);
4004   format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
4005   ins_encode %{
4006     __ mlav(as_FloatRegister($dst$$reg), __ T2S,
4007             as_FloatRegister($src1$$reg),
4008             as_FloatRegister($src2$$reg));
4009   %}
4010   ins_pipe(vmla64);
4011 %}
4012 
4013 instruct vmla4I(vecX dst, vecX src1, vecX src2)
4014 %{
4015   predicate(n->as_Vector()->length() == 4);
4016   match(Set dst (AddVI dst (MulVI src1 src2)));
4017   ins_cost(INSN_COST);
4018   format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
4019   ins_encode %{
4020     __ mlav(as_FloatRegister($dst$$reg), __ T4S,
4021             as_FloatRegister($src1$$reg),
4022             as_FloatRegister($src2$$reg));
4023   %}
4024   ins_pipe(vmla128);
4025 %}
4026 
4027 // dst + src1 * src2
4028 instruct vmla2F(vecD dst, vecD src1, vecD src2)
4029 %{
4030   predicate(UseFMA && n->as_Vector()->length() == 2);
4031   match(Set dst (FmaVF  dst (Binary src1 src2)));
4032   ins_cost(INSN_COST);
4033   format %{ "fmla  $dst,$src1,$src2\t# vector (2S)" %}
4034   ins_encode %{
4035     __ fmla(as_FloatRegister($dst$$reg), __ T2S,
4036             as_FloatRegister($src1$$reg),
4037             as_FloatRegister($src2$$reg));
4038   %}
4039   ins_pipe(vmuldiv_fp64);
4040 %}
4041 
4042 // dst + src1 * src2
4043 instruct vmla4F(vecX dst, vecX src1, vecX src2)
4044 %{
4045   predicate(UseFMA && n->as_Vector()->length() == 4);
4046   match(Set dst (FmaVF  dst (Binary src1 src2)));
4047   ins_cost(INSN_COST);
4048   format %{ "fmla  $dst,$src1,$src2\t# vector (4S)" %}
4049   ins_encode %{
4050     __ fmla(as_FloatRegister($dst$$reg), __ T4S,
4051             as_FloatRegister($src1$$reg),
4052             as_FloatRegister($src2$$reg));
4053   %}
4054   ins_pipe(vmuldiv_fp128);
4055 %}
4056 
4057 // dst + src1 * src2
4058 instruct vmla2D(vecX dst, vecX src1, vecX src2)
4059 %{
4060   predicate(UseFMA && n->as_Vector()->length() == 2);
4061   match(Set dst (FmaVD  dst (Binary src1 src2)));
4062   ins_cost(INSN_COST);
4063   format %{ "fmla  $dst,$src1,$src2\t# vector (2D)" %}
4064   ins_encode %{
4065     __ fmla(as_FloatRegister($dst$$reg), __ T2D,
4066             as_FloatRegister($src1$$reg),
4067             as_FloatRegister($src2$$reg));
4068   %}
4069   ins_pipe(vmuldiv_fp128);
4070 %}
4071 
4072 // --------------------------------- MLS --------------------------------------
4073 
4074 instruct vmls4S(vecD dst, vecD src1, vecD src2) %{
4075   predicate(n->as_Vector()->length() == 2 ||
4076             n->as_Vector()->length() == 4);
4077   match(Set dst (SubVS dst (MulVS src1 src2)));
4078   ins_cost(INSN_COST);
4079   format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
4080   ins_encode %{
4081     __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
4082             as_FloatRegister($src1$$reg),
4083             as_FloatRegister($src2$$reg));
4084   %}
4085   ins_pipe(vmla64);
4086 %}
4087 
4088 instruct vmls8S(vecX dst, vecX src1, vecX src2) %{
4089   predicate(n->as_Vector()->length() == 8);
4090   match(Set dst (SubVS dst (MulVS src1 src2)));
4091   ins_cost(INSN_COST);
4092   format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
4093   ins_encode %{
4094     __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
4095             as_FloatRegister($src1$$reg),
4096             as_FloatRegister($src2$$reg));
4097   %}
4098   ins_pipe(vmla128);
4099 %}
4100 
4101 instruct vmls2I(vecD dst, vecD src1, vecD src2) %{
4102   predicate(n->as_Vector()->length() == 2);
4103   match(Set dst (SubVI dst (MulVI src1 src2)));
4104   ins_cost(INSN_COST);
4105   format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
4106   ins_encode %{
4107     __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
4108             as_FloatRegister($src1$$reg),
4109             as_FloatRegister($src2$$reg));
4110   %}
4111   ins_pipe(vmla64);
4112 %}
4113 
4114 instruct vmls4I(vecX dst, vecX src1, vecX src2) %{
4115   predicate(n->as_Vector()->length() == 4);
4116   match(Set dst (SubVI dst (MulVI src1 src2)));
4117   ins_cost(INSN_COST);
4118   format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
4119   ins_encode %{
4120     __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
4121             as_FloatRegister($src1$$reg),
4122             as_FloatRegister($src2$$reg));
4123   %}
4124   ins_pipe(vmla128);
4125 %}
4126 
4127 // dst - src1 * src2
4128 instruct vmls2F(vecD dst, vecD src1, vecD src2) %{
4129   predicate(UseFMA && n->as_Vector()->length() == 2);
4130   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
4131   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
4132   ins_cost(INSN_COST);
4133   format %{ "fmls  $dst,$src1,$src2\t# vector (2S)" %}
4134   ins_encode %{
4135     __ fmls(as_FloatRegister($dst$$reg), __ T2S,
4136             as_FloatRegister($src1$$reg),
4137             as_FloatRegister($src2$$reg));
4138   %}
4139   ins_pipe(vmuldiv_fp64);
4140 %}
4141 
4142 // dst - src1 * src2
4143 instruct vmls4F(vecX dst, vecX src1, vecX src2) %{
4144   predicate(UseFMA && n->as_Vector()->length() == 4);
4145   match(Set dst (FmaVF  dst (Binary (NegVF src1) src2)));
4146   match(Set dst (FmaVF  dst (Binary src1 (NegVF src2))));
4147   ins_cost(INSN_COST);
4148   format %{ "fmls  $dst,$src1,$src2\t# vector (4S)" %}
4149   ins_encode %{
4150     __ fmls(as_FloatRegister($dst$$reg), __ T4S,
4151             as_FloatRegister($src1$$reg),
4152             as_FloatRegister($src2$$reg));
4153   %}
4154   ins_pipe(vmuldiv_fp128);
4155 %}
4156 
4157 // dst - src1 * src2
4158 instruct vmls2D(vecX dst, vecX src1, vecX src2) %{
4159   predicate(UseFMA && n->as_Vector()->length() == 2);
4160   match(Set dst (FmaVD  dst (Binary (NegVD src1) src2)));
4161   match(Set dst (FmaVD  dst (Binary src1 (NegVD src2))));
4162   ins_cost(INSN_COST);
4163   format %{ "fmls  $dst,$src1,$src2\t# vector (2D)" %}
4164   ins_encode %{
4165     __ fmls(as_FloatRegister($dst$$reg), __ T2D,
4166             as_FloatRegister($src1$$reg),
4167             as_FloatRegister($src2$$reg));
4168   %}
4169   ins_pipe(vmuldiv_fp128);
4170 %}
4171 
4172 // --------------- Vector Multiply-Add Shorts into Integer --------------------
4173 
4174 instruct vmuladdS2I(vecX dst, vecX src1, vecX src2, vecX tmp) %{
4175   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
4176   match(Set dst (MulAddVS2VI src1 src2));
4177   ins_cost(INSN_COST);
4178   effect(TEMP_DEF dst, TEMP tmp);
4179   format %{ "smullv  $tmp, $src1, $src2\t# vector (4H)\n\t"
4180             "smullv  $dst, $src1, $src2\t# vector (8H)\n\t"
4181             "addpv   $dst, $tmp, $dst\t# vector (4S)" %}
4182   ins_encode %{
4183     __ smullv(as_FloatRegister($tmp$$reg), __ T4H,
4184               as_FloatRegister($src1$$reg),
4185               as_FloatRegister($src2$$reg));
4186     __ smullv(as_FloatRegister($dst$$reg), __ T8H,
4187               as_FloatRegister($src1$$reg),
4188               as_FloatRegister($src2$$reg));
4189     __ addpv(as_FloatRegister($dst$$reg), __ T4S,
4190              as_FloatRegister($tmp$$reg),
4191              as_FloatRegister($dst$$reg));
4192   %}
4193   ins_pipe(vmuldiv_fp128);
4194 %}
4195 
4196 // --------------------------------- DIV --------------------------------------
4197 
4198 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
4199 %{
4200   predicate(n->as_Vector()->length() == 2);
4201   match(Set dst (DivVF src1 src2));
4202   ins_cost(INSN_COST);
4203   format %{ "fdiv  $dst,$src1,$src2\t# vector (2S)" %}
4204   ins_encode %{
4205     __ fdiv(as_FloatRegister($dst$$reg), __ T2S,
4206             as_FloatRegister($src1$$reg),
4207             as_FloatRegister($src2$$reg));
4208   %}
4209   ins_pipe(vmuldiv_fp64);
4210 %}
4211 
4212 instruct vdiv4F(vecX dst, vecX src1, vecX src2)
4213 %{
4214   predicate(n->as_Vector()->length() == 4);
4215   match(Set dst (DivVF src1 src2));
4216   ins_cost(INSN_COST);
4217   format %{ "fdiv  $dst,$src1,$src2\t# vector (4S)" %}
4218   ins_encode %{
4219     __ fdiv(as_FloatRegister($dst$$reg), __ T4S,
4220             as_FloatRegister($src1$$reg),
4221             as_FloatRegister($src2$$reg));
4222   %}
4223   ins_pipe(vmuldiv_fp128);
4224 %}
4225 
4226 instruct vdiv2D(vecX dst, vecX src1, vecX src2)
4227 %{
4228   predicate(n->as_Vector()->length() == 2);
4229   match(Set dst (DivVD src1 src2));
4230   ins_cost(INSN_COST);
4231   format %{ "fdiv  $dst,$src1,$src2\t# vector (2D)" %}
4232   ins_encode %{
4233     __ fdiv(as_FloatRegister($dst$$reg), __ T2D,
4234             as_FloatRegister($src1$$reg),
4235             as_FloatRegister($src2$$reg));
4236   %}
4237   ins_pipe(vmuldiv_fp128);
4238 %}
4239 
4240 // --------------------------------- SQRT -------------------------------------
4241 
4242 instruct vsqrt2F(vecD dst, vecD src)
4243 %{
4244   predicate(n->as_Vector()->length() == 2);
4245   match(Set dst (SqrtVF src));
4246   format %{ "fsqrt  $dst, $src\t# vector (2F)" %}
4247   ins_encode %{
4248     __ fsqrt(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
4249   %}
4250   ins_pipe(vunop_fp64);
4251 %}
4252 
4253 instruct vsqrt4F(vecX dst, vecX src)
4254 %{
4255   predicate(n->as_Vector()->length() == 4);
4256   match(Set dst (SqrtVF src));
4257   format %{ "fsqrt  $dst, $src\t# vector (4F)" %}
4258   ins_encode %{
4259     __ fsqrt(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
4260   %}
4261   ins_pipe(vsqrt_fp128);
4262 %}
4263 
4264 instruct vsqrt2D(vecX dst, vecX src)
4265 %{
4266   predicate(n->as_Vector()->length() == 2);
4267   match(Set dst (SqrtVD src));
4268   format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
4269   ins_encode %{
4270     __ fsqrt(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
4271   %}
4272   ins_pipe(vsqrt_fp128);
4273 %}
4274 
4275 // --------------------------------- NEG --------------------------------------
4276 
4277 instruct vnegID(vecD dst, vecD src)
4278 %{
4279   predicate(n->as_Vector()->length_in_bytes() < 16);
4280   match(Set dst (NegVI src));
4281   ins_cost(INSN_COST);
4282   format %{ "negr  $dst, $src\t# vector (8B/4H/2S)" %}
4283   ins_encode %{
4284     BasicType bt = Matcher::vector_element_basic_type(this);
4285     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
4286     __ negr(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
4287   %}
4288   ins_pipe(vunop_fp64);
4289 %}
4290 
4291 instruct vnegIX(vecX dst, vecX src)
4292 %{
4293   predicate(n->as_Vector()->length_in_bytes() == 16);
4294   match(Set dst (NegVI src));
4295   ins_cost(INSN_COST);
4296   format %{ "negr  $dst, $src\t# vector (16B/8H/4S)" %}
4297   ins_encode %{
4298     BasicType bt = Matcher::vector_element_basic_type(this);
4299     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
4300     __ negr(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
4301   %}
4302   ins_pipe(vunop_fp128);
4303 %}
4304 
4305 instruct vneg2L(vecX dst, vecX src)
4306 %{
4307   predicate(n->as_Vector()->length() == 2);
4308   match(Set dst (NegVL src));
4309   ins_cost(INSN_COST);
4310   format %{ "negr  $dst,$src\t# vector (2D)" %}
4311   ins_encode %{
4312     __ negr(as_FloatRegister($dst$$reg), __ T2D,
4313             as_FloatRegister($src$$reg));
4314   %}
4315   ins_pipe(vunop_fp128);
4316 %}
4317 
4318 instruct vneg2F(vecD dst, vecD src)
4319 %{
4320   predicate(n->as_Vector()->length() == 2);
4321   match(Set dst (NegVF src));
4322   ins_cost(INSN_COST * 3);
4323   format %{ "fneg  $dst,$src\t# vector (2S)" %}
4324   ins_encode %{
4325     __ fneg(as_FloatRegister($dst$$reg), __ T2S,
4326             as_FloatRegister($src$$reg));
4327   %}
4328   ins_pipe(vunop_fp64);
4329 %}
4330 
4331 instruct vneg4F(vecX dst, vecX src)
4332 %{
4333   predicate(n->as_Vector()->length() == 4);
4334   match(Set dst (NegVF src));
4335   ins_cost(INSN_COST * 3);
4336   format %{ "fneg  $dst,$src\t# vector (4S)" %}
4337   ins_encode %{
4338     __ fneg(as_FloatRegister($dst$$reg), __ T4S,
4339             as_FloatRegister($src$$reg));
4340   %}
4341   ins_pipe(vunop_fp128);
4342 %}
4343 
4344 instruct vneg2D(vecX dst, vecX src)
4345 %{
4346   predicate(n->as_Vector()->length() == 2);
4347   match(Set dst (NegVD src));
4348   ins_cost(INSN_COST * 3);
4349   format %{ "fneg  $dst,$src\t# vector (2D)" %}
4350   ins_encode %{
4351     __ fneg(as_FloatRegister($dst$$reg), __ T2D,
4352             as_FloatRegister($src$$reg));
4353   %}
4354   ins_pipe(vunop_fp128);
4355 %}
4356 
4357 // --------------------------------- AND --------------------------------------
4358 
4359 instruct vand8B(vecD dst, vecD src1, vecD src2)
4360 %{
4361   predicate(n->as_Vector()->length_in_bytes() == 4 ||
4362             n->as_Vector()->length_in_bytes() == 8);
4363   match(Set dst (AndV src1 src2));
4364   ins_cost(INSN_COST);
4365   format %{ "and  $dst,$src1,$src2\t# vector (8B)" %}
4366   ins_encode %{
4367     __ andr(as_FloatRegister($dst$$reg), __ T8B,
4368             as_FloatRegister($src1$$reg),
4369             as_FloatRegister($src2$$reg));
4370   %}
4371   ins_pipe(vlogical64);
4372 %}
4373 
4374 instruct vand16B(vecX dst, vecX src1, vecX src2)
4375 %{
4376   predicate(n->as_Vector()->length_in_bytes() == 16);
4377   match(Set dst (AndV src1 src2));
4378   ins_cost(INSN_COST);
4379   format %{ "and  $dst,$src1,$src2\t# vector (16B)" %}
4380   ins_encode %{
4381     __ andr(as_FloatRegister($dst$$reg), __ T16B,
4382             as_FloatRegister($src1$$reg),
4383             as_FloatRegister($src2$$reg));
4384   %}
4385   ins_pipe(vlogical128);
4386 %}
4387 
4388 // --------------------------------- OR ---------------------------------------
4389 
4390 instruct vor8B(vecD dst, vecD src1, vecD src2)
4391 %{
4392   predicate(n->as_Vector()->length_in_bytes() == 4 ||
4393             n->as_Vector()->length_in_bytes() == 8);
4394   match(Set dst (OrV src1 src2));
4395   ins_cost(INSN_COST);
4396   format %{ "orr  $dst,$src1,$src2\t# vector (8B)" %}
4397   ins_encode %{
4398     __ orr(as_FloatRegister($dst$$reg), __ T8B,
4399             as_FloatRegister($src1$$reg),
4400             as_FloatRegister($src2$$reg));
4401   %}
4402   ins_pipe(vlogical64);
4403 %}
4404 
4405 instruct vor16B(vecX dst, vecX src1, vecX src2)
4406 %{
4407   predicate(n->as_Vector()->length_in_bytes() == 16);
4408   match(Set dst (OrV src1 src2));
4409   ins_cost(INSN_COST);
4410   format %{ "orr  $dst,$src1,$src2\t# vector (16B)" %}
4411   ins_encode %{
4412     __ orr(as_FloatRegister($dst$$reg), __ T16B,
4413             as_FloatRegister($src1$$reg),
4414             as_FloatRegister($src2$$reg));
4415   %}
4416   ins_pipe(vlogical128);
4417 %}
4418 
4419 // --------------------------------- XOR --------------------------------------
4420 
4421 instruct vxor8B(vecD dst, vecD src1, vecD src2)
4422 %{
4423   predicate(n->as_Vector()->length_in_bytes() == 4 ||
4424             n->as_Vector()->length_in_bytes() == 8);
4425   match(Set dst (XorV src1 src2));
4426   ins_cost(INSN_COST);
4427   format %{ "xor  $dst,$src1,$src2\t# vector (8B)" %}
4428   ins_encode %{
4429     __ eor(as_FloatRegister($dst$$reg), __ T8B,
4430             as_FloatRegister($src1$$reg),
4431             as_FloatRegister($src2$$reg));
4432   %}
4433   ins_pipe(vlogical64);
4434 %}
4435 
4436 instruct vxor16B(vecX dst, vecX src1, vecX src2)
4437 %{
4438   predicate(n->as_Vector()->length_in_bytes() == 16);
4439   match(Set dst (XorV src1 src2));
4440   ins_cost(INSN_COST);
4441   format %{ "xor  $dst,$src1,$src2\t# vector (16B)" %}
4442   ins_encode %{
4443     __ eor(as_FloatRegister($dst$$reg), __ T16B,
4444             as_FloatRegister($src1$$reg),
4445             as_FloatRegister($src2$$reg));
4446   %}
4447   ins_pipe(vlogical128);
4448 %}
4449 
4450 // ------------------------------ Shift ---------------------------------------
4451 
4452 // Vector shift count
4453 // Note-1: Low 8 bits of each element are used, so it doesn't matter if we
4454 //         treat it as ints or bytes here.
4455 // Note-2: Shift value is negated for RShiftCntV additionally. See the comments
4456 //         on vsra8B rule for more details.
4457 
4458 instruct vslcnt8B(vecD dst, iRegIorL2I cnt) %{
4459   predicate(UseSVE == 0 && (n->as_Vector()->length_in_bytes() == 4 ||
4460                             n->as_Vector()->length_in_bytes() == 8));
4461   match(Set dst (LShiftCntV cnt));
4462   ins_cost(INSN_COST);
4463   format %{ "dup  $dst, $cnt\t# shift count vector (8B)" %}
4464   ins_encode %{
4465     __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($cnt$$reg));
4466   %}
4467   ins_pipe(vdup_reg_reg64);
4468 %}
4469 
4470 instruct vslcnt16B(vecX dst, iRegIorL2I cnt) %{
4471   predicate(UseSVE == 0 && n->as_Vector()->length_in_bytes() == 16);
4472   match(Set dst (LShiftCntV cnt));
4473   ins_cost(INSN_COST);
4474   format %{ "dup  $dst, $cnt\t# shift count vector (16B)" %}
4475   ins_encode %{
4476     __ dup(as_FloatRegister($dst$$reg), __ T16B, as_Register($cnt$$reg));
4477   %}
4478   ins_pipe(vdup_reg_reg128);
4479 %}
4480 
4481 instruct vsrcnt8B(vecD dst, iRegIorL2I cnt) %{
4482   predicate(UseSVE == 0 && (n->as_Vector()->length_in_bytes() == 4 ||
4483                             n->as_Vector()->length_in_bytes() == 8));
4484   match(Set dst (RShiftCntV cnt));
4485   ins_cost(INSN_COST * 2);
4486   format %{ "negw  rscratch1, $cnt\t"
4487             "dup   $dst, rscratch1\t# shift count vector (8B)" %}
4488   ins_encode %{
4489     __ negw(rscratch1, as_Register($cnt$$reg));
4490     __ dup(as_FloatRegister($dst$$reg), __ T8B, rscratch1);
4491   %}
4492   ins_pipe(vdup_reg_reg64);
4493 %}
4494 
4495 instruct vsrcnt16B(vecX dst, iRegIorL2I cnt) %{
4496   predicate(UseSVE == 0 && n->as_Vector()->length_in_bytes() == 16);
4497   match(Set dst (RShiftCntV cnt));
4498   ins_cost(INSN_COST * 2);
4499   format %{ "negw  rscratch1, $cnt\t"
4500             "dup   $dst, rscratch1\t# shift count vector (16B)" %}
4501   ins_encode %{
4502     __ negw(rscratch1, as_Register($cnt$$reg));
4503     __ dup(as_FloatRegister($dst$$reg), __ T16B, rscratch1);
4504   %}
4505   ins_pipe(vdup_reg_reg128);
4506 %}
4507 
4508 instruct vsll8B(vecD dst, vecD src, vecD shift) %{
4509   predicate(n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8);
4510   match(Set dst (LShiftVB src shift));
4511   ins_cost(INSN_COST);
4512   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
4513   ins_encode %{
4514     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
4515             as_FloatRegister($src$$reg),
4516             as_FloatRegister($shift$$reg));
4517   %}
4518   ins_pipe(vshift64);
4519 %}
4520 
4521 instruct vsll16B(vecX dst, vecX src, vecX shift) %{
4522   predicate(n->as_Vector()->length() == 16);
4523   match(Set dst (LShiftVB src shift));
4524   ins_cost(INSN_COST);
4525   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
4526   ins_encode %{
4527     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
4528             as_FloatRegister($src$$reg),
4529             as_FloatRegister($shift$$reg));
4530   %}
4531   ins_pipe(vshift128);
4532 %}
4533 
4534 // Right shifts with vector shift count on aarch64 SIMD are implemented
4535 // as left shift by negative shift count.
4536 // There are two cases for vector shift count.
4537 //
4538 // Case 1: The vector shift count is from replication.
4539 //        |            |
4540 //    LoadVector  RShiftCntV
4541 //        |       /
4542 //     RShiftVI
4543 //
4544 // Case 2: The vector shift count is from loading.
4545 // This case isn't supported by middle-end now. But it's supported by
4546 // panama/vectorIntrinsics(JEP 338: Vector API).
4547 //        |            |
4548 //    LoadVector  LoadVector
4549 //        |       /
4550 //     RShiftVI
4551 //
4552 // The negate is conducted in RShiftCntV rule for case 1, whereas it's done in
4553 // RShiftV* rules for case 2. Because there exists an optimization opportunity
4554 // for case 1, that is, multiple neg instructions in inner loop can be hoisted
4555 // to outer loop and merged into one neg instruction.
4556 //
4557 // Note that ShiftVNode::is_var_shift() indicates whether the vector shift
4558 // count is a variable vector(case 2) or not(a vector generated by RShiftCntV,
4559 // i.e. case 1).
4560 
4561 instruct vsra8B(vecD dst, vecD src, vecD shift) %{
4562   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
4563             !n->as_ShiftV()->is_var_shift());
4564   match(Set dst (RShiftVB src shift));
4565   ins_cost(INSN_COST);
4566   format %{ "sshl  $dst,$src,$shift\t# vector (8B)" %}
4567   ins_encode %{
4568     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
4569             as_FloatRegister($src$$reg),
4570             as_FloatRegister($shift$$reg));
4571   %}
4572   ins_pipe(vshift64);
4573 %}
4574 
4575 instruct vsra8B_var(vecD dst, vecD src, vecD shift) %{
4576   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
4577             n->as_ShiftV()->is_var_shift());
4578   match(Set dst (RShiftVB src shift));
4579   ins_cost(INSN_COST * 2);
4580   effect(TEMP_DEF dst);
4581   format %{ "negr  $dst,$shift\t"
4582             "sshl  $dst,$src,$dst\t# vector (8B)" %}
4583   ins_encode %{
4584     __ negr(as_FloatRegister($dst$$reg), __ T8B,
4585             as_FloatRegister($shift$$reg));
4586     __ sshl(as_FloatRegister($dst$$reg), __ T8B,
4587             as_FloatRegister($src$$reg),
4588             as_FloatRegister($dst$$reg));
4589   %}
4590   ins_pipe(vshift64);
4591 %}
4592 
4593 instruct vsra16B(vecX dst, vecX src, vecX shift) %{
4594   predicate(n->as_Vector()->length() == 16 && !n->as_ShiftV()->is_var_shift());
4595   match(Set dst (RShiftVB src shift));
4596   ins_cost(INSN_COST);
4597   format %{ "sshl  $dst,$src,$shift\t# vector (16B)" %}
4598   ins_encode %{
4599     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
4600             as_FloatRegister($src$$reg),
4601             as_FloatRegister($shift$$reg));
4602   %}
4603   ins_pipe(vshift128);
4604 %}
4605 
4606 instruct vsra16B_var(vecX dst, vecX src, vecX shift) %{
4607   predicate(n->as_Vector()->length() == 16 && n->as_ShiftV()->is_var_shift());
4608   match(Set dst (RShiftVB src shift));
4609   ins_cost(INSN_COST * 2);
4610   effect(TEMP_DEF dst);
4611   format %{ "negr  $dst,$shift\t"
4612             "sshl  $dst,$src,$dst\t# vector (16B)" %}
4613   ins_encode %{
4614     __ negr(as_FloatRegister($dst$$reg), __ T16B,
4615             as_FloatRegister($shift$$reg));
4616     __ sshl(as_FloatRegister($dst$$reg), __ T16B,
4617             as_FloatRegister($src$$reg),
4618             as_FloatRegister($dst$$reg));
4619   %}
4620   ins_pipe(vshift128);
4621 %}
4622 
4623 instruct vsrl8B(vecD dst, vecD src, vecD shift) %{
4624   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
4625             !n->as_ShiftV()->is_var_shift());
4626   match(Set dst (URShiftVB src shift));
4627   ins_cost(INSN_COST);
4628   format %{ "ushl  $dst,$src,$shift\t# vector (8B)" %}
4629   ins_encode %{
4630     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
4631             as_FloatRegister($src$$reg),
4632             as_FloatRegister($shift$$reg));
4633   %}
4634   ins_pipe(vshift64);
4635 %}
4636 
4637 instruct vsrl8B_var(vecD dst, vecD src, vecD shift) %{
4638   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
4639             n->as_ShiftV()->is_var_shift());
4640   match(Set dst (URShiftVB src shift));
4641   ins_cost(INSN_COST * 2);
4642   effect(TEMP_DEF dst);
4643   format %{ "negr  $dst,$shift\t"
4644             "ushl  $dst,$src,$dst\t# vector (8B)" %}
4645   ins_encode %{
4646     __ negr(as_FloatRegister($dst$$reg), __ T8B,
4647             as_FloatRegister($shift$$reg));
4648     __ ushl(as_FloatRegister($dst$$reg), __ T8B,
4649             as_FloatRegister($src$$reg),
4650             as_FloatRegister($dst$$reg));
4651   %}
4652   ins_pipe(vshift64);
4653 %}
4654 
4655 instruct vsrl16B(vecX dst, vecX src, vecX shift) %{
4656   predicate(n->as_Vector()->length() == 16 && !n->as_ShiftV()->is_var_shift());
4657   match(Set dst (URShiftVB src shift));
4658   ins_cost(INSN_COST);
4659   format %{ "ushl  $dst,$src,$shift\t# vector (16B)" %}
4660   ins_encode %{
4661     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
4662             as_FloatRegister($src$$reg),
4663             as_FloatRegister($shift$$reg));
4664   %}
4665   ins_pipe(vshift128);
4666 %}
4667 
4668 instruct vsrl16B_var(vecX dst, vecX src, vecX shift) %{
4669   predicate(n->as_Vector()->length() == 16 && n->as_ShiftV()->is_var_shift());
4670   match(Set dst (URShiftVB src shift));
4671   ins_cost(INSN_COST * 2);
4672   effect(TEMP_DEF dst);
4673   format %{ "negr  $dst,$shift\t"
4674             "ushl  $dst,$src,$dst\t# vector (16B)" %}
4675   ins_encode %{
4676     __ negr(as_FloatRegister($dst$$reg), __ T16B,
4677             as_FloatRegister($shift$$reg));
4678     __ ushl(as_FloatRegister($dst$$reg), __ T16B,
4679             as_FloatRegister($src$$reg),
4680             as_FloatRegister($dst$$reg));
4681   %}
4682   ins_pipe(vshift128);
4683 %}
4684 
4685 instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{
4686   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
4687             assert_not_var_shift(n));
4688   match(Set dst (LShiftVB src (LShiftCntV shift)));
4689   ins_cost(INSN_COST);
4690   format %{ "shl  $dst, $src, $shift\t# vector (8B)" %}
4691   ins_encode %{
4692     int sh = (int)$shift$$constant;
4693     if (sh >= 8) {
4694       __ eor(as_FloatRegister($dst$$reg), __ T8B,
4695              as_FloatRegister($src$$reg),
4696              as_FloatRegister($src$$reg));
4697     } else {
4698       __ shl(as_FloatRegister($dst$$reg), __ T8B,
4699              as_FloatRegister($src$$reg), sh);
4700     }
4701   %}
4702   ins_pipe(vshift64_imm);
4703 %}
4704 
4705 instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{
4706   predicate(n->as_Vector()->length() == 16 && assert_not_var_shift(n));
4707   match(Set dst (LShiftVB src (LShiftCntV shift)));
4708   ins_cost(INSN_COST);
4709   format %{ "shl  $dst, $src, $shift\t# vector (16B)" %}
4710   ins_encode %{
4711     int sh = (int)$shift$$constant;
4712     if (sh >= 8) {
4713       __ eor(as_FloatRegister($dst$$reg), __ T16B,
4714              as_FloatRegister($src$$reg),
4715              as_FloatRegister($src$$reg));
4716     } else {
4717       __ shl(as_FloatRegister($dst$$reg), __ T16B,
4718              as_FloatRegister($src$$reg), sh);
4719     }
4720   %}
4721   ins_pipe(vshift128_imm);
4722 %}
4723 
4724 instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{
4725   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
4726             assert_not_var_shift(n));
4727   match(Set dst (RShiftVB src (RShiftCntV shift)));
4728   ins_cost(INSN_COST);
4729   format %{ "sshr  $dst, $src, $shift\t# vector (8B)" %}
4730   ins_encode %{
4731     int sh = (int)$shift$$constant;
4732     if (sh >= 8) sh = 7;
4733     __ sshr(as_FloatRegister($dst$$reg), __ T8B,
4734             as_FloatRegister($src$$reg), sh);
4735   %}
4736   ins_pipe(vshift64_imm);
4737 %}
4738 
4739 instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{
4740   predicate(n->as_Vector()->length() == 16 && assert_not_var_shift(n));
4741   match(Set dst (RShiftVB src (RShiftCntV shift)));
4742   ins_cost(INSN_COST);
4743   format %{ "sshr  $dst, $src, $shift\t# vector (16B)" %}
4744   ins_encode %{
4745     int sh = (int)$shift$$constant;
4746     if (sh >= 8) sh = 7;
4747     __ sshr(as_FloatRegister($dst$$reg), __ T16B,
4748             as_FloatRegister($src$$reg), sh);
4749   %}
4750   ins_pipe(vshift128_imm);
4751 %}
4752 
4753 instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{
4754   predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
4755             assert_not_var_shift(n));
4756   match(Set dst (URShiftVB src (RShiftCntV shift)));
4757   ins_cost(INSN_COST);
4758   format %{ "ushr  $dst, $src, $shift\t# vector (8B)" %}
4759   ins_encode %{
4760     int sh = (int)$shift$$constant;
4761     if (sh >= 8) {
4762       __ eor(as_FloatRegister($dst$$reg), __ T8B,
4763              as_FloatRegister($src$$reg),
4764              as_FloatRegister($src$$reg));
4765     } else {
4766       __ ushr(as_FloatRegister($dst$$reg), __ T8B,
4767               as_FloatRegister($src$$reg), sh);
4768     }
4769   %}
4770   ins_pipe(vshift64_imm);
4771 %}
4772 
4773 instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{
4774   predicate(n->as_Vector()->length() == 16 && assert_not_var_shift(n));
4775   match(Set dst (URShiftVB src (RShiftCntV shift)));
4776   ins_cost(INSN_COST);
4777   format %{ "ushr  $dst, $src, $shift\t# vector (16B)" %}
4778   ins_encode %{
4779     int sh = (int)$shift$$constant;
4780     if (sh >= 8) {
4781       __ eor(as_FloatRegister($dst$$reg), __ T16B,
4782              as_FloatRegister($src$$reg),
4783              as_FloatRegister($src$$reg));
4784     } else {
4785       __ ushr(as_FloatRegister($dst$$reg), __ T16B,
4786               as_FloatRegister($src$$reg), sh);
4787     }
4788   %}
4789   ins_pipe(vshift128_imm);
4790 %}
4791 
4792 instruct vsll4S(vecD dst, vecD src, vecD shift) %{
4793   predicate(n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4);
4794   match(Set dst (LShiftVS src shift));
4795   ins_cost(INSN_COST);
4796   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
4797   ins_encode %{
4798     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
4799             as_FloatRegister($src$$reg),
4800             as_FloatRegister($shift$$reg));
4801   %}
4802   ins_pipe(vshift64);
4803 %}
4804 
4805 instruct vsll8S(vecX dst, vecX src, vecX shift) %{
4806   predicate(n->as_Vector()->length() == 8);
4807   match(Set dst (LShiftVS src shift));
4808   ins_cost(INSN_COST);
4809   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
4810   ins_encode %{
4811     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
4812             as_FloatRegister($src$$reg),
4813             as_FloatRegister($shift$$reg));
4814   %}
4815   ins_pipe(vshift128);
4816 %}
4817 
4818 instruct vsra4S(vecD dst, vecD src, vecD shift) %{
4819   predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&
4820             !n->as_ShiftV()->is_var_shift());
4821   match(Set dst (RShiftVS src shift));
4822   ins_cost(INSN_COST);
4823   format %{ "sshl  $dst,$src,$shift\t# vector (4H)" %}
4824   ins_encode %{
4825     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
4826             as_FloatRegister($src$$reg),
4827             as_FloatRegister($shift$$reg));
4828   %}
4829   ins_pipe(vshift64);
4830 %}
4831 
4832 instruct vsra4S_var(vecD dst, vecD src, vecD shift) %{
4833   predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&
4834             n->as_ShiftV()->is_var_shift());
4835   match(Set dst (RShiftVS src shift));
4836   ins_cost(INSN_COST * 2);
4837   effect(TEMP_DEF dst);
4838   format %{ "negr  $dst,$shift\t"
4839             "sshl  $dst,$src,$dst\t# vector (4H)" %}
4840   ins_encode %{
4841     __ negr(as_FloatRegister($dst$$reg), __ T8B,
4842             as_FloatRegister($shift$$reg));
4843     __ sshl(as_FloatRegister($dst$$reg), __ T4H,
4844             as_FloatRegister($src$$reg),
4845             as_FloatRegister($dst$$reg));
4846   %}
4847   ins_pipe(vshift64);
4848 %}
4849 
4850 instruct vsra8S(vecX dst, vecX src, vecX shift) %{
4851   predicate(n->as_Vector()->length() == 8 && !n->as_ShiftV()->is_var_shift());
4852   match(Set dst (RShiftVS src shift));
4853   ins_cost(INSN_COST);
4854   format %{ "sshl  $dst,$src,$shift\t# vector (8H)" %}
4855   ins_encode %{
4856     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
4857             as_FloatRegister($src$$reg),
4858             as_FloatRegister($shift$$reg));
4859   %}
4860   ins_pipe(vshift128);
4861 %}
4862 
4863 instruct vsra8S_var(vecX dst, vecX src, vecX shift) %{
4864   predicate(n->as_Vector()->length() == 8 && n->as_ShiftV()->is_var_shift());
4865   match(Set dst (RShiftVS src shift));
4866   ins_cost(INSN_COST * 2);
4867   effect(TEMP_DEF dst);
4868   format %{ "negr  $dst,$shift\t"
4869             "sshl  $dst,$src,$dst\t# vector (8H)" %}
4870   ins_encode %{
4871     __ negr(as_FloatRegister($dst$$reg), __ T16B,
4872             as_FloatRegister($shift$$reg));
4873     __ sshl(as_FloatRegister($dst$$reg), __ T8H,
4874             as_FloatRegister($src$$reg),
4875             as_FloatRegister($dst$$reg));
4876   %}
4877   ins_pipe(vshift128);
4878 %}
4879 
4880 instruct vsrl4S(vecD dst, vecD src, vecD shift) %{
4881   predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&
4882             !n->as_ShiftV()->is_var_shift());
4883   match(Set dst (URShiftVS src shift));
4884   ins_cost(INSN_COST);
4885   format %{ "ushl  $dst,$src,$shift\t# vector (4H)" %}
4886   ins_encode %{
4887     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
4888             as_FloatRegister($src$$reg),
4889             as_FloatRegister($shift$$reg));
4890   %}
4891   ins_pipe(vshift64);
4892 %}
4893 
4894 instruct vsrl4S_var(vecD dst, vecD src, vecD shift) %{
4895   predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&
4896             n->as_ShiftV()->is_var_shift());
4897   match(Set dst (URShiftVS src shift));
4898   ins_cost(INSN_COST * 2);
4899   effect(TEMP_DEF dst);
4900   format %{ "negr  $dst,$shift\t"
4901             "ushl  $dst,$src,$dst\t# vector (4H)" %}
4902   ins_encode %{
4903     __ negr(as_FloatRegister($dst$$reg), __ T8B,
4904             as_FloatRegister($shift$$reg));
4905     __ ushl(as_FloatRegister($dst$$reg), __ T4H,
4906             as_FloatRegister($src$$reg),
4907             as_FloatRegister($dst$$reg));
4908   %}
4909   ins_pipe(vshift64);
4910 %}
4911 
4912 instruct vsrl8S(vecX dst, vecX src, vecX shift) %{
4913   predicate(n->as_Vector()->length() == 8 && !n->as_ShiftV()->is_var_shift());
4914   match(Set dst (URShiftVS src shift));
4915   ins_cost(INSN_COST);
4916   format %{ "ushl  $dst,$src,$shift\t# vector (8H)" %}
4917   ins_encode %{
4918     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
4919             as_FloatRegister($src$$reg),
4920             as_FloatRegister($shift$$reg));
4921   %}
4922   ins_pipe(vshift128);
4923 %}
4924 
4925 instruct vsrl8S_var(vecX dst, vecX src, vecX shift) %{
4926   predicate(n->as_Vector()->length() == 8 && n->as_ShiftV()->is_var_shift());
4927   match(Set dst (URShiftVS src shift));
4928   ins_cost(INSN_COST * 2);
4929   effect(TEMP_DEF dst);
4930   format %{ "negr  $dst,$shift\t"
4931             "ushl  $dst,$src,$dst\t# vector (8H)" %}
4932   ins_encode %{
4933     __ negr(as_FloatRegister($dst$$reg), __ T16B,
4934             as_FloatRegister($shift$$reg));
4935     __ ushl(as_FloatRegister($dst$$reg), __ T8H,
4936             as_FloatRegister($src$$reg),
4937             as_FloatRegister($dst$$reg));
4938   %}
4939   ins_pipe(vshift128);
4940 %}
4941 
4942 instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{
4943   predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&
4944             assert_not_var_shift(n));
4945   match(Set dst (LShiftVS src (LShiftCntV shift)));
4946   ins_cost(INSN_COST);
4947   format %{ "shl  $dst, $src, $shift\t# vector (4H)" %}
4948   ins_encode %{
4949     int sh = (int)$shift$$constant;
4950     if (sh >= 16) {
4951       __ eor(as_FloatRegister($dst$$reg), __ T8B,
4952              as_FloatRegister($src$$reg),
4953              as_FloatRegister($src$$reg));
4954     } else {
4955       __ shl(as_FloatRegister($dst$$reg), __ T4H,
4956              as_FloatRegister($src$$reg), sh);
4957     }
4958   %}
4959   ins_pipe(vshift64_imm);
4960 %}
4961 
4962 instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{
4963   predicate(n->as_Vector()->length() == 8 && assert_not_var_shift(n));
4964   match(Set dst (LShiftVS src (LShiftCntV shift)));
4965   ins_cost(INSN_COST);
4966   format %{ "shl  $dst, $src, $shift\t# vector (8H)" %}
4967   ins_encode %{
4968     int sh = (int)$shift$$constant;
4969     if (sh >= 16) {
4970       __ eor(as_FloatRegister($dst$$reg), __ T16B,
4971              as_FloatRegister($src$$reg),
4972              as_FloatRegister($src$$reg));
4973     } else {
4974       __ shl(as_FloatRegister($dst$$reg), __ T8H,
4975              as_FloatRegister($src$$reg), sh);
4976     }
4977   %}
4978   ins_pipe(vshift128_imm);
4979 %}
4980 
4981 instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{
4982   predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&
4983             assert_not_var_shift(n));
4984   match(Set dst (RShiftVS src (RShiftCntV shift)));
4985   ins_cost(INSN_COST);
4986   format %{ "sshr  $dst, $src, $shift\t# vector (4H)" %}
4987   ins_encode %{
4988     int sh = (int)$shift$$constant;
4989     if (sh >= 16) sh = 15;
4990     __ sshr(as_FloatRegister($dst$$reg), __ T4H,
4991             as_FloatRegister($src$$reg), sh);
4992   %}
4993   ins_pipe(vshift64_imm);
4994 %}
4995 
4996 instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{
4997   predicate(n->as_Vector()->length() == 8 && assert_not_var_shift(n));
4998   match(Set dst (RShiftVS src (RShiftCntV shift)));
4999   ins_cost(INSN_COST);
5000   format %{ "sshr  $dst, $src, $shift\t# vector (8H)" %}
5001   ins_encode %{
5002     int sh = (int)$shift$$constant;
5003     if (sh >= 16) sh = 15;
5004     __ sshr(as_FloatRegister($dst$$reg), __ T8H,
5005             as_FloatRegister($src$$reg), sh);
5006   %}
5007   ins_pipe(vshift128_imm);
5008 %}
5009 
5010 instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{
5011   predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4) &&
5012             assert_not_var_shift(n));
5013   match(Set dst (URShiftVS src (RShiftCntV shift)));
5014   ins_cost(INSN_COST);
5015   format %{ "ushr  $dst, $src, $shift\t# vector (4H)" %}
5016   ins_encode %{
5017     int sh = (int)$shift$$constant;
5018     if (sh >= 16) {
5019       __ eor(as_FloatRegister($dst$$reg), __ T8B,
5020              as_FloatRegister($src$$reg),
5021              as_FloatRegister($src$$reg));
5022     } else {
5023       __ ushr(as_FloatRegister($dst$$reg), __ T4H,
5024               as_FloatRegister($src$$reg), sh);
5025     }
5026   %}
5027   ins_pipe(vshift64_imm);
5028 %}
5029 
5030 instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{
5031   predicate(n->as_Vector()->length() == 8 && assert_not_var_shift(n));
5032   match(Set dst (URShiftVS src (RShiftCntV shift)));
5033   ins_cost(INSN_COST);
5034   format %{ "ushr  $dst, $src, $shift\t# vector (8H)" %}
5035   ins_encode %{
5036     int sh = (int)$shift$$constant;
5037     if (sh >= 16) {
5038       __ eor(as_FloatRegister($dst$$reg), __ T16B,
5039              as_FloatRegister($src$$reg),
5040              as_FloatRegister($src$$reg));
5041     } else {
5042       __ ushr(as_FloatRegister($dst$$reg), __ T8H,
5043               as_FloatRegister($src$$reg), sh);
5044     }
5045   %}
5046   ins_pipe(vshift128_imm);
5047 %}
5048 
5049 instruct vsll2I(vecD dst, vecD src, vecD shift) %{
5050   predicate(n->as_Vector()->length() == 2);
5051   match(Set dst (LShiftVI src shift));
5052   ins_cost(INSN_COST);
5053   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
5054   ins_encode %{
5055     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
5056             as_FloatRegister($src$$reg),
5057             as_FloatRegister($shift$$reg));
5058   %}
5059   ins_pipe(vshift64);
5060 %}
5061 
5062 instruct vsll4I(vecX dst, vecX src, vecX shift) %{
5063   predicate(n->as_Vector()->length() == 4);
5064   match(Set dst (LShiftVI src shift));
5065   ins_cost(INSN_COST);
5066   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
5067   ins_encode %{
5068     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
5069             as_FloatRegister($src$$reg),
5070             as_FloatRegister($shift$$reg));
5071   %}
5072   ins_pipe(vshift128);
5073 %}
5074 
5075 instruct vsra2I(vecD dst, vecD src, vecD shift) %{
5076   predicate(n->as_Vector()->length() == 2 && !n->as_ShiftV()->is_var_shift());
5077   match(Set dst (RShiftVI src shift));
5078   ins_cost(INSN_COST);
5079   format %{ "sshl  $dst,$src,$shift\t# vector (2S)" %}
5080   ins_encode %{
5081     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
5082             as_FloatRegister($src$$reg),
5083             as_FloatRegister($shift$$reg));
5084   %}
5085   ins_pipe(vshift64);
5086 %}
5087 
5088 instruct vsra2I_var(vecD dst, vecD src, vecD shift) %{
5089   predicate(n->as_Vector()->length() == 2 && n->as_ShiftV()->is_var_shift());
5090   match(Set dst (RShiftVI src shift));
5091   ins_cost(INSN_COST * 2);
5092   effect(TEMP_DEF dst);
5093   format %{ "negr  $dst,$shift\t"
5094             "sshl  $dst,$src,$dst\t# vector (2S)" %}
5095   ins_encode %{
5096     __ negr(as_FloatRegister($dst$$reg), __ T8B,
5097             as_FloatRegister($shift$$reg));
5098     __ sshl(as_FloatRegister($dst$$reg), __ T2S,
5099             as_FloatRegister($src$$reg),
5100             as_FloatRegister($dst$$reg));
5101   %}
5102   ins_pipe(vshift64);
5103 %}
5104 
5105 instruct vsra4I(vecX dst, vecX src, vecX shift) %{
5106   predicate(n->as_Vector()->length() == 4 && !n->as_ShiftV()->is_var_shift());
5107   match(Set dst (RShiftVI src shift));
5108   ins_cost(INSN_COST);
5109   format %{ "sshl  $dst,$src,$shift\t# vector (4S)" %}
5110   ins_encode %{
5111     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
5112             as_FloatRegister($src$$reg),
5113             as_FloatRegister($shift$$reg));
5114   %}
5115   ins_pipe(vshift128);
5116 %}
5117 
5118 instruct vsra4I_var(vecX dst, vecX src, vecX shift) %{
5119   predicate(n->as_Vector()->length() == 4 && n->as_ShiftV()->is_var_shift());
5120   match(Set dst (RShiftVI src shift));
5121   ins_cost(INSN_COST * 2);
5122   effect(TEMP_DEF dst);
5123   format %{ "negr  $dst,$shift\t"
5124             "sshl  $dst,$src,$dst\t# vector (4S)" %}
5125   ins_encode %{
5126     __ negr(as_FloatRegister($dst$$reg), __ T16B,
5127             as_FloatRegister($shift$$reg));
5128     __ sshl(as_FloatRegister($dst$$reg), __ T4S,
5129             as_FloatRegister($src$$reg),
5130             as_FloatRegister($dst$$reg));
5131   %}
5132   ins_pipe(vshift128);
5133 %}
5134 
5135 instruct vsrl2I(vecD dst, vecD src, vecD shift) %{
5136   predicate(n->as_Vector()->length() == 2 && !n->as_ShiftV()->is_var_shift());
5137   match(Set dst (URShiftVI src shift));
5138   ins_cost(INSN_COST);
5139   format %{ "ushl  $dst,$src,$shift\t# vector (2S)" %}
5140   ins_encode %{
5141     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
5142             as_FloatRegister($src$$reg),
5143             as_FloatRegister($shift$$reg));
5144   %}
5145   ins_pipe(vshift64);
5146 %}
5147 
5148 instruct vsrl2I_var(vecD dst, vecD src, vecD shift) %{
5149   predicate(n->as_Vector()->length() == 2 && n->as_ShiftV()->is_var_shift());
5150   match(Set dst (URShiftVI src shift));
5151   ins_cost(INSN_COST * 2);
5152   effect(TEMP_DEF dst);
5153   format %{ "negr  $dst,$shift\t"
5154             "ushl  $dst,$src,$dst\t# vector (2S)" %}
5155   ins_encode %{
5156     __ negr(as_FloatRegister($dst$$reg), __ T8B,
5157             as_FloatRegister($shift$$reg));
5158     __ ushl(as_FloatRegister($dst$$reg), __ T2S,
5159             as_FloatRegister($src$$reg),
5160             as_FloatRegister($dst$$reg));
5161   %}
5162   ins_pipe(vshift64);
5163 %}
5164 
5165 instruct vsrl4I(vecX dst, vecX src, vecX shift) %{
5166   predicate(n->as_Vector()->length() == 4 && !n->as_ShiftV()->is_var_shift());
5167   match(Set dst (URShiftVI src shift));
5168   ins_cost(INSN_COST);
5169   format %{ "ushl  $dst,$src,$shift\t# vector (4S)" %}
5170   ins_encode %{
5171     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
5172             as_FloatRegister($src$$reg),
5173             as_FloatRegister($shift$$reg));
5174   %}
5175   ins_pipe(vshift128);
5176 %}
5177 
5178 instruct vsrl4I_var(vecX dst, vecX src, vecX shift) %{
5179   predicate(n->as_Vector()->length() == 4 && n->as_ShiftV()->is_var_shift());
5180   match(Set dst (URShiftVI src shift));
5181   ins_cost(INSN_COST * 2);
5182   effect(TEMP_DEF dst);
5183   format %{ "negr  $dst,$shift\t"
5184             "ushl  $dst,$src,$dst\t# vector (4S)" %}
5185   ins_encode %{
5186     __ negr(as_FloatRegister($dst$$reg), __ T16B,
5187             as_FloatRegister($shift$$reg));
5188     __ ushl(as_FloatRegister($dst$$reg), __ T4S,
5189             as_FloatRegister($src$$reg),
5190             as_FloatRegister($dst$$reg));
5191   %}
5192   ins_pipe(vshift128);
5193 %}
5194 
5195 instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{
5196   predicate(n->as_Vector()->length() == 2 && assert_not_var_shift(n));
5197   match(Set dst (LShiftVI src (LShiftCntV shift)));
5198   ins_cost(INSN_COST);
5199   format %{ "shl  $dst, $src, $shift\t# vector (2S)" %}
5200   ins_encode %{
5201     __ shl(as_FloatRegister($dst$$reg), __ T2S,
5202            as_FloatRegister($src$$reg),
5203            (int)$shift$$constant);
5204   %}
5205   ins_pipe(vshift64_imm);
5206 %}
5207 
5208 instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{
5209   predicate(n->as_Vector()->length() == 4 && assert_not_var_shift(n));
5210   match(Set dst (LShiftVI src (LShiftCntV shift)));
5211   ins_cost(INSN_COST);
5212   format %{ "shl  $dst, $src, $shift\t# vector (4S)" %}
5213   ins_encode %{
5214     __ shl(as_FloatRegister($dst$$reg), __ T4S,
5215            as_FloatRegister($src$$reg),
5216            (int)$shift$$constant);
5217   %}
5218   ins_pipe(vshift128_imm);
5219 %}
5220 
5221 instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{
5222   predicate(n->as_Vector()->length() == 2 && assert_not_var_shift(n));
5223   match(Set dst (RShiftVI src (RShiftCntV shift)));
5224   ins_cost(INSN_COST);
5225   format %{ "sshr  $dst, $src, $shift\t# vector (2S)" %}
5226   ins_encode %{
5227     __ sshr(as_FloatRegister($dst$$reg), __ T2S,
5228             as_FloatRegister($src$$reg),
5229             (int)$shift$$constant);
5230   %}
5231   ins_pipe(vshift64_imm);
5232 %}
5233 
5234 instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{
5235   predicate(n->as_Vector()->length() == 4 && assert_not_var_shift(n));
5236   match(Set dst (RShiftVI src (RShiftCntV shift)));
5237   ins_cost(INSN_COST);
5238   format %{ "sshr  $dst, $src, $shift\t# vector (4S)" %}
5239   ins_encode %{
5240     __ sshr(as_FloatRegister($dst$$reg), __ T4S,
5241             as_FloatRegister($src$$reg),
5242             (int)$shift$$constant);
5243   %}
5244   ins_pipe(vshift128_imm);
5245 %}
5246 
5247 instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{
5248   predicate(n->as_Vector()->length() == 2 && assert_not_var_shift(n));
5249   match(Set dst (URShiftVI src (RShiftCntV shift)));
5250   ins_cost(INSN_COST);
5251   format %{ "ushr  $dst, $src, $shift\t# vector (2S)" %}
5252   ins_encode %{
5253     __ ushr(as_FloatRegister($dst$$reg), __ T2S,
5254             as_FloatRegister($src$$reg),
5255             (int)$shift$$constant);
5256   %}
5257   ins_pipe(vshift64_imm);
5258 %}
5259 
5260 instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{
5261   predicate(n->as_Vector()->length() == 4 && assert_not_var_shift(n));
5262   match(Set dst (URShiftVI src (RShiftCntV shift)));
5263   ins_cost(INSN_COST);
5264   format %{ "ushr  $dst, $src, $shift\t# vector (4S)" %}
5265   ins_encode %{
5266     __ ushr(as_FloatRegister($dst$$reg), __ T4S,
5267             as_FloatRegister($src$$reg),
5268             (int)$shift$$constant);
5269   %}
5270   ins_pipe(vshift128_imm);
5271 %}
5272 
5273 instruct vsll2L(vecX dst, vecX src, vecX shift) %{
5274   predicate(n->as_Vector()->length() == 2);
5275   match(Set dst (LShiftVL src shift));
5276   ins_cost(INSN_COST);
5277   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
5278   ins_encode %{
5279     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
5280             as_FloatRegister($src$$reg),
5281             as_FloatRegister($shift$$reg));
5282   %}
5283   ins_pipe(vshift128);
5284 %}
5285 
5286 instruct vsra2L(vecX dst, vecX src, vecX shift) %{
5287   predicate(n->as_Vector()->length() == 2 && !n->as_ShiftV()->is_var_shift());
5288   match(Set dst (RShiftVL src shift));
5289   ins_cost(INSN_COST);
5290   format %{ "sshl  $dst,$src,$shift\t# vector (2D)" %}
5291   ins_encode %{
5292     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
5293             as_FloatRegister($src$$reg),
5294             as_FloatRegister($shift$$reg));
5295   %}
5296   ins_pipe(vshift128);
5297 %}
5298 
5299 instruct vsra2L_var(vecX dst, vecX src, vecX shift) %{
5300   predicate(n->as_Vector()->length() == 2 && n->as_ShiftV()->is_var_shift());
5301   match(Set dst (RShiftVL src shift));
5302   ins_cost(INSN_COST * 2);
5303   effect(TEMP_DEF dst);
5304   format %{ "negr  $dst,$shift\t"
5305             "sshl  $dst,$src,$dst\t# vector (2D)" %}
5306   ins_encode %{
5307     __ negr(as_FloatRegister($dst$$reg), __ T16B,
5308             as_FloatRegister($shift$$reg));
5309     __ sshl(as_FloatRegister($dst$$reg), __ T2D,
5310             as_FloatRegister($src$$reg),
5311             as_FloatRegister($dst$$reg));
5312   %}
5313   ins_pipe(vshift128);
5314 %}
5315 
5316 instruct vsrl2L(vecX dst, vecX src, vecX shift) %{
5317   predicate(n->as_Vector()->length() == 2 && !n->as_ShiftV()->is_var_shift());
5318   match(Set dst (URShiftVL src shift));
5319   ins_cost(INSN_COST);
5320   format %{ "ushl  $dst,$src,$shift\t# vector (2D)" %}
5321   ins_encode %{
5322     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
5323             as_FloatRegister($src$$reg),
5324             as_FloatRegister($shift$$reg));
5325   %}
5326   ins_pipe(vshift128);
5327 %}
5328 
5329 instruct vsrl2L_var(vecX dst, vecX src, vecX shift) %{
5330   predicate(n->as_Vector()->length() == 2 && n->as_ShiftV()->is_var_shift());
5331   match(Set dst (URShiftVL src shift));
5332   ins_cost(INSN_COST * 2);
5333   effect(TEMP_DEF dst);
5334   format %{ "negr  $dst,$shift\t"
5335             "ushl  $dst,$src,$dst\t# vector (2D)" %}
5336   ins_encode %{
5337     __ negr(as_FloatRegister($dst$$reg), __ T16B,
5338             as_FloatRegister($shift$$reg));
5339     __ ushl(as_FloatRegister($dst$$reg), __ T2D,
5340             as_FloatRegister($src$$reg),
5341             as_FloatRegister($dst$$reg));
5342   %}
5343   ins_pipe(vshift128);
5344 %}
5345 
5346 instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{
5347   predicate(n->as_Vector()->length() == 2 && assert_not_var_shift(n));
5348   match(Set dst (LShiftVL src (LShiftCntV shift)));
5349   ins_cost(INSN_COST);
5350   format %{ "shl  $dst, $src, $shift\t# vector (2D)" %}
5351   ins_encode %{
5352     __ shl(as_FloatRegister($dst$$reg), __ T2D,
5353            as_FloatRegister($src$$reg),
5354            (int)$shift$$constant);
5355   %}
5356   ins_pipe(vshift128_imm);
5357 %}
5358 
5359 instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{
5360   predicate(n->as_Vector()->length() == 2 && assert_not_var_shift(n));
5361   match(Set dst (RShiftVL src (RShiftCntV shift)));
5362   ins_cost(INSN_COST);
5363   format %{ "sshr  $dst, $src, $shift\t# vector (2D)" %}
5364   ins_encode %{
5365     __ sshr(as_FloatRegister($dst$$reg), __ T2D,
5366             as_FloatRegister($src$$reg),
5367             (int)$shift$$constant);
5368   %}
5369   ins_pipe(vshift128_imm);
5370 %}
5371 
5372 instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
5373   predicate(n->as_Vector()->length() == 2 && assert_not_var_shift(n));
5374   match(Set dst (URShiftVL src (RShiftCntV shift)));
5375   ins_cost(INSN_COST);
5376   format %{ "ushr  $dst, $src, $shift\t# vector (2D)" %}
5377   ins_encode %{
5378     __ ushr(as_FloatRegister($dst$$reg), __ T2D,
5379             as_FloatRegister($src$$reg),
5380             (int)$shift$$constant);
5381   %}
5382   ins_pipe(vshift128_imm);
5383 %}
5384 
5385 instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{
5386   predicate(n->as_Vector()->length() == 8);
5387   match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
5388   ins_cost(INSN_COST);
5389   format %{ "ssra  $dst, $src, $shift\t# vector (8B)" %}
5390   ins_encode %{
5391     int sh = (int)$shift$$constant;
5392     if (sh >= 8) sh = 7;
5393     __ ssra(as_FloatRegister($dst$$reg), __ T8B,
5394             as_FloatRegister($src$$reg), sh);
5395   %}
5396   ins_pipe(vshift64_imm);
5397 %}
5398 
5399 instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{
5400   predicate(n->as_Vector()->length() == 16);
5401   match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
5402   ins_cost(INSN_COST);
5403   format %{ "ssra  $dst, $src, $shift\t# vector (16B)" %}
5404   ins_encode %{
5405     int sh = (int)$shift$$constant;
5406     if (sh >= 8) sh = 7;
5407     __ ssra(as_FloatRegister($dst$$reg), __ T16B,
5408             as_FloatRegister($src$$reg), sh);
5409   %}
5410   ins_pipe(vshift128_imm);
5411 %}
5412 
5413 instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{
5414   predicate(n->as_Vector()->length() == 4);
5415   match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
5416   ins_cost(INSN_COST);
5417   format %{ "ssra  $dst, $src, $shift\t# vector (4H)" %}
5418   ins_encode %{
5419     int sh = (int)$shift$$constant;
5420     if (sh >= 16) sh = 15;
5421     __ ssra(as_FloatRegister($dst$$reg), __ T4H,
5422             as_FloatRegister($src$$reg), sh);
5423   %}
5424   ins_pipe(vshift64_imm);
5425 %}
5426 
5427 instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{
5428   predicate(n->as_Vector()->length() == 8);
5429   match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
5430   ins_cost(INSN_COST);
5431   format %{ "ssra  $dst, $src, $shift\t# vector (8H)" %}
5432   ins_encode %{
5433     int sh = (int)$shift$$constant;
5434     if (sh >= 16) sh = 15;
5435     __ ssra(as_FloatRegister($dst$$reg), __ T8H,
5436             as_FloatRegister($src$$reg), sh);
5437   %}
5438   ins_pipe(vshift128_imm);
5439 %}
5440 
5441 instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{
5442   predicate(n->as_Vector()->length() == 2);
5443   match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
5444   ins_cost(INSN_COST);
5445   format %{ "ssra  $dst, $src, $shift\t# vector (2S)" %}
5446   ins_encode %{
5447     __ ssra(as_FloatRegister($dst$$reg), __ T2S,
5448             as_FloatRegister($src$$reg),
5449             (int)$shift$$constant);
5450   %}
5451   ins_pipe(vshift64_imm);
5452 %}
5453 
5454 instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{
5455   predicate(n->as_Vector()->length() == 4);
5456   match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
5457   ins_cost(INSN_COST);
5458   format %{ "ssra  $dst, $src, $shift\t# vector (4S)" %}
5459   ins_encode %{
5460     __ ssra(as_FloatRegister($dst$$reg), __ T4S,
5461             as_FloatRegister($src$$reg),
5462             (int)$shift$$constant);
5463   %}
5464   ins_pipe(vshift128_imm);
5465 %}
5466 
5467 instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{
5468   predicate(n->as_Vector()->length() == 2);
5469   match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift))));
5470   ins_cost(INSN_COST);
5471   format %{ "ssra  $dst, $src, $shift\t# vector (2D)" %}
5472   ins_encode %{
5473     __ ssra(as_FloatRegister($dst$$reg), __ T2D,
5474             as_FloatRegister($src$$reg),
5475             (int)$shift$$constant);
5476   %}
5477   ins_pipe(vshift128_imm);
5478 %}
5479 
5480 instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{
5481   predicate(n->as_Vector()->length() == 8);
5482   match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
5483   ins_cost(INSN_COST);
5484   format %{ "usra  $dst, $src, $shift\t# vector (8B)" %}
5485   ins_encode %{
5486     int sh = (int)$shift$$constant;
5487     if (sh < 8) {
5488       __ usra(as_FloatRegister($dst$$reg), __ T8B,
5489               as_FloatRegister($src$$reg), sh);
5490     }
5491   %}
5492   ins_pipe(vshift64_imm);
5493 %}
5494 
5495 instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{
5496   predicate(n->as_Vector()->length() == 16);
5497   match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
5498   ins_cost(INSN_COST);
5499   format %{ "usra  $dst, $src, $shift\t# vector (16B)" %}
5500   ins_encode %{
5501     int sh = (int)$shift$$constant;
5502     if (sh < 8) {
5503       __ usra(as_FloatRegister($dst$$reg), __ T16B,
5504               as_FloatRegister($src$$reg), sh);
5505     }
5506   %}
5507   ins_pipe(vshift128_imm);
5508 %}
5509 
5510 instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{
5511   predicate(n->as_Vector()->length() == 4);
5512   match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
5513   ins_cost(INSN_COST);
5514   format %{ "usra  $dst, $src, $shift\t# vector (4H)" %}
5515   ins_encode %{
5516     int sh = (int)$shift$$constant;
5517     if (sh < 16) {
5518       __ usra(as_FloatRegister($dst$$reg), __ T4H,
5519               as_FloatRegister($src$$reg), sh);
5520     }
5521   %}
5522   ins_pipe(vshift64_imm);
5523 %}
5524 
5525 instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{
5526   predicate(n->as_Vector()->length() == 8);
5527   match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
5528   ins_cost(INSN_COST);
5529   format %{ "usra  $dst, $src, $shift\t# vector (8H)" %}
5530   ins_encode %{
5531     int sh = (int)$shift$$constant;
5532     if (sh < 16) {
5533       __ usra(as_FloatRegister($dst$$reg), __ T8H,
5534               as_FloatRegister($src$$reg), sh);
5535     }
5536   %}
5537   ins_pipe(vshift128_imm);
5538 %}
5539 
5540 instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{
5541   predicate(n->as_Vector()->length() == 2);
5542   match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
5543   ins_cost(INSN_COST);
5544   format %{ "usra  $dst, $src, $shift\t# vector (2S)" %}
5545   ins_encode %{
5546     __ usra(as_FloatRegister($dst$$reg), __ T2S,
5547             as_FloatRegister($src$$reg),
5548             (int)$shift$$constant);
5549   %}
5550   ins_pipe(vshift64_imm);
5551 %}
5552 
5553 instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{
5554   predicate(n->as_Vector()->length() == 4);
5555   match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
5556   ins_cost(INSN_COST);
5557   format %{ "usra  $dst, $src, $shift\t# vector (4S)" %}
5558   ins_encode %{
5559     __ usra(as_FloatRegister($dst$$reg), __ T4S,
5560             as_FloatRegister($src$$reg),
5561             (int)$shift$$constant);
5562   %}
5563   ins_pipe(vshift128_imm);
5564 %}
5565 
5566 instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{
5567   predicate(n->as_Vector()->length() == 2);
5568   match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift))));
5569   ins_cost(INSN_COST);
5570   format %{ "usra  $dst, $src, $shift\t# vector (2D)" %}
5571   ins_encode %{
5572     __ usra(as_FloatRegister($dst$$reg), __ T2D,
5573             as_FloatRegister($src$$reg),
5574             (int)$shift$$constant);
5575   %}
5576   ins_pipe(vshift128_imm);
5577 %}
5578 
5579 instruct vmax2F(vecD dst, vecD src1, vecD src2)
5580 %{
5581   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5582   match(Set dst (MaxV src1 src2));
5583   ins_cost(INSN_COST);
5584   format %{ "fmax  $dst,$src1,$src2\t# vector (2F)" %}
5585   ins_encode %{
5586     __ fmax(as_FloatRegister($dst$$reg), __ T2S,
5587             as_FloatRegister($src1$$reg),
5588             as_FloatRegister($src2$$reg));
5589   %}
5590   ins_pipe(vdop_fp64);
5591 %}
5592 
5593 instruct vmax4F(vecX dst, vecX src1, vecX src2)
5594 %{
5595   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5596   match(Set dst (MaxV src1 src2));
5597   ins_cost(INSN_COST);
5598   format %{ "fmax  $dst,$src1,$src2\t# vector (4S)" %}
5599   ins_encode %{
5600     __ fmax(as_FloatRegister($dst$$reg), __ T4S,
5601             as_FloatRegister($src1$$reg),
5602             as_FloatRegister($src2$$reg));
5603   %}
5604   ins_pipe(vdop_fp128);
5605 %}
5606 
5607 instruct vmax2D(vecX dst, vecX src1, vecX src2)
5608 %{
5609   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
5610   match(Set dst (MaxV src1 src2));
5611   ins_cost(INSN_COST);
5612   format %{ "fmax  $dst,$src1,$src2\t# vector (2D)" %}
5613   ins_encode %{
5614     __ fmax(as_FloatRegister($dst$$reg), __ T2D,
5615             as_FloatRegister($src1$$reg),
5616             as_FloatRegister($src2$$reg));
5617   %}
5618   ins_pipe(vdop_fp128);
5619 %}
5620 
5621 instruct vmin2F(vecD dst, vecD src1, vecD src2)
5622 %{
5623   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5624   match(Set dst (MinV src1 src2));
5625   ins_cost(INSN_COST);
5626   format %{ "fmin  $dst,$src1,$src2\t# vector (2F)" %}
5627   ins_encode %{
5628     __ fmin(as_FloatRegister($dst$$reg), __ T2S,
5629             as_FloatRegister($src1$$reg),
5630             as_FloatRegister($src2$$reg));
5631   %}
5632   ins_pipe(vdop_fp64);
5633 %}
5634 
5635 instruct vmin4F(vecX dst, vecX src1, vecX src2)
5636 %{
5637   predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
5638   match(Set dst (MinV src1 src2));
5639   ins_cost(INSN_COST);
5640   format %{ "fmin  $dst,$src1,$src2\t# vector (4S)" %}
5641   ins_encode %{
5642     __ fmin(as_FloatRegister($dst$$reg), __ T4S,
5643             as_FloatRegister($src1$$reg),
5644             as_FloatRegister($src2$$reg));
5645   %}
5646   ins_pipe(vdop_fp128);
5647 %}
5648 
5649 instruct vmin2D(vecX dst, vecX src1, vecX src2)
5650 %{
5651   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
5652   match(Set dst (MinV src1 src2));
5653   ins_cost(INSN_COST);
5654   format %{ "fmin  $dst,$src1,$src2\t# vector (2D)" %}
5655   ins_encode %{
5656     __ fmin(as_FloatRegister($dst$$reg), __ T2D,
5657             as_FloatRegister($src1$$reg),
5658             as_FloatRegister($src2$$reg));
5659   %}
5660   ins_pipe(vdop_fp128);
5661 %}
5662 
5663 instruct vround2D_reg(vecX dst, vecX src, immI rmode) %{
5664   predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
5665   match(Set dst (RoundDoubleModeV src rmode));
5666   format %{ "frint  $dst, $src, $rmode" %}
5667   ins_encode %{
5668     switch ($rmode$$constant) {
5669       case RoundDoubleModeNode::rmode_rint:
5670         __ frintn(as_FloatRegister($dst$$reg), __ T2D,
5671                   as_FloatRegister($src$$reg));
5672         break;
5673       case RoundDoubleModeNode::rmode_floor:
5674         __ frintm(as_FloatRegister($dst$$reg), __ T2D,
5675                   as_FloatRegister($src$$reg));
5676         break;
5677       case RoundDoubleModeNode::rmode_ceil:
5678         __ frintp(as_FloatRegister($dst$$reg), __ T2D,
5679                   as_FloatRegister($src$$reg));
5680         break;
5681     }
5682   %}
5683   ins_pipe(vdop_fp128);
5684 %}
5685 
5686 instruct vpopcountID(vecD dst, vecD src) %{
5687   predicate(n->as_Vector()->length_in_bytes() < 16);
5688   match(Set dst (PopCountVI src));
5689   ins_cost(3 * INSN_COST);
5690   format %{ "vpopcountI  $dst, $src\t# vector (8B/4H/2S)" %}
5691   ins_encode %{
5692     assert(UsePopCountInstruction, "unsupported");
5693     BasicType bt = Matcher::vector_element_basic_type(this);
5694     __ cnt(as_FloatRegister($dst$$reg), __ T8B,
5695            as_FloatRegister($src$$reg));
5696     if (bt == T_SHORT || bt == T_INT) {
5697       __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
5698                 as_FloatRegister($dst$$reg));
5699     }
5700     if (bt == T_INT) {
5701       __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
5702                 as_FloatRegister($dst$$reg));
5703     }
5704   %}
5705   ins_pipe(pipe_class_default);
5706 %}
5707 
5708 instruct vpopcountIX(vecX dst, vecX src) %{
5709   predicate(n->as_Vector()->length_in_bytes() == 16);
5710   match(Set dst (PopCountVI src));
5711   ins_cost(3 * INSN_COST);
5712   format %{ "vpopcountI  $dst, $src\t# vector (16B/8H/4S)" %}
5713   ins_encode %{
5714     assert(UsePopCountInstruction, "unsupported");
5715     BasicType bt = Matcher::vector_element_basic_type(this);
5716     __ cnt(as_FloatRegister($dst$$reg), __ T16B,
5717            as_FloatRegister($src$$reg));
5718     if (bt == T_SHORT || bt == T_INT) {
5719       __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
5720                 as_FloatRegister($dst$$reg));
5721     }
5722     if (bt == T_INT) {
5723       __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
5724                 as_FloatRegister($dst$$reg));
5725     }
5726   %}
5727   ins_pipe(pipe_class_default);
5728 %}
5729 
5730 // If the PopCountVL is generated by auto-vectorization, the dst basic
5731 // type is T_INT. And once we have unified the type definition for
5732 // Vector API and auto-vectorization, this rule can be merged with
5733 // "vpopcountLX" rule.
5734 instruct vpopcountLD(vecD dst, vecX src) %{
5735   predicate(n->as_Vector()->length_in_bytes() < 16 &&
5736             n->bottom_type()->is_vect()->element_basic_type() == T_INT);
5737   match(Set dst (PopCountVL src));
5738   ins_cost(5 * INSN_COST);
5739   format %{ "vpopcountL  $dst, $src\t# vector (2S)" %}
5740   ins_encode %{
5741     assert(UsePopCountInstruction, "unsupported");
5742     __ cnt(as_FloatRegister($dst$$reg), __ T16B,
5743            as_FloatRegister($src$$reg));
5744     __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
5745               as_FloatRegister($dst$$reg));
5746     __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
5747               as_FloatRegister($dst$$reg));
5748     __ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
5749               as_FloatRegister($dst$$reg));
5750     __ xtn(as_FloatRegister($dst$$reg), __ T2S,
5751            as_FloatRegister($dst$$reg), __ T2D);
5752   %}
5753   ins_pipe(pipe_class_default);
5754 %}
5755 
5756 instruct vpopcountLX(vecX dst, vecX src) %{
5757   predicate(n->as_Vector()->length_in_bytes() == 16 &&
5758             n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
5759   match(Set dst (PopCountVL src));
5760   ins_cost(4 * INSN_COST);
5761   format %{ "vpopcountL  $dst, $src\t# vector (2D)" %}
5762   ins_encode %{
5763     assert(UsePopCountInstruction, "unsupported");
5764     __ cnt(as_FloatRegister($dst$$reg), __ T16B,
5765            as_FloatRegister($src$$reg));
5766     __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
5767               as_FloatRegister($dst$$reg));
5768     __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
5769               as_FloatRegister($dst$$reg));
5770     __ uaddlp(as_FloatRegister($dst$$reg), __ T4S,
5771               as_FloatRegister($dst$$reg));
5772   %}
5773   ins_pipe(pipe_class_default);
5774 %}
5775 
5776 // vector mask reductions
5777 
5778 instruct vmask_truecount8B(iRegINoSp dst, vecD src, vecD tmp) %{
5779   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
5780   match(Set dst (VectorMaskTrueCount src));
5781   effect(TEMP tmp);
5782   ins_cost(2 * INSN_COST);
5783   format %{ "addv $tmp, $src\n\t"
5784             "umov $dst, $tmp, B, 0\t# vector (8B)" %}
5785   ins_encode %{
5786     // Input "src" is a vector of boolean represented as bytes with
5787     // 0x00/0x01 as element values.
5788     __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src$$reg));
5789     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
5790   %}
5791   ins_pipe(pipe_slow);
5792 %}
5793 
5794 instruct vmask_truecount16B(iRegINoSp dst, vecX src, vecX tmp) %{
5795   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
5796   match(Set dst (VectorMaskTrueCount src));
5797   effect(TEMP tmp);
5798   ins_cost(2 * INSN_COST);
5799   format %{ "addv $tmp, $src\n\t"
5800             "umov $dst, $tmp, B, 0\t# vector (16B)" %}
5801   ins_encode %{
5802     // Input "src" is a vector of boolean represented as bytes with
5803     // 0x00/0x01 as element values.
5804     __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src$$reg));
5805     __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
5806   %}
5807   ins_pipe(pipe_slow);
5808 %}
5809 
5810 instruct vmask_firsttrue_LT8B(iRegINoSp dst, vecD src, rFlagsReg cr) %{
5811   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN &&
5812             n->in(1)->bottom_type()->is_vect()->length() < 8);
5813   match(Set dst (VectorMaskFirstTrue src));
5814   effect(KILL cr);
5815   ins_cost(7 * INSN_COST);
5816   format %{ "vmask_firsttrue $dst, $src\t# vector (4I/4S/2I)" %}
5817   ins_encode %{
5818     // Returns the index of the first active lane of the
5819     // vector mask, or VLENGTH if no lane is active.
5820     //
5821     // Input "src" is a vector of boolean represented as
5822     // bytes with 0x00/0x01 as element values.
5823     //
5824     // Computed by reversing the bits and counting the leading
5825     // zero bytes.
5826     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
5827     __ rbit($dst$$Register, $dst$$Register);
5828     __ clz($dst$$Register, $dst$$Register);
5829     __ lsrw($dst$$Register, $dst$$Register, 3);
5830     __ movw(rscratch1, Matcher::vector_length(this, $src));
5831     __ cmpw($dst$$Register, rscratch1);
5832     __ cselw($dst$$Register, rscratch1, $dst$$Register, Assembler::GE);
5833   %}
5834   ins_pipe(pipe_slow);
5835 %}
5836 
5837 instruct vmask_firsttrue8B(iRegINoSp dst, vecD src) %{
5838   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN &&
5839             n->in(1)->bottom_type()->is_vect()->length() == 8);
5840   match(Set dst (VectorMaskFirstTrue src));
5841   ins_cost(4 * INSN_COST);
5842   format %{ "vmask_firsttrue $dst, $src\t# vector (8B)" %}
5843   ins_encode %{
5844     // Returns the index of the first active lane of the
5845     // vector mask, or VLENGTH if no lane is active.
5846     //
5847     // Input "src" is a vector of boolean represented as
5848     // bytes with 0x00/0x01 as element values.
5849     //
5850     // Computed by reversing the bits and counting the leading
5851     // zero bytes.
5852     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
5853     __ rbit($dst$$Register, $dst$$Register);
5854     __ clz($dst$$Register, $dst$$Register);
5855     __ lsrw($dst$$Register, $dst$$Register, 3);
5856   %}
5857   ins_pipe(pipe_slow);
5858 %}
5859 
5860 instruct vmask_firsttrue16B(iRegINoSp dst, vecX src) %{
5861   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
5862   match(Set dst (VectorMaskFirstTrue src));
5863   ins_cost(6 * INSN_COST);
5864   format %{ "vmask_firsttrue $dst, $src\t# vector (16B)" %}
5865   ins_encode %{
5866     // Returns the index of the first active lane of the
5867     // vector mask, or 16 (VLENGTH) if no lane is active.
5868     //
5869     // Input "src" is a vector of boolean represented as
5870     // bytes with 0x00/0x01 as element values.
5871 
5872     Label FIRST_TRUE_INDEX;
5873 
5874     // Try to compute the result from lower 64 bits.
5875     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
5876     __ movw(rscratch1, zr);
5877     __ cbnz($dst$$Register, FIRST_TRUE_INDEX);
5878 
5879     // Compute the result from the higher 64 bits.
5880     __ fmovhid($dst$$Register, as_FloatRegister($src$$reg));
5881     __ movw(rscratch1, 8);
5882 
5883     // Reverse the bits and count the leading zero bytes.
5884     __ bind(FIRST_TRUE_INDEX);
5885     __ rbit($dst$$Register, $dst$$Register);
5886     __ clz($dst$$Register, $dst$$Register);
5887     __ addw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3);
5888   %}
5889   ins_pipe(pipe_slow);
5890 %}
5891 
5892 instruct vmask_lasttrue8B(iRegINoSp dst, vecD src) %{
5893   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
5894   match(Set dst (VectorMaskLastTrue src));
5895   ins_cost(4 * INSN_COST);
5896   format %{ "vmask_lasttrue $dst, $src\t# vector (8B)" %}
5897   ins_encode %{
5898     // Returns the index of the last active lane of the
5899     // vector mask, or -1 if no lane is active.
5900     //
5901     // Input "src" is a vector of boolean represented as
5902     // bytes with 0x00/0x01 as element values.
5903     //
5904     // Computed by counting the leading zero bytes and
5905     // subtracting it by 7 (VLENGTH - 1).
5906     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
5907     __ clz($dst$$Register, $dst$$Register);
5908     __ movw(rscratch1, 7);
5909     __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3);
5910   %}
5911   ins_pipe(pipe_slow);
5912 %}
5913 
5914 instruct vmask_lasttrue16B(iRegINoSp dst, vecX src) %{
5915   predicate(n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
5916   match(Set dst (VectorMaskLastTrue src));
5917   ins_cost(5 * INSN_COST);
5918   format %{ "vmask_lasttrue $dst, $src\t# vector (16B)" %}
5919   ins_encode %{
5920     // Returns the index of the last active lane of the
5921     // vector mask, or -1 if no lane is active.
5922     //
5923     // Input "src" is a vector of boolean represented as
5924     // bytes with 0x00/0x01 as element values.
5925 
5926     Label LAST_TRUE_INDEX;
5927 
5928     // Try to compute the result from higher 64 bits.
5929     __ fmovhid($dst$$Register, as_FloatRegister($src$$reg));
5930     __ movw(rscratch1, 16 - 1);
5931     __ cbnz($dst$$Register, LAST_TRUE_INDEX);
5932 
5933     // Compute the result from the lower 64 bits.
5934     __ fmovd($dst$$Register, as_FloatRegister($src$$reg));
5935     __ movw(rscratch1, 8 - 1);
5936 
5937     // Count the leading zero bytes and subtract it by 15 (VLENGTH - 1).
5938     __ bind(LAST_TRUE_INDEX);
5939     __ clz($dst$$Register, $dst$$Register);
5940     __ subw($dst$$Register, rscratch1, $dst$$Register, Assembler::LSR, 3);
5941   %}
5942   ins_pipe(pipe_slow);
5943 %}
5944 
5945 instruct vmask_tolong8B(iRegLNoSp dst, vecD src) %{
5946   match(Set dst (VectorMaskToLong src));
5947   ins_cost(5 * INSN_COST);
5948   format %{ "vmask_tolong $dst, $src\t# convert mask to long (8B)" %}
5949   ins_encode %{
5950     // Input "src" is a vector of boolean represented as
5951     // bytes with 0x00/0x01 as element values.
5952 
5953     __ fmovd(as_Register($dst$$reg), as_FloatRegister($src$$reg));
5954     __ bytemask_compress(as_Register($dst$$reg));
5955   %}
5956   ins_pipe(pipe_slow);
5957 %}
5958 
5959 instruct vmask_tolong16B(iRegLNoSp dst, vecX src) %{
5960   match(Set dst (VectorMaskToLong src));
5961   ins_cost(11 * INSN_COST);
5962   format %{ "vmask_tolong $dst, $src\t# convert mask to long (16B)" %}
5963   ins_encode %{
5964     // Input "src" is a vector of boolean represented as
5965     // bytes with 0x00/0x01 as element values.
5966 
5967     __ umov(as_Register($dst$$reg), as_FloatRegister($src$$reg), __ D, 0);
5968     __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
5969     __ bytemask_compress(as_Register($dst$$reg));
5970     __ bytemask_compress(rscratch1);
5971     __ orr(as_Register($dst$$reg), as_Register($dst$$reg),
5972            rscratch1, Assembler::LSL, 8);
5973   %}
5974   ins_pipe(pipe_slow);
5975 %}
5976 
5977 //------------------------- CountLeadingZerosV -----------------------------
5978 
5979 instruct countLeadingZerosVD(vecD dst, vecD src) %{
5980   predicate(n->as_Vector()->length_in_bytes() == 8);
5981   match(Set dst (CountLeadingZerosV src));
5982   ins_cost(INSN_COST);
5983   format %{ "countLeadingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
5984   ins_encode %{
5985     BasicType bt = Matcher::vector_element_basic_type(this);
5986     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
5987     __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
5988   %}
5989   ins_pipe(pipe_slow);
5990 %}
5991 
5992 instruct countLeadingZerosVX(vecX dst, vecX src) %{
5993   predicate(n->as_Vector()->length_in_bytes() == 16);
5994   match(Set dst (CountLeadingZerosV src));
5995   ins_cost(INSN_COST);
5996   format %{ "countLeadingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
5997   ins_encode %{
5998     BasicType bt = Matcher::vector_element_basic_type(this);
5999     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
6000     if (bt != T_LONG) {
6001       __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($src$$reg));
6002     } else {
6003       __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 0);
6004       __ clz(rscratch1, rscratch1);
6005       __ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
6006       __ umov(rscratch1, as_FloatRegister($src$$reg), __ D, 1);
6007       __ clz(rscratch1, rscratch1);
6008       __ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
6009     }
6010   %}
6011   ins_pipe(pipe_slow);
6012 %}
6013 
6014 //------------------------- CountTrailingZerosV ----------------------------
6015 
6016 instruct countTrailingZerosVD(vecD dst, vecD src) %{
6017   predicate(n->as_Vector()->length_in_bytes() == 8);
6018   match(Set dst (CountTrailingZerosV src));
6019   ins_cost(3 * INSN_COST);
6020   format %{ "countTrailingZerosV $dst, $src\t# vector (8B/4H/2S)" %}
6021   ins_encode %{
6022     BasicType bt = Matcher::vector_element_basic_type(this);
6023     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), false);
6024     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
6025     __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
6026   %}
6027   ins_pipe(pipe_slow);
6028 %}
6029 
6030 instruct countTrailingZerosVX(vecX dst, vecX src) %{
6031   predicate(n->as_Vector()->length_in_bytes() == 16);
6032   match(Set dst (CountTrailingZerosV src));
6033   ins_cost(3 * INSN_COST);
6034   format %{ "countTrailingZerosV $dst, $src\t# vector (16B/8H/4S/2D)" %}
6035   ins_encode %{
6036     BasicType bt = Matcher::vector_element_basic_type(this);
6037     Assembler::SIMD_Arrangement size = __ esize2arrangement((unsigned)type2aelembytes(bt), true);
6038     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
6039     if (bt != T_LONG) {
6040       __ clz(as_FloatRegister($dst$$reg), size, as_FloatRegister($dst$$reg));
6041     } else {
6042       __ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 0);
6043       __ clz(rscratch1, rscratch1);
6044       __ mov(as_FloatRegister($dst$$reg), __ D, 0, rscratch1);
6045       __ umov(rscratch1, as_FloatRegister($dst$$reg), __ D, 1);
6046       __ clz(rscratch1, rscratch1);
6047       __ mov(as_FloatRegister($dst$$reg), __ D, 1, rscratch1);
6048     }
6049   %}
6050   ins_pipe(pipe_slow);
6051 %}
6052 
6053 //------------------------------ ReverseV -----------------------------------
6054 
6055 instruct vreverseD(vecD dst, vecD src) %{
6056   predicate(n->as_Vector()->length_in_bytes() == 8);
6057   match(Set dst (ReverseV src));
6058   ins_cost(2 * INSN_COST);
6059   format %{ "ReverseV $dst, $src\t# vector (D)" %}
6060   ins_encode %{
6061     BasicType bt = Matcher::vector_element_basic_type(this);
6062     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
6063   %}
6064   ins_pipe(pipe_slow);
6065 %}
6066 
6067 instruct vreverseX(vecX dst, vecX src) %{
6068   predicate(n->as_Vector()->length_in_bytes() == 16);
6069   match(Set dst (ReverseV src));
6070   ins_cost(2 * INSN_COST);
6071   format %{ "ReverseV $dst, $src\t# vector (X)" %}
6072   ins_encode %{
6073     BasicType bt = Matcher::vector_element_basic_type(this);
6074     __ neon_reverse_bits(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
6075   %}
6076   ins_pipe(pipe_slow);
6077 %}
6078 
6079 //---------------------------- ReverseBytesV --------------------------------
6080 
6081 instruct vreverseBytesD(vecD dst, vecD src) %{
6082   predicate(n->as_Vector()->length_in_bytes() == 8);
6083   match(Set dst (ReverseBytesV src));
6084   ins_cost(INSN_COST);
6085   format %{ "ReverseBytesV $dst, $src\t# vector (D)" %}
6086   ins_encode %{
6087     BasicType bt = Matcher::vector_element_basic_type(this);
6088     __ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, false);
6089   %}
6090   ins_pipe(pipe_slow);
6091 %}
6092 
6093 instruct vreverseBytesX(vecX dst, vecX src) %{
6094   predicate(n->as_Vector()->length_in_bytes() == 16);
6095   match(Set dst (ReverseBytesV src));
6096   ins_cost(INSN_COST);
6097   format %{ "ReverseBytesV $dst, $src\t# vector (X)" %}
6098   ins_encode %{
6099     BasicType bt = Matcher::vector_element_basic_type(this);
6100     __ neon_reverse_bytes(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), bt, true);
6101   %}
6102   ins_pipe(pipe_slow);
6103 %}