< prev index next >

src/hotspot/cpu/s390/s390.ad

Print this page


   1 //
   2 // Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2017, 2019 SAP SE. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //


 457 // Longs in 1 register. Aligned adjacent hi/lo pairs.
 458 reg_class z_long_reg(
 459 /*Z_R0_H,Z_R0*/     // R0
 460 /*Z_R1_H,Z_R1*/
 461   Z_R2_H,Z_R2,
 462   Z_R3_H,Z_R3,
 463   Z_R4_H,Z_R4,
 464   Z_R5_H,Z_R5,
 465   Z_R6_H,Z_R6,
 466   Z_R7_H,Z_R7,
 467 /*Z_R8_H,Z_R8,*/    // Z_thread
 468   Z_R9_H,Z_R9,
 469   Z_R10_H,Z_R10,
 470   Z_R11_H,Z_R11,
 471   Z_R12_H,Z_R12,
 472   Z_R13_H,Z_R13
 473 /*Z_R14_H,Z_R14,*/  // return_pc
 474 /*Z_R15_H,Z_R15*/   // SP
 475 );
 476 
 477 // z_long_reg without even registers
 478 reg_class z_long_odd_reg(
 479 /*Z_R0_H,Z_R0*/     // R0
 480 /*Z_R1_H,Z_R1*/
 481   Z_R3_H,Z_R3,
 482   Z_R5_H,Z_R5,
 483   Z_R7_H,Z_R7,
 484   Z_R9_H,Z_R9,
 485   Z_R11_H,Z_R11,
 486   Z_R13_H,Z_R13
 487 /*Z_R14_H,Z_R14,*/  // return_pc
 488 /*Z_R15_H,Z_R15*/   // SP
 489 );
 490 
 491 // Special Class for Condition Code Flags Register
 492 
 493 reg_class z_condition_reg(
 494   Z_CR
 495 );
 496 
 497 // Scratch register for late profiling. Callee saved.
 498 reg_class z_rscratch2_bits64_reg(Z_R2_H, Z_R2);
 499 
 500 
 501 // Float Register Classes
 502 
 503 reg_class z_flt_reg(
 504   Z_F0,
 505 /*Z_F1,*/ // scratch
 506   Z_F2,
 507   Z_F3,
 508   Z_F4,
 509   Z_F5,


1371     // The ic_miss_stub will handle the null pointer exception.
1372     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1373     __ z_br(R1_ic_miss_stub_addr);
1374     __ bind(valid);
1375   }
1376 
1377   // Check whether this method is the proper implementation for the class of
1378   // the receiver (ic miss check).
1379   {
1380     Label valid;
1381     // Compare cached class against klass from receiver.
1382     // This also does an implicit null check!
1383     __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1384     __ z_bre(valid);
1385     // The inline cache points to the wrong method. Call the
1386     // ic_miss_stub to find the proper method.
1387     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1388     __ z_br(R1_ic_miss_stub_addr);
1389     __ bind(valid);
1390   }

1391 }
1392 
1393 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1394   // Determine size dynamically.
1395   return MachNode::size(ra_);
1396 }
1397 
1398 //=============================================================================
1399 
1400 %} // interrupt source section
1401 
1402 source_hpp %{ // Header information of the source block.
1403 
1404 class HandlerImpl {
1405  public:
1406 
1407   static int emit_exception_handler(CodeBuffer &cbuf);
1408   static int emit_deopt_handler(CodeBuffer& cbuf);
1409 
1410   static uint size_exception_handler() {


1626 // Should correspond to setting above
1627 const bool Matcher::init_array_count_is_in_bytes = false;
1628 
1629 // Suppress CMOVL. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
1630 const int Matcher::long_cmove_cost() { return ConditionalMoveLimit; }
1631 
1632 // Suppress CMOVF. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
1633 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1634 
1635 // Does the CPU require postalloc expand (see block.cpp for description of postalloc expand)?
1636 const bool Matcher::require_postalloc_expand = false;
1637 
1638 // Do we need to mask the count passed to shift instructions or does
1639 // the cpu only look at the lower 5/6 bits anyway?
1640 // 32bit shifts mask in emitter, 64bit shifts need no mask.
1641 // Constant shift counts are handled in Ideal phase.
1642 const bool Matcher::need_masked_shift_count = false;
1643 
1644 // Set this as clone_shift_expressions.
1645 bool Matcher::narrow_oop_use_complex_address() {
1646   if (CompressedOops::base() == NULL && CompressedOops::shift() == 0) return true;
1647   return false;
1648 }
1649 
1650 bool Matcher::narrow_klass_use_complex_address() {
1651   NOT_LP64(ShouldNotCallThis());
1652   assert(UseCompressedClassPointers, "only for compressed klass code");
1653   // TODO HS25: z port if (MatchDecodeNodes) return true;
1654   return false;
1655 }
1656 
1657 bool Matcher::const_oop_prefer_decode() {
1658   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
1659   return CompressedOops::base() == NULL;
1660 }
1661 
1662 bool Matcher::const_klass_prefer_decode() {
1663   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
1664   return CompressedKlassPointers::base() == NULL;
1665 }
1666 
1667 // Is it better to copy float constants, or load them directly from memory?
1668 // Most RISCs will have to materialize an address into a
1669 // register first, so they would do better to copy the constant from stack.
1670 const bool Matcher::rematerialize_float_constants = false;
1671 
1672 // If CPU can load and store mis-aligned doubles directly then no fixup is
1673 // needed. Else we split the double into 2 integer pieces and move it
1674 // piece-by-piece. Only happens when passing doubles into C code as the
1675 // Java calling convention forces doubles to be aligned.
1676 const bool Matcher::misaligned_doubles_ok = true;
1677 
1678 // Advertise here if the CPU requires explicit rounding operations
1679 // to implement the UseStrictFP mode.
1680 const bool Matcher::strict_fp_requires_explicit_rounding = false;
1681 
1682 // Do floats take an entire double register or just half?
1683 //
1684 // A float in resides in a zarch double register. When storing it by


3373 operand rarg4RegN() %{
3374   constraint(ALLOC_IN_RC(z_rarg4_int_reg));
3375   match(iRegN);
3376   format %{ %}
3377   interface(REG_INTER);
3378 %}
3379 
3380 operand rarg5RegN() %{
3381   constraint(ALLOC_IN_RC(z_rarg5_ptrN_reg));
3382   match(iRegN);
3383   format %{ %}
3384   interface(REG_INTER);
3385 %}
3386 
3387 // Long Register
3388 operand iRegL() %{
3389   constraint(ALLOC_IN_RC(z_long_reg));
3390   match(RegL);
3391   match(revenRegL);
3392   match(roddRegL);
3393   match(allRoddRegL);
3394   match(rarg1RegL);
3395   match(rarg5RegL);
3396   format %{ %}
3397   interface(REG_INTER);
3398 %}
3399 
3400 // revenRegL and roddRegL constitute and even-odd-pair.
3401 operand revenRegL() %{
3402   constraint(ALLOC_IN_RC(z_rarg3_long_reg));
3403   match(iRegL);
3404   format %{ %}
3405   interface(REG_INTER);
3406 %}
3407 
3408 // revenRegL and roddRegL constitute and even-odd-pair.
3409 operand roddRegL() %{
3410   constraint(ALLOC_IN_RC(z_rarg4_long_reg));
3411   match(iRegL);
3412   format %{ %}
3413   interface(REG_INTER);
3414 %}
3415 
3416 // available odd registers for iRegL
3417 operand allRoddRegL() %{
3418   constraint(ALLOC_IN_RC(z_long_odd_reg));
3419   match(iRegL);
3420   format %{ %}
3421   interface(REG_INTER);
3422 %}
3423 
3424 operand rarg1RegL() %{
3425   constraint(ALLOC_IN_RC(z_rarg1_long_reg));
3426   match(iRegL);
3427   format %{ %}
3428   interface(REG_INTER);
3429 %}
3430 
3431 operand rarg5RegL() %{
3432   constraint(ALLOC_IN_RC(z_rarg5_long_reg));
3433   match(iRegL);
3434   format %{ %}
3435   interface(REG_INTER);
3436 %}
3437 
3438 // Condition Code Flag Registers
3439 operand flagsReg() %{
3440   constraint(ALLOC_IN_RC(z_condition_reg));
3441   match(RegFlags);
3442   format %{ "CR" %}
3443   interface(REG_INTER);


3489   interface(REG_INTER);
3490 %}
3491 
3492 operand compiler_method_oop_regP(iRegP reg) %{
3493   constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_oop_reg
3494   match(reg);
3495   format %{ %}
3496   interface(REG_INTER);
3497 %}
3498 
3499 operand interpreter_method_oop_regP(iRegP reg) %{
3500   constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_oop_reg
3501   match(reg);
3502   format %{ %}
3503   interface(REG_INTER);
3504 %}
3505 
3506 // Operands to remove register moves in unscaled mode.
3507 // Match read/write registers with an EncodeP node if neither shift nor add are required.
3508 operand iRegP2N(iRegP reg) %{
3509   predicate(CompressedOops::shift() == 0 && _leaf->as_EncodeP()->in(0) == NULL);
3510   constraint(ALLOC_IN_RC(z_memory_ptr_reg));
3511   match(EncodeP reg);
3512   format %{ "$reg" %}
3513   interface(REG_INTER)
3514 %}
3515 
3516 operand iRegN2P(iRegN reg) %{
3517   predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0 &&
3518             _leaf->as_DecodeN()->in(0) == NULL);
3519   constraint(ALLOC_IN_RC(z_memory_ptr_reg));
3520   match(DecodeN reg);
3521   format %{ "$reg" %}
3522   interface(REG_INTER)
3523 %}
3524 
3525 
3526 //----------Complex Operands---------------------------------------------------
3527 
3528 // Indirect Memory Reference
3529 operand indirect(memoryRegP base) %{
3530   constraint(ALLOC_IN_RC(z_memory_ptr_reg));
3531   match(base);
3532   op_cost(1);
3533   format %{ "#0[,$base]" %}
3534   interface(MEMORY_INTER) %{
3535     base($base);
3536     index(0xffffFFFF); // noreg
3537     scale(0x0);


4300 // Load Double - UNaligned
4301 instruct loadD_unaligned(regD dst, memory mem) %{
4302   match(Set dst (LoadD_unaligned mem));
4303   ins_cost(MEMORY_REF_COST);
4304   size(Z_DISP_SIZE);
4305   format %{ "LD(Y)    $dst,$mem" %}
4306   opcode(LDY_ZOPC, LD_ZOPC);
4307   ins_encode(z_form_rt_mem_opt(dst, mem));
4308   ins_pipe(pipe_class_dummy);
4309 %}
4310 
4311 
4312 //----------------------
4313 //  IMMEDIATES
4314 //----------------------
4315 
4316 instruct loadConI(iRegI dst, immI src) %{
4317   match(Set dst src);
4318   ins_cost(DEFAULT_COST);
4319   size(6);
4320   format %{ "LGFI    $dst,$src\t # (int)" %}
4321   ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
4322   ins_pipe(pipe_class_dummy);
4323 %}
4324 
4325 instruct loadConI16(iRegI dst, immI16 src) %{
4326   match(Set dst src);
4327   ins_cost(DEFAULT_COST_LOW);
4328   size(4);
4329   format %{ "LGHI    $dst,$src\t # (int)" %}
4330   ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
4331   ins_pipe(pipe_class_dummy);
4332 %}
4333 
4334 instruct loadConI_0(iRegI dst, immI_0 src, flagsReg cr) %{
4335   match(Set dst src);
4336   effect(KILL cr);
4337   ins_cost(DEFAULT_COST_LOW);
4338   size(4);
4339   format %{ "loadConI $dst,$src\t # (int) XGR because ZERO is loaded" %}
4340   opcode(XGR_ZOPC);
4341   ins_encode(z_rreform(dst, dst));
4342   ins_pipe(pipe_class_dummy);
4343 %}
4344 
4345 instruct loadConUI16(iRegI dst, uimmI16 src) %{
4346   match(Set dst src);
4347   // TODO: s390 port size(FIXED_SIZE);
4348   format %{ "LLILL    $dst,$src" %}
4349   opcode(LLILL_ZOPC);


4705 // See cOop encoding classes for elaborate comment.
4706 
4707 // Moved here because it is needed in expand rules for encode.
4708 // Long negation.
4709 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4710   match(Set dst (SubL zero src));
4711   effect(KILL cr);
4712   size(4);
4713   format %{ "NEG     $dst, $src\t # long" %}
4714   ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4715   ins_pipe(pipe_class_dummy);
4716 %}
4717 
4718 // Load Compressed Pointer
4719 
4720 // Load narrow oop
4721 instruct loadN(iRegN dst, memory mem) %{
4722   match(Set dst (LoadN mem));
4723   ins_cost(MEMORY_REF_COST);
4724   size(Z_DISP3_SIZE);
4725   format %{ "LoadN   $dst,$mem\t # (cOop)" %}
4726   opcode(LLGF_ZOPC, LLGF_ZOPC);
4727   ins_encode(z_form_rt_mem_opt(dst, mem));
4728   ins_pipe(pipe_class_dummy);
4729 %}
4730 
4731 // Load narrow Klass Pointer
4732 instruct loadNKlass(iRegN dst, memory mem) %{
4733   match(Set dst (LoadNKlass mem));
4734   ins_cost(MEMORY_REF_COST);
4735   size(Z_DISP3_SIZE);
4736   format %{ "LoadNKlass $dst,$mem\t # (klass cOop)" %}
4737   opcode(LLGF_ZOPC, LLGF_ZOPC);
4738   ins_encode(z_form_rt_mem_opt(dst, mem));
4739   ins_pipe(pipe_class_dummy);
4740 %}
4741 
4742 // Load constant Compressed Pointer
4743 
4744 instruct loadConN(iRegN dst, immN src) %{
4745   match(Set dst src);
4746   ins_cost(DEFAULT_COST);
4747   size(6);
4748   format %{ "loadConN    $dst,$src\t # (cOop)" %}
4749   ins_encode %{
4750     AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4751     __ relocate(cOop.rspec(), 1);
4752     __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4753   %}
4754   ins_pipe(pipe_class_dummy);
4755 %}
4756 


4766 %}
4767 
4768 instruct loadConNKlass(iRegN dst, immNKlass src) %{
4769   match(Set dst src);
4770   ins_cost(DEFAULT_COST);
4771   size(6);
4772   format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4773   ins_encode %{
4774     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4775     __ relocate(NKlass.rspec(), 1);
4776     __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4777   %}
4778   ins_pipe(pipe_class_dummy);
4779 %}
4780 
4781 // Load and Decode Compressed Pointer
4782 // optimized variants for Unscaled cOops
4783 
4784 instruct decodeLoadN(iRegP dst, memory mem) %{
4785   match(Set dst (DecodeN (LoadN mem)));
4786   predicate(false && (CompressedOops::base()==NULL)&&(CompressedOops::shift()==0));
4787   ins_cost(MEMORY_REF_COST);
4788   size(Z_DISP3_SIZE);
4789   format %{ "DecodeLoadN  $dst,$mem\t # (cOop Load+Decode)" %}
4790   opcode(LLGF_ZOPC, LLGF_ZOPC);
4791   ins_encode(z_form_rt_mem_opt(dst, mem));
4792   ins_pipe(pipe_class_dummy);
4793 %}
4794 
4795 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4796   match(Set dst (DecodeNKlass (LoadNKlass mem)));
4797   predicate(false && (CompressedKlassPointers::base()==NULL)&&(CompressedKlassPointers::shift()==0));
4798   ins_cost(MEMORY_REF_COST);
4799   size(Z_DISP3_SIZE);
4800   format %{ "DecodeLoadNKlass  $dst,$mem\t # (load/decode NKlass)" %}
4801   opcode(LLGF_ZOPC, LLGF_ZOPC);
4802   ins_encode(z_form_rt_mem_opt(dst, mem));
4803   ins_pipe(pipe_class_dummy);
4804 %}
4805 
4806 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4807   match(Set dst (DecodeNKlass src));
4808   ins_cost(3 * DEFAULT_COST);
4809   size(12);
4810   format %{ "DecodeLoadConNKlass  $dst,$src\t # decode(cKlass)" %}
4811   ins_encode %{
4812     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4813     __ relocate(NKlass.rspec(), 1);
4814     __ load_const($dst$$Register, (Klass*)NKlass.value());
4815   %}
4816   ins_pipe(pipe_class_dummy);
4817 %}
4818 
4819 // Decode Compressed Pointer
4820 
4821 // General decoder
4822 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4823   match(Set dst (DecodeN src));
4824   effect(KILL cr);
4825   predicate(CompressedOops::base() == NULL || !ExpandLoadingBaseDecode);
4826   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4827   // TODO: s390 port size(VARIABLE_SIZE);
4828   format %{ "decodeN  $dst,$src\t # (decode cOop)" %}
4829   ins_encode %{  __ oop_decoder($dst$$Register, $src$$Register, true); %}
4830   ins_pipe(pipe_class_dummy);
4831 %}
4832 
4833 // General Klass decoder
4834 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4835   match(Set dst (DecodeNKlass src));
4836   effect(KILL cr);
4837   ins_cost(3 * DEFAULT_COST);
4838   format %{ "decode_klass $dst,$src" %}
4839   ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4840   ins_pipe(pipe_class_dummy);
4841 %}
4842 
4843 // General decoder
4844 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4845   match(Set dst (DecodeN src));
4846   effect(KILL cr);
4847   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4848              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4849             (CompressedOops::base()== NULL || !ExpandLoadingBaseDecode_NN));
4850   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4851   // TODO: s390 port size(VARIABLE_SIZE);
4852   format %{ "decodeN  $dst,$src\t # (decode cOop NN)" %}
4853   ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4854   ins_pipe(pipe_class_dummy);
4855 %}
4856 
4857   instruct loadBase(iRegL dst, immL baseImm) %{
4858     effect(DEF dst, USE baseImm);
4859     predicate(false);
4860     format %{ "llihl    $dst=$baseImm \t// load heap base" %}
4861     ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4862     ins_pipe(pipe_class_dummy);
4863   %}
4864 
4865   // Decoder for heapbased mode peeling off loading the base.
4866   instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4867     match(Set dst (DecodeN src base));
4868     // Note: Effect TEMP dst was used with the intention to get
4869     // different regs for dst and base, but this has caused ADLC to
4870     // generate wrong code. Oop_decoder generates additional lgr when
4871     // dst==base.
4872     effect(KILL cr);
4873     predicate(false);
4874     // TODO: s390 port size(VARIABLE_SIZE);
4875     format %{ "decodeN  $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4876     ins_encode %{
4877       __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4878                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4879     %}
4880     ins_pipe(pipe_class_dummy);
4881   %}
4882 
4883   // Decoder for heapbased mode peeling off loading the base.
4884   instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4885     match(Set dst (DecodeN src base));
4886     effect(KILL cr);
4887     predicate(false);
4888     // TODO: s390 port size(VARIABLE_SIZE);
4889     format %{ "decodeN  $dst = ($src << 3) + $base + pow2_offset\t # (decode cOop)" %}
4890     ins_encode %{
4891       __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4892                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)CompressedOops::base()));
4893     %}
4894     ins_pipe(pipe_class_dummy);
4895   %}
4896 
4897 // Decoder for heapbased mode peeling off loading the base.
4898 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4899   match(Set dst (DecodeN src));
4900   predicate(CompressedOops::base() != NULL && ExpandLoadingBaseDecode);
4901   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4902   // TODO: s390 port size(VARIABLE_SIZE);
4903   expand %{
4904     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4905     iRegL base;
4906     loadBase(base, baseImm);
4907     decodeN_base(dst, src, base, cr);
4908   %}
4909 %}
4910 
4911 // Decoder for heapbased mode peeling off loading the base.
4912 instruct decodeN_NN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4913   match(Set dst (DecodeN src));
4914   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4915              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4916             CompressedOops::base() != NULL && ExpandLoadingBaseDecode_NN);
4917   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4918   // TODO: s390 port size(VARIABLE_SIZE);
4919   expand %{
4920     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
4921     iRegL base;
4922     loadBase(base, baseImm);
4923     decodeN_NN_base(dst, src, base, cr);
4924   %}
4925 %}
4926 
4927 //  Encode Compressed Pointer
4928 
4929 // General encoder
4930 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4931   match(Set dst (EncodeP src));
4932   effect(KILL cr);
4933   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4934             (CompressedOops::base() == 0 ||
4935              CompressedOops::base_disjoint() ||
4936              !ExpandLoadingBaseEncode));
4937   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4938   // TODO: s390 port size(VARIABLE_SIZE);
4939   format %{ "encodeP  $dst,$src\t # (encode cOop)" %}
4940   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4941   ins_pipe(pipe_class_dummy);
4942 %}
4943 
4944 // General class encoder
4945 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4946   match(Set dst (EncodePKlass src));
4947   effect(KILL cr);
4948   format %{ "encode_klass $dst,$src" %}
4949   ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4950   ins_pipe(pipe_class_dummy);
4951 %}
4952 
4953 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4954   match(Set dst (EncodeP src));
4955   effect(KILL cr);
4956   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4957             (CompressedOops::base() == 0 ||
4958              CompressedOops::base_disjoint() ||
4959              !ExpandLoadingBaseEncode_NN));
4960   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4961   // TODO: s390 port size(VARIABLE_SIZE);
4962   format %{ "encodeP  $dst,$src\t # (encode cOop)" %}
4963   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4964   ins_pipe(pipe_class_dummy);
4965 %}
4966 
4967   // Encoder for heapbased mode peeling off loading the base.
4968   instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4969     match(Set dst (EncodeP src (Binary base dst)));
4970     effect(TEMP_DEF dst);
4971     predicate(false);
4972     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4973     // TODO: s390 port size(VARIABLE_SIZE);
4974     format %{ "encodeP  $dst = ($src>>3) +$base + pow2_offset\t # (encode cOop)" %}
4975     ins_encode %{
4976       jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4977         (((uint64_t)(intptr_t)CompressedOops::base()) >> CompressedOops::shift());
4978       __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4979     %}
4980     ins_pipe(pipe_class_dummy);
4981   %}
4982 
4983   // Encoder for heapbased mode peeling off loading the base.
4984   instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4985     match(Set dst (EncodeP src base));
4986     effect(USE pow2_offset);
4987     predicate(false);
4988     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4989     // TODO: s390 port size(VARIABLE_SIZE);
4990     format %{ "encodeP  $dst = ($src>>3) +$base + $pow2_offset\t # (encode cOop)" %}
4991     ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4992     ins_pipe(pipe_class_dummy);
4993   %}
4994 
4995 // Encoder for heapbased mode peeling off loading the base.
4996 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4997   match(Set dst (EncodeP src));
4998   effect(KILL cr);
4999   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
5000             (CompressedOops::base_overlaps() && ExpandLoadingBaseEncode));
5001   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
5002   // TODO: s390 port size(VARIABLE_SIZE);
5003   expand %{
5004     immL baseImm %{ ((jlong)(intptr_t)CompressedOops::base()) >> CompressedOops::shift() %}
5005     immL_0 zero %{ (0) %}
5006     flagsReg ccr;
5007     iRegL base;
5008     iRegL negBase;
5009     loadBase(base, baseImm);
5010     negL_reg_reg(negBase, zero, base, ccr);
5011     encodeP_base(dst, src, negBase);
5012   %}
5013 %}
5014 
5015 // Encoder for heapbased mode peeling off loading the base.
5016 instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{
5017   match(Set dst (EncodeP src));
5018   effect(KILL cr);
5019   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
5020             (CompressedOops::base_overlaps() && ExpandLoadingBaseEncode_NN));
5021   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
5022   // TODO: s390 port size(VARIABLE_SIZE);
5023   expand %{
5024     immL baseImm %{ (jlong)(intptr_t)CompressedOops::base() %}
5025     immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)CompressedOops::base())) %}
5026     immL_0 zero %{ 0 %}
5027     flagsReg ccr;
5028     iRegL base;
5029     iRegL negBase;
5030     loadBase(base, baseImm);
5031     negL_reg_reg(negBase, zero, base, ccr);
5032     encodeP_NN_base(dst, src, negBase, pow2_offset);
5033   %}
5034 %}
5035 
5036 //  Store Compressed Pointer
5037 
5038 // Store Compressed Pointer
5039 instruct storeN(memory mem, iRegN_P2N src) %{
5040   match(Set mem (StoreN mem src));
5041   ins_cost(MEMORY_REF_COST);
5042   size(Z_DISP_SIZE);
5043   format %{ "ST      $src,$mem\t # (cOop)" %}
5044   opcode(STY_ZOPC, ST_ZOPC);
5045   ins_encode(z_form_rt_mem_opt(src, mem));
5046   ins_pipe(pipe_class_dummy);
5047 %}
5048 
5049 // Store Compressed Klass pointer
5050 instruct storeNKlass(memory mem, iRegN src) %{
5051   match(Set mem (StoreNKlass mem src));
5052   ins_cost(MEMORY_REF_COST);
5053   size(Z_DISP_SIZE);
5054   format %{ "ST      $src,$mem\t # (cKlass)" %}
5055   opcode(STY_ZOPC, ST_ZOPC);
5056   ins_encode(z_form_rt_mem_opt(src, mem));
5057   ins_pipe(pipe_class_dummy);
5058 %}
5059 
5060 // Compare Compressed Pointers
5061 
5062 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5063   match(Set cr (CmpN src1 src2));
5064   ins_cost(DEFAULT_COST);
5065   size(2);
5066   format %{ "CLR     $src1,$src2\t # (cOop)" %}
5067   opcode(CLR_ZOPC);
5068   ins_encode(z_rrform(src1, src2));
5069   ins_pipe(pipe_class_dummy);
5070 %}
5071 
5072 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5073   match(Set cr (CmpN src1 src2));
5074   ins_cost(DEFAULT_COST);
5075   size(6);
5076   format %{ "CLFI    $src1,$src2\t # (cOop) compare immediate narrow" %}
5077   ins_encode %{
5078     AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5079     __ relocate(cOop.rspec(), 1);
5080     __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5081   %}
5082   ins_pipe(pipe_class_dummy);
5083 %}
5084 
5085 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5086   match(Set cr (CmpN src1 src2));
5087   ins_cost(DEFAULT_COST);
5088   size(6);
5089   format %{ "CLFI    $src1,$src2\t # (NKlass) compare immediate narrow" %}
5090   ins_encode %{
5091     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5092     __ relocate(NKlass.rspec(), 1);
5093     __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5094   %}
5095   ins_pipe(pipe_class_dummy);
5096 %}
5097 
5098 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5099   match(Set cr (CmpN src1 src2));
5100   ins_cost(DEFAULT_COST);
5101   size(2);
5102   format %{ "LTR     $src1,$src2\t # (cOop) LTR because comparing against zero" %}
5103   opcode(LTR_ZOPC);
5104   ins_encode(z_rrform(src1, src1));
5105   ins_pipe(pipe_class_dummy);
5106 %}
5107 
5108 
5109 //----------MemBar Instructions-----------------------------------------------
5110 
5111 // Memory barrier flavors
5112 
5113 instruct membar_acquire() %{
5114   match(MemBarAcquire);
5115   match(LoadFence);
5116   ins_cost(4*MEMORY_REF_COST);
5117   size(0);
5118   format %{ "MEMBAR-acquire" %}
5119   ins_encode %{ __ z_acquire(); %}
5120   ins_pipe(pipe_class_dummy);
5121 %}
5122 


6191   opcode(AGFI_ZOPC);
6192   ins_encode(z_rilform_signed(dst, src));
6193   ins_pipe(pipe_class_dummy);
6194 %}
6195 
6196 // REG = REG1 + REG2 + IMM
6197 
6198 instruct addP_reg_reg_imm12(iRegP dst, memoryRegP src1, iRegL src2, uimmL12 con) %{
6199   match(Set dst (AddP (AddP src1 src2) con));
6200   predicate( PreferLAoverADD);
6201   ins_cost(DEFAULT_COST_LOW);
6202   size(4);
6203   format %{ "LA      $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
6204   opcode(LA_ZOPC);
6205   ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
6206   ins_pipe(pipe_class_dummy);
6207 %}
6208 
6209 instruct addP_regN_reg_imm12(iRegP dst, iRegP_N2P src1, iRegL src2, uimmL12 con) %{
6210   match(Set dst (AddP (AddP src1 src2) con));
6211   predicate( PreferLAoverADD && CompressedOops::base() == NULL && CompressedOops::shift() == 0);
6212   ins_cost(DEFAULT_COST_LOW);
6213   size(4);
6214   format %{ "LA      $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
6215   opcode(LA_ZOPC);
6216   ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
6217   ins_pipe(pipe_class_dummy);
6218 %}
6219 
6220 instruct addP_reg_reg_imm20(iRegP dst, memoryRegP src1, iRegL src2, immL20 con) %{
6221   match(Set dst (AddP (AddP src1 src2) con));
6222   predicate(PreferLAoverADD);
6223   ins_cost(DEFAULT_COST);
6224   // TODO: s390 port size(FIXED_SIZE);
6225   format %{ "LAY     $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
6226   opcode(LAY_ZOPC);
6227   ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
6228   ins_pipe(pipe_class_dummy);
6229 %}
6230 
6231 instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con) %{
6232   match(Set dst (AddP (AddP src1 src2) con));
6233   predicate( PreferLAoverADD && CompressedOops::base() == NULL && CompressedOops::shift() == 0);
6234   ins_cost(DEFAULT_COST);
6235   // TODO: s390 port size(FIXED_SIZE);
6236   format %{ "LAY     $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
6237   opcode(LAY_ZOPC);
6238   ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
6239   ins_pipe(pipe_class_dummy);
6240 %}
6241 
6242 // MEM = MEM + IMM
6243 
6244 // Add Immediate to 8-byte memory operand and result
6245 instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
6246   match(Set mem (StoreP mem (AddP (LoadP mem) src)));
6247   effect(KILL cr);
6248   predicate(VM_Version::has_MemWithImmALUOps());
6249   ins_cost(MEMORY_REF_COST);
6250   size(6);
6251   format %{ "AGSI    $mem,$src\t # direct mem add 8 (ptr)" %}
6252   opcode(AGSI_ZOPC);
6253   ins_encode(z_siyform(mem, src));


6777       __ z_lghi(Z_R0_scratch, divisor);
6778       __ z_lgr($dst$$Register->successor(), $src1$$Register);
6779       __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch);  // Instruction kills tmp.
6780     } else {
6781       __ clear_reg($dst$$Register, true, false);
6782     }
6783   %}
6784   ins_pipe(pipe_class_dummy);
6785 %}
6786 
6787 // SHIFT
6788 
6789 // Shift left logical
6790 
6791 // Register Shift Left variable
6792 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6793   match(Set dst (LShiftI src nbits));
6794   effect(KILL cr); // R1 is killed, too.
6795   ins_cost(3 * DEFAULT_COST);
6796   size(14);
6797   format %{ "SLL     $dst,$src,[$nbits] & 31\t # use RISC-like SLLG also for int" %}
6798   ins_encode %{
6799     __ z_lgr(Z_R1_scratch, $nbits$$Register);
6800     __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6801     __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6802   %}
6803   ins_pipe(pipe_class_dummy);
6804 %}
6805 
6806 // Register Shift Left Immediate
6807 // Constant shift count is masked in ideal graph already.
6808 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6809   match(Set dst (LShiftI src nbits));
6810   size(6);
6811   format %{ "SLL     $dst,$src,$nbits\t # use RISC-like SLLG also for int" %}
6812   ins_encode %{
6813     int Nbit = $nbits$$constant;
6814     assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6815     __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6816   %}
6817   ins_pipe(pipe_class_dummy);
6818 %}
6819 
6820 // Register Shift Left Immediate by 1bit
6821 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6822   match(Set dst (LShiftI src nbits));
6823   predicate(PreferLAoverADD);
6824   ins_cost(DEFAULT_COST_LOW);
6825   size(4);
6826   format %{ "LA      $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6827   ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6828   ins_pipe(pipe_class_dummy);
6829 %}
6830 
6831 // Register Shift Left Long


7107   %}
7108   ins_pipe(pipe_class_dummy);
7109 %}
7110 
7111 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7112   match(Set cr (OverflowSubL op1 op2));
7113   effect(DEF cr, USE op1, USE op2);
7114   // TODO: s390 port size(VARIABLE_SIZE);
7115   format %{ "SGR     $op1,$op2\t # overflow check long" %}
7116   ins_encode %{
7117     __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7118     __ z_lgr(Z_R0_scratch, $op1$$Register);
7119     __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7120   %}
7121   ins_pipe(pipe_class_dummy);
7122 %}
7123 
7124 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7125   match(Set cr (OverflowSubI zero op2));
7126   effect(DEF cr, USE op2);
7127   format %{ "NEG    $op2\t # overflow check int" %}
7128   ins_encode %{
7129     __ clear_reg(Z_R0_scratch, false, false);
7130     __ z_sr(Z_R0_scratch, $op2$$Register);
7131   %}
7132   ins_pipe(pipe_class_dummy);
7133 %}
7134 
7135 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7136   match(Set cr (OverflowSubL zero op2));
7137   effect(DEF cr, USE op2);
7138   format %{ "NEGG    $op2\t # overflow check long" %}
7139   ins_encode %{
7140     __ clear_reg(Z_R0_scratch, true, false);
7141     __ z_sgr(Z_R0_scratch, $op2$$Register);
7142   %}
7143   ins_pipe(pipe_class_dummy);
7144 %}
7145 
7146 // No intrinsics for multiplication, since there is no easy way
7147 // to check for overflow.
7148 
7149 
7150 //----------Floating Point Arithmetic Instructions-----------------------------
7151 
7152 //  ADD
7153 
7154 //  Add float single precision
7155 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7156   match(Set dst (AddF dst src));
7157   effect(KILL cr);
7158   ins_cost(ALU_REG_COST);


8537   size(4);
8538   format %{ "CLGR    $op1,$op2\t # ptr" %}
8539   opcode(CLGR_ZOPC);
8540   ins_encode(z_rreform(op1, op2));
8541   ins_pipe(pipe_class_dummy);
8542 %}
8543 
8544 instruct compP_reg_imm0(flagsReg cr, iRegP_N2P op1, immP0 op2) %{
8545   match(Set cr (CmpP op1 op2));
8546   ins_cost(DEFAULT_COST_LOW);
8547   size(4);
8548   format %{ "LTGR    $op1, $op1\t # ptr" %}
8549   opcode(LTGR_ZOPC);
8550   ins_encode(z_rreform(op1, op1));
8551   ins_pipe(pipe_class_dummy);
8552 %}
8553 
8554 // Don't use LTGFR which performs sign extend.
8555 instruct compP_decode_reg_imm0(flagsReg cr, iRegN op1, immP0 op2) %{
8556   match(Set cr (CmpP (DecodeN op1) op2));
8557   predicate(CompressedOops::base() == NULL && CompressedOops::shift() == 0);
8558   ins_cost(DEFAULT_COST_LOW);
8559   size(2);
8560   format %{ "LTR    $op1, $op1\t # ptr" %}
8561   opcode(LTR_ZOPC);
8562   ins_encode(z_rrform(op1, op1));
8563   ins_pipe(pipe_class_dummy);
8564 %}
8565 
8566 instruct compP_reg_mem(iRegP dst, memory src, flagsReg cr)%{
8567   match(Set cr (CmpP dst (LoadP src)));
8568   ins_cost(MEMORY_REF_COST);
8569   size(Z_DISP3_SIZE);
8570   format %{ "CLG     $dst, $src\t # ptr" %}
8571   opcode(CLG_ZOPC, CLG_ZOPC);
8572   ins_encode(z_form_rt_mem_opt(dst, src));
8573   ins_pipe(pipe_class_dummy);
8574 %}
8575 
8576 //----------Max and Min--------------------------------------------------------
8577 


9173 // Direct Branch.
9174 instruct branchFar(label labl) %{
9175   match(Goto);
9176   effect(USE labl);
9177   ins_cost(BRANCH_COST);
9178   size(6);
9179   format %{ "BRUL   $labl" %}
9180   ins_encode(z_enc_brul(labl));
9181   ins_pipe(pipe_class_dummy);
9182   // This is not a short variant of a branch, but the long variant.
9183   ins_short_branch(0);
9184 %}
9185 
9186 // Conditional Near Branch
9187 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9188   // Same match rule as `branchConFar'.
9189   match(If cmp cr);
9190   effect(USE lbl);
9191   ins_cost(BRANCH_COST);
9192   size(4);
9193   format %{ "branch_con_short,$cmp   $lbl" %}
9194   ins_encode(z_enc_branch_con_short(cmp, lbl));
9195   ins_pipe(pipe_class_dummy);
9196   // If set to 1 this indicates that the current instruction is a
9197   // short variant of a long branch. This avoids using this
9198   // instruction in first-pass matching. It will then only be used in
9199   // the `Shorten_branches' pass.
9200   ins_short_branch(1);
9201 %}
9202 
9203 // This is for cases when the z/Architecture conditional branch instruction
9204 // does not reach far enough. So we emit a far branch here, which is
9205 // more expensive.
9206 //
9207 // Conditional Far Branch
9208 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9209   // Same match rule as `branchCon'.
9210   match(If cmp cr);
9211   effect(USE cr, USE lbl);
9212   // Make more expensive to prefer compare_and_branch over separate instructions.
9213   ins_cost(2 * BRANCH_COST);
9214   size(6);
9215   format %{ "branch_con_far,$cmp   $lbl" %}
9216   ins_encode(z_enc_branch_con_far(cmp, lbl));
9217   ins_pipe(pipe_class_dummy);
9218   // This is not a short variant of a branch, but the long variant..
9219   ins_short_branch(0);
9220 %}
9221 
9222 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9223   match(CountedLoopEnd cmp cr);
9224   effect(USE labl);
9225   ins_cost(BRANCH_COST);
9226   size(4);
9227   format %{ "branch_con_short,$cmp   $labl\t # counted loop end" %}
9228   ins_encode(z_enc_branch_con_short(cmp, labl));
9229   ins_pipe(pipe_class_dummy);
9230   // If set to 1 this indicates that the current instruction is a
9231   // short variant of a long branch. This avoids using this
9232   // instruction in first-pass matching. It will then only be used in
9233   // the `Shorten_branches' pass.
9234   ins_short_branch(1);
9235 %}


9764 instruct CallLeafNoFPDirect(method meth) %{
9765   match(CallLeafNoFP);
9766   effect(USE meth);
9767   ins_cost(CALL_COST);
9768   // TODO: s390 port size(VARIABLE_SIZE);
9769   ins_num_consts(1);
9770   format %{ "CALL,runtime leaf nofp $meth" %}
9771   ins_encode( z_enc_java_to_runtime_call(meth) );
9772   ins_pipe(pipe_class_dummy);
9773   ins_alignment(2);
9774 %}
9775 
9776 // Tail Call; Jump from runtime stub to Java code.
9777 // Also known as an 'interprocedural jump'.
9778 // Target of jump will eventually return to caller.
9779 // TailJump below removes the return address.
9780 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9781   match(TailCall jump_target method_oop);
9782   ins_cost(CALL_COST);
9783   size(2);
9784   format %{ "Jmp     $jump_target\t # $method_oop holds method oop" %}
9785   ins_encode %{ __ z_br($jump_target$$Register); %}
9786   ins_pipe(pipe_class_dummy);
9787 %}
9788 
9789 // Return Instruction
9790 instruct Ret() %{
9791   match(Return);
9792   size(2);
9793   format %{ "BR(Z_R14) // branch to link register" %}
9794   ins_encode %{ __ z_br(Z_R14); %}
9795   ins_pipe(pipe_class_dummy);
9796 %}
9797 
9798 // Tail Jump; remove the return address; jump to target.
9799 // TailCall above leaves the return address around.
9800 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9801 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9802 // "restore" before this instruction (in Epilogue), we need to materialize it
9803 // in %i0.
9804 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{


9903   match(Set pcc (FastUnlock oop box));
9904   effect(TEMP tmp1, TEMP tmp2);
9905   ins_cost(100);
9906   // TODO: s390 port size(FIXED_SIZE);  // emitted code depends on UseBiasedLocking being on/off.
9907   format %{ "FASTUNLOCK  $oop, $box; KILL Z_ARG4, Z_ARG5" %}
9908   ins_encode %{ __ compiler_fast_unlock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register,
9909                                                UseBiasedLocking && !UseOptoBiasInlining); %}
9910   ins_pipe(pipe_class_dummy);
9911 %}
9912 
9913 instruct inlineCallClearArrayConst(SSlenDW cnt, iRegP_N2P base, Universe dummy, flagsReg cr) %{
9914   match(Set dummy (ClearArray cnt base));
9915   effect(KILL cr);
9916   ins_cost(100);
9917   // TODO: s390 port size(VARIABLE_SIZE);       // Variable in size due to varying #instructions.
9918   format %{ "ClearArrayConst $cnt,$base" %}
9919   ins_encode %{ __ Clear_Array_Const($cnt$$constant, $base$$Register); %}
9920   ins_pipe(pipe_class_dummy);
9921 %}
9922 
9923 instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
9924   match(Set dummy (ClearArray cnt base));
9925   effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
9926   ins_cost(200);
9927   // TODO: s390 port size(VARIABLE_SIZE);       // Variable in size due to optimized constant loader.
9928   format %{ "ClearArrayConstBig $cnt,$base" %}
9929   ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $tmpL$$Register); %}
9930   ins_pipe(pipe_class_dummy);
9931 %}
9932 
9933 instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, allRoddRegL tmpL, flagsReg cr) %{
9934   match(Set dummy (ClearArray cnt base));
9935   effect(TEMP tmpL, KILL cr); // R0, R1 are killed, too.
9936   ins_cost(300);
9937   // TODO: s390 port size(FIXED_SIZE);  // z/Architecture: emitted code depends on PreferLAoverADD being on/off.
9938   format %{ "ClearArrayVar $cnt,$base" %}
9939   ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $tmpL$$Register); %}
9940   ins_pipe(pipe_class_dummy);
9941 %}
9942 
9943 // ============================================================================
9944 // CompactStrings
9945 
9946 // String equals
9947 instruct string_equalsL(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
9948   match(Set result (StrEquals (Binary str1 str2) cnt));
9949   effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
9950   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
9951   ins_cost(300);
9952   format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %}
9953   ins_encode %{
9954     __ array_equals(false, $str1$$Register, $str2$$Register,
9955                     $cnt$$Register, $oddReg$$Register, $evenReg$$Register,
9956                     $result$$Register, true /* byte */);
9957   %}
9958   ins_pipe(pipe_class_dummy);
9959 %}


10772 instruct loadV8(iRegL dst, memory mem) %{
10773   match(Set dst (LoadVector mem));
10774   predicate(n->as_LoadVector()->memory_size() == 8);
10775   ins_cost(MEMORY_REF_COST);
10776   // TODO: s390 port size(VARIABLE_SIZE);
10777   format %{ "LG      $dst,$mem\t # L(packed8B)" %}
10778   opcode(LG_ZOPC, LG_ZOPC);
10779   ins_encode(z_form_rt_mem_opt(dst, mem));
10780   ins_pipe(pipe_class_dummy);
10781 %}
10782 
10783 //----------POPULATION COUNT RULES--------------------------------------------
10784 
10785 // Byte reverse
10786 
10787 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10788   match(Set dst (ReverseBytesI src));
10789   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10790   ins_cost(DEFAULT_COST);
10791   size(4);
10792   format %{ "LRVR    $dst,$src\t # byte reverse int" %}
10793   opcode(LRVR_ZOPC);
10794   ins_encode(z_rreform(dst, src));
10795   ins_pipe(pipe_class_dummy);
10796 %}
10797 
10798 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10799   match(Set dst (ReverseBytesL src));
10800   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10801   ins_cost(DEFAULT_COST);
10802   // TODO: s390 port size(FIXED_SIZE);
10803   format %{ "LRVGR   $dst,$src\t # byte reverse long" %}
10804   opcode(LRVGR_ZOPC);
10805   ins_encode(z_rreform(dst, src));
10806   ins_pipe(pipe_class_dummy);
10807 %}
10808 
10809 // Leading zeroes
10810 
10811 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10812 // returns the bit position of the leftmost 1 in the 64bit source register.
10813 // As the bits are numbered from left to right (0..63), the returned
10814 // position index is equivalent to the number of leading zeroes.
10815 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10816 // returns position 64. That's exactly what we need.
10817 
10818 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10819   match(Set dst (CountLeadingZerosI src));
10820   effect(KILL tmp, KILL cr);
10821   ins_cost(3 * DEFAULT_COST);
10822   size(14);
10823   format %{ "SLLG    $dst,$src,32\t # no need to always count 32 zeroes first\n\t"
10824             "IILH    $dst,0x8000 \t # insert \"stop bit\" to force result 32 for zero src.\n\t"
10825             "FLOGR   $dst,$dst"
10826          %}
10827   ins_encode %{
10828     // Performance experiments indicate that "FLOGR" is using some kind of
10829     // iteration to find the leftmost "1" bit.
10830     //
10831     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10832     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10833     // We could gain measurable speedup in micro benchmark:
10834     //
10835     //               leading   trailing
10836     //   z10:   int     2.04       1.68
10837     //         long     1.00       1.02
10838     //   z196:  int     0.99       1.23
10839     //         long     1.00       1.11
10840     //
10841     // By shifting the argument into the high-word instead of zero-extending it.
10842     // The add'l branch on condition (taken for a zero argument, very infrequent,
10843     // good prediction) is well compensated for by the savings.
10844     //
10845     // We leave the previous implementation in for some time in the future when
10846     // the "FLOGR" instruction may become less iterative.
10847 
10848     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10849     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10850     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10851     __ z_flogr($dst$$Register, $dst$$Register);
10852   %}
10853   ins_pipe(pipe_class_dummy);
10854 %}
10855 
10856 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10857   match(Set dst (CountLeadingZerosL src));
10858   effect(KILL tmp, KILL cr);
10859   ins_cost(DEFAULT_COST);
10860   size(4);
10861   format %{ "FLOGR   $dst,$src \t # count leading zeros (long)\n\t" %}
10862   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10863   ins_pipe(pipe_class_dummy);
10864 %}
10865 
10866 // trailing zeroes
10867 
10868 // We transform the trailing zeroes problem to a leading zeroes problem
10869 // such that can use the FLOGR instruction to our advantage.
10870 
10871 // With
10872 //   tmp1 = src - 1
10873 // we flip all trailing zeroes to ones and the rightmost one to zero.
10874 // All other bits remain unchanged.
10875 // With the complement
10876 //   tmp2 = ~src
10877 // we get all ones in the trailing zeroes positions. Thus,
10878 //   tmp3 = tmp1 & tmp2
10879 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10880 // Now we can apply FLOGR and get 64-(trailing zeroes).
10881 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10882   match(Set dst (CountTrailingZerosI src));
10883   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10884   ins_cost(8 * DEFAULT_COST);
10885   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10886   format %{ "LLGFR   $dst,$src  \t # clear upper 32 bits (we are dealing with int)\n\t"
10887             "LCGFR   $tmp,$src  \t # load 2's complement (32->64 bit)\n\t"
10888             "AGHI    $dst,-1    \t # tmp1 = src-1\n\t"
10889             "AGHI    $tmp,-1    \t # tmp2 = -src-1 = ~src\n\t"
10890             "NGR     $dst,$tmp  \t # tmp3 = tmp1&tmp2\n\t"
10891             "FLOGR   $dst,$dst  \t # count trailing zeros (int)\n\t"
10892             "AHI     $dst,-64   \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10893             "LCR     $dst,$dst  \t # res = -tmp4"
10894          %}
10895   ins_encode %{
10896     Register Rdst = $dst$$Register;
10897     Register Rsrc = $src$$Register;
10898     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10899     // match rule Rsrc = roddReg would be possible, saving one register.
10900     Register Rtmp = $tmp$$Register;
10901 
10902     assert_different_registers(Rdst, Rsrc, Rtmp);
10903 
10904     // Algorithm:
10905     // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10906     //   All other bits in the result are zero.
10907     // - Find the "leftmost one" bit position in the single-bit result from previous step.
10908     // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10909 
10910     // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10911     Label done;
10912     __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10913     __ z_lcgfr(Rtmp, Rsrc);


10919                                        // into upper half of reg. Not relevant with sllg below.
10920     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10921     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10922                                        // Depends on CC set by ahi above.
10923                                        // Taken very infrequently, good prediction, no BHT entry.
10924                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10925                                        // after SLLG Rdst == 0(64bit)).
10926     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10927     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10928     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10929     __ bind(done);
10930   %}
10931   ins_pipe(pipe_class_dummy);
10932 %}
10933 
10934 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10935   match(Set dst (CountTrailingZerosL src));
10936   effect(TEMP_DEF dst, KILL tmp, KILL cr);
10937   ins_cost(8 * DEFAULT_COST);
10938   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10939   format %{ "LCGR    $dst,$src  \t # preserve src\n\t"
10940             "NGR     $dst,$src  \t #\n\t"
10941             "AGHI    $dst,-1    \t # tmp1 = src-1\n\t"
10942             "FLOGR   $dst,$dst  \t # count trailing zeros (long), kill $tmp\n\t"
10943             "AHI     $dst,-64   \t # tmp4 = 64-(trailing zeroes)-64\n\t"
10944             "LCR     $dst,$dst  \t #"
10945          %}
10946   ins_encode %{
10947     Register Rdst = $dst$$Register;
10948     Register Rsrc = $src$$Register;
10949     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10950 
10951     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10952     __ z_lcgr(Rdst, Rsrc);
10953     __ z_ngr(Rdst, Rsrc);
10954     __ add2reg(Rdst,   -1);
10955     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10956     __ add2reg(Rdst,  -64);
10957     __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10958   %}
10959   ins_pipe(pipe_class_dummy);
10960 %}
10961 
10962 
10963 // bit count
10964 
10965 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10966   match(Set dst (PopCountI src));
10967   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10968   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10969   ins_cost(DEFAULT_COST);
10970   size(24);
10971   format %{ "POPCNT  $dst,$src\t # pop count int" %}
10972   ins_encode %{
10973     Register Rdst = $dst$$Register;
10974     Register Rsrc = $src$$Register;
10975     Register Rtmp = $tmp$$Register;
10976 
10977     // Prefer compile-time assertion over run-time SIGILL.
10978     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10979     assert_different_registers(Rdst, Rtmp);
10980 
10981     // Version 2: shows 10%(z196) improvement over original.
10982     __ z_popcnt(Rdst, Rsrc);
10983     __ z_srlg(Rtmp, Rdst, 16); // calc  byte4+byte6 and byte5+byte7
10984     __ z_alr(Rdst, Rtmp);      //   into byte6 and byte7
10985     __ z_srlg(Rtmp, Rdst,  8); // calc (byte4+byte6) + (byte5+byte7)
10986     __ z_alr(Rdst, Rtmp);      //   into byte7
10987     __ z_llgcr(Rdst, Rdst);    // zero-extend sum
10988   %}
10989   ins_pipe(pipe_class_dummy);
10990 %}
10991 
10992 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10993   match(Set dst (PopCountL src));
10994   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10995   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10996   ins_cost(DEFAULT_COST);
10997   // TODO: s390 port size(FIXED_SIZE);
10998   format %{ "POPCNT  $dst,$src\t # pop count long" %}
10999   ins_encode %{
11000     Register Rdst = $dst$$Register;
11001     Register Rsrc = $src$$Register;
11002     Register Rtmp = $tmp$$Register;
11003 
11004     // Prefer compile-time assertion over run-time SIGILL.
11005     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
11006     assert_different_registers(Rdst, Rtmp);
11007 
11008     // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
11009     __ z_popcnt(Rdst, Rsrc);
11010     __ z_ahhlr(Rdst, Rdst, Rdst);
11011     __ z_sllg(Rtmp, Rdst, 16);
11012     __ z_algr(Rdst, Rtmp);
11013     __ z_sllg(Rtmp, Rdst,  8);
11014     __ z_algr(Rdst, Rtmp);
11015     __ z_srlg(Rdst, Rdst, 56);
11016   %}
11017   ins_pipe(pipe_class_dummy);
11018 %}
11019 
11020 //----------SMARTSPILL RULES---------------------------------------------------
11021 // These must follow all instruction definitions as they use the names
11022 // defined in the instructions definitions.
11023 
11024 // ============================================================================
11025 // TYPE PROFILING RULES

   1 //
   2 // Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3 // Copyright (c) 2017, SAP SE. All rights reserved.
   4 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5 //
   6 // This code is free software; you can redistribute it and/or modify it
   7 // under the terms of the GNU General Public License version 2 only, as
   8 // published by the Free Software Foundation.
   9 //
  10 // This code is distributed in the hope that it will be useful, but WITHOUT
  11 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13 // version 2 for more details (a copy is included in the LICENSE file that
  14 // accompanied this code).
  15 //
  16 // You should have received a copy of the GNU General Public License version
  17 // 2 along with this work; if not, write to the Free Software Foundation,
  18 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19 //
  20 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21 // or visit www.oracle.com if you need additional information or have any
  22 // questions.
  23 //


 457 // Longs in 1 register. Aligned adjacent hi/lo pairs.
 458 reg_class z_long_reg(
 459 /*Z_R0_H,Z_R0*/     // R0
 460 /*Z_R1_H,Z_R1*/
 461   Z_R2_H,Z_R2,
 462   Z_R3_H,Z_R3,
 463   Z_R4_H,Z_R4,
 464   Z_R5_H,Z_R5,
 465   Z_R6_H,Z_R6,
 466   Z_R7_H,Z_R7,
 467 /*Z_R8_H,Z_R8,*/    // Z_thread
 468   Z_R9_H,Z_R9,
 469   Z_R10_H,Z_R10,
 470   Z_R11_H,Z_R11,
 471   Z_R12_H,Z_R12,
 472   Z_R13_H,Z_R13
 473 /*Z_R14_H,Z_R14,*/  // return_pc
 474 /*Z_R15_H,Z_R15*/   // SP
 475 );
 476 













 477 
 478 // Special Class for Condition Code Flags Register
 479 
 480 reg_class z_condition_reg(
 481   Z_CR
 482 );
 483 
 484 // Scratch register for late profiling. Callee saved.
 485 reg_class z_rscratch2_bits64_reg(Z_R2_H, Z_R2);
 486 
 487 
 488 // Float Register Classes
 489 
 490 reg_class z_flt_reg(
 491   Z_F0,
 492 /*Z_F1,*/ // scratch
 493   Z_F2,
 494   Z_F3,
 495   Z_F4,
 496   Z_F5,


1358     // The ic_miss_stub will handle the null pointer exception.
1359     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1360     __ z_br(R1_ic_miss_stub_addr);
1361     __ bind(valid);
1362   }
1363 
1364   // Check whether this method is the proper implementation for the class of
1365   // the receiver (ic miss check).
1366   {
1367     Label valid;
1368     // Compare cached class against klass from receiver.
1369     // This also does an implicit null check!
1370     __ compare_klass_ptr(ic_klass, klass_offset, R2_receiver, false);
1371     __ z_bre(valid);
1372     // The inline cache points to the wrong method. Call the
1373     // ic_miss_stub to find the proper method.
1374     __ load_const_optimized(R1_ic_miss_stub_addr, icmiss);
1375     __ z_br(R1_ic_miss_stub_addr);
1376     __ bind(valid);
1377   }
1378 
1379 }
1380 
1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1382   // Determine size dynamically.
1383   return MachNode::size(ra_);
1384 }
1385 
1386 //=============================================================================
1387 
1388 %} // interrupt source section
1389 
1390 source_hpp %{ // Header information of the source block.
1391 
1392 class HandlerImpl {
1393  public:
1394 
1395   static int emit_exception_handler(CodeBuffer &cbuf);
1396   static int emit_deopt_handler(CodeBuffer& cbuf);
1397 
1398   static uint size_exception_handler() {


1614 // Should correspond to setting above
1615 const bool Matcher::init_array_count_is_in_bytes = false;
1616 
1617 // Suppress CMOVL. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
1618 const int Matcher::long_cmove_cost() { return ConditionalMoveLimit; }
1619 
1620 // Suppress CMOVF. Conditional move available on z/Architecture only from z196 onwards. Not exploited yet.
1621 const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
1622 
1623 // Does the CPU require postalloc expand (see block.cpp for description of postalloc expand)?
1624 const bool Matcher::require_postalloc_expand = false;
1625 
1626 // Do we need to mask the count passed to shift instructions or does
1627 // the cpu only look at the lower 5/6 bits anyway?
1628 // 32bit shifts mask in emitter, 64bit shifts need no mask.
1629 // Constant shift counts are handled in Ideal phase.
1630 const bool Matcher::need_masked_shift_count = false;
1631 
1632 // Set this as clone_shift_expressions.
1633 bool Matcher::narrow_oop_use_complex_address() {
1634   if (Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0) return true;
1635   return false;
1636 }
1637 
1638 bool Matcher::narrow_klass_use_complex_address() {
1639   NOT_LP64(ShouldNotCallThis());
1640   assert(UseCompressedClassPointers, "only for compressed klass code");
1641   // TODO HS25: z port if (MatchDecodeNodes) return true;
1642   return false;
1643 }
1644 
1645 bool Matcher::const_oop_prefer_decode() {
1646   // Prefer ConN+DecodeN over ConP in simple compressed oops mode.
1647   return Universe::narrow_oop_base() == NULL;
1648 }
1649 
1650 bool Matcher::const_klass_prefer_decode() {
1651   // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
1652   return Universe::narrow_klass_base() == NULL;
1653 }
1654 
1655 // Is it better to copy float constants, or load them directly from memory?
1656 // Most RISCs will have to materialize an address into a
1657 // register first, so they would do better to copy the constant from stack.
1658 const bool Matcher::rematerialize_float_constants = false;
1659 
1660 // If CPU can load and store mis-aligned doubles directly then no fixup is
1661 // needed. Else we split the double into 2 integer pieces and move it
1662 // piece-by-piece. Only happens when passing doubles into C code as the
1663 // Java calling convention forces doubles to be aligned.
1664 const bool Matcher::misaligned_doubles_ok = true;
1665 
1666 // Advertise here if the CPU requires explicit rounding operations
1667 // to implement the UseStrictFP mode.
1668 const bool Matcher::strict_fp_requires_explicit_rounding = false;
1669 
1670 // Do floats take an entire double register or just half?
1671 //
1672 // A float in resides in a zarch double register. When storing it by


3361 operand rarg4RegN() %{
3362   constraint(ALLOC_IN_RC(z_rarg4_int_reg));
3363   match(iRegN);
3364   format %{ %}
3365   interface(REG_INTER);
3366 %}
3367 
3368 operand rarg5RegN() %{
3369   constraint(ALLOC_IN_RC(z_rarg5_ptrN_reg));
3370   match(iRegN);
3371   format %{ %}
3372   interface(REG_INTER);
3373 %}
3374 
3375 // Long Register
3376 operand iRegL() %{
3377   constraint(ALLOC_IN_RC(z_long_reg));
3378   match(RegL);
3379   match(revenRegL);
3380   match(roddRegL);

3381   match(rarg1RegL);
3382   match(rarg5RegL);
3383   format %{ %}
3384   interface(REG_INTER);
3385 %}
3386 
3387 // revenRegL and roddRegL constitute and even-odd-pair.
3388 operand revenRegL() %{
3389   constraint(ALLOC_IN_RC(z_rarg3_long_reg));
3390   match(iRegL);
3391   format %{ %}
3392   interface(REG_INTER);
3393 %}
3394 
3395 // revenRegL and roddRegL constitute and even-odd-pair.
3396 operand roddRegL() %{
3397   constraint(ALLOC_IN_RC(z_rarg4_long_reg));
3398   match(iRegL);
3399   format %{ %}
3400   interface(REG_INTER);
3401 %}
3402 








3403 operand rarg1RegL() %{
3404   constraint(ALLOC_IN_RC(z_rarg1_long_reg));
3405   match(iRegL);
3406   format %{ %}
3407   interface(REG_INTER);
3408 %}
3409 
3410 operand rarg5RegL() %{
3411   constraint(ALLOC_IN_RC(z_rarg5_long_reg));
3412   match(iRegL);
3413   format %{ %}
3414   interface(REG_INTER);
3415 %}
3416 
3417 // Condition Code Flag Registers
3418 operand flagsReg() %{
3419   constraint(ALLOC_IN_RC(z_condition_reg));
3420   match(RegFlags);
3421   format %{ "CR" %}
3422   interface(REG_INTER);


3468   interface(REG_INTER);
3469 %}
3470 
3471 operand compiler_method_oop_regP(iRegP reg) %{
3472   constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_oop_reg
3473   match(reg);
3474   format %{ %}
3475   interface(REG_INTER);
3476 %}
3477 
3478 operand interpreter_method_oop_regP(iRegP reg) %{
3479   constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_oop_reg
3480   match(reg);
3481   format %{ %}
3482   interface(REG_INTER);
3483 %}
3484 
3485 // Operands to remove register moves in unscaled mode.
3486 // Match read/write registers with an EncodeP node if neither shift nor add are required.
3487 operand iRegP2N(iRegP reg) %{
3488   predicate(Universe::narrow_oop_shift() == 0 && _leaf->as_EncodeP()->in(0) == NULL);
3489   constraint(ALLOC_IN_RC(z_memory_ptr_reg));
3490   match(EncodeP reg);
3491   format %{ "$reg" %}
3492   interface(REG_INTER)
3493 %}
3494 
3495 operand iRegN2P(iRegN reg) %{
3496   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0 &&
3497             _leaf->as_DecodeN()->in(0) == NULL);
3498   constraint(ALLOC_IN_RC(z_memory_ptr_reg));
3499   match(DecodeN reg);
3500   format %{ "$reg" %}
3501   interface(REG_INTER)
3502 %}
3503 
3504 
3505 //----------Complex Operands---------------------------------------------------
3506 
3507 // Indirect Memory Reference
3508 operand indirect(memoryRegP base) %{
3509   constraint(ALLOC_IN_RC(z_memory_ptr_reg));
3510   match(base);
3511   op_cost(1);
3512   format %{ "#0[,$base]" %}
3513   interface(MEMORY_INTER) %{
3514     base($base);
3515     index(0xffffFFFF); // noreg
3516     scale(0x0);


4279 // Load Double - UNaligned
4280 instruct loadD_unaligned(regD dst, memory mem) %{
4281   match(Set dst (LoadD_unaligned mem));
4282   ins_cost(MEMORY_REF_COST);
4283   size(Z_DISP_SIZE);
4284   format %{ "LD(Y)    $dst,$mem" %}
4285   opcode(LDY_ZOPC, LD_ZOPC);
4286   ins_encode(z_form_rt_mem_opt(dst, mem));
4287   ins_pipe(pipe_class_dummy);
4288 %}
4289 
4290 
4291 //----------------------
4292 //  IMMEDIATES
4293 //----------------------
4294 
4295 instruct loadConI(iRegI dst, immI src) %{
4296   match(Set dst src);
4297   ins_cost(DEFAULT_COST);
4298   size(6);
4299   format %{ "LGFI     $dst,$src\t # (int)" %}
4300   ins_encode %{ __ z_lgfi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
4301   ins_pipe(pipe_class_dummy);
4302 %}
4303 
4304 instruct loadConI16(iRegI dst, immI16 src) %{
4305   match(Set dst src);
4306   ins_cost(DEFAULT_COST_LOW);
4307   size(4);
4308   format %{ "LGHI     $dst,$src\t # (int)" %}
4309   ins_encode %{ __ z_lghi($dst$$Register, $src$$constant); %}  // Sign-extend to 64 bit, it's at no cost.
4310   ins_pipe(pipe_class_dummy);
4311 %}
4312 
4313 instruct loadConI_0(iRegI dst, immI_0 src, flagsReg cr) %{
4314   match(Set dst src);
4315   effect(KILL cr);
4316   ins_cost(DEFAULT_COST_LOW);
4317   size(4);
4318   format %{ "loadConI $dst,$src\t # (int) XGR because ZERO is loaded" %}
4319   opcode(XGR_ZOPC);
4320   ins_encode(z_rreform(dst, dst));
4321   ins_pipe(pipe_class_dummy);
4322 %}
4323 
4324 instruct loadConUI16(iRegI dst, uimmI16 src) %{
4325   match(Set dst src);
4326   // TODO: s390 port size(FIXED_SIZE);
4327   format %{ "LLILL    $dst,$src" %}
4328   opcode(LLILL_ZOPC);


4684 // See cOop encoding classes for elaborate comment.
4685 
4686 // Moved here because it is needed in expand rules for encode.
4687 // Long negation.
4688 instruct negL_reg_reg(iRegL dst, immL_0 zero, iRegL src, flagsReg cr) %{
4689   match(Set dst (SubL zero src));
4690   effect(KILL cr);
4691   size(4);
4692   format %{ "NEG     $dst, $src\t # long" %}
4693   ins_encode %{ __ z_lcgr($dst$$Register, $src$$Register); %}
4694   ins_pipe(pipe_class_dummy);
4695 %}
4696 
4697 // Load Compressed Pointer
4698 
4699 // Load narrow oop
4700 instruct loadN(iRegN dst, memory mem) %{
4701   match(Set dst (LoadN mem));
4702   ins_cost(MEMORY_REF_COST);
4703   size(Z_DISP3_SIZE);
4704   format %{ "LoadN  $dst,$mem\t# (cOop)" %}
4705   opcode(LLGF_ZOPC, LLGF_ZOPC);
4706   ins_encode(z_form_rt_mem_opt(dst, mem));
4707   ins_pipe(pipe_class_dummy);
4708 %}
4709 
4710 // Load narrow Klass Pointer
4711 instruct loadNKlass(iRegN dst, memory mem) %{
4712   match(Set dst (LoadNKlass mem));
4713   ins_cost(MEMORY_REF_COST);
4714   size(Z_DISP3_SIZE);
4715   format %{ "LoadNKlass $dst,$mem\t# (klass cOop)" %}
4716   opcode(LLGF_ZOPC, LLGF_ZOPC);
4717   ins_encode(z_form_rt_mem_opt(dst, mem));
4718   ins_pipe(pipe_class_dummy);
4719 %}
4720 
4721 // Load constant Compressed Pointer
4722 
4723 instruct loadConN(iRegN dst, immN src) %{
4724   match(Set dst src);
4725   ins_cost(DEFAULT_COST);
4726   size(6);
4727   format %{ "loadConN    $dst,$src\t # (cOop)" %}
4728   ins_encode %{
4729     AddressLiteral cOop = __ constant_oop_address((jobject)$src$$constant);
4730     __ relocate(cOop.rspec(), 1);
4731     __ load_narrow_oop($dst$$Register, (narrowOop)cOop.value());
4732   %}
4733   ins_pipe(pipe_class_dummy);
4734 %}
4735 


4745 %}
4746 
4747 instruct loadConNKlass(iRegN dst, immNKlass src) %{
4748   match(Set dst src);
4749   ins_cost(DEFAULT_COST);
4750   size(6);
4751   format %{ "loadConNKlass $dst,$src\t # (cKlass)" %}
4752   ins_encode %{
4753     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4754     __ relocate(NKlass.rspec(), 1);
4755     __ load_narrow_klass($dst$$Register, (Klass*)NKlass.value());
4756   %}
4757   ins_pipe(pipe_class_dummy);
4758 %}
4759 
4760 // Load and Decode Compressed Pointer
4761 // optimized variants for Unscaled cOops
4762 
4763 instruct decodeLoadN(iRegP dst, memory mem) %{
4764   match(Set dst (DecodeN (LoadN mem)));
4765   predicate(false && (Universe::narrow_oop_base()==NULL)&&(Universe::narrow_oop_shift()==0));
4766   ins_cost(MEMORY_REF_COST);
4767   size(Z_DISP3_SIZE);
4768   format %{ "DecodeLoadN  $dst,$mem\t# (cOop Load+Decode)" %}
4769   opcode(LLGF_ZOPC, LLGF_ZOPC);
4770   ins_encode(z_form_rt_mem_opt(dst, mem));
4771   ins_pipe(pipe_class_dummy);
4772 %}
4773 
4774 instruct decodeLoadNKlass(iRegP dst, memory mem) %{
4775   match(Set dst (DecodeNKlass (LoadNKlass mem)));
4776   predicate(false && (Universe::narrow_klass_base()==NULL)&&(Universe::narrow_klass_shift()==0));
4777   ins_cost(MEMORY_REF_COST);
4778   size(Z_DISP3_SIZE);
4779   format %{ "DecodeLoadNKlass  $dst,$mem\t# (load/decode NKlass)" %}
4780   opcode(LLGF_ZOPC, LLGF_ZOPC);
4781   ins_encode(z_form_rt_mem_opt(dst, mem));
4782   ins_pipe(pipe_class_dummy);
4783 %}
4784 
4785 instruct decodeLoadConNKlass(iRegP dst, immNKlass src) %{
4786   match(Set dst (DecodeNKlass src));
4787   ins_cost(3 * DEFAULT_COST);
4788   size(12);
4789   format %{ "DecodeLoadConNKlass  $dst,$src\t # decode(cKlass)" %}
4790   ins_encode %{
4791     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src$$constant);
4792     __ relocate(NKlass.rspec(), 1);
4793     __ load_const($dst$$Register, (Klass*)NKlass.value());
4794   %}
4795   ins_pipe(pipe_class_dummy);
4796 %}
4797 
4798 // Decode Compressed Pointer
4799 
4800 // General decoder
4801 instruct decodeN(iRegP dst, iRegN src, flagsReg cr) %{
4802   match(Set dst (DecodeN src));
4803   effect(KILL cr);
4804   predicate(Universe::narrow_oop_base() == NULL || !ExpandLoadingBaseDecode);
4805   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4806   // TODO: s390 port size(VARIABLE_SIZE);
4807   format %{ "decodeN  $dst,$src\t# (decode cOop)" %}
4808   ins_encode %{  __ oop_decoder($dst$$Register, $src$$Register, true); %}
4809   ins_pipe(pipe_class_dummy);
4810 %}
4811 
4812 // General Klass decoder
4813 instruct decodeKlass(iRegP dst, iRegN src, flagsReg cr) %{
4814   match(Set dst (DecodeNKlass src));
4815   effect(KILL cr);
4816   ins_cost(3 * DEFAULT_COST);
4817   format %{ "decode_klass $dst,$src" %}
4818   ins_encode %{ __ decode_klass_not_null($dst$$Register, $src$$Register); %}
4819   ins_pipe(pipe_class_dummy);
4820 %}
4821 
4822 // General decoder
4823 instruct decodeN_NN(iRegP dst, iRegN src, flagsReg cr) %{
4824   match(Set dst (DecodeN src));
4825   effect(KILL cr);
4826   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4827              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4828             (Universe::narrow_oop_base()== NULL || !ExpandLoadingBaseDecode_NN));
4829   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4830   // TODO: s390 port size(VARIABLE_SIZE);
4831   format %{ "decodeN  $dst,$src\t# (decode cOop NN)" %}
4832   ins_encode %{ __ oop_decoder($dst$$Register, $src$$Register, false); %}
4833   ins_pipe(pipe_class_dummy);
4834 %}
4835 
4836   instruct loadBase(iRegL dst, immL baseImm) %{
4837     effect(DEF dst, USE baseImm);
4838     predicate(false);
4839     format %{ "llihl    $dst=$baseImm \t// load heap base" %}
4840     ins_encode %{ __ get_oop_base($dst$$Register, $baseImm$$constant); %}
4841     ins_pipe(pipe_class_dummy);
4842   %}
4843 
4844   // Decoder for heapbased mode peeling off loading the base.
4845   instruct decodeN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4846     match(Set dst (DecodeN src base));
4847     // Note: Effect TEMP dst was used with the intention to get
4848     // different regs for dst and base, but this has caused ADLC to
4849     // generate wrong code. Oop_decoder generates additional lgr when
4850     // dst==base.
4851     effect(KILL cr);
4852     predicate(false);
4853     // TODO: s390 port size(VARIABLE_SIZE);
4854     format %{ "decodeN  $dst = ($src == 0) ? NULL : ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4855     ins_encode %{
4856       __ oop_decoder($dst$$Register, $src$$Register, true, $base$$Register,
4857                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4858     %}
4859     ins_pipe(pipe_class_dummy);
4860   %}
4861 
4862   // Decoder for heapbased mode peeling off loading the base.
4863   instruct decodeN_NN_base(iRegP dst, iRegN src, iRegL base, flagsReg cr) %{
4864     match(Set dst (DecodeN src base));
4865     effect(KILL cr);
4866     predicate(false);
4867     // TODO: s390 port size(VARIABLE_SIZE);
4868     format %{ "decodeN  $dst = ($src << 3) + $base + pow2_offset\t# (decode cOop)" %}
4869     ins_encode %{
4870       __ oop_decoder($dst$$Register, $src$$Register, false, $base$$Register,
4871                      (jlong)MacroAssembler::get_oop_base_pow2_offset((uint64_t)(intptr_t)Universe::narrow_oop_base()));
4872     %}
4873     ins_pipe(pipe_class_dummy);
4874   %}
4875 
4876 // Decoder for heapbased mode peeling off loading the base.
4877 instruct decodeN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4878   match(Set dst (DecodeN src));
4879   predicate(Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode);
4880   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST + BRANCH_COST);
4881   // TODO: s390 port size(VARIABLE_SIZE);
4882   expand %{
4883     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4884     iRegL base;
4885     loadBase(base, baseImm);
4886     decodeN_base(dst, src, base, cr);
4887   %}
4888 %}
4889 
4890 // Decoder for heapbased mode peeling off loading the base.
4891 instruct decodeN_NN_Ex(iRegP dst, iRegN src, flagsReg cr) %{
4892   match(Set dst (DecodeN src));
4893   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull ||
4894              n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant) &&
4895             Universe::narrow_oop_base() != NULL && ExpandLoadingBaseDecode_NN);
4896   ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4897   // TODO: s390 port size(VARIABLE_SIZE);
4898   expand %{
4899     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
4900     iRegL base;
4901     loadBase(base, baseImm);
4902     decodeN_NN_base(dst, src, base, cr);
4903   %}
4904 %}
4905 
4906 //  Encode Compressed Pointer
4907 
4908 // General encoder
4909 instruct encodeP(iRegN dst, iRegP src, flagsReg cr) %{
4910   match(Set dst (EncodeP src));
4911   effect(KILL cr);
4912   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4913             (Universe::narrow_oop_base() == 0 ||
4914              Universe::narrow_oop_base_disjoint() ||
4915              !ExpandLoadingBaseEncode));
4916   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4917   // TODO: s390 port size(VARIABLE_SIZE);
4918   format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
4919   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, true, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4920   ins_pipe(pipe_class_dummy);
4921 %}
4922 
4923 // General class encoder
4924 instruct encodeKlass(iRegN dst, iRegP src, flagsReg cr) %{
4925   match(Set dst (EncodePKlass src));
4926   effect(KILL cr);
4927   format %{ "encode_klass $dst,$src" %}
4928   ins_encode %{ __ encode_klass_not_null($dst$$Register, $src$$Register); %}
4929   ins_pipe(pipe_class_dummy);
4930 %}
4931 
4932 instruct encodeP_NN(iRegN dst, iRegP src, flagsReg cr) %{
4933   match(Set dst (EncodeP src));
4934   effect(KILL cr);
4935   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4936             (Universe::narrow_oop_base() == 0 ||
4937              Universe::narrow_oop_base_disjoint() ||
4938              !ExpandLoadingBaseEncode_NN));
4939   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4940   // TODO: s390 port size(VARIABLE_SIZE);
4941   format %{ "encodeP  $dst,$src\t# (encode cOop)" %}
4942   ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, Z_R1_scratch, -1, all_outs_are_Stores(this)); %}
4943   ins_pipe(pipe_class_dummy);
4944 %}
4945 
4946   // Encoder for heapbased mode peeling off loading the base.
4947   instruct encodeP_base(iRegN dst, iRegP src, iRegL base) %{
4948     match(Set dst (EncodeP src (Binary base dst)));
4949     effect(TEMP_DEF dst);
4950     predicate(false);
4951     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4952     // TODO: s390 port size(VARIABLE_SIZE);
4953     format %{ "encodeP  $dst = ($src>>3) +$base + pow2_offset\t# (encode cOop)" %}
4954     ins_encode %{
4955       jlong offset = -(jlong)MacroAssembler::get_oop_base_pow2_offset
4956         (((uint64_t)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift());
4957       __ oop_encoder($dst$$Register, $src$$Register, true, $base$$Register, offset);
4958     %}
4959     ins_pipe(pipe_class_dummy);
4960   %}
4961 
4962   // Encoder for heapbased mode peeling off loading the base.
4963   instruct encodeP_NN_base(iRegN dst, iRegP src, iRegL base, immL pow2_offset) %{
4964     match(Set dst (EncodeP src base));
4965     effect(USE pow2_offset);
4966     predicate(false);
4967     ins_cost(MEMORY_REF_COST+2 * DEFAULT_COST);
4968     // TODO: s390 port size(VARIABLE_SIZE);
4969     format %{ "encodeP  $dst = ($src>>3) +$base + $pow2_offset\t# (encode cOop)" %}
4970     ins_encode %{ __ oop_encoder($dst$$Register, $src$$Register, false, $base$$Register, $pow2_offset$$constant); %}
4971     ins_pipe(pipe_class_dummy);
4972   %}
4973 
4974 // Encoder for heapbased mode peeling off loading the base.
4975 instruct encodeP_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4976   match(Set dst (EncodeP src));
4977   effect(KILL cr);
4978   predicate((n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull) &&
4979             (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode));
4980   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
4981   // TODO: s390 port size(VARIABLE_SIZE);
4982   expand %{
4983     immL baseImm %{ ((jlong)(intptr_t)Universe::narrow_oop_base()) >> Universe::narrow_oop_shift() %}
4984     immL_0 zero %{ (0) %}
4985     flagsReg ccr;
4986     iRegL base;
4987     iRegL negBase;
4988     loadBase(base, baseImm);
4989     negL_reg_reg(negBase, zero, base, ccr);
4990     encodeP_base(dst, src, negBase);
4991   %}
4992 %}
4993 
4994 // Encoder for heapbased mode peeling off loading the base.
4995 instruct encodeP_NN_Ex(iRegN dst, iRegP src, flagsReg cr) %{
4996   match(Set dst (EncodeP src));
4997   effect(KILL cr);
4998   predicate((n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull) &&
4999             (Universe::narrow_oop_base_overlaps() && ExpandLoadingBaseEncode_NN));
5000   ins_cost(MEMORY_REF_COST+3 * DEFAULT_COST);
5001   // TODO: s390 port size(VARIABLE_SIZE);
5002   expand %{
5003     immL baseImm %{ (jlong)(intptr_t)Universe::narrow_oop_base() %}
5004     immL pow2_offset %{ -(jlong)MacroAssembler::get_oop_base_pow2_offset(((uint64_t)(intptr_t)Universe::narrow_oop_base())) %}
5005     immL_0 zero %{ 0 %}
5006     flagsReg ccr;
5007     iRegL base;
5008     iRegL negBase;
5009     loadBase(base, baseImm);
5010     negL_reg_reg(negBase, zero, base, ccr);
5011     encodeP_NN_base(dst, src, negBase, pow2_offset);
5012   %}
5013 %}
5014 
5015 //  Store Compressed Pointer
5016 
5017 // Store Compressed Pointer
5018 instruct storeN(memory mem, iRegN_P2N src) %{
5019   match(Set mem (StoreN mem src));
5020   ins_cost(MEMORY_REF_COST);
5021   size(Z_DISP_SIZE);
5022   format %{ "ST      $src,$mem\t# (cOop)" %}
5023   opcode(STY_ZOPC, ST_ZOPC);
5024   ins_encode(z_form_rt_mem_opt(src, mem));
5025   ins_pipe(pipe_class_dummy);
5026 %}
5027 
5028 // Store Compressed Klass pointer
5029 instruct storeNKlass(memory mem, iRegN src) %{
5030   match(Set mem (StoreNKlass mem src));
5031   ins_cost(MEMORY_REF_COST);
5032   size(Z_DISP_SIZE);
5033   format %{ "ST      $src,$mem\t# (cKlass)" %}
5034   opcode(STY_ZOPC, ST_ZOPC);
5035   ins_encode(z_form_rt_mem_opt(src, mem));
5036   ins_pipe(pipe_class_dummy);
5037 %}
5038 
5039 // Compare Compressed Pointers
5040 
5041 instruct compN_iRegN(iRegN_P2N src1, iRegN_P2N src2, flagsReg cr) %{
5042   match(Set cr (CmpN src1 src2));
5043   ins_cost(DEFAULT_COST);
5044   size(2);
5045   format %{ "CLR     $src1,$src2\t# (cOop)" %}
5046   opcode(CLR_ZOPC);
5047   ins_encode(z_rrform(src1, src2));
5048   ins_pipe(pipe_class_dummy);
5049 %}
5050 
5051 instruct compN_iRegN_immN(iRegN_P2N src1, immN src2, flagsReg cr) %{
5052   match(Set cr (CmpN src1 src2));
5053   ins_cost(DEFAULT_COST);
5054   size(6);
5055   format %{ "CLFI    $src1,$src2\t# (cOop) compare immediate narrow" %}
5056   ins_encode %{
5057     AddressLiteral cOop = __ constant_oop_address((jobject)$src2$$constant);
5058     __ relocate(cOop.rspec(), 1);
5059     __ compare_immediate_narrow_oop($src1$$Register, (narrowOop)cOop.value());
5060   %}
5061   ins_pipe(pipe_class_dummy);
5062 %}
5063 
5064 instruct compNKlass_iRegN_immN(iRegN src1, immNKlass src2, flagsReg cr) %{
5065   match(Set cr (CmpN src1 src2));
5066   ins_cost(DEFAULT_COST);
5067   size(6);
5068   format %{ "CLFI    $src1,$src2\t# (NKlass) compare immediate narrow" %}
5069   ins_encode %{
5070     AddressLiteral NKlass = __ constant_metadata_address((Metadata*)$src2$$constant);
5071     __ relocate(NKlass.rspec(), 1);
5072     __ compare_immediate_narrow_klass($src1$$Register, (Klass*)NKlass.value());
5073   %}
5074   ins_pipe(pipe_class_dummy);
5075 %}
5076 
5077 instruct compN_iRegN_immN0(iRegN_P2N src1, immN0 src2, flagsReg cr) %{
5078   match(Set cr (CmpN src1 src2));
5079   ins_cost(DEFAULT_COST);
5080   size(2);
5081   format %{ "LTR     $src1,$src2\t# (cOop) LTR because comparing against zero" %}
5082   opcode(LTR_ZOPC);
5083   ins_encode(z_rrform(src1, src1));
5084   ins_pipe(pipe_class_dummy);
5085 %}
5086 
5087 
5088 //----------MemBar Instructions-----------------------------------------------
5089 
5090 // Memory barrier flavors
5091 
5092 instruct membar_acquire() %{
5093   match(MemBarAcquire);
5094   match(LoadFence);
5095   ins_cost(4*MEMORY_REF_COST);
5096   size(0);
5097   format %{ "MEMBAR-acquire" %}
5098   ins_encode %{ __ z_acquire(); %}
5099   ins_pipe(pipe_class_dummy);
5100 %}
5101 


6170   opcode(AGFI_ZOPC);
6171   ins_encode(z_rilform_signed(dst, src));
6172   ins_pipe(pipe_class_dummy);
6173 %}
6174 
6175 // REG = REG1 + REG2 + IMM
6176 
6177 instruct addP_reg_reg_imm12(iRegP dst, memoryRegP src1, iRegL src2, uimmL12 con) %{
6178   match(Set dst (AddP (AddP src1 src2) con));
6179   predicate( PreferLAoverADD);
6180   ins_cost(DEFAULT_COST_LOW);
6181   size(4);
6182   format %{ "LA      $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
6183   opcode(LA_ZOPC);
6184   ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
6185   ins_pipe(pipe_class_dummy);
6186 %}
6187 
6188 instruct addP_regN_reg_imm12(iRegP dst, iRegP_N2P src1, iRegL src2, uimmL12 con) %{
6189   match(Set dst (AddP (AddP src1 src2) con));
6190   predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
6191   ins_cost(DEFAULT_COST_LOW);
6192   size(4);
6193   format %{ "LA      $dst,$con($src1,$src2)\t # ptr d12(x,b)" %}
6194   opcode(LA_ZOPC);
6195   ins_encode(z_rxform_imm_reg_reg(dst, con, src1, src2));
6196   ins_pipe(pipe_class_dummy);
6197 %}
6198 
6199 instruct addP_reg_reg_imm20(iRegP dst, memoryRegP src1, iRegL src2, immL20 con) %{
6200   match(Set dst (AddP (AddP src1 src2) con));
6201   predicate(PreferLAoverADD);
6202   ins_cost(DEFAULT_COST);
6203   // TODO: s390 port size(FIXED_SIZE);
6204   format %{ "LAY     $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
6205   opcode(LAY_ZOPC);
6206   ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
6207   ins_pipe(pipe_class_dummy);
6208 %}
6209 
6210 instruct addP_regN_reg_imm20(iRegP dst, iRegP_N2P src1, iRegL src2, immL20 con) %{
6211   match(Set dst (AddP (AddP src1 src2) con));
6212   predicate( PreferLAoverADD && Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
6213   ins_cost(DEFAULT_COST);
6214   // TODO: s390 port size(FIXED_SIZE);
6215   format %{ "LAY     $dst,$con($src1,$src2)\t # ptr d20(x,b)" %}
6216   opcode(LAY_ZOPC);
6217   ins_encode(z_rxyform_imm_reg_reg(dst, con, src1, src2));
6218   ins_pipe(pipe_class_dummy);
6219 %}
6220 
6221 // MEM = MEM + IMM
6222 
6223 // Add Immediate to 8-byte memory operand and result
6224 instruct addP_mem_imm(memoryRSY mem, immL8 src, flagsReg cr) %{
6225   match(Set mem (StoreP mem (AddP (LoadP mem) src)));
6226   effect(KILL cr);
6227   predicate(VM_Version::has_MemWithImmALUOps());
6228   ins_cost(MEMORY_REF_COST);
6229   size(6);
6230   format %{ "AGSI    $mem,$src\t # direct mem add 8 (ptr)" %}
6231   opcode(AGSI_ZOPC);
6232   ins_encode(z_siyform(mem, src));


6756       __ z_lghi(Z_R0_scratch, divisor);
6757       __ z_lgr($dst$$Register->successor(), $src1$$Register);
6758       __ z_dsgr($dst$$Register /* Dst is even part of a register pair. */, Z_R0_scratch);  // Instruction kills tmp.
6759     } else {
6760       __ clear_reg($dst$$Register, true, false);
6761     }
6762   %}
6763   ins_pipe(pipe_class_dummy);
6764 %}
6765 
6766 // SHIFT
6767 
6768 // Shift left logical
6769 
6770 // Register Shift Left variable
6771 instruct sllI_reg_reg(iRegI dst, iRegI src, iRegI nbits, flagsReg cr) %{
6772   match(Set dst (LShiftI src nbits));
6773   effect(KILL cr); // R1 is killed, too.
6774   ins_cost(3 * DEFAULT_COST);
6775   size(14);
6776   format %{ "SLL     $dst,$src,[$nbits] & 31\t# use RISC-like SLLG also for int" %}
6777   ins_encode %{
6778     __ z_lgr(Z_R1_scratch, $nbits$$Register);
6779     __ z_nill(Z_R1_scratch, BitsPerJavaInteger-1);
6780     __ z_sllg($dst$$Register, $src$$Register, 0, Z_R1_scratch);
6781   %}
6782   ins_pipe(pipe_class_dummy);
6783 %}
6784 
6785 // Register Shift Left Immediate
6786 // Constant shift count is masked in ideal graph already.
6787 instruct sllI_reg_imm(iRegI dst, iRegI src, immI nbits) %{
6788   match(Set dst (LShiftI src nbits));
6789   size(6);
6790   format %{ "SLL     $dst,$src,$nbits\t# use RISC-like SLLG also for int" %}
6791   ins_encode %{
6792     int Nbit = $nbits$$constant;
6793     assert((Nbit & (BitsPerJavaInteger - 1)) == Nbit, "Check shift mask in ideal graph");
6794     __ z_sllg($dst$$Register, $src$$Register, Nbit & (BitsPerJavaInteger - 1), Z_R0);
6795   %}
6796   ins_pipe(pipe_class_dummy);
6797 %}
6798 
6799 // Register Shift Left Immediate by 1bit
6800 instruct sllI_reg_imm_1(iRegI dst, iRegI src, immI_1 nbits) %{
6801   match(Set dst (LShiftI src nbits));
6802   predicate(PreferLAoverADD);
6803   ins_cost(DEFAULT_COST_LOW);
6804   size(4);
6805   format %{ "LA      $dst,#0($src,$src)\t # SLL by 1 (int)" %}
6806   ins_encode %{ __ z_la($dst$$Register, 0, $src$$Register, $src$$Register); %}
6807   ins_pipe(pipe_class_dummy);
6808 %}
6809 
6810 // Register Shift Left Long


7086   %}
7087   ins_pipe(pipe_class_dummy);
7088 %}
7089 
7090 instruct overflowSubL_reg_imm(flagsReg cr, iRegL op1, immL op2) %{
7091   match(Set cr (OverflowSubL op1 op2));
7092   effect(DEF cr, USE op1, USE op2);
7093   // TODO: s390 port size(VARIABLE_SIZE);
7094   format %{ "SGR     $op1,$op2\t # overflow check long" %}
7095   ins_encode %{
7096     __ load_const_optimized(Z_R1_scratch, $op2$$constant);
7097     __ z_lgr(Z_R0_scratch, $op1$$Register);
7098     __ z_sgr(Z_R0_scratch, Z_R1_scratch);
7099   %}
7100   ins_pipe(pipe_class_dummy);
7101 %}
7102 
7103 instruct overflowNegI_rReg(flagsReg cr, immI_0 zero, iRegI op2) %{
7104   match(Set cr (OverflowSubI zero op2));
7105   effect(DEF cr, USE op2);
7106   format %{ "NEG    $op2\t# overflow check int" %}
7107   ins_encode %{
7108     __ clear_reg(Z_R0_scratch, false, false);
7109     __ z_sr(Z_R0_scratch, $op2$$Register);
7110   %}
7111   ins_pipe(pipe_class_dummy);
7112 %}
7113 
7114 instruct overflowNegL_rReg(flagsReg cr, immL_0 zero, iRegL op2) %{
7115   match(Set cr (OverflowSubL zero op2));
7116   effect(DEF cr, USE op2);
7117   format %{ "NEGG    $op2\t# overflow check long" %}
7118   ins_encode %{
7119     __ clear_reg(Z_R0_scratch, true, false);
7120     __ z_sgr(Z_R0_scratch, $op2$$Register);
7121   %}
7122   ins_pipe(pipe_class_dummy);
7123 %}
7124 
7125 // No intrinsics for multiplication, since there is no easy way
7126 // to check for overflow.
7127 
7128 
7129 //----------Floating Point Arithmetic Instructions-----------------------------
7130 
7131 //  ADD
7132 
7133 //  Add float single precision
7134 instruct addF_reg_reg(regF dst, regF src, flagsReg cr) %{
7135   match(Set dst (AddF dst src));
7136   effect(KILL cr);
7137   ins_cost(ALU_REG_COST);


8516   size(4);
8517   format %{ "CLGR    $op1,$op2\t # ptr" %}
8518   opcode(CLGR_ZOPC);
8519   ins_encode(z_rreform(op1, op2));
8520   ins_pipe(pipe_class_dummy);
8521 %}
8522 
8523 instruct compP_reg_imm0(flagsReg cr, iRegP_N2P op1, immP0 op2) %{
8524   match(Set cr (CmpP op1 op2));
8525   ins_cost(DEFAULT_COST_LOW);
8526   size(4);
8527   format %{ "LTGR    $op1, $op1\t # ptr" %}
8528   opcode(LTGR_ZOPC);
8529   ins_encode(z_rreform(op1, op1));
8530   ins_pipe(pipe_class_dummy);
8531 %}
8532 
8533 // Don't use LTGFR which performs sign extend.
8534 instruct compP_decode_reg_imm0(flagsReg cr, iRegN op1, immP0 op2) %{
8535   match(Set cr (CmpP (DecodeN op1) op2));
8536   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
8537   ins_cost(DEFAULT_COST_LOW);
8538   size(2);
8539   format %{ "LTR    $op1, $op1\t # ptr" %}
8540   opcode(LTR_ZOPC);
8541   ins_encode(z_rrform(op1, op1));
8542   ins_pipe(pipe_class_dummy);
8543 %}
8544 
8545 instruct compP_reg_mem(iRegP dst, memory src, flagsReg cr)%{
8546   match(Set cr (CmpP dst (LoadP src)));
8547   ins_cost(MEMORY_REF_COST);
8548   size(Z_DISP3_SIZE);
8549   format %{ "CLG     $dst, $src\t # ptr" %}
8550   opcode(CLG_ZOPC, CLG_ZOPC);
8551   ins_encode(z_form_rt_mem_opt(dst, src));
8552   ins_pipe(pipe_class_dummy);
8553 %}
8554 
8555 //----------Max and Min--------------------------------------------------------
8556 


9152 // Direct Branch.
9153 instruct branchFar(label labl) %{
9154   match(Goto);
9155   effect(USE labl);
9156   ins_cost(BRANCH_COST);
9157   size(6);
9158   format %{ "BRUL   $labl" %}
9159   ins_encode(z_enc_brul(labl));
9160   ins_pipe(pipe_class_dummy);
9161   // This is not a short variant of a branch, but the long variant.
9162   ins_short_branch(0);
9163 %}
9164 
9165 // Conditional Near Branch
9166 instruct branchCon(cmpOp cmp, flagsReg cr, label lbl) %{
9167   // Same match rule as `branchConFar'.
9168   match(If cmp cr);
9169   effect(USE lbl);
9170   ins_cost(BRANCH_COST);
9171   size(4);
9172   format %{ "branch_con_short,$cmp   $cr, $lbl" %}
9173   ins_encode(z_enc_branch_con_short(cmp, lbl));
9174   ins_pipe(pipe_class_dummy);
9175   // If set to 1 this indicates that the current instruction is a
9176   // short variant of a long branch. This avoids using this
9177   // instruction in first-pass matching. It will then only be used in
9178   // the `Shorten_branches' pass.
9179   ins_short_branch(1);
9180 %}
9181 
9182 // This is for cases when the z/Architecture conditional branch instruction
9183 // does not reach far enough. So we emit a far branch here, which is
9184 // more expensive.
9185 //
9186 // Conditional Far Branch
9187 instruct branchConFar(cmpOp cmp, flagsReg cr, label lbl) %{
9188   // Same match rule as `branchCon'.
9189   match(If cmp cr);
9190   effect(USE cr, USE lbl);
9191   // Make more expensive to prefer compare_and_branch over separate instructions.
9192   ins_cost(2 * BRANCH_COST);
9193   size(6);
9194   format %{ "branch_con_far,$cmp   $cr, $lbl" %}
9195   ins_encode(z_enc_branch_con_far(cmp, lbl));
9196   ins_pipe(pipe_class_dummy);
9197   // This is not a short variant of a branch, but the long variant..
9198   ins_short_branch(0);
9199 %}
9200 
9201 instruct branchLoopEnd(cmpOp cmp, flagsReg cr, label labl) %{
9202   match(CountedLoopEnd cmp cr);
9203   effect(USE labl);
9204   ins_cost(BRANCH_COST);
9205   size(4);
9206   format %{ "branch_con_short,$cmp   $labl\t # counted loop end" %}
9207   ins_encode(z_enc_branch_con_short(cmp, labl));
9208   ins_pipe(pipe_class_dummy);
9209   // If set to 1 this indicates that the current instruction is a
9210   // short variant of a long branch. This avoids using this
9211   // instruction in first-pass matching. It will then only be used in
9212   // the `Shorten_branches' pass.
9213   ins_short_branch(1);
9214 %}


9743 instruct CallLeafNoFPDirect(method meth) %{
9744   match(CallLeafNoFP);
9745   effect(USE meth);
9746   ins_cost(CALL_COST);
9747   // TODO: s390 port size(VARIABLE_SIZE);
9748   ins_num_consts(1);
9749   format %{ "CALL,runtime leaf nofp $meth" %}
9750   ins_encode( z_enc_java_to_runtime_call(meth) );
9751   ins_pipe(pipe_class_dummy);
9752   ins_alignment(2);
9753 %}
9754 
9755 // Tail Call; Jump from runtime stub to Java code.
9756 // Also known as an 'interprocedural jump'.
9757 // Target of jump will eventually return to caller.
9758 // TailJump below removes the return address.
9759 instruct TailCalljmpInd(iRegP jump_target, inline_cache_regP method_oop) %{
9760   match(TailCall jump_target method_oop);
9761   ins_cost(CALL_COST);
9762   size(2);
9763   format %{ "Jmp     $jump_target\t# $method_oop holds method oop" %}
9764   ins_encode %{ __ z_br($jump_target$$Register); %}
9765   ins_pipe(pipe_class_dummy);
9766 %}
9767 
9768 // Return Instruction
9769 instruct Ret() %{
9770   match(Return);
9771   size(2);
9772   format %{ "BR(Z_R14) // branch to link register" %}
9773   ins_encode %{ __ z_br(Z_R14); %}
9774   ins_pipe(pipe_class_dummy);
9775 %}
9776 
9777 // Tail Jump; remove the return address; jump to target.
9778 // TailCall above leaves the return address around.
9779 // TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
9780 // ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
9781 // "restore" before this instruction (in Epilogue), we need to materialize it
9782 // in %i0.
9783 instruct tailjmpInd(iRegP jump_target, rarg1RegP ex_oop) %{


9882   match(Set pcc (FastUnlock oop box));
9883   effect(TEMP tmp1, TEMP tmp2);
9884   ins_cost(100);
9885   // TODO: s390 port size(FIXED_SIZE);  // emitted code depends on UseBiasedLocking being on/off.
9886   format %{ "FASTUNLOCK  $oop, $box; KILL Z_ARG4, Z_ARG5" %}
9887   ins_encode %{ __ compiler_fast_unlock_object($oop$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register,
9888                                                UseBiasedLocking && !UseOptoBiasInlining); %}
9889   ins_pipe(pipe_class_dummy);
9890 %}
9891 
9892 instruct inlineCallClearArrayConst(SSlenDW cnt, iRegP_N2P base, Universe dummy, flagsReg cr) %{
9893   match(Set dummy (ClearArray cnt base));
9894   effect(KILL cr);
9895   ins_cost(100);
9896   // TODO: s390 port size(VARIABLE_SIZE);       // Variable in size due to varying #instructions.
9897   format %{ "ClearArrayConst $cnt,$base" %}
9898   ins_encode %{ __ Clear_Array_Const($cnt$$constant, $base$$Register); %}
9899   ins_pipe(pipe_class_dummy);
9900 %}
9901 
9902 instruct inlineCallClearArrayConstBig(immL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
9903   match(Set dummy (ClearArray cnt base));
9904   effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
9905   ins_cost(200);
9906   // TODO: s390 port size(VARIABLE_SIZE);       // Variable in size due to optimized constant loader.
9907   format %{ "ClearArrayConstBig $cnt,$base" %}
9908   ins_encode %{ __ Clear_Array_Const_Big($cnt$$constant, $base$$Register, $srcA$$Register, $srcL$$Register); %}
9909   ins_pipe(pipe_class_dummy);
9910 %}
9911 
9912 instruct inlineCallClearArray(iRegL cnt, iRegP_N2P base, Universe dummy, revenRegL srcA, roddRegL srcL, flagsReg cr) %{
9913   match(Set dummy (ClearArray cnt base));
9914   effect(TEMP srcA, TEMP srcL, KILL cr); // R0, R1 are killed, too.
9915   ins_cost(300);
9916   // TODO: s390 port size(FIXED_SIZE);  // z/Architecture: emitted code depends on PreferLAoverADD being on/off.
9917   format %{ "ClearArrayVar $cnt,$base" %}
9918   ins_encode %{ __ Clear_Array($cnt$$Register, $base$$Register, $srcA$$Register, $srcL$$Register); %}
9919   ins_pipe(pipe_class_dummy);
9920 %}
9921 
9922 // ============================================================================
9923 // CompactStrings
9924 
9925 // String equals
9926 instruct string_equalsL(iRegP str1, iRegP str2, iRegI cnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
9927   match(Set result (StrEquals (Binary str1 str2) cnt));
9928   effect(TEMP oddReg, TEMP evenReg, KILL cr); // R0, R1 are killed, too.
9929   predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
9930   ins_cost(300);
9931   format %{ "String Equals byte[] $str1,$str2,$cnt -> $result" %}
9932   ins_encode %{
9933     __ array_equals(false, $str1$$Register, $str2$$Register,
9934                     $cnt$$Register, $oddReg$$Register, $evenReg$$Register,
9935                     $result$$Register, true /* byte */);
9936   %}
9937   ins_pipe(pipe_class_dummy);
9938 %}


10751 instruct loadV8(iRegL dst, memory mem) %{
10752   match(Set dst (LoadVector mem));
10753   predicate(n->as_LoadVector()->memory_size() == 8);
10754   ins_cost(MEMORY_REF_COST);
10755   // TODO: s390 port size(VARIABLE_SIZE);
10756   format %{ "LG      $dst,$mem\t # L(packed8B)" %}
10757   opcode(LG_ZOPC, LG_ZOPC);
10758   ins_encode(z_form_rt_mem_opt(dst, mem));
10759   ins_pipe(pipe_class_dummy);
10760 %}
10761 
10762 //----------POPULATION COUNT RULES--------------------------------------------
10763 
10764 // Byte reverse
10765 
10766 instruct bytes_reverse_int(iRegI dst, iRegI src) %{
10767   match(Set dst (ReverseBytesI src));
10768   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10769   ins_cost(DEFAULT_COST);
10770   size(4);
10771   format %{ "LRVR    $dst,$src\t# byte reverse int" %}
10772   opcode(LRVR_ZOPC);
10773   ins_encode(z_rreform(dst, src));
10774   ins_pipe(pipe_class_dummy);
10775 %}
10776 
10777 instruct bytes_reverse_long(iRegL dst, iRegL src) %{
10778   match(Set dst (ReverseBytesL src));
10779   predicate(UseByteReverseInstruction);  // See Matcher::match_rule_supported
10780   ins_cost(DEFAULT_COST);
10781   // TODO: s390 port size(FIXED_SIZE);
10782   format %{ "LRVGR   $dst,$src\t# byte reverse long" %}
10783   opcode(LRVGR_ZOPC);
10784   ins_encode(z_rreform(dst, src));
10785   ins_pipe(pipe_class_dummy);
10786 %}
10787 
10788 // Leading zeroes
10789 
10790 // The instruction FLOGR (Find Leftmost One in Grande (64bit) Register)
10791 // returns the bit position of the leftmost 1 in the 64bit source register.
10792 // As the bits are numbered from left to right (0..63), the returned
10793 // position index is equivalent to the number of leading zeroes.
10794 // If no 1-bit is found (i.e. the regsiter contains zero), the instruction
10795 // returns position 64. That's exactly what we need.
10796 
10797 instruct countLeadingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10798   match(Set dst (CountLeadingZerosI src));
10799   effect(KILL tmp, KILL cr);
10800   ins_cost(3 * DEFAULT_COST);
10801   size(14);
10802   format %{ "SLLG    $dst,$src,32\t# no need to always count 32 zeroes first\n\t"
10803             "IILH    $dst,0x8000 \t# insert \"stop bit\" to force result 32 for zero src.\n\t"
10804             "FLOGR   $dst,$dst"
10805          %}
10806   ins_encode %{
10807     // Performance experiments indicate that "FLOGR" is using some kind of
10808     // iteration to find the leftmost "1" bit.
10809     //
10810     // The prior implementation zero-extended the 32-bit argument to 64 bit,
10811     // thus forcing "FLOGR" to count 32 bits of which we know they are zero.
10812     // We could gain measurable speedup in micro benchmark:
10813     //
10814     //               leading   trailing
10815     //   z10:   int     2.04       1.68
10816     //         long     1.00       1.02
10817     //   z196:  int     0.99       1.23
10818     //         long     1.00       1.11
10819     //
10820     // By shifting the argument into the high-word instead of zero-extending it.
10821     // The add'l branch on condition (taken for a zero argument, very infrequent,
10822     // good prediction) is well compensated for by the savings.
10823     //
10824     // We leave the previous implementation in for some time in the future when
10825     // the "FLOGR" instruction may become less iterative.
10826 
10827     // Version 2: shows 62%(z9), 204%(z10), -1%(z196) improvement over original
10828     __ z_sllg($dst$$Register, $src$$Register, 32); // No need to always count 32 zeroes first.
10829     __ z_iilh($dst$$Register, 0x8000);   // Insert "stop bit" to force result 32 for zero src.
10830     __ z_flogr($dst$$Register, $dst$$Register);
10831   %}
10832   ins_pipe(pipe_class_dummy);
10833 %}
10834 
10835 instruct countLeadingZerosL(revenRegI dst, iRegL src, roddRegI tmp, flagsReg cr) %{
10836   match(Set dst (CountLeadingZerosL src));
10837   effect(KILL tmp, KILL cr);
10838   ins_cost(DEFAULT_COST);
10839   size(4);
10840   format %{ "FLOGR   $dst,$src \t# count leading zeros (long)\n\t" %}
10841   ins_encode %{ __ z_flogr($dst$$Register, $src$$Register); %}
10842   ins_pipe(pipe_class_dummy);
10843 %}
10844 
10845 // trailing zeroes
10846 
10847 // We transform the trailing zeroes problem to a leading zeroes problem
10848 // such that can use the FLOGR instruction to our advantage.
10849 
10850 // With
10851 //   tmp1 = src - 1
10852 // we flip all trailing zeroes to ones and the rightmost one to zero.
10853 // All other bits remain unchanged.
10854 // With the complement
10855 //   tmp2 = ~src
10856 // we get all ones in the trailing zeroes positions. Thus,
10857 //   tmp3 = tmp1 & tmp2
10858 // yields ones in the trailing zeroes positions and zeroes elsewhere.
10859 // Now we can apply FLOGR and get 64-(trailing zeroes).
10860 instruct countTrailingZerosI(revenRegI dst, iRegI src, roddRegI tmp, flagsReg cr) %{
10861   match(Set dst (CountTrailingZerosI src));
10862   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10863   ins_cost(8 * DEFAULT_COST);
10864   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10865   format %{ "LLGFR   $dst,$src  \t# clear upper 32 bits (we are dealing with int)\n\t"
10866             "LCGFR   $tmp,$src  \t# load 2's complement (32->64 bit)\n\t"
10867             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10868             "AGHI    $tmp,-1    \t# tmp2 = -src-1 = ~src\n\t"
10869             "NGR     $dst,$tmp  \t# tmp3 = tmp1&tmp2\n\t"
10870             "FLOGR   $dst,$dst  \t# count trailing zeros (int)\n\t"
10871             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10872             "LCR     $dst,$dst  \t# res = -tmp4"
10873          %}
10874   ins_encode %{
10875     Register Rdst = $dst$$Register;
10876     Register Rsrc = $src$$Register;
10877     // Rtmp only needed for for zero-argument shortcut. With kill effect in
10878     // match rule Rsrc = roddReg would be possible, saving one register.
10879     Register Rtmp = $tmp$$Register;
10880 
10881     assert_different_registers(Rdst, Rsrc, Rtmp);
10882 
10883     // Algorithm:
10884     // - Isolate the least significant (rightmost) set bit using (src & (-src)).
10885     //   All other bits in the result are zero.
10886     // - Find the "leftmost one" bit position in the single-bit result from previous step.
10887     // - 63-("leftmost one" bit position) gives the # of trailing zeros.
10888 
10889     // Version 2: shows 79%(z9), 68%(z10), 23%(z196) improvement over original.
10890     Label done;
10891     __ load_const_optimized(Rdst, 32); // Prepare for shortcut (zero argument), result will be 32.
10892     __ z_lcgfr(Rtmp, Rsrc);


10898                                        // into upper half of reg. Not relevant with sllg below.
10899     __ z_sllg(Rdst, Rtmp, 32);         // Shift interesting contents to upper half of register.
10900     __ z_bre(done);                    // Shortcut for argument = 1, result will be 0.
10901                                        // Depends on CC set by ahi above.
10902                                        // Taken very infrequently, good prediction, no BHT entry.
10903                                        // Branch delayed to have Rdst set correctly (Rtmp == 0(32bit)
10904                                        // after SLLG Rdst == 0(64bit)).
10905     __ z_flogr(Rdst, Rdst);            // Kills tmp which is the oddReg for dst.
10906     __ add2reg(Rdst,  -32);            // 32-pos(leftmost1) is #trailing zeros
10907     __ z_lcgfr(Rdst, Rdst);            // Provide 64bit result at no cost.
10908     __ bind(done);
10909   %}
10910   ins_pipe(pipe_class_dummy);
10911 %}
10912 
10913 instruct countTrailingZerosL(revenRegI dst, iRegL src, roddRegL tmp, flagsReg cr) %{
10914   match(Set dst (CountTrailingZerosL src));
10915   effect(TEMP_DEF dst, KILL tmp, KILL cr);
10916   ins_cost(8 * DEFAULT_COST);
10917   // TODO: s390 port size(FIXED_SIZE);  // Emitted code depends on PreferLAoverADD being on/off.
10918   format %{ "LCGR    $dst,$src  \t# preserve src\n\t"
10919             "NGR     $dst,$src  \t#"
10920             "AGHI    $dst,-1    \t# tmp1 = src-1\n\t"
10921             "FLOGR   $dst,$dst  \t# count trailing zeros (long), kill $tmp\n\t"
10922             "AHI     $dst,-64   \t# tmp4 = 64-(trailing zeroes)-64\n\t"
10923             "LCR     $dst,$dst  \t#"
10924          %}
10925   ins_encode %{
10926     Register Rdst = $dst$$Register;
10927     Register Rsrc = $src$$Register;
10928     assert_different_registers(Rdst, Rsrc); // Rtmp == Rsrc allowed.
10929 
10930     // New version: shows 5%(z9), 2%(z10), 11%(z196) improvement over original.
10931     __ z_lcgr(Rdst, Rsrc);
10932     __ z_ngr(Rdst, Rsrc);
10933     __ add2reg(Rdst,   -1);
10934     __ z_flogr(Rdst, Rdst); // Kills tmp which is the oddReg for dst.
10935     __ add2reg(Rdst,  -64);
10936     __ z_lcgfr(Rdst, Rdst); // Provide 64bit result at no cost.
10937   %}
10938   ins_pipe(pipe_class_dummy);
10939 %}
10940 
10941 
10942 // bit count
10943 
10944 instruct popCountI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
10945   match(Set dst (PopCountI src));
10946   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10947   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10948   ins_cost(DEFAULT_COST);
10949   size(24);
10950   format %{ "POPCNT  $dst,$src\t# pop count int" %}
10951   ins_encode %{
10952     Register Rdst = $dst$$Register;
10953     Register Rsrc = $src$$Register;
10954     Register Rtmp = $tmp$$Register;
10955 
10956     // Prefer compile-time assertion over run-time SIGILL.
10957     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10958     assert_different_registers(Rdst, Rtmp);
10959 
10960     // Version 2: shows 10%(z196) improvement over original.
10961     __ z_popcnt(Rdst, Rsrc);
10962     __ z_srlg(Rtmp, Rdst, 16); // calc  byte4+byte6 and byte5+byte7
10963     __ z_alr(Rdst, Rtmp);      //   into byte6 and byte7
10964     __ z_srlg(Rtmp, Rdst,  8); // calc (byte4+byte6) + (byte5+byte7)
10965     __ z_alr(Rdst, Rtmp);      //   into byte7
10966     __ z_llgcr(Rdst, Rdst);    // zero-extend sum
10967   %}
10968   ins_pipe(pipe_class_dummy);
10969 %}
10970 
10971 instruct popCountL(iRegI dst, iRegL src, iRegL tmp, flagsReg cr) %{
10972   match(Set dst (PopCountL src));
10973   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
10974   predicate(UsePopCountInstruction && VM_Version::has_PopCount());
10975   ins_cost(DEFAULT_COST);
10976   // TODO: s390 port size(FIXED_SIZE);
10977   format %{ "POPCNT  $dst,$src\t# pop count long" %}
10978   ins_encode %{
10979     Register Rdst = $dst$$Register;
10980     Register Rsrc = $src$$Register;
10981     Register Rtmp = $tmp$$Register;
10982 
10983     // Prefer compile-time assertion over run-time SIGILL.
10984     assert(VM_Version::has_PopCount(), "bad predicate for countLeadingZerosI");
10985     assert_different_registers(Rdst, Rtmp);
10986 
10987     // Original version. Using LA instead of algr seems to be a really bad idea (-35%).
10988     __ z_popcnt(Rdst, Rsrc);
10989     __ z_ahhlr(Rdst, Rdst, Rdst);
10990     __ z_sllg(Rtmp, Rdst, 16);
10991     __ z_algr(Rdst, Rtmp);
10992     __ z_sllg(Rtmp, Rdst,  8);
10993     __ z_algr(Rdst, Rtmp);
10994     __ z_srlg(Rdst, Rdst, 56);
10995   %}
10996   ins_pipe(pipe_class_dummy);
10997 %}
10998 
10999 //----------SMARTSPILL RULES---------------------------------------------------
11000 // These must follow all instruction definitions as they use the names
11001 // defined in the instructions definitions.
11002 
11003 // ============================================================================
11004 // TYPE PROFILING RULES
11005 
< prev index next >