409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429
430 extern RegMask _ANY_REG_mask;
431 extern RegMask _PTR_REG_mask;
432 extern RegMask _PTR_REG_NO_RBP_mask;
433 extern RegMask _PTR_NO_RAX_REG_mask;
434 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
435 extern RegMask _LONG_REG_mask;
436 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
437 extern RegMask _LONG_NO_RCX_REG_mask;
438 extern RegMask _LONG_NO_RBP_R13_REG_mask;
439 extern RegMask _INT_REG_mask;
440 extern RegMask _INT_NO_RAX_RDX_REG_mask;
441 extern RegMask _INT_NO_RCX_REG_mask;
442 extern RegMask _INT_NO_RBP_R13_REG_mask;
443 extern RegMask _FLOAT_REG_mask;
444
445 extern RegMask _STACK_OR_PTR_REG_mask;
446 extern RegMask _STACK_OR_LONG_REG_mask;
447 extern RegMask _STACK_OR_INT_REG_mask;
448
449 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
822 st->print("\n\t");
823 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
824 st->print("\n\t");
825 st->print("je fast_entry\t");
826 st->print("\n\t");
827 st->print("call #nmethod_entry_barrier_stub\t");
828 st->print("\n\tfast_entry:");
829 }
830 st->cr();
831 }
832 #endif
833
834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
835 Compile* C = ra_->C;
836
837 int framesize = C->output()->frame_size_in_bytes();
838 int bangsize = C->output()->bang_size_in_bytes();
839
840 if (C->clinit_barrier_on_entry()) {
841 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
842 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
843
844 Label L_skip_barrier;
845 Register klass = rscratch1;
846
847 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
848 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
849
850 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
851
852 __ bind(L_skip_barrier);
853 }
854
855 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
856
857 C->output()->set_frame_complete(__ offset());
858
859 if (C->has_mach_constant_base_node()) {
860 // NOTE: We set the table base offset here because users might be
861 // emitted before MachConstantBaseNode.
862 ConstantTable& constant_table = C->output()->constant_table();
1830 // idivq (note: must be emitted by the user of this rule)
1831 // <done>
1832 __ idivq($div$$Register);
1833 __ bind(done);
1834 %}
1835
1836 enc_class clear_avx %{
1837 debug_only(int off0 = __ offset());
1838 if (generate_vzeroupper(Compile::current())) {
1839 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1840 // Clear upper bits of YMM registers when current compiled code uses
1841 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1842 __ vzeroupper();
1843 }
1844 debug_only(int off1 = __ offset());
1845 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1846 %}
1847
1848 enc_class Java_To_Runtime(method meth) %{
1849 // No relocation needed
1850 __ mov64(r10, (int64_t) $meth$$method);
1851 __ call(r10);
1852 __ post_call_nop();
1853 %}
1854
1855 enc_class Java_Static_Call(method meth)
1856 %{
1857 // JAVA STATIC CALL
1858 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1859 // determine who we intended to call.
1860 if (!_method) {
1861 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1862 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1863 // The NOP here is purely to ensure that eliding a call to
1864 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1865 __ addr_nop_5();
1866 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1867 } else {
1868 int method_index = resolved_method_index(masm);
1869 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1870 : static_call_Relocation::spec(method_index);
2457 // Constant for byte-wide masking
2458 operand immL_255()
2459 %{
2460 predicate(n->get_long() == 255);
2461 match(ConL);
2462
2463 format %{ %}
2464 interface(CONST_INTER);
2465 %}
2466
2467 // Constant for short-wide masking
2468 operand immL_65535()
2469 %{
2470 predicate(n->get_long() == 65535);
2471 match(ConL);
2472
2473 format %{ %}
2474 interface(CONST_INTER);
2475 %}
2476
2477 operand kReg()
2478 %{
2479 constraint(ALLOC_IN_RC(vectmask_reg));
2480 match(RegVectMask);
2481 format %{%}
2482 interface(REG_INTER);
2483 %}
2484
2485 // Register Operands
2486 // Integer Register
2487 operand rRegI()
2488 %{
2489 constraint(ALLOC_IN_RC(int_reg));
2490 match(RegI);
2491
2492 match(rax_RegI);
2493 match(rbx_RegI);
2494 match(rcx_RegI);
2495 match(rdx_RegI);
2496 match(rdi_RegI);
4412 format %{ "movlpd $dst, $mem\t# double" %}
4413 ins_encode %{
4414 __ movdbl($dst$$XMMRegister, $mem$$Address);
4415 %}
4416 ins_pipe(pipe_slow); // XXX
4417 %}
4418
4419 instruct loadD(regD dst, memory mem)
4420 %{
4421 predicate(UseXmmLoadAndClearUpper);
4422 match(Set dst (LoadD mem));
4423
4424 ins_cost(145); // XXX
4425 format %{ "movsd $dst, $mem\t# double" %}
4426 ins_encode %{
4427 __ movdbl($dst$$XMMRegister, $mem$$Address);
4428 %}
4429 ins_pipe(pipe_slow); // XXX
4430 %}
4431
4432 // max = java.lang.Math.max(float a, float b)
4433 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4434 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4435 match(Set dst (MaxF a b));
4436 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4437 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
4438 ins_encode %{
4439 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4440 %}
4441 ins_pipe( pipe_slow );
4442 %}
4443
4444 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
4445 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4446 match(Set dst (MaxF a b));
4447 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
4448
4449 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
4450 ins_encode %{
4451 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|
409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429 extern RegMask _ANY_REG_mask;
430 extern RegMask _PTR_REG_mask;
431 extern RegMask _PTR_REG_NO_RBP_mask;
432 extern RegMask _PTR_NO_RAX_REG_mask;
433 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
434 extern RegMask _LONG_REG_mask;
435 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
436 extern RegMask _LONG_NO_RCX_REG_mask;
437 extern RegMask _LONG_NO_RBP_R13_REG_mask;
438 extern RegMask _INT_REG_mask;
439 extern RegMask _INT_NO_RAX_RDX_REG_mask;
440 extern RegMask _INT_NO_RCX_REG_mask;
441 extern RegMask _INT_NO_RBP_R13_REG_mask;
442 extern RegMask _FLOAT_REG_mask;
443
444 extern RegMask _STACK_OR_PTR_REG_mask;
445 extern RegMask _STACK_OR_LONG_REG_mask;
446 extern RegMask _STACK_OR_INT_REG_mask;
447
448 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
821 st->print("\n\t");
822 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
823 st->print("\n\t");
824 st->print("je fast_entry\t");
825 st->print("\n\t");
826 st->print("call #nmethod_entry_barrier_stub\t");
827 st->print("\n\tfast_entry:");
828 }
829 st->cr();
830 }
831 #endif
832
833 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
834 Compile* C = ra_->C;
835
836 int framesize = C->output()->frame_size_in_bytes();
837 int bangsize = C->output()->bang_size_in_bytes();
838
839 if (C->clinit_barrier_on_entry()) {
840 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
841 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
842
843 Label L_skip_barrier;
844 Register klass = rscratch1;
845
846 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
847 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
848
849 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
850
851 __ bind(L_skip_barrier);
852 }
853
854 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
855
856 C->output()->set_frame_complete(__ offset());
857
858 if (C->has_mach_constant_base_node()) {
859 // NOTE: We set the table base offset here because users might be
860 // emitted before MachConstantBaseNode.
861 ConstantTable& constant_table = C->output()->constant_table();
1829 // idivq (note: must be emitted by the user of this rule)
1830 // <done>
1831 __ idivq($div$$Register);
1832 __ bind(done);
1833 %}
1834
1835 enc_class clear_avx %{
1836 debug_only(int off0 = __ offset());
1837 if (generate_vzeroupper(Compile::current())) {
1838 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1839 // Clear upper bits of YMM registers when current compiled code uses
1840 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1841 __ vzeroupper();
1842 }
1843 debug_only(int off1 = __ offset());
1844 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1845 %}
1846
1847 enc_class Java_To_Runtime(method meth) %{
1848 // No relocation needed
1849 if (SCCache::is_on_for_write()) {
1850 // Created runtime_call_type relocation when caching code
1851 __ lea(r10, RuntimeAddress((address)$meth$$method));
1852 } else {
1853 __ mov64(r10, (int64_t) $meth$$method);
1854 }
1855 __ call(r10);
1856 __ post_call_nop();
1857 %}
1858
1859 enc_class Java_Static_Call(method meth)
1860 %{
1861 // JAVA STATIC CALL
1862 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1863 // determine who we intended to call.
1864 if (!_method) {
1865 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1866 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1867 // The NOP here is purely to ensure that eliding a call to
1868 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1869 __ addr_nop_5();
1870 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1871 } else {
1872 int method_index = resolved_method_index(masm);
1873 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1874 : static_call_Relocation::spec(method_index);
2461 // Constant for byte-wide masking
2462 operand immL_255()
2463 %{
2464 predicate(n->get_long() == 255);
2465 match(ConL);
2466
2467 format %{ %}
2468 interface(CONST_INTER);
2469 %}
2470
2471 // Constant for short-wide masking
2472 operand immL_65535()
2473 %{
2474 predicate(n->get_long() == 65535);
2475 match(ConL);
2476
2477 format %{ %}
2478 interface(CONST_INTER);
2479 %}
2480
2481 // AOT Runtime Constants Address
2482 operand immAOTRuntimeConstantsAddress()
2483 %{
2484 // Check if the address is in the range of AOT Runtime Constants
2485 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
2486 match(ConP);
2487
2488 op_cost(0);
2489 format %{ %}
2490 interface(CONST_INTER);
2491 %}
2492
2493 operand kReg()
2494 %{
2495 constraint(ALLOC_IN_RC(vectmask_reg));
2496 match(RegVectMask);
2497 format %{%}
2498 interface(REG_INTER);
2499 %}
2500
2501 // Register Operands
2502 // Integer Register
2503 operand rRegI()
2504 %{
2505 constraint(ALLOC_IN_RC(int_reg));
2506 match(RegI);
2507
2508 match(rax_RegI);
2509 match(rbx_RegI);
2510 match(rcx_RegI);
2511 match(rdx_RegI);
2512 match(rdi_RegI);
4428 format %{ "movlpd $dst, $mem\t# double" %}
4429 ins_encode %{
4430 __ movdbl($dst$$XMMRegister, $mem$$Address);
4431 %}
4432 ins_pipe(pipe_slow); // XXX
4433 %}
4434
4435 instruct loadD(regD dst, memory mem)
4436 %{
4437 predicate(UseXmmLoadAndClearUpper);
4438 match(Set dst (LoadD mem));
4439
4440 ins_cost(145); // XXX
4441 format %{ "movsd $dst, $mem\t# double" %}
4442 ins_encode %{
4443 __ movdbl($dst$$XMMRegister, $mem$$Address);
4444 %}
4445 ins_pipe(pipe_slow); // XXX
4446 %}
4447
4448 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
4449 %{
4450 match(Set dst con);
4451
4452 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
4453
4454 ins_encode %{
4455 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
4456 %}
4457
4458 ins_pipe(ialu_reg_fat);
4459 %}
4460
4461 // max = java.lang.Math.max(float a, float b)
4462 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4463 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4464 match(Set dst (MaxF a b));
4465 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4466 format %{ "maxF $dst, $a, $b \t! using $tmp, $atmp and $btmp as TEMP" %}
4467 ins_encode %{
4468 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4469 %}
4470 ins_pipe( pipe_slow );
4471 %}
4472
4473 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xtmp, rRegI rtmp, rFlagsReg cr) %{
4474 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4475 match(Set dst (MaxF a b));
4476 effect(USE a, USE b, TEMP xtmp, TEMP rtmp, KILL cr);
4477
4478 format %{ "maxF_reduction $dst, $a, $b \t!using $xtmp and $rtmp as TEMP" %}
4479 ins_encode %{
4480 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp$$XMMRegister, $rtmp$$Register,
|