409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429
430 extern RegMask _ANY_REG_mask;
431 extern RegMask _PTR_REG_mask;
432 extern RegMask _PTR_REG_NO_RBP_mask;
433 extern RegMask _PTR_NO_RAX_REG_mask;
434 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
435 extern RegMask _LONG_REG_mask;
436 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
437 extern RegMask _LONG_NO_RCX_REG_mask;
438 extern RegMask _LONG_NO_RBP_R13_REG_mask;
439 extern RegMask _INT_REG_mask;
440 extern RegMask _INT_NO_RAX_RDX_REG_mask;
441 extern RegMask _INT_NO_RCX_REG_mask;
442 extern RegMask _INT_NO_RBP_R13_REG_mask;
443 extern RegMask _FLOAT_REG_mask;
444
445 extern RegMask _STACK_OR_PTR_REG_mask;
446 extern RegMask _STACK_OR_LONG_REG_mask;
447 extern RegMask _STACK_OR_INT_REG_mask;
448
449 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
822 st->print("\n\t");
823 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
824 st->print("\n\t");
825 st->print("je fast_entry\t");
826 st->print("\n\t");
827 st->print("call #nmethod_entry_barrier_stub\t");
828 st->print("\n\tfast_entry:");
829 }
830 st->cr();
831 }
832 #endif
833
834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
835 Compile* C = ra_->C;
836
837 int framesize = C->output()->frame_size_in_bytes();
838 int bangsize = C->output()->bang_size_in_bytes();
839
840 if (C->clinit_barrier_on_entry()) {
841 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
842 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
843
844 Label L_skip_barrier;
845 Register klass = rscratch1;
846
847 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
848 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
849
850 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
851
852 __ bind(L_skip_barrier);
853 }
854
855 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
856
857 C->output()->set_frame_complete(__ offset());
858
859 if (C->has_mach_constant_base_node()) {
860 // NOTE: We set the table base offset here because users might be
861 // emitted before MachConstantBaseNode.
862 ConstantTable& constant_table = C->output()->constant_table();
1830 // idivq (note: must be emitted by the user of this rule)
1831 // <done>
1832 __ idivq($div$$Register);
1833 __ bind(done);
1834 %}
1835
1836 enc_class clear_avx %{
1837 debug_only(int off0 = __ offset());
1838 if (generate_vzeroupper(Compile::current())) {
1839 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1840 // Clear upper bits of YMM registers when current compiled code uses
1841 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1842 __ vzeroupper();
1843 }
1844 debug_only(int off1 = __ offset());
1845 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1846 %}
1847
1848 enc_class Java_To_Runtime(method meth) %{
1849 // No relocation needed
1850 __ mov64(r10, (int64_t) $meth$$method);
1851 __ call(r10);
1852 __ post_call_nop();
1853 %}
1854
1855 enc_class Java_Static_Call(method meth)
1856 %{
1857 // JAVA STATIC CALL
1858 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1859 // determine who we intended to call.
1860 if (!_method) {
1861 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1862 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1863 // The NOP here is purely to ensure that eliding a call to
1864 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1865 __ addr_nop_5();
1866 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1867 } else {
1868 int method_index = resolved_method_index(masm);
1869 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1870 : static_call_Relocation::spec(method_index);
2447 // Constant for byte-wide masking
2448 operand immL_255()
2449 %{
2450 predicate(n->get_long() == 255);
2451 match(ConL);
2452
2453 format %{ %}
2454 interface(CONST_INTER);
2455 %}
2456
2457 // Constant for short-wide masking
2458 operand immL_65535()
2459 %{
2460 predicate(n->get_long() == 65535);
2461 match(ConL);
2462
2463 format %{ %}
2464 interface(CONST_INTER);
2465 %}
2466
2467 operand kReg()
2468 %{
2469 constraint(ALLOC_IN_RC(vectmask_reg));
2470 match(RegVectMask);
2471 format %{%}
2472 interface(REG_INTER);
2473 %}
2474
2475 // Register Operands
2476 // Integer Register
2477 operand rRegI()
2478 %{
2479 constraint(ALLOC_IN_RC(int_reg));
2480 match(RegI);
2481
2482 match(rax_RegI);
2483 match(rbx_RegI);
2484 match(rcx_RegI);
2485 match(rdx_RegI);
2486 match(rdi_RegI);
4402 format %{ "movlpd $dst, $mem\t# double" %}
4403 ins_encode %{
4404 __ movdbl($dst$$XMMRegister, $mem$$Address);
4405 %}
4406 ins_pipe(pipe_slow); // XXX
4407 %}
4408
4409 instruct loadD(regD dst, memory mem)
4410 %{
4411 predicate(UseXmmLoadAndClearUpper);
4412 match(Set dst (LoadD mem));
4413
4414 ins_cost(145); // XXX
4415 format %{ "movsd $dst, $mem\t# double" %}
4416 ins_encode %{
4417 __ movdbl($dst$$XMMRegister, $mem$$Address);
4418 %}
4419 ins_pipe(pipe_slow); // XXX
4420 %}
4421
4422 // max = java.lang.Math.max(float a, float b)
4423 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4424 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4425 match(Set dst (MaxF a b));
4426 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4427 format %{ "maxF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
4428 ins_encode %{
4429 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4430 %}
4431 ins_pipe( pipe_slow );
4432 %}
4433
4434 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
4435 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4436 match(Set dst (MaxF a b));
4437 effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
4438
4439 format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
4440 ins_encode %{
4441 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|
409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429 extern RegMask _ANY_REG_mask;
430 extern RegMask _PTR_REG_mask;
431 extern RegMask _PTR_REG_NO_RBP_mask;
432 extern RegMask _PTR_NO_RAX_REG_mask;
433 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
434 extern RegMask _LONG_REG_mask;
435 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
436 extern RegMask _LONG_NO_RCX_REG_mask;
437 extern RegMask _LONG_NO_RBP_R13_REG_mask;
438 extern RegMask _INT_REG_mask;
439 extern RegMask _INT_NO_RAX_RDX_REG_mask;
440 extern RegMask _INT_NO_RCX_REG_mask;
441 extern RegMask _INT_NO_RBP_R13_REG_mask;
442 extern RegMask _FLOAT_REG_mask;
443
444 extern RegMask _STACK_OR_PTR_REG_mask;
445 extern RegMask _STACK_OR_LONG_REG_mask;
446 extern RegMask _STACK_OR_INT_REG_mask;
447
448 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
821 st->print("\n\t");
822 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
823 st->print("\n\t");
824 st->print("je fast_entry\t");
825 st->print("\n\t");
826 st->print("call #nmethod_entry_barrier_stub\t");
827 st->print("\n\tfast_entry:");
828 }
829 st->cr();
830 }
831 #endif
832
833 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
834 Compile* C = ra_->C;
835
836 int framesize = C->output()->frame_size_in_bytes();
837 int bangsize = C->output()->bang_size_in_bytes();
838
839 if (C->clinit_barrier_on_entry()) {
840 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
841 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
842
843 Label L_skip_barrier;
844 Register klass = rscratch1;
845
846 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
847 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
848
849 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
850
851 __ bind(L_skip_barrier);
852 }
853
854 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
855
856 C->output()->set_frame_complete(__ offset());
857
858 if (C->has_mach_constant_base_node()) {
859 // NOTE: We set the table base offset here because users might be
860 // emitted before MachConstantBaseNode.
861 ConstantTable& constant_table = C->output()->constant_table();
1829 // idivq (note: must be emitted by the user of this rule)
1830 // <done>
1831 __ idivq($div$$Register);
1832 __ bind(done);
1833 %}
1834
1835 enc_class clear_avx %{
1836 debug_only(int off0 = __ offset());
1837 if (generate_vzeroupper(Compile::current())) {
1838 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1839 // Clear upper bits of YMM registers when current compiled code uses
1840 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1841 __ vzeroupper();
1842 }
1843 debug_only(int off1 = __ offset());
1844 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1845 %}
1846
1847 enc_class Java_To_Runtime(method meth) %{
1848 // No relocation needed
1849 if (SCCache::is_on_for_write()) {
1850 // Created runtime_call_type relocation when caching code
1851 __ lea(r10, RuntimeAddress((address)$meth$$method));
1852 } else {
1853 __ mov64(r10, (int64_t) $meth$$method);
1854 }
1855 __ call(r10);
1856 __ post_call_nop();
1857 %}
1858
1859 enc_class Java_Static_Call(method meth)
1860 %{
1861 // JAVA STATIC CALL
1862 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1863 // determine who we intended to call.
1864 if (!_method) {
1865 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1866 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1867 // The NOP here is purely to ensure that eliding a call to
1868 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1869 __ addr_nop_5();
1870 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1871 } else {
1872 int method_index = resolved_method_index(masm);
1873 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1874 : static_call_Relocation::spec(method_index);
2451 // Constant for byte-wide masking
2452 operand immL_255()
2453 %{
2454 predicate(n->get_long() == 255);
2455 match(ConL);
2456
2457 format %{ %}
2458 interface(CONST_INTER);
2459 %}
2460
2461 // Constant for short-wide masking
2462 operand immL_65535()
2463 %{
2464 predicate(n->get_long() == 65535);
2465 match(ConL);
2466
2467 format %{ %}
2468 interface(CONST_INTER);
2469 %}
2470
2471 // AOT Runtime Constants Address
2472 operand immAOTRuntimeConstantsAddress()
2473 %{
2474 // Check if the address is in the range of AOT Runtime Constants
2475 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
2476 match(ConP);
2477
2478 op_cost(0);
2479 format %{ %}
2480 interface(CONST_INTER);
2481 %}
2482
2483 operand kReg()
2484 %{
2485 constraint(ALLOC_IN_RC(vectmask_reg));
2486 match(RegVectMask);
2487 format %{%}
2488 interface(REG_INTER);
2489 %}
2490
2491 // Register Operands
2492 // Integer Register
2493 operand rRegI()
2494 %{
2495 constraint(ALLOC_IN_RC(int_reg));
2496 match(RegI);
2497
2498 match(rax_RegI);
2499 match(rbx_RegI);
2500 match(rcx_RegI);
2501 match(rdx_RegI);
2502 match(rdi_RegI);
4418 format %{ "movlpd $dst, $mem\t# double" %}
4419 ins_encode %{
4420 __ movdbl($dst$$XMMRegister, $mem$$Address);
4421 %}
4422 ins_pipe(pipe_slow); // XXX
4423 %}
4424
4425 instruct loadD(regD dst, memory mem)
4426 %{
4427 predicate(UseXmmLoadAndClearUpper);
4428 match(Set dst (LoadD mem));
4429
4430 ins_cost(145); // XXX
4431 format %{ "movsd $dst, $mem\t# double" %}
4432 ins_encode %{
4433 __ movdbl($dst$$XMMRegister, $mem$$Address);
4434 %}
4435 ins_pipe(pipe_slow); // XXX
4436 %}
4437
4438 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
4439 %{
4440 match(Set dst con);
4441
4442 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
4443
4444 ins_encode %{
4445 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
4446 %}
4447
4448 ins_pipe(ialu_reg_fat);
4449 %}
4450
4451 // max = java.lang.Math.max(float a, float b)
4452 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4453 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4454 match(Set dst (MaxF a b));
4455 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4456 format %{ "maxF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
4457 ins_encode %{
4458 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4459 %}
4460 ins_pipe( pipe_slow );
4461 %}
4462
4463 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
4464 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4465 match(Set dst (MaxF a b));
4466 effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
4467
4468 format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
4469 ins_encode %{
4470 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|