409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429
430 extern RegMask _ANY_REG_mask;
431 extern RegMask _PTR_REG_mask;
432 extern RegMask _PTR_REG_NO_RBP_mask;
433 extern RegMask _PTR_NO_RAX_REG_mask;
434 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
435 extern RegMask _LONG_REG_mask;
436 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
437 extern RegMask _LONG_NO_RCX_REG_mask;
438 extern RegMask _LONG_NO_RBP_R13_REG_mask;
439 extern RegMask _INT_REG_mask;
440 extern RegMask _INT_NO_RAX_RDX_REG_mask;
441 extern RegMask _INT_NO_RCX_REG_mask;
442 extern RegMask _INT_NO_RBP_R13_REG_mask;
443 extern RegMask _FLOAT_REG_mask;
444
445 extern RegMask _STACK_OR_PTR_REG_mask;
446 extern RegMask _STACK_OR_LONG_REG_mask;
447 extern RegMask _STACK_OR_INT_REG_mask;
448
449 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
822 st->print("\n\t");
823 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
824 st->print("\n\t");
825 st->print("je fast_entry\t");
826 st->print("\n\t");
827 st->print("call #nmethod_entry_barrier_stub\t");
828 st->print("\n\tfast_entry:");
829 }
830 st->cr();
831 }
832 #endif
833
834 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
835 Compile* C = ra_->C;
836
837 int framesize = C->output()->frame_size_in_bytes();
838 int bangsize = C->output()->bang_size_in_bytes();
839
840 if (C->clinit_barrier_on_entry()) {
841 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
842 assert(!C->method()->holder()->is_not_initialized(), "initialization should have been started");
843
844 Label L_skip_barrier;
845 Register klass = rscratch1;
846
847 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
848 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
849
850 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
851
852 __ bind(L_skip_barrier);
853 }
854
855 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
856
857 C->output()->set_frame_complete(__ offset());
858
859 if (C->has_mach_constant_base_node()) {
860 // NOTE: We set the table base offset here because users might be
861 // emitted before MachConstantBaseNode.
862 ConstantTable& constant_table = C->output()->constant_table();
1848 if ($primary) {
1849 __ xorptr(Rrdi, Rrdi);
1850 }
1851 __ bind(miss);
1852 %}
1853
1854 enc_class clear_avx %{
1855 debug_only(int off0 = __ offset());
1856 if (generate_vzeroupper(Compile::current())) {
1857 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1858 // Clear upper bits of YMM registers when current compiled code uses
1859 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1860 __ vzeroupper();
1861 }
1862 debug_only(int off1 = __ offset());
1863 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1864 %}
1865
1866 enc_class Java_To_Runtime(method meth) %{
1867 // No relocation needed
1868 __ mov64(r10, (int64_t) $meth$$method);
1869 __ call(r10);
1870 __ post_call_nop();
1871 %}
1872
1873 enc_class Java_Static_Call(method meth)
1874 %{
1875 // JAVA STATIC CALL
1876 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1877 // determine who we intended to call.
1878 if (!_method) {
1879 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1880 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1881 // The NOP here is purely to ensure that eliding a call to
1882 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1883 __ addr_nop_5();
1884 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1885 } else {
1886 int method_index = resolved_method_index(masm);
1887 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1888 : static_call_Relocation::spec(method_index);
2465 // Constant for byte-wide masking
2466 operand immL_255()
2467 %{
2468 predicate(n->get_long() == 255);
2469 match(ConL);
2470
2471 format %{ %}
2472 interface(CONST_INTER);
2473 %}
2474
2475 // Constant for short-wide masking
2476 operand immL_65535()
2477 %{
2478 predicate(n->get_long() == 65535);
2479 match(ConL);
2480
2481 format %{ %}
2482 interface(CONST_INTER);
2483 %}
2484
2485 operand kReg()
2486 %{
2487 constraint(ALLOC_IN_RC(vectmask_reg));
2488 match(RegVectMask);
2489 format %{%}
2490 interface(REG_INTER);
2491 %}
2492
2493 // Register Operands
2494 // Integer Register
2495 operand rRegI()
2496 %{
2497 constraint(ALLOC_IN_RC(int_reg));
2498 match(RegI);
2499
2500 match(rax_RegI);
2501 match(rbx_RegI);
2502 match(rcx_RegI);
2503 match(rdx_RegI);
2504 match(rdi_RegI);
4401 format %{ "movlpd $dst, $mem\t# double" %}
4402 ins_encode %{
4403 __ movdbl($dst$$XMMRegister, $mem$$Address);
4404 %}
4405 ins_pipe(pipe_slow); // XXX
4406 %}
4407
4408 instruct loadD(regD dst, memory mem)
4409 %{
4410 predicate(UseXmmLoadAndClearUpper);
4411 match(Set dst (LoadD mem));
4412
4413 ins_cost(145); // XXX
4414 format %{ "movsd $dst, $mem\t# double" %}
4415 ins_encode %{
4416 __ movdbl($dst$$XMMRegister, $mem$$Address);
4417 %}
4418 ins_pipe(pipe_slow); // XXX
4419 %}
4420
4421 // max = java.lang.Math.max(float a, float b)
4422 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4423 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4424 match(Set dst (MaxF a b));
4425 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4426 format %{ "maxF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
4427 ins_encode %{
4428 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4429 %}
4430 ins_pipe( pipe_slow );
4431 %}
4432
4433 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
4434 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4435 match(Set dst (MaxF a b));
4436 effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
4437
4438 format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
4439 ins_encode %{
4440 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|
409 // Singleton class for RDI int register
410 reg_class int_rdi_reg(RDI);
411
412 // Singleton class for instruction pointer
413 // reg_class ip_reg(RIP);
414
415 %}
416
417 //----------SOURCE BLOCK-------------------------------------------------------
418 // This is a block of C++ code which provides values, functions, and
419 // definitions necessary in the rest of the architecture description
420
421 source_hpp %{
422
423 #include "peephole_x86_64.hpp"
424
425 %}
426
427 // Register masks
428 source_hpp %{
429 extern RegMask _ANY_REG_mask;
430 extern RegMask _PTR_REG_mask;
431 extern RegMask _PTR_REG_NO_RBP_mask;
432 extern RegMask _PTR_NO_RAX_REG_mask;
433 extern RegMask _PTR_NO_RAX_RBX_REG_mask;
434 extern RegMask _LONG_REG_mask;
435 extern RegMask _LONG_NO_RAX_RDX_REG_mask;
436 extern RegMask _LONG_NO_RCX_REG_mask;
437 extern RegMask _LONG_NO_RBP_R13_REG_mask;
438 extern RegMask _INT_REG_mask;
439 extern RegMask _INT_NO_RAX_RDX_REG_mask;
440 extern RegMask _INT_NO_RCX_REG_mask;
441 extern RegMask _INT_NO_RBP_R13_REG_mask;
442 extern RegMask _FLOAT_REG_mask;
443
444 extern RegMask _STACK_OR_PTR_REG_mask;
445 extern RegMask _STACK_OR_LONG_REG_mask;
446 extern RegMask _STACK_OR_INT_REG_mask;
447
448 inline const RegMask& STACK_OR_PTR_REG_mask() { return _STACK_OR_PTR_REG_mask; }
821 st->print("\n\t");
822 st->print("cmpl [r15_thread + #disarmed_guard_value_offset], #disarmed_guard_value\t");
823 st->print("\n\t");
824 st->print("je fast_entry\t");
825 st->print("\n\t");
826 st->print("call #nmethod_entry_barrier_stub\t");
827 st->print("\n\tfast_entry:");
828 }
829 st->cr();
830 }
831 #endif
832
833 void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const {
834 Compile* C = ra_->C;
835
836 int framesize = C->output()->frame_size_in_bytes();
837 int bangsize = C->output()->bang_size_in_bytes();
838
839 if (C->clinit_barrier_on_entry()) {
840 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
841 assert(!C->method()->holder()->is_not_initialized() || C->do_clinit_barriers(), "initialization should have been started");
842
843 Label L_skip_barrier;
844 Register klass = rscratch1;
845
846 __ mov_metadata(klass, C->method()->holder()->constant_encoding());
847 __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/);
848
849 __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); // slow path
850
851 __ bind(L_skip_barrier);
852 }
853
854 __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr);
855
856 C->output()->set_frame_complete(__ offset());
857
858 if (C->has_mach_constant_base_node()) {
859 // NOTE: We set the table base offset here because users might be
860 // emitted before MachConstantBaseNode.
861 ConstantTable& constant_table = C->output()->constant_table();
1847 if ($primary) {
1848 __ xorptr(Rrdi, Rrdi);
1849 }
1850 __ bind(miss);
1851 %}
1852
1853 enc_class clear_avx %{
1854 debug_only(int off0 = __ offset());
1855 if (generate_vzeroupper(Compile::current())) {
1856 // Clear upper bits of YMM registers to avoid AVX <-> SSE transition penalty
1857 // Clear upper bits of YMM registers when current compiled code uses
1858 // wide vectors to avoid AVX <-> SSE transition penalty during call.
1859 __ vzeroupper();
1860 }
1861 debug_only(int off1 = __ offset());
1862 assert(off1 - off0 == clear_avx_size(), "correct size prediction");
1863 %}
1864
1865 enc_class Java_To_Runtime(method meth) %{
1866 // No relocation needed
1867 if (SCCache::is_on_for_write()) {
1868 // Created runtime_call_type relocation when caching code
1869 __ lea(r10, RuntimeAddress((address)$meth$$method));
1870 } else {
1871 __ mov64(r10, (int64_t) $meth$$method);
1872 }
1873 __ call(r10);
1874 __ post_call_nop();
1875 %}
1876
1877 enc_class Java_Static_Call(method meth)
1878 %{
1879 // JAVA STATIC CALL
1880 // CALL to fixup routine. Fixup routine uses ScopeDesc info to
1881 // determine who we intended to call.
1882 if (!_method) {
1883 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, $meth$$method)));
1884 } else if (_method->intrinsic_id() == vmIntrinsicID::_ensureMaterializedForStackWalk) {
1885 // The NOP here is purely to ensure that eliding a call to
1886 // JVM_EnsureMaterializedForStackWalk doesn't change the code size.
1887 __ addr_nop_5();
1888 __ block_comment("call JVM_EnsureMaterializedForStackWalk (elided)");
1889 } else {
1890 int method_index = resolved_method_index(masm);
1891 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1892 : static_call_Relocation::spec(method_index);
2469 // Constant for byte-wide masking
2470 operand immL_255()
2471 %{
2472 predicate(n->get_long() == 255);
2473 match(ConL);
2474
2475 format %{ %}
2476 interface(CONST_INTER);
2477 %}
2478
2479 // Constant for short-wide masking
2480 operand immL_65535()
2481 %{
2482 predicate(n->get_long() == 65535);
2483 match(ConL);
2484
2485 format %{ %}
2486 interface(CONST_INTER);
2487 %}
2488
2489 // AOT Runtime Constants Address
2490 operand immAOTRuntimeConstantsAddress()
2491 %{
2492 // Check if the address is in the range of AOT Runtime Constants
2493 predicate(AOTRuntimeConstants::contains((address)(n->get_ptr())));
2494 match(ConP);
2495
2496 op_cost(0);
2497 format %{ %}
2498 interface(CONST_INTER);
2499 %}
2500
2501 operand kReg()
2502 %{
2503 constraint(ALLOC_IN_RC(vectmask_reg));
2504 match(RegVectMask);
2505 format %{%}
2506 interface(REG_INTER);
2507 %}
2508
2509 // Register Operands
2510 // Integer Register
2511 operand rRegI()
2512 %{
2513 constraint(ALLOC_IN_RC(int_reg));
2514 match(RegI);
2515
2516 match(rax_RegI);
2517 match(rbx_RegI);
2518 match(rcx_RegI);
2519 match(rdx_RegI);
2520 match(rdi_RegI);
4417 format %{ "movlpd $dst, $mem\t# double" %}
4418 ins_encode %{
4419 __ movdbl($dst$$XMMRegister, $mem$$Address);
4420 %}
4421 ins_pipe(pipe_slow); // XXX
4422 %}
4423
4424 instruct loadD(regD dst, memory mem)
4425 %{
4426 predicate(UseXmmLoadAndClearUpper);
4427 match(Set dst (LoadD mem));
4428
4429 ins_cost(145); // XXX
4430 format %{ "movsd $dst, $mem\t# double" %}
4431 ins_encode %{
4432 __ movdbl($dst$$XMMRegister, $mem$$Address);
4433 %}
4434 ins_pipe(pipe_slow); // XXX
4435 %}
4436
4437 instruct loadAOTRCAddress(rRegP dst, immAOTRuntimeConstantsAddress con)
4438 %{
4439 match(Set dst con);
4440
4441 format %{ "leaq $dst, $con\t# AOT Runtime Constants Address" %}
4442
4443 ins_encode %{
4444 __ load_aotrc_address($dst$$Register, (address)$con$$constant);
4445 %}
4446
4447 ins_pipe(ialu_reg_fat);
4448 %}
4449
4450 // max = java.lang.Math.max(float a, float b)
4451 instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp, legRegF btmp) %{
4452 predicate(UseAVX > 0 && !VLoopReductions::is_reduction(n));
4453 match(Set dst (MaxF a b));
4454 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
4455 format %{ "maxF $dst, $a, $b \t! using tmp, atmp and btmp as TEMP" %}
4456 ins_encode %{
4457 __ vminmax_fp(Op_MaxV, T_FLOAT, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, Assembler::AVX_128bit);
4458 %}
4459 ins_pipe( pipe_slow );
4460 %}
4461
4462 instruct maxF_reduction_reg(legRegF dst, legRegF a, legRegF b, legRegF xmmt, rRegI tmp, rFlagsReg cr) %{
4463 predicate(UseAVX > 0 && VLoopReductions::is_reduction(n));
4464 match(Set dst (MaxF a b));
4465 effect(USE a, USE b, TEMP xmmt, TEMP tmp, KILL cr);
4466
4467 format %{ "$dst = max($a, $b)\t# intrinsic (float)" %}
4468 ins_encode %{
4469 emit_fp_min_max(masm, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xmmt$$XMMRegister, $tmp$$Register,
|