26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "compiler/disassembler.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "memory/cardTableModRefBS.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "prims/methodHandles.hpp"
35 #include "runtime/biasedLocking.hpp"
36 #include "runtime/interfaceSupport.hpp"
37 #include "runtime/objectMonitor.hpp"
38 #include "runtime/os.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "utilities/macros.hpp"
42 #if INCLUDE_ALL_GCS
43 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
44 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
45 #include "gc_implementation/g1/heapRegion.hpp"
46 #endif // INCLUDE_ALL_GCS
47
48 #ifdef PRODUCT
49 #define BLOCK_COMMENT(str) /* nothing */
50 #define STOP(error) stop(error)
51 #else
52 #define BLOCK_COMMENT(str) block_comment(str)
53 #define STOP(error) block_comment(error); stop(error)
54 #endif
55
56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
57
58 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
59
60 #ifdef ASSERT
61 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
62 #endif
63
64 static Assembler::Condition reverse[] = {
65 Assembler::noOverflow /* overflow = 0x0 */ ,
4115 vxorps(dst, nds, as_Address(src), vector256);
4116 } else {
4117 lea(rscratch1, src);
4118 vxorps(dst, nds, Address(rscratch1, 0), vector256);
4119 }
4120 }
4121
4122 void MacroAssembler::resolve_jobject(Register value,
4123 Register thread,
4124 Register tmp) {
4125 assert_different_registers(value, thread, tmp);
4126 Label done, not_weak;
4127 testptr(value, value);
4128 jcc(Assembler::zero, done); // Use NULL as-is.
4129 testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
4130 jcc(Assembler::zero, not_weak);
4131 // Resolve jweak.
4132 movptr(value, Address(value, -JNIHandles::weak_tag_value));
4133 verify_oop(value);
4134 #if INCLUDE_ALL_GCS
4135 if (UseG1GC) {
4136 g1_write_barrier_pre(noreg /* obj */,
4137 value /* pre_val */,
4138 thread /* thread */,
4139 tmp /* tmp */,
4140 true /* tosca_live */,
4141 true /* expand_call */);
4142 }
4143 #endif // INCLUDE_ALL_GCS
4144 jmp(done);
4145 bind(not_weak);
4146 // Resolve (untagged) jobject.
4147 movptr(value, Address(value, 0));
4148 verify_oop(value);
4149 bind(done);
4150 }
4151
4152 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
4153 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
4154 STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
4155 // The inverted mask is sign-extended
4174 assert(thread == r15_thread, "must be");
4175 #endif // _LP64
4176
4177 Label done;
4178 Label runtime;
4179
4180 assert(pre_val != noreg, "check this code");
4181
4182 if (obj != noreg) {
4183 assert_different_registers(obj, pre_val, tmp);
4184 assert(pre_val != rax, "check this code");
4185 }
4186
4187 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4188 PtrQueue::byte_offset_of_active()));
4189 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4190 PtrQueue::byte_offset_of_index()));
4191 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4192 PtrQueue::byte_offset_of_buf()));
4193
4194
4195 // Is marking active?
4196 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4197 cmpl(in_progress, 0);
4198 } else {
4199 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
4200 cmpb(in_progress, 0);
4201 }
4202 jcc(Assembler::equal, done);
4203
4204 // Do we need to load the previous value?
4205 if (obj != noreg) {
4206 load_heap_oop(pre_val, Address(obj, 0));
4207 }
4208
4209 // Is the previous value null?
4210 cmpptr(pre_val, (int32_t) NULL_WORD);
4211 jcc(Assembler::equal, done);
4212
4213 // Can we store original value in the thread's buffer?
4214 // Is index == 0?
4215 // (The index field is typed as size_t.)
4216
4217 movptr(tmp, index); // tmp := *index_adr
4218 cmpptr(tmp, 0); // tmp == 0?
4219 jcc(Assembler::equal, runtime); // If yes, goto runtime
4220
4221 subptr(tmp, wordSize); // tmp := tmp - wordSize
4222 movptr(index, tmp); // *index_adr := tmp
4265 if (pre_val != rax)
4266 pop(pre_val);
4267
4268 if (obj != noreg && obj != rax)
4269 pop(obj);
4270
4271 if(tosca_live) pop(rax);
4272
4273 bind(done);
4274 }
4275
4276 void MacroAssembler::g1_write_barrier_post(Register store_addr,
4277 Register new_val,
4278 Register thread,
4279 Register tmp,
4280 Register tmp2) {
4281 #ifdef _LP64
4282 assert(thread == r15_thread, "must be");
4283 #endif // _LP64
4284
4285 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4286 PtrQueue::byte_offset_of_index()));
4287 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4288 PtrQueue::byte_offset_of_buf()));
4289
4290 BarrierSet* bs = Universe::heap()->barrier_set();
4291 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
4292 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
4293
4294 Label done;
4295 Label runtime;
4296
4297 // Does store cross heap regions?
4298
4299 movptr(tmp, store_addr);
4300 xorptr(tmp, new_val);
4301 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
4302 jcc(Assembler::equal, done);
4303
4304 // crosses regions, storing NULL?
4600
4601 #ifdef _LP64
4602 if (var_size_in_bytes->is_valid()) {
4603 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4604 } else {
4605 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4606 }
4607 #else
4608 if (var_size_in_bytes->is_valid()) {
4609 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4610 } else {
4611 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4612 }
4613 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
4614 #endif
4615 }
4616
4617 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
4618 pusha();
4619
4620 // if we are coming from c1, xmm registers may be live
4621 int off = 0;
4622 if (UseSSE == 1) {
4623 subptr(rsp, sizeof(jdouble)*8);
4624 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
4625 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
4626 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
4627 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
4628 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
4629 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
4630 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
4631 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
4632 } else if (UseSSE >= 2) {
4633 #ifdef COMPILER2
4634 if (MaxVectorSize > 16) {
4635 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
4636 // Save upper half of YMM registes
4637 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4638 vextractf128h(Address(rsp, 0),xmm0);
4639 vextractf128h(Address(rsp, 16),xmm1);
4640 vextractf128h(Address(rsp, 32),xmm2);
4641 vextractf128h(Address(rsp, 48),xmm3);
4642 vextractf128h(Address(rsp, 64),xmm4);
4643 vextractf128h(Address(rsp, 80),xmm5);
4644 vextractf128h(Address(rsp, 96),xmm6);
4645 vextractf128h(Address(rsp,112),xmm7);
4646 #ifdef _LP64
4647 vextractf128h(Address(rsp,128),xmm8);
4648 vextractf128h(Address(rsp,144),xmm9);
4649 vextractf128h(Address(rsp,160),xmm10);
4650 vextractf128h(Address(rsp,176),xmm11);
4651 vextractf128h(Address(rsp,192),xmm12);
4652 vextractf128h(Address(rsp,208),xmm13);
4653 vextractf128h(Address(rsp,224),xmm14);
4654 vextractf128h(Address(rsp,240),xmm15);
4655 #endif
4656 }
4657 #endif
4658 // Save whole 128bit (16 bytes) XMM regiters
4659 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4660 movdqu(Address(rsp,off++*16),xmm0);
4661 movdqu(Address(rsp,off++*16),xmm1);
4662 movdqu(Address(rsp,off++*16),xmm2);
4663 movdqu(Address(rsp,off++*16),xmm3);
4664 movdqu(Address(rsp,off++*16),xmm4);
4665 movdqu(Address(rsp,off++*16),xmm5);
4666 movdqu(Address(rsp,off++*16),xmm6);
4667 movdqu(Address(rsp,off++*16),xmm7);
4668 #ifdef _LP64
4669 movdqu(Address(rsp,off++*16),xmm8);
4670 movdqu(Address(rsp,off++*16),xmm9);
4671 movdqu(Address(rsp,off++*16),xmm10);
4672 movdqu(Address(rsp,off++*16),xmm11);
4673 movdqu(Address(rsp,off++*16),xmm12);
4674 movdqu(Address(rsp,off++*16),xmm13);
4675 movdqu(Address(rsp,off++*16),xmm14);
4676 movdqu(Address(rsp,off++*16),xmm15);
4677 #endif
4678 }
4679
4680 // Preserve registers across runtime call
4681 int incoming_argument_and_return_value_offset = -1;
4682 if (num_fpu_regs_in_use > 1) {
4683 // Must preserve all other FPU regs (could alternatively convert
4684 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
4685 // FPU state, but can not trust C compiler)
4686 NEEDS_CLEANUP;
4687 // NOTE that in this case we also push the incoming argument(s) to
4688 // the stack and restore it later; we also use this stack slot to
4689 // hold the return value from dsin, dcos etc.
4690 for (int i = 0; i < num_fpu_regs_in_use; i++) {
4691 subptr(rsp, sizeof(jdouble));
4692 fstp_d(Address(rsp, 0));
4693 }
4694 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
4695 for (int i = nb_args-1; i >= 0; i--) {
4696 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
4697 }
4698 }
4724
4725 MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
4726
4727 #ifdef _LP64
4728 movsd(Address(rsp, 0), xmm0);
4729 fld_d(Address(rsp, 0));
4730 #endif // _LP64
4731 addptr(rsp, sizeof(jdouble) * nb_args);
4732 if (num_fpu_regs_in_use > 1) {
4733 // Must save return value to stack and then restore entire FPU
4734 // stack except incoming arguments
4735 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
4736 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
4737 fld_d(Address(rsp, 0));
4738 addptr(rsp, sizeof(jdouble));
4739 }
4740 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
4741 addptr(rsp, sizeof(jdouble) * nb_args);
4742 }
4743
4744 off = 0;
4745 if (UseSSE == 1) {
4746 movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
4747 movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
4748 movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
4749 movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
4750 movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
4751 movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
4752 movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
4753 movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
4754 addptr(rsp, sizeof(jdouble)*8);
4755 } else if (UseSSE >= 2) {
4756 // Restore whole 128bit (16 bytes) XMM regiters
4757 movdqu(xmm0, Address(rsp,off++*16));
4758 movdqu(xmm1, Address(rsp,off++*16));
4759 movdqu(xmm2, Address(rsp,off++*16));
4760 movdqu(xmm3, Address(rsp,off++*16));
4761 movdqu(xmm4, Address(rsp,off++*16));
4762 movdqu(xmm5, Address(rsp,off++*16));
4763 movdqu(xmm6, Address(rsp,off++*16));
4764 movdqu(xmm7, Address(rsp,off++*16));
4781 vinsertf128h(xmm2, Address(rsp, 32));
4782 vinsertf128h(xmm3, Address(rsp, 48));
4783 vinsertf128h(xmm4, Address(rsp, 64));
4784 vinsertf128h(xmm5, Address(rsp, 80));
4785 vinsertf128h(xmm6, Address(rsp, 96));
4786 vinsertf128h(xmm7, Address(rsp,112));
4787 #ifdef _LP64
4788 vinsertf128h(xmm8, Address(rsp,128));
4789 vinsertf128h(xmm9, Address(rsp,144));
4790 vinsertf128h(xmm10, Address(rsp,160));
4791 vinsertf128h(xmm11, Address(rsp,176));
4792 vinsertf128h(xmm12, Address(rsp,192));
4793 vinsertf128h(xmm13, Address(rsp,208));
4794 vinsertf128h(xmm14, Address(rsp,224));
4795 vinsertf128h(xmm15, Address(rsp,240));
4796 #endif
4797 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4798 }
4799 #endif
4800 }
4801 popa();
4802 }
4803
4804 static const double pi_4 = 0.7853981633974483;
4805
4806 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
4807 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
4808 // was attempted in this code; unfortunately it appears that the
4809 // switch to 80-bit precision and back causes this to be
4810 // unprofitable compared with simply performing a runtime call if
4811 // the argument is out of the (-pi/4, pi/4) range.
4812
4813 Register tmp = noreg;
4814 if (!VM_Version::supports_cmov()) {
4815 // fcmp needs a temporary so preserve rbx,
4816 tmp = rbx;
4817 push(tmp);
4818 }
4819
4820 Label slow_case, done;
4821
5217 b = code_string(ss.as_string());
5218 }
5219 BLOCK_COMMENT("verify_oop {");
5220 #ifdef _LP64
5221 push(rscratch1); // save r10, trashed by movptr()
5222 #endif
5223 push(rax); // save rax,
5224 push(reg); // pass register argument
5225 ExternalAddress buffer((address) b);
5226 // avoid using pushptr, as it modifies scratch registers
5227 // and our contract is not to modify anything
5228 movptr(rax, buffer.addr());
5229 push(rax);
5230 // call indirectly to solve generation ordering problem
5231 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
5232 call(rax);
5233 // Caller pops the arguments (oop, message) and restores rax, r10
5234 BLOCK_COMMENT("} verify_oop");
5235 }
5236
5237
5238 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
5239 Register tmp,
5240 int offset) {
5241 intptr_t value = *delayed_value_addr;
5242 if (value != 0)
5243 return RegisterOrConstant(value + offset);
5244
5245 // load indirectly to solve generation ordering problem
5246 movptr(tmp, ExternalAddress((address) delayed_value_addr));
5247
5248 #ifdef ASSERT
5249 { Label L;
5250 testptr(tmp, tmp);
5251 if (WizardMode) {
5252 const char* buf = NULL;
5253 {
5254 ResourceMark rm;
5255 stringStream ss;
5256 ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
5257 buf = code_string(ss.as_string());
5746 #endif
5747 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5748 }
5749
5750 void MacroAssembler::load_prototype_header(Register dst, Register src) {
5751 load_klass(dst, src);
5752 movptr(dst, Address(dst, Klass::prototype_header_offset()));
5753 }
5754
5755 void MacroAssembler::store_klass(Register dst, Register src) {
5756 #ifdef _LP64
5757 if (UseCompressedClassPointers) {
5758 encode_klass_not_null(src);
5759 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5760 } else
5761 #endif
5762 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5763 }
5764
5765 void MacroAssembler::load_heap_oop(Register dst, Address src) {
5766 #ifdef _LP64
5767 // FIXME: Must change all places where we try to load the klass.
5768 if (UseCompressedOops) {
5769 movl(dst, src);
5770 decode_heap_oop(dst);
5771 } else
5772 #endif
5773 movptr(dst, src);
5774 }
5775
5776 // Doesn't do verfication, generates fixed size code
5777 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
5778 #ifdef _LP64
5779 if (UseCompressedOops) {
5780 movl(dst, src);
5781 decode_heap_oop_not_null(dst);
5782 } else
5783 #endif
5784 movptr(dst, src);
5785 }
5786
5787 void MacroAssembler::store_heap_oop(Address dst, Register src) {
5788 #ifdef _LP64
5789 if (UseCompressedOops) {
5790 assert(!dst.uses(src), "not enough registers");
5791 encode_heap_oop(src);
5792 movl(dst, src);
5793 } else
5794 #endif
5795 movptr(dst, src);
5796 }
5797
|
26 #include "asm/assembler.hpp"
27 #include "asm/assembler.inline.hpp"
28 #include "compiler/disassembler.hpp"
29 #include "gc_interface/collectedHeap.inline.hpp"
30 #include "interpreter/interpreter.hpp"
31 #include "memory/cardTableModRefBS.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "prims/methodHandles.hpp"
35 #include "runtime/biasedLocking.hpp"
36 #include "runtime/interfaceSupport.hpp"
37 #include "runtime/objectMonitor.hpp"
38 #include "runtime/os.hpp"
39 #include "runtime/sharedRuntime.hpp"
40 #include "runtime/stubRoutines.hpp"
41 #include "utilities/macros.hpp"
42 #if INCLUDE_ALL_GCS
43 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
44 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
45 #include "gc_implementation/g1/heapRegion.hpp"
46 #include "shenandoahBarrierSetAssembler_x86.hpp"
47 #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp"
48 #endif // INCLUDE_ALL_GCS
49
50 #ifdef PRODUCT
51 #define BLOCK_COMMENT(str) /* nothing */
52 #define STOP(error) stop(error)
53 #else
54 #define BLOCK_COMMENT(str) block_comment(str)
55 #define STOP(error) block_comment(error); stop(error)
56 #endif
57
58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
59
60 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
61
62 #ifdef ASSERT
63 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
64 #endif
65
66 static Assembler::Condition reverse[] = {
67 Assembler::noOverflow /* overflow = 0x0 */ ,
4117 vxorps(dst, nds, as_Address(src), vector256);
4118 } else {
4119 lea(rscratch1, src);
4120 vxorps(dst, nds, Address(rscratch1, 0), vector256);
4121 }
4122 }
4123
4124 void MacroAssembler::resolve_jobject(Register value,
4125 Register thread,
4126 Register tmp) {
4127 assert_different_registers(value, thread, tmp);
4128 Label done, not_weak;
4129 testptr(value, value);
4130 jcc(Assembler::zero, done); // Use NULL as-is.
4131 testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
4132 jcc(Assembler::zero, not_weak);
4133 // Resolve jweak.
4134 movptr(value, Address(value, -JNIHandles::weak_tag_value));
4135 verify_oop(value);
4136 #if INCLUDE_ALL_GCS
4137 if (UseG1GC || (UseShenandoahGC && ShenandoahSATBBarrier)) {
4138 g1_write_barrier_pre(noreg /* obj */,
4139 value /* pre_val */,
4140 thread /* thread */,
4141 tmp /* tmp */,
4142 true /* tosca_live */,
4143 true /* expand_call */);
4144 }
4145 #endif // INCLUDE_ALL_GCS
4146 jmp(done);
4147 bind(not_weak);
4148 // Resolve (untagged) jobject.
4149 movptr(value, Address(value, 0));
4150 verify_oop(value);
4151 bind(done);
4152 }
4153
4154 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
4155 const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
4156 STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
4157 // The inverted mask is sign-extended
4176 assert(thread == r15_thread, "must be");
4177 #endif // _LP64
4178
4179 Label done;
4180 Label runtime;
4181
4182 assert(pre_val != noreg, "check this code");
4183
4184 if (obj != noreg) {
4185 assert_different_registers(obj, pre_val, tmp);
4186 assert(pre_val != rax, "check this code");
4187 }
4188
4189 Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4190 PtrQueue::byte_offset_of_active()));
4191 Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4192 PtrQueue::byte_offset_of_index()));
4193 Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4194 PtrQueue::byte_offset_of_buf()));
4195
4196 if (UseShenandoahGC) {
4197 Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
4198 testb(gc_state, ShenandoahHeap::MARKING);
4199 jcc(Assembler::zero, done);
4200 } else {
4201 assert(UseG1GC, "Should be");
4202 // Is marking active?
4203 if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4204 cmpl(in_progress, 0);
4205 } else {
4206 assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
4207 cmpb(in_progress, 0);
4208 }
4209 jcc(Assembler::equal, done);
4210 }
4211
4212 // Do we need to load the previous value?
4213 if (obj != noreg) {
4214 load_heap_oop(pre_val, Address(obj, 0));
4215 }
4216
4217 // Is the previous value null?
4218 cmpptr(pre_val, (int32_t) NULL_WORD);
4219 jcc(Assembler::equal, done);
4220
4221 // Can we store original value in the thread's buffer?
4222 // Is index == 0?
4223 // (The index field is typed as size_t.)
4224
4225 movptr(tmp, index); // tmp := *index_adr
4226 cmpptr(tmp, 0); // tmp == 0?
4227 jcc(Assembler::equal, runtime); // If yes, goto runtime
4228
4229 subptr(tmp, wordSize); // tmp := tmp - wordSize
4230 movptr(index, tmp); // *index_adr := tmp
4273 if (pre_val != rax)
4274 pop(pre_val);
4275
4276 if (obj != noreg && obj != rax)
4277 pop(obj);
4278
4279 if(tosca_live) pop(rax);
4280
4281 bind(done);
4282 }
4283
4284 void MacroAssembler::g1_write_barrier_post(Register store_addr,
4285 Register new_val,
4286 Register thread,
4287 Register tmp,
4288 Register tmp2) {
4289 #ifdef _LP64
4290 assert(thread == r15_thread, "must be");
4291 #endif // _LP64
4292
4293 if (UseShenandoahGC) {
4294 // No need for this in Shenandoah.
4295 return;
4296 }
4297
4298 assert(UseG1GC, "expect G1 GC");
4299
4300 Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4301 PtrQueue::byte_offset_of_index()));
4302 Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4303 PtrQueue::byte_offset_of_buf()));
4304
4305 BarrierSet* bs = Universe::heap()->barrier_set();
4306 CardTableModRefBS* ct = (CardTableModRefBS*)bs;
4307 assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
4308
4309 Label done;
4310 Label runtime;
4311
4312 // Does store cross heap regions?
4313
4314 movptr(tmp, store_addr);
4315 xorptr(tmp, new_val);
4316 shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
4317 jcc(Assembler::equal, done);
4318
4319 // crosses regions, storing NULL?
4615
4616 #ifdef _LP64
4617 if (var_size_in_bytes->is_valid()) {
4618 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4619 } else {
4620 addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4621 }
4622 #else
4623 if (var_size_in_bytes->is_valid()) {
4624 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4625 } else {
4626 addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4627 }
4628 adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
4629 #endif
4630 }
4631
4632 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
4633 pusha();
4634
4635 save_vector_registers();
4636
4637 // if we are coming from c1, xmm registers may be live
4638
4639 // Preserve registers across runtime call
4640 int incoming_argument_and_return_value_offset = -1;
4641 if (num_fpu_regs_in_use > 1) {
4642 // Must preserve all other FPU regs (could alternatively convert
4643 // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
4644 // FPU state, but can not trust C compiler)
4645 NEEDS_CLEANUP;
4646 // NOTE that in this case we also push the incoming argument(s) to
4647 // the stack and restore it later; we also use this stack slot to
4648 // hold the return value from dsin, dcos etc.
4649 for (int i = 0; i < num_fpu_regs_in_use; i++) {
4650 subptr(rsp, sizeof(jdouble));
4651 fstp_d(Address(rsp, 0));
4652 }
4653 incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
4654 for (int i = nb_args-1; i >= 0; i--) {
4655 fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
4656 }
4657 }
4683
4684 MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
4685
4686 #ifdef _LP64
4687 movsd(Address(rsp, 0), xmm0);
4688 fld_d(Address(rsp, 0));
4689 #endif // _LP64
4690 addptr(rsp, sizeof(jdouble) * nb_args);
4691 if (num_fpu_regs_in_use > 1) {
4692 // Must save return value to stack and then restore entire FPU
4693 // stack except incoming arguments
4694 fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
4695 for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
4696 fld_d(Address(rsp, 0));
4697 addptr(rsp, sizeof(jdouble));
4698 }
4699 fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
4700 addptr(rsp, sizeof(jdouble) * nb_args);
4701 }
4702
4703 restore_vector_registers();
4704 popa();
4705 }
4706
4707 void MacroAssembler::save_vector_registers() {
4708 int off = 0;
4709 if (UseSSE == 1) {
4710 subptr(rsp, sizeof(jdouble)*8);
4711 movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
4712 movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
4713 movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
4714 movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
4715 movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
4716 movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
4717 movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
4718 movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
4719 } else if (UseSSE >= 2) {
4720 #ifdef COMPILER2
4721 if (MaxVectorSize > 16) {
4722 assert(UseAVX > 0, "256bit vectors are supported only with AVX");
4723 // Save upper half of YMM registes
4724 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4725 vextractf128h(Address(rsp, 0),xmm0);
4726 vextractf128h(Address(rsp, 16),xmm1);
4727 vextractf128h(Address(rsp, 32),xmm2);
4728 vextractf128h(Address(rsp, 48),xmm3);
4729 vextractf128h(Address(rsp, 64),xmm4);
4730 vextractf128h(Address(rsp, 80),xmm5);
4731 vextractf128h(Address(rsp, 96),xmm6);
4732 vextractf128h(Address(rsp,112),xmm7);
4733 #ifdef _LP64
4734 vextractf128h(Address(rsp,128),xmm8);
4735 vextractf128h(Address(rsp,144),xmm9);
4736 vextractf128h(Address(rsp,160),xmm10);
4737 vextractf128h(Address(rsp,176),xmm11);
4738 vextractf128h(Address(rsp,192),xmm12);
4739 vextractf128h(Address(rsp,208),xmm13);
4740 vextractf128h(Address(rsp,224),xmm14);
4741 vextractf128h(Address(rsp,240),xmm15);
4742 #endif
4743 }
4744 #endif
4745 // Save whole 128bit (16 bytes) XMM regiters
4746 subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4747 movdqu(Address(rsp,off++*16),xmm0);
4748 movdqu(Address(rsp,off++*16),xmm1);
4749 movdqu(Address(rsp,off++*16),xmm2);
4750 movdqu(Address(rsp,off++*16),xmm3);
4751 movdqu(Address(rsp,off++*16),xmm4);
4752 movdqu(Address(rsp,off++*16),xmm5);
4753 movdqu(Address(rsp,off++*16),xmm6);
4754 movdqu(Address(rsp,off++*16),xmm7);
4755 #ifdef _LP64
4756 movdqu(Address(rsp,off++*16),xmm8);
4757 movdqu(Address(rsp,off++*16),xmm9);
4758 movdqu(Address(rsp,off++*16),xmm10);
4759 movdqu(Address(rsp,off++*16),xmm11);
4760 movdqu(Address(rsp,off++*16),xmm12);
4761 movdqu(Address(rsp,off++*16),xmm13);
4762 movdqu(Address(rsp,off++*16),xmm14);
4763 movdqu(Address(rsp,off++*16),xmm15);
4764 #endif
4765 }
4766 }
4767
4768 void MacroAssembler::restore_vector_registers() {
4769 int off = 0;
4770 if (UseSSE == 1) {
4771 movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
4772 movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
4773 movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
4774 movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
4775 movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
4776 movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
4777 movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
4778 movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
4779 addptr(rsp, sizeof(jdouble)*8);
4780 } else if (UseSSE >= 2) {
4781 // Restore whole 128bit (16 bytes) XMM regiters
4782 movdqu(xmm0, Address(rsp,off++*16));
4783 movdqu(xmm1, Address(rsp,off++*16));
4784 movdqu(xmm2, Address(rsp,off++*16));
4785 movdqu(xmm3, Address(rsp,off++*16));
4786 movdqu(xmm4, Address(rsp,off++*16));
4787 movdqu(xmm5, Address(rsp,off++*16));
4788 movdqu(xmm6, Address(rsp,off++*16));
4789 movdqu(xmm7, Address(rsp,off++*16));
4806 vinsertf128h(xmm2, Address(rsp, 32));
4807 vinsertf128h(xmm3, Address(rsp, 48));
4808 vinsertf128h(xmm4, Address(rsp, 64));
4809 vinsertf128h(xmm5, Address(rsp, 80));
4810 vinsertf128h(xmm6, Address(rsp, 96));
4811 vinsertf128h(xmm7, Address(rsp,112));
4812 #ifdef _LP64
4813 vinsertf128h(xmm8, Address(rsp,128));
4814 vinsertf128h(xmm9, Address(rsp,144));
4815 vinsertf128h(xmm10, Address(rsp,160));
4816 vinsertf128h(xmm11, Address(rsp,176));
4817 vinsertf128h(xmm12, Address(rsp,192));
4818 vinsertf128h(xmm13, Address(rsp,208));
4819 vinsertf128h(xmm14, Address(rsp,224));
4820 vinsertf128h(xmm15, Address(rsp,240));
4821 #endif
4822 addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4823 }
4824 #endif
4825 }
4826 }
4827
4828 static const double pi_4 = 0.7853981633974483;
4829
4830 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
4831 // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
4832 // was attempted in this code; unfortunately it appears that the
4833 // switch to 80-bit precision and back causes this to be
4834 // unprofitable compared with simply performing a runtime call if
4835 // the argument is out of the (-pi/4, pi/4) range.
4836
4837 Register tmp = noreg;
4838 if (!VM_Version::supports_cmov()) {
4839 // fcmp needs a temporary so preserve rbx,
4840 tmp = rbx;
4841 push(tmp);
4842 }
4843
4844 Label slow_case, done;
4845
5241 b = code_string(ss.as_string());
5242 }
5243 BLOCK_COMMENT("verify_oop {");
5244 #ifdef _LP64
5245 push(rscratch1); // save r10, trashed by movptr()
5246 #endif
5247 push(rax); // save rax,
5248 push(reg); // pass register argument
5249 ExternalAddress buffer((address) b);
5250 // avoid using pushptr, as it modifies scratch registers
5251 // and our contract is not to modify anything
5252 movptr(rax, buffer.addr());
5253 push(rax);
5254 // call indirectly to solve generation ordering problem
5255 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
5256 call(rax);
5257 // Caller pops the arguments (oop, message) and restores rax, r10
5258 BLOCK_COMMENT("} verify_oop");
5259 }
5260
5261 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
5262 Register tmp,
5263 int offset) {
5264 intptr_t value = *delayed_value_addr;
5265 if (value != 0)
5266 return RegisterOrConstant(value + offset);
5267
5268 // load indirectly to solve generation ordering problem
5269 movptr(tmp, ExternalAddress((address) delayed_value_addr));
5270
5271 #ifdef ASSERT
5272 { Label L;
5273 testptr(tmp, tmp);
5274 if (WizardMode) {
5275 const char* buf = NULL;
5276 {
5277 ResourceMark rm;
5278 stringStream ss;
5279 ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
5280 buf = code_string(ss.as_string());
5769 #endif
5770 movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5771 }
5772
5773 void MacroAssembler::load_prototype_header(Register dst, Register src) {
5774 load_klass(dst, src);
5775 movptr(dst, Address(dst, Klass::prototype_header_offset()));
5776 }
5777
5778 void MacroAssembler::store_klass(Register dst, Register src) {
5779 #ifdef _LP64
5780 if (UseCompressedClassPointers) {
5781 encode_klass_not_null(src);
5782 movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5783 } else
5784 #endif
5785 movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5786 }
5787
5788 void MacroAssembler::load_heap_oop(Register dst, Address src) {
5789 #if INCLUDE_ALL_GCS
5790 if (UseShenandoahGC) {
5791 ShenandoahBarrierSetAssembler::bsasm()->load_heap_oop(this, dst, src);
5792 return;
5793 }
5794 #endif
5795
5796 #ifdef _LP64
5797 // FIXME: Must change all places where we try to load the klass.
5798 if (UseCompressedOops) {
5799 movl(dst, src);
5800 decode_heap_oop(dst);
5801 } else
5802 #endif
5803 movptr(dst, src);
5804 }
5805
5806 // Doesn't do verfication, generates fixed size code
5807 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
5808 #if INCLUDE_ALL_GCS
5809 if (UseShenandoahGC) {
5810 ShenandoahBarrierSetAssembler::bsasm()->load_heap_oop(this, dst, src);
5811 return;
5812 }
5813 #endif
5814
5815 #ifdef _LP64
5816 if (UseCompressedOops) {
5817 movl(dst, src);
5818 decode_heap_oop_not_null(dst);
5819 } else
5820 #endif
5821 movptr(dst, src);
5822 }
5823
5824 void MacroAssembler::store_heap_oop(Address dst, Register src) {
5825 #ifdef _LP64
5826 if (UseCompressedOops) {
5827 assert(!dst.uses(src), "not enough registers");
5828 encode_heap_oop(src);
5829 movl(dst, src);
5830 } else
5831 #endif
5832 movptr(dst, src);
5833 }
5834
|