< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "compiler/disassembler.hpp"
  29 #include "gc_interface/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/cardTableModRefBS.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "prims/methodHandles.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/interfaceSupport.hpp"
  37 #include "runtime/objectMonitor.hpp"
  38 #include "runtime/os.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "utilities/macros.hpp"
  42 #if INCLUDE_ALL_GCS
  43 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  44 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  45 #include "gc_implementation/g1/heapRegion.hpp"


  46 #endif // INCLUDE_ALL_GCS
  47 
  48 #ifdef PRODUCT
  49 #define BLOCK_COMMENT(str) /* nothing */
  50 #define STOP(error) stop(error)
  51 #else
  52 #define BLOCK_COMMENT(str) block_comment(str)
  53 #define STOP(error) block_comment(error); stop(error)
  54 #endif
  55 
  56 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  57 
  58 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
  59 
  60 #ifdef ASSERT
  61 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  62 #endif
  63 
  64 static Assembler::Condition reverse[] = {
  65     Assembler::noOverflow     /* overflow      = 0x0 */ ,


4115     vxorps(dst, nds, as_Address(src), vector256);
4116   } else {
4117     lea(rscratch1, src);
4118     vxorps(dst, nds, Address(rscratch1, 0), vector256);
4119   }
4120 }
4121 
4122 void MacroAssembler::resolve_jobject(Register value,
4123                                      Register thread,
4124                                      Register tmp) {
4125   assert_different_registers(value, thread, tmp);
4126   Label done, not_weak;
4127   testptr(value, value);
4128   jcc(Assembler::zero, done);                // Use NULL as-is.
4129   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
4130   jcc(Assembler::zero, not_weak);
4131   // Resolve jweak.
4132   movptr(value, Address(value, -JNIHandles::weak_tag_value));
4133   verify_oop(value);
4134 #if INCLUDE_ALL_GCS
4135   if (UseG1GC) {
4136     g1_write_barrier_pre(noreg /* obj */,
4137                          value /* pre_val */,
4138                          thread /* thread */,
4139                          tmp /* tmp */,
4140                          true /* tosca_live */,
4141                          true /* expand_call */);
4142   }
4143 #endif // INCLUDE_ALL_GCS
4144   jmp(done);
4145   bind(not_weak);
4146   // Resolve (untagged) jobject.
4147   movptr(value, Address(value, 0));
4148   verify_oop(value);
4149   bind(done);
4150 }
4151 
4152 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
4153   const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
4154   STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
4155   // The inverted mask is sign-extended


4174   assert(thread == r15_thread, "must be");
4175 #endif // _LP64
4176 
4177   Label done;
4178   Label runtime;
4179 
4180   assert(pre_val != noreg, "check this code");
4181 
4182   if (obj != noreg) {
4183     assert_different_registers(obj, pre_val, tmp);
4184     assert(pre_val != rax, "check this code");
4185   }
4186 
4187   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4188                                        PtrQueue::byte_offset_of_active()));
4189   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4190                                        PtrQueue::byte_offset_of_index()));
4191   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4192                                        PtrQueue::byte_offset_of_buf()));
4193 
4194 
4195   // Is marking active?
4196   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4197     cmpl(in_progress, 0);
4198   } else {
4199     assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
4200     cmpb(in_progress, 0);







4201   }
4202   jcc(Assembler::equal, done);
4203 
4204   // Do we need to load the previous value?
4205   if (obj != noreg) {
4206     load_heap_oop(pre_val, Address(obj, 0));
4207   }
4208 
4209   // Is the previous value null?
4210   cmpptr(pre_val, (int32_t) NULL_WORD);
4211   jcc(Assembler::equal, done);
4212 
4213   // Can we store original value in the thread's buffer?
4214   // Is index == 0?
4215   // (The index field is typed as size_t.)
4216 
4217   movptr(tmp, index);                   // tmp := *index_adr
4218   cmpptr(tmp, 0);                       // tmp == 0?
4219   jcc(Assembler::equal, runtime);       // If yes, goto runtime
4220 
4221   subptr(tmp, wordSize);                // tmp := tmp - wordSize
4222   movptr(index, tmp);                   // *index_adr := tmp


4265   if (pre_val != rax)
4266     pop(pre_val);
4267 
4268   if (obj != noreg && obj != rax)
4269     pop(obj);
4270 
4271   if(tosca_live) pop(rax);
4272 
4273   bind(done);
4274 }
4275 
4276 void MacroAssembler::g1_write_barrier_post(Register store_addr,
4277                                            Register new_val,
4278                                            Register thread,
4279                                            Register tmp,
4280                                            Register tmp2) {
4281 #ifdef _LP64
4282   assert(thread == r15_thread, "must be");
4283 #endif // _LP64
4284 







4285   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4286                                        PtrQueue::byte_offset_of_index()));
4287   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4288                                        PtrQueue::byte_offset_of_buf()));
4289 
4290   BarrierSet* bs = Universe::heap()->barrier_set();
4291   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
4292   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
4293 
4294   Label done;
4295   Label runtime;
4296 
4297   // Does store cross heap regions?
4298 
4299   movptr(tmp, store_addr);
4300   xorptr(tmp, new_val);
4301   shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
4302   jcc(Assembler::equal, done);
4303 
4304   // crosses regions, storing NULL?


4600 
4601 #ifdef _LP64
4602   if (var_size_in_bytes->is_valid()) {
4603     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4604   } else {
4605     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4606   }
4607 #else
4608   if (var_size_in_bytes->is_valid()) {
4609     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4610   } else {
4611     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4612   }
4613   adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
4614 #endif
4615 }
4616 
4617 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
4618   pusha();
4619 


4620   // if we are coming from c1, xmm registers may be live
4621   int off = 0;
4622   if (UseSSE == 1)  {
4623     subptr(rsp, sizeof(jdouble)*8);
4624     movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
4625     movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
4626     movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
4627     movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
4628     movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
4629     movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
4630     movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
4631     movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
4632   } else if (UseSSE >= 2)  {
4633 #ifdef COMPILER2
4634     if (MaxVectorSize > 16) {
4635       assert(UseAVX > 0, "256bit vectors are supported only with AVX");
4636       // Save upper half of YMM registes
4637       subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4638       vextractf128h(Address(rsp,  0),xmm0);
4639       vextractf128h(Address(rsp, 16),xmm1);
4640       vextractf128h(Address(rsp, 32),xmm2);
4641       vextractf128h(Address(rsp, 48),xmm3);
4642       vextractf128h(Address(rsp, 64),xmm4);
4643       vextractf128h(Address(rsp, 80),xmm5);
4644       vextractf128h(Address(rsp, 96),xmm6);
4645       vextractf128h(Address(rsp,112),xmm7);
4646 #ifdef _LP64
4647       vextractf128h(Address(rsp,128),xmm8);
4648       vextractf128h(Address(rsp,144),xmm9);
4649       vextractf128h(Address(rsp,160),xmm10);
4650       vextractf128h(Address(rsp,176),xmm11);
4651       vextractf128h(Address(rsp,192),xmm12);
4652       vextractf128h(Address(rsp,208),xmm13);
4653       vextractf128h(Address(rsp,224),xmm14);
4654       vextractf128h(Address(rsp,240),xmm15);
4655 #endif
4656     }
4657 #endif
4658     // Save whole 128bit (16 bytes) XMM regiters
4659     subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4660     movdqu(Address(rsp,off++*16),xmm0);
4661     movdqu(Address(rsp,off++*16),xmm1);
4662     movdqu(Address(rsp,off++*16),xmm2);
4663     movdqu(Address(rsp,off++*16),xmm3);
4664     movdqu(Address(rsp,off++*16),xmm4);
4665     movdqu(Address(rsp,off++*16),xmm5);
4666     movdqu(Address(rsp,off++*16),xmm6);
4667     movdqu(Address(rsp,off++*16),xmm7);
4668 #ifdef _LP64
4669     movdqu(Address(rsp,off++*16),xmm8);
4670     movdqu(Address(rsp,off++*16),xmm9);
4671     movdqu(Address(rsp,off++*16),xmm10);
4672     movdqu(Address(rsp,off++*16),xmm11);
4673     movdqu(Address(rsp,off++*16),xmm12);
4674     movdqu(Address(rsp,off++*16),xmm13);
4675     movdqu(Address(rsp,off++*16),xmm14);
4676     movdqu(Address(rsp,off++*16),xmm15);
4677 #endif
4678   }
4679 
4680   // Preserve registers across runtime call
4681   int incoming_argument_and_return_value_offset = -1;
4682   if (num_fpu_regs_in_use > 1) {
4683     // Must preserve all other FPU regs (could alternatively convert
4684     // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
4685     // FPU state, but can not trust C compiler)
4686     NEEDS_CLEANUP;
4687     // NOTE that in this case we also push the incoming argument(s) to
4688     // the stack and restore it later; we also use this stack slot to
4689     // hold the return value from dsin, dcos etc.
4690     for (int i = 0; i < num_fpu_regs_in_use; i++) {
4691       subptr(rsp, sizeof(jdouble));
4692       fstp_d(Address(rsp, 0));
4693     }
4694     incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
4695     for (int i = nb_args-1; i >= 0; i--) {
4696       fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
4697     }
4698   }


4724 
4725   MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
4726 
4727 #ifdef _LP64
4728   movsd(Address(rsp, 0), xmm0);
4729   fld_d(Address(rsp, 0));
4730 #endif // _LP64
4731   addptr(rsp, sizeof(jdouble) * nb_args);
4732   if (num_fpu_regs_in_use > 1) {
4733     // Must save return value to stack and then restore entire FPU
4734     // stack except incoming arguments
4735     fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
4736     for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
4737       fld_d(Address(rsp, 0));
4738       addptr(rsp, sizeof(jdouble));
4739     }
4740     fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
4741     addptr(rsp, sizeof(jdouble) * nb_args);
4742   }
4743 
4744   off = 0;


































































4745   if (UseSSE == 1)  {
4746     movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
4747     movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
4748     movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
4749     movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
4750     movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
4751     movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
4752     movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
4753     movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
4754     addptr(rsp, sizeof(jdouble)*8);
4755   } else if (UseSSE >= 2)  {
4756     // Restore whole 128bit (16 bytes) XMM regiters
4757     movdqu(xmm0, Address(rsp,off++*16));
4758     movdqu(xmm1, Address(rsp,off++*16));
4759     movdqu(xmm2, Address(rsp,off++*16));
4760     movdqu(xmm3, Address(rsp,off++*16));
4761     movdqu(xmm4, Address(rsp,off++*16));
4762     movdqu(xmm5, Address(rsp,off++*16));
4763     movdqu(xmm6, Address(rsp,off++*16));
4764     movdqu(xmm7, Address(rsp,off++*16));


4781       vinsertf128h(xmm2, Address(rsp, 32));
4782       vinsertf128h(xmm3, Address(rsp, 48));
4783       vinsertf128h(xmm4, Address(rsp, 64));
4784       vinsertf128h(xmm5, Address(rsp, 80));
4785       vinsertf128h(xmm6, Address(rsp, 96));
4786       vinsertf128h(xmm7, Address(rsp,112));
4787 #ifdef _LP64
4788       vinsertf128h(xmm8, Address(rsp,128));
4789       vinsertf128h(xmm9, Address(rsp,144));
4790       vinsertf128h(xmm10, Address(rsp,160));
4791       vinsertf128h(xmm11, Address(rsp,176));
4792       vinsertf128h(xmm12, Address(rsp,192));
4793       vinsertf128h(xmm13, Address(rsp,208));
4794       vinsertf128h(xmm14, Address(rsp,224));
4795       vinsertf128h(xmm15, Address(rsp,240));
4796 #endif
4797       addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4798     }
4799 #endif
4800   }
4801   popa();
4802 }
4803 
4804 static const double     pi_4 =  0.7853981633974483;
4805 
4806 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
4807   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
4808   // was attempted in this code; unfortunately it appears that the
4809   // switch to 80-bit precision and back causes this to be
4810   // unprofitable compared with simply performing a runtime call if
4811   // the argument is out of the (-pi/4, pi/4) range.
4812 
4813   Register tmp = noreg;
4814   if (!VM_Version::supports_cmov()) {
4815     // fcmp needs a temporary so preserve rbx,
4816     tmp = rbx;
4817     push(tmp);
4818   }
4819 
4820   Label slow_case, done;
4821 


5217     b = code_string(ss.as_string());
5218   }
5219   BLOCK_COMMENT("verify_oop {");
5220 #ifdef _LP64
5221   push(rscratch1);                    // save r10, trashed by movptr()
5222 #endif
5223   push(rax);                          // save rax,
5224   push(reg);                          // pass register argument
5225   ExternalAddress buffer((address) b);
5226   // avoid using pushptr, as it modifies scratch registers
5227   // and our contract is not to modify anything
5228   movptr(rax, buffer.addr());
5229   push(rax);
5230   // call indirectly to solve generation ordering problem
5231   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
5232   call(rax);
5233   // Caller pops the arguments (oop, message) and restores rax, r10
5234   BLOCK_COMMENT("} verify_oop");
5235 }
5236 
5237 
5238 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
5239                                                       Register tmp,
5240                                                       int offset) {
5241   intptr_t value = *delayed_value_addr;
5242   if (value != 0)
5243     return RegisterOrConstant(value + offset);
5244 
5245   // load indirectly to solve generation ordering problem
5246   movptr(tmp, ExternalAddress((address) delayed_value_addr));
5247 
5248 #ifdef ASSERT
5249   { Label L;
5250     testptr(tmp, tmp);
5251     if (WizardMode) {
5252       const char* buf = NULL;
5253       {
5254         ResourceMark rm;
5255         stringStream ss;
5256         ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
5257         buf = code_string(ss.as_string());


5746 #endif
5747     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5748 }
5749 
5750 void MacroAssembler::load_prototype_header(Register dst, Register src) {
5751   load_klass(dst, src);
5752   movptr(dst, Address(dst, Klass::prototype_header_offset()));
5753 }
5754 
5755 void MacroAssembler::store_klass(Register dst, Register src) {
5756 #ifdef _LP64
5757   if (UseCompressedClassPointers) {
5758     encode_klass_not_null(src);
5759     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5760   } else
5761 #endif
5762     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5763 }
5764 
5765 void MacroAssembler::load_heap_oop(Register dst, Address src) {







5766 #ifdef _LP64
5767   // FIXME: Must change all places where we try to load the klass.
5768   if (UseCompressedOops) {
5769     movl(dst, src);
5770     decode_heap_oop(dst);
5771   } else
5772 #endif
5773     movptr(dst, src);
5774 }
5775 
5776 // Doesn't do verfication, generates fixed size code
5777 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {







5778 #ifdef _LP64
5779   if (UseCompressedOops) {
5780     movl(dst, src);
5781     decode_heap_oop_not_null(dst);
5782   } else
5783 #endif
5784     movptr(dst, src);
5785 }
5786 
5787 void MacroAssembler::store_heap_oop(Address dst, Register src) {
5788 #ifdef _LP64
5789   if (UseCompressedOops) {
5790     assert(!dst.uses(src), "not enough registers");
5791     encode_heap_oop(src);
5792     movl(dst, src);
5793   } else
5794 #endif
5795     movptr(dst, src);
5796 }
5797 




  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "compiler/disassembler.hpp"
  29 #include "gc_interface/collectedHeap.inline.hpp"
  30 #include "interpreter/interpreter.hpp"
  31 #include "memory/cardTableModRefBS.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "prims/methodHandles.hpp"
  35 #include "runtime/biasedLocking.hpp"
  36 #include "runtime/interfaceSupport.hpp"
  37 #include "runtime/objectMonitor.hpp"
  38 #include "runtime/os.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "utilities/macros.hpp"
  42 #if INCLUDE_ALL_GCS
  43 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  44 #include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
  45 #include "gc_implementation/g1/heapRegion.hpp"
  46 #include "shenandoahBarrierSetAssembler_x86.hpp"
  47 #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp"
  48 #endif // INCLUDE_ALL_GCS
  49 
  50 #ifdef PRODUCT
  51 #define BLOCK_COMMENT(str) /* nothing */
  52 #define STOP(error) stop(error)
  53 #else
  54 #define BLOCK_COMMENT(str) block_comment(str)
  55 #define STOP(error) block_comment(error); stop(error)
  56 #endif
  57 
  58 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  59 
  60 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
  61 
  62 #ifdef ASSERT
  63 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  64 #endif
  65 
  66 static Assembler::Condition reverse[] = {
  67     Assembler::noOverflow     /* overflow      = 0x0 */ ,


4117     vxorps(dst, nds, as_Address(src), vector256);
4118   } else {
4119     lea(rscratch1, src);
4120     vxorps(dst, nds, Address(rscratch1, 0), vector256);
4121   }
4122 }
4123 
4124 void MacroAssembler::resolve_jobject(Register value,
4125                                      Register thread,
4126                                      Register tmp) {
4127   assert_different_registers(value, thread, tmp);
4128   Label done, not_weak;
4129   testptr(value, value);
4130   jcc(Assembler::zero, done);                // Use NULL as-is.
4131   testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
4132   jcc(Assembler::zero, not_weak);
4133   // Resolve jweak.
4134   movptr(value, Address(value, -JNIHandles::weak_tag_value));
4135   verify_oop(value);
4136 #if INCLUDE_ALL_GCS
4137   if (UseG1GC || (UseShenandoahGC && ShenandoahSATBBarrier)) {
4138     g1_write_barrier_pre(noreg /* obj */,
4139                          value /* pre_val */,
4140                          thread /* thread */,
4141                          tmp /* tmp */,
4142                          true /* tosca_live */,
4143                          true /* expand_call */);
4144   }
4145 #endif // INCLUDE_ALL_GCS
4146   jmp(done);
4147   bind(not_weak);
4148   // Resolve (untagged) jobject.
4149   movptr(value, Address(value, 0));
4150   verify_oop(value);
4151   bind(done);
4152 }
4153 
4154 void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
4155   const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
4156   STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
4157   // The inverted mask is sign-extended


4176   assert(thread == r15_thread, "must be");
4177 #endif // _LP64
4178 
4179   Label done;
4180   Label runtime;
4181 
4182   assert(pre_val != noreg, "check this code");
4183 
4184   if (obj != noreg) {
4185     assert_different_registers(obj, pre_val, tmp);
4186     assert(pre_val != rax, "check this code");
4187   }
4188 
4189   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4190                                        PtrQueue::byte_offset_of_active()));
4191   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4192                                        PtrQueue::byte_offset_of_index()));
4193   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
4194                                        PtrQueue::byte_offset_of_buf()));
4195 
4196   if (UseShenandoahGC) {
4197     Address gc_state(thread, in_bytes(JavaThread::gc_state_offset()));
4198     testb(gc_state, ShenandoahHeap::MARKING);
4199     jcc(Assembler::zero, done);
4200   } else {
4201     assert(UseG1GC, "Should be");
4202     // Is marking active?
4203     if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
4204       cmpl(in_progress, 0);
4205     } else {
4206       assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
4207       cmpb(in_progress, 0);
4208     }
4209     jcc(Assembler::equal, done);
4210   }

4211 
4212   // Do we need to load the previous value?
4213   if (obj != noreg) {
4214     load_heap_oop(pre_val, Address(obj, 0));
4215   }
4216 
4217   // Is the previous value null?
4218   cmpptr(pre_val, (int32_t) NULL_WORD);
4219   jcc(Assembler::equal, done);
4220 
4221   // Can we store original value in the thread's buffer?
4222   // Is index == 0?
4223   // (The index field is typed as size_t.)
4224 
4225   movptr(tmp, index);                   // tmp := *index_adr
4226   cmpptr(tmp, 0);                       // tmp == 0?
4227   jcc(Assembler::equal, runtime);       // If yes, goto runtime
4228 
4229   subptr(tmp, wordSize);                // tmp := tmp - wordSize
4230   movptr(index, tmp);                   // *index_adr := tmp


4273   if (pre_val != rax)
4274     pop(pre_val);
4275 
4276   if (obj != noreg && obj != rax)
4277     pop(obj);
4278 
4279   if(tosca_live) pop(rax);
4280 
4281   bind(done);
4282 }
4283 
4284 void MacroAssembler::g1_write_barrier_post(Register store_addr,
4285                                            Register new_val,
4286                                            Register thread,
4287                                            Register tmp,
4288                                            Register tmp2) {
4289 #ifdef _LP64
4290   assert(thread == r15_thread, "must be");
4291 #endif // _LP64
4292 
4293   if (UseShenandoahGC) {
4294     // No need for this in Shenandoah.
4295     return;
4296   }
4297 
4298   assert(UseG1GC, "expect G1 GC");
4299 
4300   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4301                                        PtrQueue::byte_offset_of_index()));
4302   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
4303                                        PtrQueue::byte_offset_of_buf()));
4304 
4305   BarrierSet* bs = Universe::heap()->barrier_set();
4306   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
4307   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
4308 
4309   Label done;
4310   Label runtime;
4311 
4312   // Does store cross heap regions?
4313 
4314   movptr(tmp, store_addr);
4315   xorptr(tmp, new_val);
4316   shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
4317   jcc(Assembler::equal, done);
4318 
4319   // crosses regions, storing NULL?


4615 
4616 #ifdef _LP64
4617   if (var_size_in_bytes->is_valid()) {
4618     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4619   } else {
4620     addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4621   }
4622 #else
4623   if (var_size_in_bytes->is_valid()) {
4624     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes);
4625   } else {
4626     addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes);
4627   }
4628   adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0);
4629 #endif
4630 }
4631 
4632 void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
4633   pusha();
4634 
4635   save_vector_registers();
4636 
4637   // if we are coming from c1, xmm registers may be live


























































4638 
4639   // Preserve registers across runtime call
4640   int incoming_argument_and_return_value_offset = -1;
4641   if (num_fpu_regs_in_use > 1) {
4642     // Must preserve all other FPU regs (could alternatively convert
4643     // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
4644     // FPU state, but can not trust C compiler)
4645     NEEDS_CLEANUP;
4646     // NOTE that in this case we also push the incoming argument(s) to
4647     // the stack and restore it later; we also use this stack slot to
4648     // hold the return value from dsin, dcos etc.
4649     for (int i = 0; i < num_fpu_regs_in_use; i++) {
4650       subptr(rsp, sizeof(jdouble));
4651       fstp_d(Address(rsp, 0));
4652     }
4653     incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
4654     for (int i = nb_args-1; i >= 0; i--) {
4655       fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
4656     }
4657   }


4683 
4684   MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
4685 
4686 #ifdef _LP64
4687   movsd(Address(rsp, 0), xmm0);
4688   fld_d(Address(rsp, 0));
4689 #endif // _LP64
4690   addptr(rsp, sizeof(jdouble) * nb_args);
4691   if (num_fpu_regs_in_use > 1) {
4692     // Must save return value to stack and then restore entire FPU
4693     // stack except incoming arguments
4694     fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
4695     for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
4696       fld_d(Address(rsp, 0));
4697       addptr(rsp, sizeof(jdouble));
4698     }
4699     fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
4700     addptr(rsp, sizeof(jdouble) * nb_args);
4701   }
4702 
4703   restore_vector_registers();
4704   popa();
4705 }
4706 
4707 void MacroAssembler::save_vector_registers() {
4708   int off = 0;
4709   if (UseSSE == 1)  {
4710     subptr(rsp, sizeof(jdouble)*8);
4711     movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
4712     movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
4713     movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
4714     movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
4715     movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
4716     movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
4717     movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
4718     movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
4719   } else if (UseSSE >= 2)  {
4720 #ifdef COMPILER2
4721     if (MaxVectorSize > 16) {
4722       assert(UseAVX > 0, "256bit vectors are supported only with AVX");
4723       // Save upper half of YMM registes
4724       subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4725       vextractf128h(Address(rsp,  0),xmm0);
4726       vextractf128h(Address(rsp, 16),xmm1);
4727       vextractf128h(Address(rsp, 32),xmm2);
4728       vextractf128h(Address(rsp, 48),xmm3);
4729       vextractf128h(Address(rsp, 64),xmm4);
4730       vextractf128h(Address(rsp, 80),xmm5);
4731       vextractf128h(Address(rsp, 96),xmm6);
4732       vextractf128h(Address(rsp,112),xmm7);
4733 #ifdef _LP64
4734       vextractf128h(Address(rsp,128),xmm8);
4735       vextractf128h(Address(rsp,144),xmm9);
4736       vextractf128h(Address(rsp,160),xmm10);
4737       vextractf128h(Address(rsp,176),xmm11);
4738       vextractf128h(Address(rsp,192),xmm12);
4739       vextractf128h(Address(rsp,208),xmm13);
4740       vextractf128h(Address(rsp,224),xmm14);
4741       vextractf128h(Address(rsp,240),xmm15);
4742 #endif
4743     }
4744 #endif
4745     // Save whole 128bit (16 bytes) XMM regiters
4746     subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4747     movdqu(Address(rsp,off++*16),xmm0);
4748     movdqu(Address(rsp,off++*16),xmm1);
4749     movdqu(Address(rsp,off++*16),xmm2);
4750     movdqu(Address(rsp,off++*16),xmm3);
4751     movdqu(Address(rsp,off++*16),xmm4);
4752     movdqu(Address(rsp,off++*16),xmm5);
4753     movdqu(Address(rsp,off++*16),xmm6);
4754     movdqu(Address(rsp,off++*16),xmm7);
4755 #ifdef _LP64
4756     movdqu(Address(rsp,off++*16),xmm8);
4757     movdqu(Address(rsp,off++*16),xmm9);
4758     movdqu(Address(rsp,off++*16),xmm10);
4759     movdqu(Address(rsp,off++*16),xmm11);
4760     movdqu(Address(rsp,off++*16),xmm12);
4761     movdqu(Address(rsp,off++*16),xmm13);
4762     movdqu(Address(rsp,off++*16),xmm14);
4763     movdqu(Address(rsp,off++*16),xmm15);
4764 #endif
4765   }
4766 }
4767 
4768 void MacroAssembler::restore_vector_registers() {
4769   int off = 0;
4770   if (UseSSE == 1)  {
4771     movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
4772     movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
4773     movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
4774     movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
4775     movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
4776     movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
4777     movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
4778     movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
4779     addptr(rsp, sizeof(jdouble)*8);
4780   } else if (UseSSE >= 2)  {
4781     // Restore whole 128bit (16 bytes) XMM regiters
4782     movdqu(xmm0, Address(rsp,off++*16));
4783     movdqu(xmm1, Address(rsp,off++*16));
4784     movdqu(xmm2, Address(rsp,off++*16));
4785     movdqu(xmm3, Address(rsp,off++*16));
4786     movdqu(xmm4, Address(rsp,off++*16));
4787     movdqu(xmm5, Address(rsp,off++*16));
4788     movdqu(xmm6, Address(rsp,off++*16));
4789     movdqu(xmm7, Address(rsp,off++*16));


4806       vinsertf128h(xmm2, Address(rsp, 32));
4807       vinsertf128h(xmm3, Address(rsp, 48));
4808       vinsertf128h(xmm4, Address(rsp, 64));
4809       vinsertf128h(xmm5, Address(rsp, 80));
4810       vinsertf128h(xmm6, Address(rsp, 96));
4811       vinsertf128h(xmm7, Address(rsp,112));
4812 #ifdef _LP64
4813       vinsertf128h(xmm8, Address(rsp,128));
4814       vinsertf128h(xmm9, Address(rsp,144));
4815       vinsertf128h(xmm10, Address(rsp,160));
4816       vinsertf128h(xmm11, Address(rsp,176));
4817       vinsertf128h(xmm12, Address(rsp,192));
4818       vinsertf128h(xmm13, Address(rsp,208));
4819       vinsertf128h(xmm14, Address(rsp,224));
4820       vinsertf128h(xmm15, Address(rsp,240));
4821 #endif
4822       addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8));
4823     }
4824 #endif
4825   }

4826 }
4827 
4828 static const double     pi_4 =  0.7853981633974483;
4829 
4830 void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
4831   // A hand-coded argument reduction for values in fabs(pi/4, pi/2)
4832   // was attempted in this code; unfortunately it appears that the
4833   // switch to 80-bit precision and back causes this to be
4834   // unprofitable compared with simply performing a runtime call if
4835   // the argument is out of the (-pi/4, pi/4) range.
4836 
4837   Register tmp = noreg;
4838   if (!VM_Version::supports_cmov()) {
4839     // fcmp needs a temporary so preserve rbx,
4840     tmp = rbx;
4841     push(tmp);
4842   }
4843 
4844   Label slow_case, done;
4845 


5241     b = code_string(ss.as_string());
5242   }
5243   BLOCK_COMMENT("verify_oop {");
5244 #ifdef _LP64
5245   push(rscratch1);                    // save r10, trashed by movptr()
5246 #endif
5247   push(rax);                          // save rax,
5248   push(reg);                          // pass register argument
5249   ExternalAddress buffer((address) b);
5250   // avoid using pushptr, as it modifies scratch registers
5251   // and our contract is not to modify anything
5252   movptr(rax, buffer.addr());
5253   push(rax);
5254   // call indirectly to solve generation ordering problem
5255   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
5256   call(rax);
5257   // Caller pops the arguments (oop, message) and restores rax, r10
5258   BLOCK_COMMENT("} verify_oop");
5259 }
5260 

5261 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
5262                                                       Register tmp,
5263                                                       int offset) {
5264   intptr_t value = *delayed_value_addr;
5265   if (value != 0)
5266     return RegisterOrConstant(value + offset);
5267 
5268   // load indirectly to solve generation ordering problem
5269   movptr(tmp, ExternalAddress((address) delayed_value_addr));
5270 
5271 #ifdef ASSERT
5272   { Label L;
5273     testptr(tmp, tmp);
5274     if (WizardMode) {
5275       const char* buf = NULL;
5276       {
5277         ResourceMark rm;
5278         stringStream ss;
5279         ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
5280         buf = code_string(ss.as_string());


5769 #endif
5770     movptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
5771 }
5772 
5773 void MacroAssembler::load_prototype_header(Register dst, Register src) {
5774   load_klass(dst, src);
5775   movptr(dst, Address(dst, Klass::prototype_header_offset()));
5776 }
5777 
5778 void MacroAssembler::store_klass(Register dst, Register src) {
5779 #ifdef _LP64
5780   if (UseCompressedClassPointers) {
5781     encode_klass_not_null(src);
5782     movl(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5783   } else
5784 #endif
5785     movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src);
5786 }
5787 
5788 void MacroAssembler::load_heap_oop(Register dst, Address src) {
5789 #if INCLUDE_ALL_GCS
5790   if (UseShenandoahGC) {
5791     ShenandoahBarrierSetAssembler::bsasm()->load_heap_oop(this, dst, src);
5792     return;
5793   }
5794 #endif
5795 
5796 #ifdef _LP64
5797   // FIXME: Must change all places where we try to load the klass.
5798   if (UseCompressedOops) {
5799     movl(dst, src);
5800     decode_heap_oop(dst);
5801   } else
5802 #endif
5803     movptr(dst, src);
5804 }
5805 
5806 // Doesn't do verfication, generates fixed size code
5807 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) {
5808 #if INCLUDE_ALL_GCS
5809   if (UseShenandoahGC) {
5810     ShenandoahBarrierSetAssembler::bsasm()->load_heap_oop(this, dst, src);
5811     return;
5812   }
5813 #endif
5814 
5815 #ifdef _LP64
5816   if (UseCompressedOops) {
5817     movl(dst, src);
5818     decode_heap_oop_not_null(dst);
5819   } else
5820 #endif
5821     movptr(dst, src);
5822 }
5823 
5824 void MacroAssembler::store_heap_oop(Address dst, Register src) {
5825 #ifdef _LP64
5826   if (UseCompressedOops) {
5827     assert(!dst.uses(src), "not enough registers");
5828     encode_heap_oop(src);
5829     movl(dst, src);
5830   } else
5831 #endif
5832     movptr(dst, src);
5833 }
5834 


< prev index next >