| 
 
4015   //  If TLAB is enabled:
4016   //    Try to allocate in the TLAB.
4017   //    If fails, go to the slow path.
4018   //    Initialize the allocation.
4019   //    Exit.
4020   //
4021   //  Go to slow path.
4022 
4023   const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
4024 
4025   if (UseTLAB) {
4026     NOT_LP64(__ get_thread(thread);)
4027     __ tlab_allocate(thread, rax, rdx, 0, rcx, rbx, slow_case);
4028     if (ZeroTLAB) {
4029       // the fields have been already cleared
4030       __ jmp(initialize_header);
4031     }
4032 
4033     // The object is initialized before the header.  If the object size is
4034     // zero, go directly to the header initialization.
4035     __ decrement(rdx, sizeof(oopDesc));
4036     __ jcc(Assembler::zero, initialize_header);
4037 
4038     // Initialize topmost object field, divide rdx by 8, check if odd and
4039     // test if zero.
4040     __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
4041     __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
4042 
4043     // rdx must have been multiple of 8
4044 #ifdef ASSERT
4045     // make sure rdx was multiple of 8
4046     Label L;
4047     // Ignore partial flag stall after shrl() since it is debug VM
4048     __ jcc(Assembler::carryClear, L);
4049     __ stop("object size is not multiple of 2 - adjust this code");
4050     __ bind(L);
4051     // rdx must be > 0, no extra check needed here
4052 #endif
4053 
4054     // initialize remaining object fields: rdx was a multiple of 8
4055     { Label loop;
4056     __ bind(loop);
4057     __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
4058     NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
4059     __ decrement(rdx);
4060     __ jcc(Assembler::notZero, loop);
4061     }
4062 
4063     // initialize object header only.
4064     __ bind(initialize_header);
4065     __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
4066               (intptr_t)markWord::prototype().value()); // header
4067     __ pop(rcx);   // get saved klass back in the register.
4068 #ifdef _LP64
4069     __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
4070     __ store_klass_gap(rax, rsi);  // zero klass gap for compressed oops
4071 #endif
4072     __ store_klass(rax, rcx, rscratch1);  // klass
4073 
4074     {
4075       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0, rscratch1);
4076       // Trigger dtrace event for fastpath
4077       __ push(atos);
4078       __ call_VM_leaf(
4079            CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), rax);
4080       __ pop(atos);
4081     }
4082 
4083     __ jmp(done);
4084   }
4085 
4086   // slow case
4087   __ bind(slow_case);
4088   __ pop(rcx);   // restore stack pointer to what it was when we came in.
4089   __ bind(slow_case_no_pop);
4090 
4091   Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax);
4092   Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
 | 
 
4015   //  If TLAB is enabled:
4016   //    Try to allocate in the TLAB.
4017   //    If fails, go to the slow path.
4018   //    Initialize the allocation.
4019   //    Exit.
4020   //
4021   //  Go to slow path.
4022 
4023   const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rcx);
4024 
4025   if (UseTLAB) {
4026     NOT_LP64(__ get_thread(thread);)
4027     __ tlab_allocate(thread, rax, rdx, 0, rcx, rbx, slow_case);
4028     if (ZeroTLAB) {
4029       // the fields have been already cleared
4030       __ jmp(initialize_header);
4031     }
4032 
4033     // The object is initialized before the header.  If the object size is
4034     // zero, go directly to the header initialization.
4035     if (UseCompactObjectHeaders) {
4036       assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned");
4037       __ decrement(rdx, oopDesc::base_offset_in_bytes());
4038     } else {
4039       __ decrement(rdx, sizeof(oopDesc));
4040     }
4041     __ jcc(Assembler::zero, initialize_header);
4042 
4043     // Initialize topmost object field, divide rdx by 8, check if odd and
4044     // test if zero.
4045     __ xorl(rcx, rcx);    // use zero reg to clear memory (shorter code)
4046     __ shrl(rdx, LogBytesPerLong); // divide by 2*oopSize and set carry flag if odd
4047 
4048     // rdx must have been multiple of 8
4049 #ifdef ASSERT
4050     // make sure rdx was multiple of 8
4051     Label L;
4052     // Ignore partial flag stall after shrl() since it is debug VM
4053     __ jcc(Assembler::carryClear, L);
4054     __ stop("object size is not multiple of 2 - adjust this code");
4055     __ bind(L);
4056     // rdx must be > 0, no extra check needed here
4057 #endif
4058 
4059     // initialize remaining object fields: rdx was a multiple of 8
4060     { Label loop;
4061     __ bind(loop);
4062     if (UseCompactObjectHeaders) {
4063       assert(is_aligned(oopDesc::base_offset_in_bytes(), BytesPerLong), "oop base offset must be 8-byte-aligned");
4064       int header_size = oopDesc::base_offset_in_bytes();
4065       __ movptr(Address(rax, rdx, Address::times_8, header_size - 1*oopSize), rcx);
4066       NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, header_size - 2*oopSize), rcx));
4067     } else {
4068       __ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize), rcx);
4069       NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 2*oopSize), rcx));
4070     }
4071     __ decrement(rdx);
4072     __ jcc(Assembler::notZero, loop);
4073     }
4074 
4075     // initialize object header only.
4076     __ bind(initialize_header);
4077     if (UseCompactObjectHeaders) {
4078       __ pop(rcx);   // get saved klass back in the register.
4079       __ movptr(rbx, Address(rcx, Klass::prototype_header_offset()));
4080       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), rbx);
4081     } else {
4082       __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()),
4083                 (intptr_t)markWord::prototype().value()); // header
4084       __ pop(rcx);   // get saved klass back in the register.
4085 #ifdef _LP64
4086       __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code)
4087       __ store_klass_gap(rax, rsi);  // zero klass gap for compressed oops
4088 #endif
4089       __ store_klass(rax, rcx, rscratch1);  // klass
4090     }
4091 
4092     {
4093       SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0, rscratch1);
4094       // Trigger dtrace event for fastpath
4095       __ push(atos);
4096       __ call_VM_leaf(
4097            CAST_FROM_FN_PTR(address, static_cast<int (*)(oopDesc*)>(SharedRuntime::dtrace_object_alloc)), rax);
4098       __ pop(atos);
4099     }
4100 
4101     __ jmp(done);
4102   }
4103 
4104   // slow case
4105   __ bind(slow_case);
4106   __ pop(rcx);   // restore stack pointer to what it was when we came in.
4107   __ bind(slow_case_no_pop);
4108 
4109   Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax);
4110   Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx);
 |