256 assert(tmpReg == rax, "");
257 assert(cx1Reg == noreg, "");
258 assert(cx2Reg == noreg, "");
259 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
260
261 // Possible cases that we'll encounter in fast_lock
262 // ------------------------------------------------
263 // * Inflated
264 // -- unlocked
265 // -- Locked
266 // = by self
267 // = by other
268 // * neutral
269 // * stack-locked
270 // -- by self
271 // = sp-proximity test hits
272 // = sp-proximity test generates false-negative
273 // -- by other
274 //
275
276 Label IsInflated, DONE_LABEL, NO_COUNT, COUNT;
277
278 if (DiagnoseSyncOnValueBasedClasses != 0) {
279 load_klass(tmpReg, objReg, scrReg);
280 movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
281 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
282 jcc(Assembler::notZero, DONE_LABEL);
283 }
284
285 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
286 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
287 jcc(Assembler::notZero, IsInflated);
288
289 if (LockingMode == LM_MONITOR) {
290 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
291 testptr(objReg, objReg);
292 } else {
293 assert(LockingMode == LM_LEGACY, "must be");
294 // Attempt stack-locking ...
295 orptr (tmpReg, markWord::unlocked_value);
296 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
297 lock();
298 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
299 jcc(Assembler::equal, COUNT); // Success
300
301 // Recursive locking.
302 // The object is stack-locked: markword contains stack pointer to BasicLock.
303 // Locked by current thread if difference with current SP is less than one page.
304 subptr(tmpReg, rsp);
305 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
306 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
307 movptr(Address(boxReg, 0), tmpReg);
308 }
309 jmp(DONE_LABEL);
310
311 bind(IsInflated);
312 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
313
314 #ifndef _LP64
315 // The object is inflated.
316
317 // boxReg refers to the on-stack BasicLock in the current frame.
318 // We'd like to write:
319 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
320 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
321 // additional latency as we have another ST in the store buffer that must drain.
322
323 // avoid ST-before-CAS
324 // register juggle because we need tmpReg for cmpxchgptr below
325 movptr(scrReg, boxReg);
326 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
327
328 // Optimistic form: consider XORL tmpReg,tmpReg
329 movptr(tmpReg, NULL_WORD);
330
331 // Appears unlocked - try to swing _owner from null to non-null.
332 // Ideally, I'd manifest "Self" with get_thread and then attempt
333 // to CAS the register containing Self into m->Owner.
334 // But we don't have enough registers, so instead we can either try to CAS
335 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
336 // we later store "Self" into m->Owner. Transiently storing a stack address
337 // (rsp or the address of the box) into m->owner is harmless.
338 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
339 lock();
340 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
341 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
342 // If we weren't able to swing _owner from null to the BasicLock
343 // then take the slow path.
344 jccb (Assembler::notZero, NO_COUNT);
345 // update _owner from BasicLock to thread
346 get_thread (scrReg); // beware: clobbers ICCs
347 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
348 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
349
350 // If the CAS fails we can either retry or pass control to the slow path.
351 // We use the latter tactic.
352 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
353 // If the CAS was successful ...
354 // Self has acquired the lock
355 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
356 // Intentional fall-through into DONE_LABEL ...
357 #else // _LP64
358 // It's inflated and we use scrReg for ObjectMonitor* in this section.
359 movq(scrReg, tmpReg);
360 xorq(tmpReg, tmpReg);
361 lock();
362 cmpxchgptr(thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
363 // Unconditionally set box->_displaced_header = markWord::unused_mark().
364 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
365 movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
366 // Propagate ICC.ZF from CAS above into DONE_LABEL.
367 jccb(Assembler::equal, COUNT); // CAS above succeeded; propagate ZF = 1 (success)
368
369 cmpptr(thread, rax); // Check if we are already the owner (recursive lock)
370 jccb(Assembler::notEqual, NO_COUNT); // If not recursive, ZF = 0 at this point (fail)
371 incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
372 xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success
373 #endif // _LP64
374 bind(DONE_LABEL);
375
376 // ZFlag == 1 count in fast path
377 // ZFlag == 0 count in slow path
378 jccb(Assembler::notZero, NO_COUNT); // jump if ZFlag == 0
379
380 bind(COUNT);
381 // Count monitors in fast path
382 increment(Address(thread, JavaThread::held_monitor_count_offset()));
383
384 xorl(tmpReg, tmpReg); // Set ZF == 1
385
386 bind(NO_COUNT);
387
388 // At NO_COUNT the icc ZFlag is set as follows ...
389 // fast_unlock uses the same protocol.
390 // ZFlag == 1 -> Success
391 // ZFlag == 0 -> Failure - force control through the slow path
392 }
393
394 // obj: object to unlock
395 // box: box address (displaced header location), killed. Must be EAX.
396 // tmp: killed, cannot be obj nor box.
397 //
398 // Some commentary on balanced locking:
399 //
400 // fast_lock and fast_unlock are emitted only for provably balanced lock sites.
401 // Methods that don't have provably balanced locking are forced to run in the
402 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
403 // The interpreter provides two properties:
404 // I1: At return-time the interpreter automatically and quietly unlocks any
405 // objects acquired the current activation (frame). Recall that the
406 // interpreter maintains an on-stack list of locks currently held by
407 // a frame.
408 // I2: If a method attempts to unlock an object that is not held by the
409 // the frame the interpreter throws IMSX.
410 //
411 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
412 // B() doesn't have provably balanced locking so it runs in the interpreter.
413 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
414 // is still locked by A().
415 //
416 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
417 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
418 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
419 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
420 // Arguably given that the spec legislates the JNI case as undefined our implementation
421 // could reasonably *avoid* checking owner in fast_unlock().
422 // In the interest of performance we elide m->Owner==Self check in unlock.
423 // A perfectly viable alternative is to elide the owner check except when
424 // Xcheck:jni is enabled.
425
426 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg) {
427 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
428 assert(boxReg == rax, "");
429 assert_different_registers(objReg, boxReg, tmpReg);
430
431 Label DONE_LABEL, Stacked, COUNT, NO_COUNT;
432
433 if (LockingMode == LM_LEGACY) {
434 cmpptr(Address(boxReg, 0), NULL_WORD); // Examine the displaced header
435 jcc (Assembler::zero, COUNT); // 0 indicates recursive stack-lock
436 }
437 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
438 if (LockingMode != LM_MONITOR) {
439 testptr(tmpReg, markWord::monitor_value); // Inflated?
440 jcc(Assembler::zero, Stacked);
441 }
442
443 // It's inflated.
444
445 // Despite our balanced locking property we still check that m->_owner == Self
446 // as java routines or native JNI code called by this thread might
447 // have released the lock.
448 // Refer to the comments in synchronizer.cpp for how we might encode extra
449 // state in _succ so we can avoid fetching EntryList|cxq.
450 //
451 // If there's no contention try a 1-0 exit. That is, exit without
452 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
453 // we detect and recover from the race that the 1-0 exit admits.
454 //
455 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
456 // before it STs null into _owner, releasing the lock. Updates
457 // to data protected by the critical section must be visible before
458 // we drop the lock (and thus before any other thread could acquire
459 // the lock and observe the fields protected by the lock).
460 // IA32's memory-model is SPO, so STs are ordered with respect to
461 // each other and there's no need for an explicit barrier (fence).
462 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
463 #ifndef _LP64
465 // the number of loads below (currently 4) to just 2 or 3.
466 // Refer to the comments in synchronizer.cpp.
467 // In practice the chain of fetches doesn't seem to impact performance, however.
468 xorptr(boxReg, boxReg);
469 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
470 jccb (Assembler::notZero, DONE_LABEL);
471 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
472 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
473 jccb (Assembler::notZero, DONE_LABEL);
474 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
475 jmpb (DONE_LABEL);
476 #else // _LP64
477 // It's inflated
478 Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath;
479
480 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
481 jccb(Assembler::equal, LNotRecursive);
482
483 // Recursive inflated unlock
484 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
485 jmpb(LSuccess);
486
487 bind(LNotRecursive);
488 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
489 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
490 jccb (Assembler::notZero, CheckSucc);
491 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
492 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
493 jmpb (DONE_LABEL);
494
495 // Try to avoid passing control into the slow_path ...
496 bind (CheckSucc);
497
498 // The following optional optimization can be elided if necessary
499 // Effectively: if (succ == null) goto slow path
500 // The code reduces the window for a race, however,
501 // and thus benefits performance.
502 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
503 jccb (Assembler::zero, LGoSlowPath);
504
505 xorptr(boxReg, boxReg);
506 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
507 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
515 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
516 lock(); addl(Address(rsp, 0), 0);
517
518 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
519 jccb (Assembler::notZero, LSuccess);
520
521 // Rare inopportune interleaving - race.
522 // The successor vanished in the small window above.
523 // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
524 // We need to ensure progress and succession.
525 // Try to reacquire the lock.
526 // If that fails then the new owner is responsible for succession and this
527 // thread needs to take no further action and can exit via the fast path (success).
528 // If the re-acquire succeeds then pass control into the slow path.
529 // As implemented, this latter mode is horrible because we generated more
530 // coherence traffic on the lock *and* artificially extended the critical section
531 // length while by virtue of passing control into the slow path.
532
533 // box is really RAX -- the following CMPXCHG depends on that binding
534 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
535 lock();
536 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
537 // There's no successor so we tried to regrab the lock.
538 // If that didn't work, then another thread grabbed the
539 // lock so we're done (and exit was a success).
540 jccb (Assembler::notEqual, LSuccess);
541 // Intentional fall-through into slow path
542
543 bind (LGoSlowPath);
544 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
545 jmpb (DONE_LABEL);
546
547 bind (LSuccess);
548 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
549 jmpb (DONE_LABEL);
550
551 #endif
552 if (LockingMode == LM_LEGACY) {
553 bind (Stacked);
554 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
555 lock();
556 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
557 // Intentional fall-thru into DONE_LABEL
558 }
559
560 bind(DONE_LABEL);
561
562 // ZFlag == 1 count in fast path
563 // ZFlag == 0 count in slow path
564 jccb(Assembler::notZero, NO_COUNT);
565
566 bind(COUNT);
567 // Count monitors in fast path
568 #ifndef _LP64
569 get_thread(tmpReg);
570 decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
571 #else // _LP64
572 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
573 #endif
574
575 xorl(tmpReg, tmpReg); // Set ZF == 1
576
577 bind(NO_COUNT);
578 }
579
580 void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register rax_reg,
581 Register t, Register thread) {
582 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
583 assert(rax_reg == rax, "Used for CAS");
584 assert_different_registers(obj, box, rax_reg, t, thread);
585
586 // Handle inflated monitor.
587 Label inflated;
588 // Finish fast lock successfully. ZF value is irrelevant.
589 Label locked;
590 // Finish fast lock unsuccessfully. MUST jump with ZF == 0
591 Label slow_path;
592
593 if (DiagnoseSyncOnValueBasedClasses != 0) {
594 load_klass(rax_reg, obj, t);
595 movl(rax_reg, Address(rax_reg, Klass::access_flags_offset()));
596 testl(rax_reg, JVM_ACC_IS_VALUE_BASED_CLASS);
597 jcc(Assembler::notZero, slow_path);
598 }
599
600 const Register mark = t;
601
602 { // Lightweight Lock
603
604 Label push;
605
606 const Register top = box;
607
608 // Load the mark.
617
618 // Check if lock-stack is full.
619 cmpl(top, LockStack::end_offset() - 1);
620 jcc(Assembler::greater, slow_path);
621
622 // Check if recursive.
623 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
624 jccb(Assembler::equal, push);
625
626 // Try to lock. Transition lock bits 0b01 => 0b00
627 movptr(rax_reg, mark);
628 orptr(rax_reg, markWord::unlocked_value);
629 andptr(mark, ~(int32_t)markWord::unlocked_value);
630 lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
631 jcc(Assembler::notEqual, slow_path);
632
633 bind(push);
634 // After successful lock, push object on lock-stack.
635 movptr(Address(thread, top), obj);
636 addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
637 jmpb(locked);
638 }
639
640 { // Handle inflated monitor.
641 bind(inflated);
642
643 const Register tagged_monitor = mark;
644
645 // CAS owner (null => current thread).
646 xorptr(rax_reg, rax_reg);
647 lock(); cmpxchgptr(thread, Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
648 jccb(Assembler::equal, locked);
649
650 // Check if recursive.
651 cmpptr(thread, rax_reg);
652 jccb(Assembler::notEqual, slow_path);
653
654 // Recursive.
655 increment(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
656 }
657
658 bind(locked);
659 increment(Address(thread, JavaThread::held_monitor_count_offset()));
660 // Set ZF = 1
661 xorl(rax_reg, rax_reg);
662
663 #ifdef ASSERT
664 // Check that locked label is reached with ZF set.
665 Label zf_correct;
666 Label zf_bad_zero;
667 jcc(Assembler::zero, zf_correct);
668 jmp(zf_bad_zero);
669 #endif
670
671 bind(slow_path);
672 #ifdef ASSERT
673 // Check that slow_path label is reached with ZF not set.
674 jcc(Assembler::notZero, zf_correct);
675 stop("Fast Lock ZF != 0");
676 bind(zf_bad_zero);
677 stop("Fast Lock ZF != 1");
678 bind(zf_correct);
679 #endif
680 // C2 uses the value of ZF to determine the continuation.
681 }
682
683 void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, Register t, Register thread) {
684 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
685 assert(reg_rax == rax, "Used for CAS");
686 assert_different_registers(obj, reg_rax, t);
687
688 // Handle inflated monitor.
689 Label inflated, inflated_check_lock_stack;
690 // Finish fast unlock successfully. MUST jump with ZF == 1
691 Label unlocked;
692
693 // Assume success.
694 decrement(Address(thread, JavaThread::held_monitor_count_offset()));
695
696 const Register mark = t;
697 const Register top = reg_rax;
698
699 Label dummy;
700 C2FastUnlockLightweightStub* stub = nullptr;
701
702 if (!Compile::current()->output()->in_scratch_emit_size()) {
703 stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, mark, reg_rax, thread);
704 Compile::current()->output()->add_stub(stub);
705 }
706
707 Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
708 Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
709
710 { // Lightweight Unlock
711
712 // Load top.
713 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
714
715 // Prefetch mark.
716 movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
717
718 // Check if obj is top of lock-stack.
719 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
720 // Top of lock stack was not obj. Must be monitor.
721 jcc(Assembler::notEqual, inflated_check_lock_stack);
722
723 // Pop lock-stack.
776 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
777 #else // _LP64
778 Label recursive;
779
780 // Check if recursive.
781 cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
782 jccb(Assembler::notEqual, recursive);
783
784 // Check if the entry lists are empty.
785 movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
786 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
787 jcc(Assembler::notZero, check_successor);
788
789 // Release lock.
790 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
791 jmpb(unlocked);
792
793 // Recursive unlock.
794 bind(recursive);
795 decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
796 xorl(t, t);
797 #endif
798 }
799
800 bind(unlocked);
801 if (stub != nullptr) {
802 bind(stub->unlocked_continuation());
803 }
804
805 #ifdef ASSERT
806 // Check that unlocked label is reached with ZF set.
807 Label zf_correct;
808 jcc(Assembler::zero, zf_correct);
809 stop("Fast Unlock ZF != 1");
810 #endif
811
812 if (stub != nullptr) {
813 bind(stub->slow_path_continuation());
814 }
815 #ifdef ASSERT
816 // Check that stub->continuation() label is reached with ZF not set.
|
256 assert(tmpReg == rax, "");
257 assert(cx1Reg == noreg, "");
258 assert(cx2Reg == noreg, "");
259 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
260
261 // Possible cases that we'll encounter in fast_lock
262 // ------------------------------------------------
263 // * Inflated
264 // -- unlocked
265 // -- Locked
266 // = by self
267 // = by other
268 // * neutral
269 // * stack-locked
270 // -- by self
271 // = sp-proximity test hits
272 // = sp-proximity test generates false-negative
273 // -- by other
274 //
275
276 Label IsInflated, DONE_LABEL, COUNT;
277
278 if (DiagnoseSyncOnValueBasedClasses != 0) {
279 load_klass(tmpReg, objReg, scrReg);
280 movl(tmpReg, Address(tmpReg, Klass::access_flags_offset()));
281 testl(tmpReg, JVM_ACC_IS_VALUE_BASED_CLASS);
282 jcc(Assembler::notZero, DONE_LABEL);
283 }
284
285 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH]
286 testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral
287 jcc(Assembler::notZero, IsInflated);
288
289 if (LockingMode == LM_MONITOR) {
290 // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0.
291 testptr(objReg, objReg);
292 } else {
293 assert(LockingMode == LM_LEGACY, "must be");
294 // Attempt stack-locking ...
295 orptr (tmpReg, markWord::unlocked_value);
296 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
297 lock();
298 cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Updates tmpReg
299 jcc(Assembler::equal, COUNT); // Success
300
301 // Recursive locking.
302 // The object is stack-locked: markword contains stack pointer to BasicLock.
303 // Locked by current thread if difference with current SP is less than one page.
304 subptr(tmpReg, rsp);
305 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
306 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) );
307 movptr(Address(boxReg, 0), tmpReg);
308 }
309 // After recursive stack locking attempt case
310 jmp(DONE_LABEL);
311
312 bind(IsInflated);
313 // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value
314
315 #ifndef _LP64
316 // The object is inflated.
317
318 // boxReg refers to the on-stack BasicLock in the current frame.
319 // We'd like to write:
320 // set box->_displaced_header = markWord::unused_mark(). Any non-0 value suffices.
321 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
322 // additional latency as we have another ST in the store buffer that must drain.
323
324 // avoid ST-before-CAS
325 // register juggle because we need tmpReg for cmpxchgptr below
326 movptr(scrReg, boxReg);
327 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
328
329 // Optimistic form: consider XORL tmpReg,tmpReg
330 movptr(tmpReg, NULL_WORD);
331
332 // Appears unlocked - try to swing _owner from null to non-null.
333 // Ideally, I'd manifest "Self" with get_thread and then attempt
334 // to CAS the register containing thread id into m->Owner.
335 // But we don't have enough registers, so instead we can either try to CAS
336 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
337 // we later store thread id into m->Owner. Transiently storing a stack address
338 // (rsp or the address of the box) into m->owner is harmless.
339 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
340 lock();
341 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
342 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
343 // If we weren't able to swing _owner from null to the BasicLock
344 // then take the slow path.
345 jccb (Assembler::notZero, DONE_LABEL);
346 // update _owner from BasicLock to thread
347 get_thread (scrReg); // beware: clobbers ICCs
348 movptr(scrReg, Address(scrReg, JavaThread::lock_id_offset()));
349 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
350 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
351 jmp(DONE_LABEL);
352
353 // If the CAS fails we can either retry or pass control to the slow path.
354 // We use the latter tactic.
355 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
356 // If the CAS was successful ...
357 // Self has acquired the lock
358 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
359 // Intentional fall-through into DONE_LABEL ...
360 #else // _LP64
361 // Unconditionally set box->_displaced_header = markWord::unused_mark().
362 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
363 movptr(Address(boxReg, 0), checked_cast<int32_t>(markWord::unused_mark().value()));
364
365 // It's inflated and we use scrReg for ObjectMonitor* in this section.
366 movq(scrReg, tmpReg);
367 xorq(tmpReg, tmpReg);
368 movptr(boxReg, Address(r15_thread, JavaThread::lock_id_offset()));
369 lock();
370 cmpxchgptr(boxReg, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
371
372 // Propagate ICC.ZF from CAS above into DONE_LABEL.
373 jccb(Assembler::equal, DONE_LABEL); // CAS above succeeded; propagate ZF = 1 (success)
374
375 cmpptr(boxReg, rax); // Check if we are already the owner (recursive lock)
376 jccb(Assembler::notEqual, DONE_LABEL); // If not recursive, ZF = 0 at this point (fail)
377 incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
378 xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success
379 jmp(DONE_LABEL);
380 #endif // _LP64
381
382 bind(COUNT);
383 // Count monitors in fast path
384 increment(Address(thread, JavaThread::held_monitor_count_offset()));
385 xorl(tmpReg, tmpReg); // Set ZF == 1
386
387 bind(DONE_LABEL);
388
389 // At DONE_LABEL the icc ZFlag is set as follows ...
390 // fast_unlock uses the same protocol.
391 // ZFlag == 1 -> Success
392 // ZFlag == 0 -> Failure - force control through the slow path
393 }
394
395 // obj: object to unlock
396 // box: box address (displaced header location), killed. Must be EAX.
397 // tmp: killed, cannot be obj nor box.
398 //
399 // Some commentary on balanced locking:
400 //
401 // fast_lock and fast_unlock are emitted only for provably balanced lock sites.
402 // Methods that don't have provably balanced locking are forced to run in the
403 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
404 // The interpreter provides two properties:
405 // I1: At return-time the interpreter automatically and quietly unlocks any
406 // objects acquired the current activation (frame). Recall that the
407 // interpreter maintains an on-stack list of locks currently held by
408 // a frame.
409 // I2: If a method attempts to unlock an object that is not held by the
410 // the frame the interpreter throws IMSX.
411 //
412 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
413 // B() doesn't have provably balanced locking so it runs in the interpreter.
414 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
415 // is still locked by A().
416 //
417 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
418 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
419 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
420 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
421 // Arguably given that the spec legislates the JNI case as undefined our implementation
422 // could reasonably *avoid* checking owner in fast_unlock().
423 // In the interest of performance we elide m->Owner==Self check in unlock.
424 // A perfectly viable alternative is to elide the owner check except when
425 // Xcheck:jni is enabled.
426
427 void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, Register scrReg) {
428 assert(LockingMode != LM_LIGHTWEIGHT, "lightweight locking should use fast_unlock_lightweight");
429 assert(boxReg == rax, "");
430 assert_different_registers(objReg, boxReg, tmpReg);
431
432 Label DONE_LABEL, Stacked, COUNT;
433
434 if (LockingMode == LM_LEGACY) {
435 cmpptr(Address(boxReg, 0), NULL_WORD); // Examine the displaced header
436 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
437 }
438 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
439 if (LockingMode != LM_MONITOR) {
440 testptr(tmpReg, markWord::monitor_value); // Inflated?
441 jcc(Assembler::zero, Stacked);
442 }
443
444 // It's inflated.
445 // If the owner is ANONYMOUS, we need to fix it - in an outline stub.
446 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) ObjectMonitor::ANONYMOUS_OWNER);
447 #ifdef _LP64
448 if (!Compile::current()->output()->in_scratch_emit_size()) {
449 C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg, boxReg);
450 Compile::current()->output()->add_stub(stub);
451 jcc(Assembler::equal, stub->entry());
452 bind(stub->continuation());
453 } else
454 #endif
455 {
456 // We can't easily implement this optimization on 32 bit because we don't have a thread register.
457 // Call the slow-path instead.
458 jcc(Assembler::notEqual, DONE_LABEL);
459 }
460
461 // Despite our balanced locking property we still check that m->_owner == Self
462 // as java routines or native JNI code called by this thread might
463 // have released the lock.
464 // Refer to the comments in synchronizer.cpp for how we might encode extra
465 // state in _succ so we can avoid fetching EntryList|cxq.
466 //
467 // If there's no contention try a 1-0 exit. That is, exit without
468 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
469 // we detect and recover from the race that the 1-0 exit admits.
470 //
471 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
472 // before it STs null into _owner, releasing the lock. Updates
473 // to data protected by the critical section must be visible before
474 // we drop the lock (and thus before any other thread could acquire
475 // the lock and observe the fields protected by the lock).
476 // IA32's memory-model is SPO, so STs are ordered with respect to
477 // each other and there's no need for an explicit barrier (fence).
478 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
479 #ifndef _LP64
481 // the number of loads below (currently 4) to just 2 or 3.
482 // Refer to the comments in synchronizer.cpp.
483 // In practice the chain of fetches doesn't seem to impact performance, however.
484 xorptr(boxReg, boxReg);
485 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
486 jccb (Assembler::notZero, DONE_LABEL);
487 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
488 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
489 jccb (Assembler::notZero, DONE_LABEL);
490 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
491 jmpb (DONE_LABEL);
492 #else // _LP64
493 // It's inflated
494 Label CheckSucc, LNotRecursive, LSuccess, LGoSlowPath;
495
496 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
497 jccb(Assembler::equal, LNotRecursive);
498
499 // Recursive inflated unlock
500 decq(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
501 xorl(tmpReg, tmpReg); // Set ZF == 1
502 jmp(DONE_LABEL);
503
504 bind(LNotRecursive);
505
506 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
507 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
508 jccb (Assembler::notZero, CheckSucc);
509 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
510 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
511 jmpb (DONE_LABEL);
512
513 // Try to avoid passing control into the slow_path ...
514 bind (CheckSucc);
515
516 // The following optional optimization can be elided if necessary
517 // Effectively: if (succ == null) goto slow path
518 // The code reduces the window for a race, however,
519 // and thus benefits performance.
520 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
521 jccb (Assembler::zero, LGoSlowPath);
522
523 xorptr(boxReg, boxReg);
524 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
525 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
533 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
534 lock(); addl(Address(rsp, 0), 0);
535
536 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), NULL_WORD);
537 jccb (Assembler::notZero, LSuccess);
538
539 // Rare inopportune interleaving - race.
540 // The successor vanished in the small window above.
541 // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
542 // We need to ensure progress and succession.
543 // Try to reacquire the lock.
544 // If that fails then the new owner is responsible for succession and this
545 // thread needs to take no further action and can exit via the fast path (success).
546 // If the re-acquire succeeds then pass control into the slow path.
547 // As implemented, this latter mode is horrible because we generated more
548 // coherence traffic on the lock *and* artificially extended the critical section
549 // length while by virtue of passing control into the slow path.
550
551 // box is really RAX -- the following CMPXCHG depends on that binding
552 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
553 movptr(scrReg, Address(r15_thread, JavaThread::lock_id_offset()));
554 lock();
555 cmpxchgptr(scrReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
556 // There's no successor so we tried to regrab the lock.
557 // If that didn't work, then another thread grabbed the
558 // lock so we're done (and exit was a success).
559 jccb (Assembler::notEqual, LSuccess);
560 // Intentional fall-through into slow path
561
562 bind (LGoSlowPath);
563 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
564 jmpb (DONE_LABEL);
565
566 bind (LSuccess);
567 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
568 jmpb (DONE_LABEL);
569
570 #endif
571 if (LockingMode == LM_LEGACY) {
572 bind (Stacked);
573 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
574 lock();
575 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
576 jccb(Assembler::notZero, DONE_LABEL);
577 // Count monitors in fast path
578 #ifndef _LP64
579 get_thread(tmpReg);
580 decrementl(Address(tmpReg, JavaThread::held_monitor_count_offset()));
581 #else // _LP64
582 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset()));
583 #endif
584 xorl(tmpReg, tmpReg); // Set ZF == 1
585 }
586
587 // ZFlag == 1 -> Success
588 // ZFlag == 0 -> Failure - force control through the slow path
589 bind(DONE_LABEL);
590 }
591
592 void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Register rax_reg,
593 Register t, Register thread) {
594 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
595 assert(rax_reg == rax, "Used for CAS");
596 assert_different_registers(obj, box, rax_reg, t, thread);
597
598 // Handle inflated monitor.
599 Label inflated;
600 // Finish fast lock successfully.
601 Label locked;
602 // Finish fast lock unsuccessfully. MUST jump with ZF == 0
603 Label slow_path;
604
605 if (DiagnoseSyncOnValueBasedClasses != 0) {
606 load_klass(rax_reg, obj, t);
607 movl(rax_reg, Address(rax_reg, Klass::access_flags_offset()));
608 testl(rax_reg, JVM_ACC_IS_VALUE_BASED_CLASS);
609 jcc(Assembler::notZero, slow_path);
610 }
611
612 const Register mark = t;
613
614 { // Lightweight Lock
615
616 Label push;
617
618 const Register top = box;
619
620 // Load the mark.
629
630 // Check if lock-stack is full.
631 cmpl(top, LockStack::end_offset() - 1);
632 jcc(Assembler::greater, slow_path);
633
634 // Check if recursive.
635 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
636 jccb(Assembler::equal, push);
637
638 // Try to lock. Transition lock bits 0b01 => 0b00
639 movptr(rax_reg, mark);
640 orptr(rax_reg, markWord::unlocked_value);
641 andptr(mark, ~(int32_t)markWord::unlocked_value);
642 lock(); cmpxchgptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
643 jcc(Assembler::notEqual, slow_path);
644
645 bind(push);
646 // After successful lock, push object on lock-stack.
647 movptr(Address(thread, top), obj);
648 addl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize);
649 xorl(rax_reg, rax_reg);
650 jmpb(locked);
651 }
652
653 { // Handle inflated monitor.
654 bind(inflated);
655
656 const Register tagged_monitor = mark;
657
658 // CAS owner (null => current thread).
659 xorptr(rax_reg, rax_reg);
660 movptr(box, Address(thread, JavaThread::lock_id_offset()));
661 lock(); cmpxchgptr(box, Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
662 jccb(Assembler::equal, locked);
663
664 // Check if recursive.
665 cmpptr(box, rax_reg);
666 jccb(Assembler::notEqual, slow_path);
667
668 // Recursive.
669 increment(Address(tagged_monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
670 xorl(rax_reg, rax_reg);
671 }
672
673 bind(locked);
674 #ifdef ASSERT
675 // Check that locked label is reached with ZF set.
676 Label zf_correct;
677 Label zf_bad_zero;
678 jcc(Assembler::zero, zf_correct);
679 jmp(zf_bad_zero);
680 #endif
681
682 bind(slow_path);
683 #ifdef ASSERT
684 // Check that slow_path label is reached with ZF not set.
685 jcc(Assembler::notZero, zf_correct);
686 stop("Fast Lock ZF != 0");
687 bind(zf_bad_zero);
688 stop("Fast Lock ZF != 1");
689 bind(zf_correct);
690 #endif
691 // C2 uses the value of ZF to determine the continuation.
692 }
693
694 void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, Register t1, Register t2, Register thread) {
695 assert(LockingMode == LM_LIGHTWEIGHT, "must be");
696 assert(reg_rax == rax, "Used for CAS");
697 assert_different_registers(obj, reg_rax, t1, t2);
698
699 // Handle inflated monitor.
700 Label inflated, inflated_check_lock_stack;
701 // Finish fast unlock successfully. MUST jump with ZF == 1
702 Label unlocked;
703
704 const Register mark = t1;
705 const Register top = reg_rax;
706
707 Label dummy;
708 C2FastUnlockLightweightStub* stub = nullptr;
709
710 if (!Compile::current()->output()->in_scratch_emit_size()) {
711 stub = new (Compile::current()->comp_arena()) C2FastUnlockLightweightStub(obj, mark, reg_rax, t2, thread);
712 Compile::current()->output()->add_stub(stub);
713 }
714
715 Label& push_and_slow_path = stub == nullptr ? dummy : stub->push_and_slow_path();
716 Label& check_successor = stub == nullptr ? dummy : stub->check_successor();
717
718 { // Lightweight Unlock
719
720 // Load top.
721 movl(top, Address(thread, JavaThread::lock_stack_top_offset()));
722
723 // Prefetch mark.
724 movptr(mark, Address(obj, oopDesc::mark_offset_in_bytes()));
725
726 // Check if obj is top of lock-stack.
727 cmpptr(obj, Address(thread, top, Address::times_1, -oopSize));
728 // Top of lock stack was not obj. Must be monitor.
729 jcc(Assembler::notEqual, inflated_check_lock_stack);
730
731 // Pop lock-stack.
784 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
785 #else // _LP64
786 Label recursive;
787
788 // Check if recursive.
789 cmpptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), 0);
790 jccb(Assembler::notEqual, recursive);
791
792 // Check if the entry lists are empty.
793 movptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
794 orptr(reg_rax, Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
795 jcc(Assembler::notZero, check_successor);
796
797 // Release lock.
798 movptr(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
799 jmpb(unlocked);
800
801 // Recursive unlock.
802 bind(recursive);
803 decrement(Address(monitor, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
804 xorl(t1, t1);
805 #endif
806 }
807
808 bind(unlocked);
809 if (stub != nullptr) {
810 bind(stub->unlocked_continuation());
811 }
812
813 #ifdef ASSERT
814 // Check that unlocked label is reached with ZF set.
815 Label zf_correct;
816 jcc(Assembler::zero, zf_correct);
817 stop("Fast Unlock ZF != 1");
818 #endif
819
820 if (stub != nullptr) {
821 bind(stub->slow_path_continuation());
822 }
823 #ifdef ASSERT
824 // Check that stub->continuation() label is reached with ZF not set.
|