1 /*
  2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #include "precompiled.hpp"
 27 #include "jvm.h"
 28 #include "classfile/javaClasses.hpp"
 29 #include "classfile/javaThreadStatus.hpp"
 30 #include "gc/shared/barrierSet.hpp"
 31 #include "jfr/jfrEvents.hpp"
 32 #include "jvmtifiles/jvmtiEnv.hpp"
 33 #include "logging/log.hpp"
 34 #include "memory/allocation.inline.hpp"
 35 #include "memory/iterator.hpp"
 36 #include "memory/resourceArea.hpp"
 37 #include "oops/oop.inline.hpp"
 38 #include "runtime/atomic.hpp"
 39 #include "runtime/handles.inline.hpp"
 40 #include "runtime/javaThread.inline.hpp"
 41 #include "runtime/lockStack.inline.hpp"
 42 #include "runtime/nonJavaThread.hpp"
 43 #include "runtime/orderAccess.hpp"
 44 #include "runtime/osThread.hpp"
 45 #include "runtime/safepoint.hpp"
 46 #include "runtime/safepointMechanism.inline.hpp"
 47 #include "runtime/thread.inline.hpp"
 48 #include "runtime/threadSMR.inline.hpp"
 49 #include "services/memTracker.hpp"
 50 #include "utilities/macros.hpp"
 51 #include "utilities/spinYield.hpp"
 52 #if INCLUDE_JFR
 53 #include "jfr/jfr.hpp"
 54 #endif
 55 
 56 #ifndef USE_LIBRARY_BASED_TLS_ONLY
 57 // Current thread is maintained as a thread-local variable
 58 THREAD_LOCAL Thread* Thread::_thr_current = NULL;
 59 #endif
 60 
 61 // ======= Thread ========
 62 void* Thread::allocate(size_t size, bool throw_excpt, MEMFLAGS flags) {
 63   return throw_excpt ? AllocateHeap(size, flags, CURRENT_PC)
 64                        : AllocateHeap(size, flags, CURRENT_PC, AllocFailStrategy::RETURN_NULL);
 65 }
 66 
 67 void Thread::operator delete(void* p) {
 68   FreeHeap(p);
 69 }
 70 
 71 // Base class for all threads: VMThread, WatcherThread, ConcurrentMarkSweepThread,
 72 // JavaThread
 73 
 74 DEBUG_ONLY(Thread* Thread::_starting_thread = NULL;)
 75 
 76 Thread::Thread():
 77   _lock_stack() {
 78 
 79   DEBUG_ONLY(_run_state = PRE_CALL_RUN;)
 80 
 81   // stack and get_thread
 82   set_stack_base(NULL);
 83   set_stack_size(0);
 84   set_lgrp_id(-1);
 85   DEBUG_ONLY(clear_suspendible_thread();)
 86 
 87   // allocated data structures
 88   set_osthread(NULL);
 89   set_resource_area(new (mtThread)ResourceArea());
 90   DEBUG_ONLY(_current_resource_mark = NULL;)
 91   set_handle_area(new (mtThread) HandleArea(NULL));
 92   set_metadata_handles(new (ResourceObj::C_HEAP, mtClass) GrowableArray<Metadata*>(30, mtClass));
 93   set_last_handle_mark(NULL);
 94   DEBUG_ONLY(_missed_ic_stub_refill_verifier = NULL);
 95 
 96   // Initial value of zero ==> never claimed.
 97   _threads_do_token = 0;
 98   _threads_hazard_ptr = NULL;
 99   _threads_list_ptr = NULL;
100   _nested_threads_hazard_ptr_cnt = 0;
101   _rcu_counter = 0;
102 
103   // the handle mark links itself to last_handle_mark
104   new HandleMark(this);
105 
106   // plain initialization
107   debug_only(_owned_locks = NULL;)
108   NOT_PRODUCT(_skip_gcalot = false;)
109   _jvmti_env_iteration_count = 0;
110   set_allocated_bytes(0);
111   _current_pending_raw_monitor = NULL;
112 
113   // thread-specific hashCode stream generator state - Marsaglia shift-xor form
114   _hashStateX = os::random();
115   _hashStateY = 842502087;
116   _hashStateZ = 0x8767;    // (int)(3579807591LL & 0xffff) ;
117   _hashStateW = 273326509;
118 
119   // Many of the following fields are effectively final - immutable
120   // Note that nascent threads can't use the Native Monitor-Mutex
121   // construct until the _MutexEvent is initialized ...
122   // CONSIDER: instead of using a fixed set of purpose-dedicated ParkEvents
123   // we might instead use a stack of ParkEvents that we could provision on-demand.
124   // The stack would act as a cache to avoid calls to ParkEvent::Allocate()
125   // and ::Release()
126   _ParkEvent   = ParkEvent::Allocate(this);
127 
128 #ifdef CHECK_UNHANDLED_OOPS
129   if (CheckUnhandledOops) {
130     _unhandled_oops = new UnhandledOops(this);
131   }
132 #endif // CHECK_UNHANDLED_OOPS
133 
134   // Notify the barrier set that a thread is being created. The initial
135   // thread is created before the barrier set is available.  The call to
136   // BarrierSet::on_thread_create() for this thread is therefore deferred
137   // to BarrierSet::set_barrier_set().
138   BarrierSet* const barrier_set = BarrierSet::barrier_set();
139   if (barrier_set != NULL) {
140     barrier_set->on_thread_create(this);
141   } else {
142     // Only the main thread should be created before the barrier set
143     // and that happens just before Thread::current is set. No other thread
144     // can attach as the VM is not created yet, so they can't execute this code.
145     // If the main thread creates other threads before the barrier set that is an error.
146     assert(Thread::current_or_null() == NULL, "creating thread before barrier set");
147   }
148 
149   MACOS_AARCH64_ONLY(DEBUG_ONLY(_wx_init = false));
150 }
151 
152 void Thread::initialize_tlab() {
153   if (UseTLAB) {
154     tlab().initialize();
155   }
156 }
157 
158 void Thread::initialize_thread_current() {
159 #ifndef USE_LIBRARY_BASED_TLS_ONLY
160   assert(_thr_current == NULL, "Thread::current already initialized");
161   _thr_current = this;
162 #endif
163   assert(ThreadLocalStorage::thread() == NULL, "ThreadLocalStorage::thread already initialized");
164   ThreadLocalStorage::set_thread(this);
165   assert(Thread::current() == ThreadLocalStorage::thread(), "TLS mismatch!");
166 }
167 
168 void Thread::clear_thread_current() {
169   assert(Thread::current() == ThreadLocalStorage::thread(), "TLS mismatch!");
170 #ifndef USE_LIBRARY_BASED_TLS_ONLY
171   _thr_current = NULL;
172 #endif
173   ThreadLocalStorage::set_thread(NULL);
174 }
175 
176 void Thread::record_stack_base_and_size() {
177   // Note: at this point, Thread object is not yet initialized. Do not rely on
178   // any members being initialized. Do not rely on Thread::current() being set.
179   // If possible, refrain from doing anything which may crash or assert since
180   // quite probably those crash dumps will be useless.
181   set_stack_base(os::current_stack_base());
182   set_stack_size(os::current_stack_size());
183 
184   // Set stack limits after thread is initialized.
185   if (is_Java_thread()) {
186     JavaThread::cast(this)->stack_overflow_state()->initialize(stack_base(), stack_end());
187   }
188 }
189 
190 void Thread::register_thread_stack_with_NMT() {
191   MemTracker::record_thread_stack(stack_end(), stack_size());
192 }
193 
194 void Thread::unregister_thread_stack_with_NMT() {
195   MemTracker::release_thread_stack(stack_end(), stack_size());
196 }
197 
198 void Thread::call_run() {
199   DEBUG_ONLY(_run_state = CALL_RUN;)
200 
201   // At this point, Thread object should be fully initialized and
202   // Thread::current() should be set.
203 
204   assert(Thread::current_or_null() != NULL, "current thread is unset");
205   assert(Thread::current_or_null() == this, "current thread is wrong");
206 
207   // Perform common initialization actions
208 
209   MACOS_AARCH64_ONLY(this->init_wx());
210 
211   register_thread_stack_with_NMT();
212 
213   JFR_ONLY(Jfr::on_thread_start(this);)
214 
215   log_debug(os, thread)("Thread " UINTX_FORMAT " stack dimensions: "
216     PTR_FORMAT "-" PTR_FORMAT " (" SIZE_FORMAT "k).",
217     os::current_thread_id(), p2i(stack_end()),
218     p2i(stack_base()), stack_size()/1024);
219 
220   // Perform <ChildClass> initialization actions
221   DEBUG_ONLY(_run_state = PRE_RUN;)
222   this->pre_run();
223 
224   // Invoke <ChildClass>::run()
225   DEBUG_ONLY(_run_state = RUN;)
226   this->run();
227   // Returned from <ChildClass>::run(). Thread finished.
228 
229   // Perform common tear-down actions
230 
231   assert(Thread::current_or_null() != NULL, "current thread is unset");
232   assert(Thread::current_or_null() == this, "current thread is wrong");
233 
234   // Perform <ChildClass> tear-down actions
235   DEBUG_ONLY(_run_state = POST_RUN;)
236   this->post_run();
237 
238   // Note: at this point the thread object may already have deleted itself,
239   // so from here on do not dereference *this*. Not all thread types currently
240   // delete themselves when they terminate. But no thread should ever be deleted
241   // asynchronously with respect to its termination - that is what _run_state can
242   // be used to check.
243 
244   assert(Thread::current_or_null() == NULL, "current thread still present");
245 }
246 
247 Thread::~Thread() {
248 
249   // Attached threads will remain in PRE_CALL_RUN, as will threads that don't actually
250   // get started due to errors etc. Any active thread should at least reach post_run
251   // before it is deleted (usually in post_run()).
252   assert(_run_state == PRE_CALL_RUN ||
253          _run_state == POST_RUN, "Active Thread deleted before post_run(): "
254          "_run_state=%d", (int)_run_state);
255 
256   // Notify the barrier set that a thread is being destroyed. Note that a barrier
257   // set might not be available if we encountered errors during bootstrapping.
258   BarrierSet* const barrier_set = BarrierSet::barrier_set();
259   if (barrier_set != NULL) {
260     barrier_set->on_thread_destroy(this);
261   }
262 
263   // deallocate data structures
264   delete resource_area();
265   // since the handle marks are using the handle area, we have to deallocated the root
266   // handle mark before deallocating the thread's handle area,
267   assert(last_handle_mark() != NULL, "check we have an element");
268   delete last_handle_mark();
269   assert(last_handle_mark() == NULL, "check we have reached the end");
270 
271   ParkEvent::Release(_ParkEvent);
272   // Set to NULL as a termination indicator for has_terminated().
273   Atomic::store(&_ParkEvent, (ParkEvent*)NULL);
274 
275   delete handle_area();
276   delete metadata_handles();
277 
278   // osthread() can be NULL, if creation of thread failed.
279   if (osthread() != NULL) os::free_thread(osthread());
280 
281   // Clear Thread::current if thread is deleting itself and it has not
282   // already been done. This must be done before the memory is deallocated.
283   // Needed to ensure JNI correctly detects non-attached threads.
284   if (this == Thread::current_or_null()) {
285     Thread::clear_thread_current();
286   }
287 
288   CHECK_UNHANDLED_OOPS_ONLY(if (CheckUnhandledOops) delete unhandled_oops();)
289 }
290 
291 #ifdef ASSERT
292 // A JavaThread is considered dangling if it not handshake-safe with respect to
293 // the current thread, it is not on a ThreadsList, or not at safepoint.
294 void Thread::check_for_dangling_thread_pointer(Thread *thread) {
295   assert(!thread->is_Java_thread() ||
296          JavaThread::cast(thread)->is_handshake_safe_for(Thread::current()) ||
297          !JavaThread::cast(thread)->on_thread_list() ||
298          SafepointSynchronize::is_at_safepoint() ||
299          ThreadsSMRSupport::is_a_protected_JavaThread_with_lock(JavaThread::cast(thread)),
300          "possibility of dangling Thread pointer");
301 }
302 #endif
303 
304 // Is the target JavaThread protected by the calling Thread or by some other
305 // mechanism?
306 //
307 bool Thread::is_JavaThread_protected(const JavaThread* target) {
308   Thread* current_thread = Thread::current();
309 
310   // Do the simplest check first:
311   if (SafepointSynchronize::is_at_safepoint()) {
312     // The target is protected since JavaThreads cannot exit
313     // while we're at a safepoint.
314     return true;
315   }
316 
317   // If the target hasn't been started yet then it is trivially
318   // "protected". We assume the caller is the thread that will do
319   // the starting.
320   if (target->osthread() == NULL || target->osthread()->get_state() <= INITIALIZED) {
321     return true;
322   }
323 
324   // Now make the simple checks based on who the caller is:
325   if (current_thread == target || Threads_lock->owner() == current_thread) {
326     // Target JavaThread is self or calling thread owns the Threads_lock.
327     // Second check is the same as Threads_lock->owner_is_self(),
328     // but we already have the current thread so check directly.
329     return true;
330   }
331 
332   // Check the ThreadsLists associated with the calling thread (if any)
333   // to see if one of them protects the target JavaThread:
334   if (is_JavaThread_protected_by_TLH(target)) {
335     return true;
336   }
337 
338   // Use this debug code with -XX:+UseNewCode to diagnose locations that
339   // are missing a ThreadsListHandle or other protection mechanism:
340   // guarantee(!UseNewCode, "current_thread=" INTPTR_FORMAT " is not protecting target="
341   //           INTPTR_FORMAT, p2i(current_thread), p2i(target));
342 
343   // Note: Since 'target' isn't protected by a TLH, the call to
344   // target->is_handshake_safe_for() may crash, but we have debug bits so
345   // we'll be able to figure out what protection mechanism is missing.
346   assert(target->is_handshake_safe_for(current_thread), "JavaThread=" INTPTR_FORMAT
347          " is not protected and not handshake safe.", p2i(target));
348 
349   // The target JavaThread is not protected so it is not safe to query:
350   return false;
351 }
352 
353 // Is the target JavaThread protected by a ThreadsListHandle (TLH) associated
354 // with the calling Thread?
355 //
356 bool Thread::is_JavaThread_protected_by_TLH(const JavaThread* target) {
357   Thread* current_thread = Thread::current();
358 
359   // Check the ThreadsLists associated with the calling thread (if any)
360   // to see if one of them protects the target JavaThread:
361   for (SafeThreadsListPtr* stlp = current_thread->_threads_list_ptr;
362        stlp != NULL; stlp = stlp->previous()) {
363     if (stlp->list()->includes(target)) {
364       // The target JavaThread is protected by this ThreadsList:
365       return true;
366     }
367   }
368 
369   // The target JavaThread is not protected by a TLH so it is not safe to query:
370   return false;
371 }
372 
373 ThreadPriority Thread::get_priority(const Thread* const thread) {
374   ThreadPriority priority;
375   // Can return an error!
376   (void)os::get_priority(thread, priority);
377   assert(MinPriority <= priority && priority <= MaxPriority, "non-Java priority found");
378   return priority;
379 }
380 
381 void Thread::set_priority(Thread* thread, ThreadPriority priority) {
382   debug_only(check_for_dangling_thread_pointer(thread);)
383   // Can return an error!
384   (void)os::set_priority(thread, priority);
385 }
386 
387 
388 void Thread::start(Thread* thread) {
389   // Start is different from resume in that its safety is guaranteed by context or
390   // being called from a Java method synchronized on the Thread object.
391   if (thread->is_Java_thread()) {
392     // Initialize the thread state to RUNNABLE before starting this thread.
393     // Can not set it after the thread started because we do not know the
394     // exact thread state at that time. It could be in MONITOR_WAIT or
395     // in SLEEPING or some other state.
396     java_lang_Thread::set_thread_status(JavaThread::cast(thread)->threadObj(),
397                                         JavaThreadStatus::RUNNABLE);
398   }
399   os::start_thread(thread);
400 }
401 
402 // GC Support
403 bool Thread::claim_par_threads_do(uintx claim_token) {
404   uintx token = _threads_do_token;
405   if (token != claim_token) {
406     uintx res = Atomic::cmpxchg(&_threads_do_token, token, claim_token);
407     if (res == token) {
408       return true;
409     }
410     guarantee(res == claim_token, "invariant");
411   }
412   return false;
413 }
414 
415 void Thread::oops_do_no_frames(OopClosure* f, CodeBlobClosure* cf) {
416   // Do oop for ThreadShadow
417   f->do_oop((oop*)&_pending_exception);
418   handle_area()->oops_do(f);
419   if (!UseHeavyMonitors) {
420     lock_stack().oops_do(f);
421   }
422 }
423 
424 // If the caller is a NamedThread, then remember, in the current scope,
425 // the given JavaThread in its _processed_thread field.
426 class RememberProcessedThread: public StackObj {
427   NamedThread* _cur_thr;
428 public:
429   RememberProcessedThread(Thread* thread) {
430     Thread* self = Thread::current();
431     if (self->is_Named_thread()) {
432       _cur_thr = (NamedThread *)self;
433       assert(_cur_thr->processed_thread() == NULL, "nesting not supported");
434       _cur_thr->set_processed_thread(thread);
435     } else {
436       _cur_thr = NULL;
437     }
438   }
439 
440   ~RememberProcessedThread() {
441     if (_cur_thr) {
442       assert(_cur_thr->processed_thread() != NULL, "nesting not supported");
443       _cur_thr->set_processed_thread(NULL);
444     }
445   }
446 };
447 
448 void Thread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
449   // Record JavaThread to GC thread
450   RememberProcessedThread rpt(this);
451   oops_do_no_frames(f, cf);
452   oops_do_frames(f, cf);
453 }
454 
455 void Thread::metadata_handles_do(void f(Metadata*)) {
456   // Only walk the Handles in Thread.
457   if (metadata_handles() != NULL) {
458     for (int i = 0; i< metadata_handles()->length(); i++) {
459       f(metadata_handles()->at(i));
460     }
461   }
462 }
463 
464 void Thread::print_on(outputStream* st, bool print_extended_info) const {
465   // get_priority assumes osthread initialized
466   if (osthread() != NULL) {
467     int os_prio;
468     if (os::get_native_priority(this, &os_prio) == OS_OK) {
469       st->print("os_prio=%d ", os_prio);
470     }
471 
472     st->print("cpu=%.2fms ",
473               os::thread_cpu_time(const_cast<Thread*>(this), true) / 1000000.0
474               );
475     st->print("elapsed=%.2fs ",
476               _statistical_info.getElapsedTime() / 1000.0
477               );
478     if (is_Java_thread() && (PrintExtendedThreadInfo || print_extended_info)) {
479       size_t allocated_bytes = (size_t) const_cast<Thread*>(this)->cooked_allocated_bytes();
480       st->print("allocated=" SIZE_FORMAT "%s ",
481                 byte_size_in_proper_unit(allocated_bytes),
482                 proper_unit_for_byte_size(allocated_bytes)
483                 );
484       st->print("defined_classes=" INT64_FORMAT " ", _statistical_info.getDefineClassCount());
485     }
486 
487     st->print("tid=" INTPTR_FORMAT " ", p2i(this));
488     if (!is_Java_thread() || !JavaThread::cast(this)->is_vthread_mounted()) {
489       osthread()->print_on(st);
490     }
491   }
492   ThreadsSMRSupport::print_info_on(this, st);
493   st->print(" ");
494   debug_only(if (WizardMode) print_owned_locks_on(st);)
495 }
496 
497 void Thread::print() const { print_on(tty); }
498 
499 // Thread::print_on_error() is called by fatal error handler. Don't use
500 // any lock or allocate memory.
501 void Thread::print_on_error(outputStream* st, char* buf, int buflen) const {
502   assert(!(is_Compiler_thread() || is_Java_thread()), "Can't call name() here if it allocates");
503 
504   st->print("%s \"%s\"", type_name(), name());
505 
506   OSThread* os_thr = osthread();
507   if (os_thr != NULL) {
508     if (os_thr->get_state() != ZOMBIE) {
509       st->print(" [stack: " PTR_FORMAT "," PTR_FORMAT "]",
510                 p2i(stack_end()), p2i(stack_base()));
511       st->print(" [id=%d]", osthread()->thread_id());
512     } else {
513       st->print(" terminated");
514     }
515   } else {
516     st->print(" unknown state (no osThread)");
517   }
518   ThreadsSMRSupport::print_info_on(this, st);
519 }
520 
521 void Thread::print_value_on(outputStream* st) const {
522   if (is_Named_thread()) {
523     st->print(" \"%s\" ", name());
524   }
525   st->print(INTPTR_FORMAT, p2i(this));   // print address
526 }
527 
528 #ifdef ASSERT
529 void Thread::print_owned_locks_on(outputStream* st) const {
530   Mutex* cur = _owned_locks;
531   if (cur == NULL) {
532     st->print(" (no locks) ");
533   } else {
534     st->print_cr(" Locks owned:");
535     while (cur) {
536       cur->print_on(st);
537       cur = cur->next();
538     }
539   }
540 }
541 #endif // ASSERT
542 
543 // We had to move these methods here, because vm threads get into ObjectSynchronizer::enter
544 // However, there is a note in JavaThread::is_lock_owned() about the VM threads not being
545 // used for compilation in the future. If that change is made, the need for these methods
546 // should be revisited, and they should be removed if possible.
547 
548 bool Thread::is_lock_owned(address adr) const {
549   assert(adr != ANONYMOUS_OWNER, "must convert to lock object");
550   return !UseHeavyMonitors && lock_stack().contains(cast_to_oop(adr));
551 }
552 
553 bool Thread::set_as_starting_thread() {
554   assert(_starting_thread == NULL, "already initialized: "
555          "_starting_thread=" INTPTR_FORMAT, p2i(_starting_thread));
556   // NOTE: this must be called inside the main thread.
557   DEBUG_ONLY(_starting_thread = this;)
558   return os::create_main_thread(JavaThread::cast(this));
559 }
560 
561 // Ad-hoc mutual exclusion primitives: SpinLock
562 //
563 // We employ SpinLocks _only for low-contention, fixed-length
564 // short-duration critical sections where we're concerned
565 // about native mutex_t or HotSpot Mutex:: latency.
566 //
567 // TODO-FIXME: ListLock should be of type SpinLock.
568 // We should make this a 1st-class type, integrated into the lock
569 // hierarchy as leaf-locks.  Critically, the SpinLock structure
570 // should have sufficient padding to avoid false-sharing and excessive
571 // cache-coherency traffic.
572 
573 
574 typedef volatile int SpinLockT;
575 
576 void Thread::SpinAcquire(volatile int * adr, const char * LockName) {
577   if (Atomic::cmpxchg(adr, 0, 1) == 0) {
578     return;   // normal fast-path return
579   }
580 
581   // Slow-path : We've encountered contention -- Spin/Yield/Block strategy.
582   int ctr = 0;
583   int Yields = 0;
584   for (;;) {
585     while (*adr != 0) {
586       ++ctr;
587       if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
588         if (Yields > 5) {
589           os::naked_short_sleep(1);
590         } else {
591           os::naked_yield();
592           ++Yields;
593         }
594       } else {
595         SpinPause();
596       }
597     }
598     if (Atomic::cmpxchg(adr, 0, 1) == 0) return;
599   }
600 }
601 
602 void Thread::SpinRelease(volatile int * adr) {
603   assert(*adr != 0, "invariant");
604   OrderAccess::fence();      // guarantee at least release consistency.
605   // Roach-motel semantics.
606   // It's safe if subsequent LDs and STs float "up" into the critical section,
607   // but prior LDs and STs within the critical section can't be allowed
608   // to reorder or float past the ST that releases the lock.
609   // Loads and stores in the critical section - which appear in program
610   // order before the store that releases the lock - must also appear
611   // before the store that releases the lock in memory visibility order.
612   // Conceptually we need a #loadstore|#storestore "release" MEMBAR before
613   // the ST of 0 into the lock-word which releases the lock, so fence
614   // more than covers this on all platforms.
615   *adr = 0;
616 }