Source: Hoshpak Upstream: https://github.com/mono/mono/issues/7167 Reason: reverts replacing condition variables with a semaphore in the threading code which caused a severe regression that introduced a 5-60 second hang while compiling code From 24bc6365883d17809b9015849d79a623336b00df Mon Sep 17 00:00:00 2001 From: Helmut Pozimski Date: Tue, 1 Jan 2019 09:17:59 +0100 Subject: [PATCH] Revert "[threadpool] Replace parked_threads condition variable by a semaphore (#6222)" This reverts commit 5f5c5e97a08f7086d7c18af37352c4a03dc4c0d1. --- mono/metadata/threadpool-worker-default.c | 99 ++++++++++++----------- 1 file changed, 52 insertions(+), 47 deletions(-) diff --git a/mono/metadata/threadpool-worker-default.c b/mono/metadata/threadpool-worker-default.c index 96372a15f30..59f33cba149 100644 --- mono/metadata/threadpool-worker-default.c +++ mono/metadata/threadpool-worker-default.c @@ -131,8 +131,9 @@ typedef struct { ThreadPoolWorkerCounter counters; - MonoCoopSem parked_threads_sem; + MonoCoopMutex parked_threads_lock; gint32 parked_threads_count; + MonoCoopCond parked_threads_cond; volatile gint32 work_items_count; @@ -215,7 +216,8 @@ rand_next (gpointer *handle, guint32 min, guint32 max) static void destroy (gpointer data) { - mono_coop_sem_destroy (&worker.parked_threads_sem); + mono_coop_mutex_destroy (&worker.parked_threads_lock); + mono_coop_cond_destroy (&worker.parked_threads_cond); mono_coop_mutex_destroy (&worker.worker_creation_lock); @@ -236,8 +238,9 @@ mono_threadpool_worker_init (MonoThreadPoolWorkerCallback callback) worker.callback = callback; - mono_coop_sem_init (&worker.parked_threads_sem, 0); + mono_coop_mutex_init (&worker.parked_threads_lock); worker.parked_threads_count = 0; + mono_coop_cond_init (&worker.parked_threads_cond); worker.worker_creation_current_second = -1; mono_coop_mutex_init (&worker.worker_creation_lock); @@ -356,60 +359,69 @@ mono_threadpool_worker_request (void) mono_refcount_dec (&worker); } +static void +worker_wait_interrupt (gpointer unused) +{ + /* If the runtime is not shutting down, we are not using this mechanism to wake up a unparked thread, and if the + * runtime is shutting down, then we need to wake up ALL the threads. + * It might be a bit wasteful, but I witnessed shutdown hang where the main thread would abort and then wait for all + * background threads to exit (see mono_thread_manage). This would go wrong because not all threadpool threads would + * be unparked. It would end up getting unstucked because of the timeout, but that would delay shutdown by 5-60s. */ + if (!mono_runtime_is_shutting_down ()) + return; + + if (!mono_refcount_tryinc (&worker)) + return; + + mono_coop_mutex_lock (&worker.parked_threads_lock); + mono_coop_cond_broadcast (&worker.parked_threads_cond); + mono_coop_mutex_unlock (&worker.parked_threads_lock); + + mono_refcount_dec (&worker); +} + /* return TRUE if timeout, FALSE otherwise (worker unpark or interrupt) */ static gboolean worker_park (void) { gboolean timeout = FALSE; gboolean interrupted = FALSE; - gint32 old, new_; mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_THREADPOOL, "[%p] worker parking", GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (mono_native_thread_id_get ()))); + mono_coop_mutex_lock (&worker.parked_threads_lock); + if (!mono_runtime_is_shutting_down ()) { static gpointer rand_handle = NULL; + MonoInternalThread *thread; ThreadPoolWorkerCounter counter; - if (!rand_handle) { + if (!rand_handle) rand_handle = rand_create (); - g_assert (rand_handle); - } + g_assert (rand_handle); + + thread = mono_thread_internal_current (); + g_assert (thread); COUNTER_ATOMIC (counter, { counter._.working --; counter._.parked ++; }); - do { - old = mono_atomic_load_i32 (&worker.parked_threads_count); - g_assert (old >= G_MININT32); + worker.parked_threads_count += 1; - new_ = old + 1; - } while (mono_atomic_cas_i32 (&worker.parked_threads_count, new_, old) != old); + mono_thread_info_install_interrupt (worker_wait_interrupt, NULL, &interrupted); + if (interrupted) + goto done; - switch (mono_coop_sem_timedwait (&worker.parked_threads_sem, rand_next (&rand_handle, 5 * 1000, 60 * 1000), MONO_SEM_FLAGS_ALERTABLE)) { - case MONO_SEM_TIMEDWAIT_RET_SUCCESS: - break; - case MONO_SEM_TIMEDWAIT_RET_ALERTED: - interrupted = TRUE; - break; - case MONO_SEM_TIMEDWAIT_RET_TIMEDOUT: + if (mono_coop_cond_timedwait (&worker.parked_threads_cond, &worker.parked_threads_lock, rand_next (&rand_handle, 5 * 1000, 60 * 1000)) != 0) timeout = TRUE; - break; - default: - g_assert_not_reached (); - } - if (interrupted || timeout) { - /* If the semaphore was posted, then worker.parked_threads_count was decremented in worker_try_unpark */ - do { - old = mono_atomic_load_i32 (&worker.parked_threads_count); - g_assert (old > G_MININT32); + mono_thread_info_uninstall_interrupt (&interrupted); - new_ = old - 1; - } while (mono_atomic_cas_i32 (&worker.parked_threads_count, new_, old) != old); - } +done: + worker.parked_threads_count -= 1; COUNTER_ATOMIC (counter, { counter._.working ++; @@ -417,6 +429,8 @@ worker_park (void) }); } + mono_coop_mutex_unlock (&worker.parked_threads_lock); + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_THREADPOOL, "[%p] worker unparking, timeout? %s interrupted? %s", GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (mono_native_thread_id_get ())), timeout ? "yes" : "no", interrupted ? "yes" : "no"); @@ -426,26 +440,17 @@ worker_park (void) static gboolean worker_try_unpark (void) { - gboolean res = TRUE; - gint32 old, new_; + gboolean res = FALSE; mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_THREADPOOL, "[%p] try unpark worker", GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (mono_native_thread_id_get ()))); - do { - old = mono_atomic_load_i32 (&worker.parked_threads_count); - g_assert (old > G_MININT32); - - if (old <= 0) { - res = FALSE; - break; - } - - new_ = old - 1; - } while (mono_atomic_cas_i32 (&worker.parked_threads_count, new_, old) != old); - - if (res) - mono_coop_sem_post (&worker.parked_threads_sem); + mono_coop_mutex_lock (&worker.parked_threads_lock); + if (worker.parked_threads_count > 0) { + mono_coop_cond_signal (&worker.parked_threads_cond); + res = TRUE; + } + mono_coop_mutex_unlock (&worker.parked_threads_lock); mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_THREADPOOL, "[%p] try unpark worker, success? %s", GUINT_TO_POINTER (MONO_NATIVE_THREAD_ID_TO_UINT (mono_native_thread_id_get ())), res ? "yes" : "no"); -- 2.20.1