ext/socket: Set raddrinfo thread as detached before thread start (#15142)

We were seeing segfaults when calling `pthread_detach`. Apparently in
some versions of glibc there is a race between when this is called
(usually right after starting a thread) and a short-lived thread's
shutdown routine. The bug has been reported to glibc:

https://sourceware.org/bugzilla/show_bug.cgi?id=19951

I haven't been able to reproduce it on my Linux desktop but apparently
it's easier to reproduce on certain kinds of servers.

As a workaround, we can set the thread's detach state before thread
start. I don't know of a platform that doesn't have
`pthread_attr_setdetachstate`, but to be safe we check for it in
`extconf.rb` and use `pthread_detach` as a backup if it isn't available.

Fixes [Bug #21679]
This commit is contained in:
Luke Gruber 2025-11-13 16:42:38 -05:00 committed by GitHub
parent d58960a912
commit f100298e28
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
Notes: git 2025-11-13 21:43:06 +00:00
Merged-By: luke-gru <luke.gru@gmail.com>
3 changed files with 38 additions and 4 deletions

View File

@ -704,6 +704,7 @@ SRC
have_func("pthread_create")
have_func("pthread_detach")
have_func("pthread_attr_setdetachstate")
$VPATH << '$(topdir)' << '$(top_srcdir)'
create_makefile("socket")

View File

@ -705,7 +705,6 @@ init_fast_fallback_inetsock_internal(VALUE v)
if (raddrinfo_pthread_create(&threads[i], fork_safe_do_fast_fallback_getaddrinfo, arg->getaddrinfo_entries[i]) != 0) {
rsock_raise_resolution_error("getaddrinfo(3)", EAI_AGAIN);
}
pthread_detach(threads[i]);
}
if (NIL_P(resolv_timeout)) {

View File

@ -496,13 +496,49 @@ int
raddrinfo_pthread_create(pthread_t *th, void *(*start_routine) (void *), void *arg)
{
int limit = 3, ret;
int saved_errno;
#ifdef HAVE_PTHREAD_ATTR_SETDETACHSTATE
pthread_attr_t attr;
pthread_attr_t *attr_p = &attr;
int err;
int init_retries = 0;
int init_retries_max = 3;
retry_attr_init:
if ((err = pthread_attr_init(attr_p)) != 0) {
if (err == ENOMEM && init_retries < init_retries_max) {
init_retries++;
rb_gc();
goto retry_attr_init;
}
return err;
}
if ((err = pthread_attr_setdetachstate(attr_p, PTHREAD_CREATE_DETACHED)) != 0) {
saved_errno = errno;
pthread_attr_destroy(attr_p);
errno = saved_errno;
return err; // EINVAL - shouldn't happen
}
#else
pthread_attr_t *attr_p = NULL;
#endif
do {
// It is said that pthread_create may fail spuriously, so we follow the JDK and retry several times.
//
// https://bugs.openjdk.org/browse/JDK-8268605
// https://github.com/openjdk/jdk/commit/e35005d5ce383ddd108096a3079b17cb0bcf76f1
ret = pthread_create(th, 0, start_routine, arg);
ret = pthread_create(th, attr_p, start_routine, arg);
} while (ret == EAGAIN && limit-- > 0);
#ifdef HAVE_PTHREAD_ATTR_SETDETACHSTATE
saved_errno = errno;
pthread_attr_destroy(attr_p);
if (ret != 0) {
errno = saved_errno;
}
#else
if (ret == 0) {
pthread_detach(th); // this can race with shutdown routine of thread in some glibc versions
}
#endif
return ret;
}
@ -534,7 +570,6 @@ start:
errno = err;
return EAI_SYSTEM;
}
pthread_detach(th);
rb_thread_call_without_gvl2(wait_getaddrinfo, arg, cancel_getaddrinfo, arg);
@ -770,7 +805,6 @@ start:
errno = err;
return EAI_SYSTEM;
}
pthread_detach(th);
rb_thread_call_without_gvl2(wait_getnameinfo, arg, cancel_getnameinfo, arg);