diff options
Diffstat (limited to 'kernel/rseq.c')
| -rw-r--r-- | kernel/rseq.c | 78 |
1 files changed, 41 insertions, 37 deletions
diff --git a/kernel/rseq.c b/kernel/rseq.c index 59adc1a7183b..01e711383e05 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -422,50 +422,54 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { struct task_struct *t = current; int ret, sig; + bool event; + + /* + * If invoked from hypervisors before entering the guest via + * resume_user_mode_work(), then @regs is a NULL pointer. + * + * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises + * it before returning from the ioctl() to user space when + * rseq_event.sched_switch is set. + * + * So it's safe to ignore here instead of pointlessly updating it + * in the vcpu_run() loop. + */ + if (!regs) + return; if (unlikely(t->flags & PF_EXITING)) return; /* - * If invoked from hypervisors or IO-URING, then @regs is a NULL - * pointer, so fixup cannot be done. If the syscall which led to - * this invocation was invoked inside a critical section, then it - * will either end up in this code again or a possible violation of - * a syscall inside a critical region can only be detected by the - * debug code in rseq_syscall() in a debug enabled kernel. + * Read and clear the event pending bit first. If the task + * was not preempted or migrated or a signal is on the way, + * there is no point in doing any of the heavy lifting here + * on production kernels. In that case TIF_NOTIFY_RESUME + * was raised by some other functionality. + * + * This is correct because the read/clear operation is + * guarded against scheduler preemption, which makes it CPU + * local atomic. If the task is preempted right after + * re-enabling preemption then TIF_NOTIFY_RESUME is set + * again and this function is invoked another time _before_ + * the task is able to return to user mode. + * + * On a debug kernel, invoke the fixup code unconditionally + * with the result handed in to allow the detection of + * inconsistencies. */ - if (regs) { - /* - * Read and clear the event pending bit first. If the task - * was not preempted or migrated or a signal is on the way, - * there is no point in doing any of the heavy lifting here - * on production kernels. In that case TIF_NOTIFY_RESUME - * was raised by some other functionality. - * - * This is correct because the read/clear operation is - * guarded against scheduler preemption, which makes it CPU - * local atomic. If the task is preempted right after - * re-enabling preemption then TIF_NOTIFY_RESUME is set - * again and this function is invoked another time _before_ - * the task is able to return to user mode. - * - * On a debug kernel, invoke the fixup code unconditionally - * with the result handed in to allow the detection of - * inconsistencies. - */ - bool event; - - scoped_guard(RSEQ_EVENT_GUARD) { - event = t->rseq_event_pending; - t->rseq_event_pending = false; - } - - if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) { - ret = rseq_ip_fixup(regs, event); - if (unlikely(ret < 0)) - goto error; - } + scoped_guard(RSEQ_EVENT_GUARD) { + event = t->rseq_event_pending; + t->rseq_event_pending = false; } + + if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) { + ret = rseq_ip_fixup(regs, event); + if (unlikely(ret < 0)) + goto error; + } + if (unlikely(rseq_update_cpu_node_id(t))) goto error; return; |
