summaryrefslogtreecommitdiff
path: root/kernel/rseq.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rseq.c')
-rw-r--r--kernel/rseq.c78
1 files changed, 41 insertions, 37 deletions
diff --git a/kernel/rseq.c b/kernel/rseq.c
index 59adc1a7183b..01e711383e05 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -422,50 +422,54 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
{
struct task_struct *t = current;
int ret, sig;
+ bool event;
+
+ /*
+ * If invoked from hypervisors before entering the guest via
+ * resume_user_mode_work(), then @regs is a NULL pointer.
+ *
+ * resume_user_mode_work() clears TIF_NOTIFY_RESUME and re-raises
+ * it before returning from the ioctl() to user space when
+ * rseq_event.sched_switch is set.
+ *
+ * So it's safe to ignore here instead of pointlessly updating it
+ * in the vcpu_run() loop.
+ */
+ if (!regs)
+ return;
if (unlikely(t->flags & PF_EXITING))
return;
/*
- * If invoked from hypervisors or IO-URING, then @regs is a NULL
- * pointer, so fixup cannot be done. If the syscall which led to
- * this invocation was invoked inside a critical section, then it
- * will either end up in this code again or a possible violation of
- * a syscall inside a critical region can only be detected by the
- * debug code in rseq_syscall() in a debug enabled kernel.
+ * Read and clear the event pending bit first. If the task
+ * was not preempted or migrated or a signal is on the way,
+ * there is no point in doing any of the heavy lifting here
+ * on production kernels. In that case TIF_NOTIFY_RESUME
+ * was raised by some other functionality.
+ *
+ * This is correct because the read/clear operation is
+ * guarded against scheduler preemption, which makes it CPU
+ * local atomic. If the task is preempted right after
+ * re-enabling preemption then TIF_NOTIFY_RESUME is set
+ * again and this function is invoked another time _before_
+ * the task is able to return to user mode.
+ *
+ * On a debug kernel, invoke the fixup code unconditionally
+ * with the result handed in to allow the detection of
+ * inconsistencies.
*/
- if (regs) {
- /*
- * Read and clear the event pending bit first. If the task
- * was not preempted or migrated or a signal is on the way,
- * there is no point in doing any of the heavy lifting here
- * on production kernels. In that case TIF_NOTIFY_RESUME
- * was raised by some other functionality.
- *
- * This is correct because the read/clear operation is
- * guarded against scheduler preemption, which makes it CPU
- * local atomic. If the task is preempted right after
- * re-enabling preemption then TIF_NOTIFY_RESUME is set
- * again and this function is invoked another time _before_
- * the task is able to return to user mode.
- *
- * On a debug kernel, invoke the fixup code unconditionally
- * with the result handed in to allow the detection of
- * inconsistencies.
- */
- bool event;
-
- scoped_guard(RSEQ_EVENT_GUARD) {
- event = t->rseq_event_pending;
- t->rseq_event_pending = false;
- }
-
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
- ret = rseq_ip_fixup(regs, event);
- if (unlikely(ret < 0))
- goto error;
- }
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ event = t->rseq_event_pending;
+ t->rseq_event_pending = false;
}
+
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
+ ret = rseq_ip_fixup(regs, event);
+ if (unlikely(ret < 0))
+ goto error;
+ }
+
if (unlikely(rseq_update_cpu_node_id(t)))
goto error;
return;