summaryrefslogtreecommitdiff
path: root/kernel/entry/common.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 08:48:53 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-02 08:48:53 -0800
commit2b09f480f0a1e68111ae36a7be9aa1c93e067255 (patch)
tree1121380528c9a1b89bced4ccf64bf6340b61a87a /kernel/entry/common.c
parent1dce50698a5ceedaca806e0a78573886a363dc95 (diff)
parent653fda7ae73d8033dedb65537acac0c2c287dc3f (diff)
Merge tag 'core-rseq-2025-11-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull rseq updates from Thomas Gleixner: "A large overhaul of the restartable sequences and CID management: The recent enablement of RSEQ in glibc resulted in regressions which are caused by the related overhead. It turned out that the decision to invoke the exit to user work was not really a decision. More or less each context switch caused that. There is a long list of small issues which sums up nicely and results in a 3-4% regression in I/O benchmarks. The other detail which caused issues due to extra work in context switch and task migration is the CID (memory context ID) management. It also requires to use a task work to consolidate the CID space, which is executed in the context of an arbitrary task and results in sporadic uncontrolled exit latencies. The rewrite addresses this by: - Removing deprecated and long unsupported functionality - Moving the related data into dedicated data structures which are optimized for fast path processing. - Caching values so actual decisions can be made - Replacing the current implementation with a optimized inlined variant. - Separating fast and slow path for architectures which use the generic entry code, so that only fault and error handling goes into the TIF_NOTIFY_RESUME handler. - Rewriting the CID management so that it becomes mostly invisible in the context switch path. That moves the work of switching modes into the fork/exit path, which is a reasonable tradeoff. That work is only required when a process creates more threads than the cpuset it is allowed to run on or when enough threads exit after that. An artificial thread pool benchmarks which triggers this did not degrade, it actually improved significantly. The main effect in migration heavy scenarios is that runqueue lock held time and therefore contention goes down significantly" * tag 'core-rseq-2025-11-30' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (54 commits) sched/mmcid: Switch over to the new mechanism sched/mmcid: Implement deferred mode change irqwork: Move data struct to a types header sched/mmcid: Provide CID ownership mode fixup functions sched/mmcid: Provide new scheduler CID mechanism sched/mmcid: Introduce per task/CPU ownership infrastructure sched/mmcid: Serialize sched_mm_cid_fork()/exit() with a mutex sched/mmcid: Provide precomputed maximal value sched/mmcid: Move initialization out of line signal: Move MMCID exit out of sighand lock sched/mmcid: Convert mm CID mask to a bitmap cpumask: Cache num_possible_cpus() sched/mmcid: Use cpumask_weighted_or() cpumask: Introduce cpumask_weighted_or() sched/mmcid: Prevent pointless work in mm_update_cpus_allowed() sched/mmcid: Move scheduler code out of global header sched: Fixup whitespace damage sched/mmcid: Cacheline align MM CID storage sched/mmcid: Use proper data structures sched/mmcid: Revert the complex CID management ...
Diffstat (limited to 'kernel/entry/common.c')
-rw-r--r--kernel/entry/common.c39
1 files changed, 22 insertions, 17 deletions
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index f62e1d1b2063..5c792b30c58a 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -11,19 +11,20 @@
/* Workaround to allow gradual conversion of architecture code */
void __weak arch_do_signal_or_restart(struct pt_regs *regs) { }
-/**
- * exit_to_user_mode_loop - do any pending work before leaving to user space
- * @regs: Pointer to pt_regs on entry stack
- * @ti_work: TIF work flags as read by the caller
- */
-__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
- unsigned long ti_work)
+#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
+#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK & ~_TIF_RSEQ)
+#else
+#define EXIT_TO_USER_MODE_WORK_LOOP (EXIT_TO_USER_MODE_WORK)
+#endif
+
+static __always_inline unsigned long __exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
{
/*
* Before returning to user space ensure that all pending work
* items have been completed.
*/
- while (ti_work & EXIT_TO_USER_MODE_WORK) {
+ while (ti_work & EXIT_TO_USER_MODE_WORK_LOOP) {
local_irq_enable_exit_to_user(ti_work);
@@ -62,17 +63,21 @@ __always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
return ti_work;
}
-noinstr void irqentry_enter_from_user_mode(struct pt_regs *regs)
+/**
+ * exit_to_user_mode_loop - do any pending work before leaving to user space
+ * @regs: Pointer to pt_regs on entry stack
+ * @ti_work: TIF work flags as read by the caller
+ */
+__always_inline unsigned long exit_to_user_mode_loop(struct pt_regs *regs,
+ unsigned long ti_work)
{
- enter_from_user_mode(regs);
-}
+ for (;;) {
+ ti_work = __exit_to_user_mode_loop(regs, ti_work);
-noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
-{
- instrumentation_begin();
- exit_to_user_mode_prepare(regs);
- instrumentation_end();
- exit_to_user_mode();
+ if (likely(!rseq_exit_to_user_mode_restart(regs, ti_work)))
+ return ti_work;
+ ti_work = read_thread_flags();
+ }
}
noinstr irqentry_state_t irqentry_enter(struct pt_regs *regs)