summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-20 11:34:37 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-20 11:34:37 -0800
commit18dfd1cbf6a633c39256c76ca13114de46435e22 (patch)
tree0572e389b2158ccf84f783a924185444208fbf54
parent072c0b4f0f9597c86ddb01fd39e784fda6b7a922 (diff)
parentf4ea8e05f2a857d5447c25f7daf00807d38b307d (diff)
Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 fixes from Catalin Marinas: "Two left-over updates that could not go into -rc1 due to conflicts with other series: - Simplify checks in arch_kfence_init_pool() since force_pte_mapping() already takes BBML2-noabort (break-before-make Level 2 with no aborts generated) into account - Remove unneeded SVE/SME fallback preserve/store handling in the arm64 EFI. With the recent updates, the fallback path is only taken for EFI runtime calls from hardirq or NMI contexts. In practice, this only happens under panic/oops/emergency_restart() and no restoring of the user state expected. There's a corresponding lkdtm update to trigger a BUG() or panic() from hardirq context together with a fixup not to confuse clang/objtool about the control flow GCS (guarded control stacks) fix: flush the GCS locking state on exec, otherwise the new task will not be able to enable GCS (locked as disabled)" * tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: lkdtm/bugs: Do not confuse the clang/objtool with busy wait loop arm64/gcs: Flush the GCS locking state on exec arm64/efi: Remove unneeded SVE/SME fallback preserve/store handling lkdtm/bugs: Add cases for BUG and PANIC occurring in hardirq context arm64: mm: Simplify check in arch_kfence_init_pool()
-rw-r--r--arch/arm64/kernel/fpsimd.c130
-rw-r--r--arch/arm64/kernel/process.c1
-rw-r--r--arch/arm64/mm/mmu.c33
-rw-r--r--drivers/misc/lkdtm/bugs.c53
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt2
5 files changed, 92 insertions, 127 deletions
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c154f72634e0..9de1d8a604cb 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -180,13 +180,6 @@ static inline void set_sve_default_vl(int val)
set_default_vl(ARM64_VEC_SVE, val);
}
-static u8 *efi_sve_state;
-
-#else /* ! CONFIG_ARM64_SVE */
-
-/* Dummy declaration for code that will be optimised out: */
-extern u8 *efi_sve_state;
-
#endif /* ! CONFIG_ARM64_SVE */
#ifdef CONFIG_ARM64_SME
@@ -1095,36 +1088,6 @@ int vec_verify_vq_map(enum vec_type type)
return 0;
}
-static void __init sve_efi_setup(void)
-{
- int max_vl = 0;
- int i;
-
- if (!IS_ENABLED(CONFIG_EFI))
- return;
-
- for (i = 0; i < ARRAY_SIZE(vl_info); i++)
- max_vl = max(vl_info[i].max_vl, max_vl);
-
- /*
- * alloc_percpu() warns and prints a backtrace if this goes wrong.
- * This is evidence of a crippled system and we are returning void,
- * so no attempt is made to handle this situation here.
- */
- if (!sve_vl_valid(max_vl))
- goto fail;
-
- efi_sve_state = kmalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)),
- GFP_KERNEL);
- if (!efi_sve_state)
- goto fail;
-
- return;
-
-fail:
- panic("Cannot allocate memory for EFI SVE save/restore");
-}
-
void cpu_enable_sve(const struct arm64_cpu_capabilities *__always_unused p)
{
write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1);
@@ -1185,8 +1148,6 @@ void __init sve_setup(void)
if (sve_max_virtualisable_vl() < sve_max_vl())
pr_warn("%s: unvirtualisable vector lengths present\n",
info->name);
-
- sve_efi_setup();
}
/*
@@ -1947,9 +1908,6 @@ EXPORT_SYMBOL_GPL(kernel_neon_end);
#ifdef CONFIG_EFI
static struct user_fpsimd_state efi_fpsimd_state;
-static bool efi_fpsimd_state_used;
-static bool efi_sve_state_used;
-static bool efi_sm_state;
/*
* EFI runtime services support functions
@@ -1976,43 +1934,26 @@ void __efi_fpsimd_begin(void)
if (may_use_simd()) {
kernel_neon_begin(&efi_fpsimd_state);
} else {
- WARN_ON(preemptible());
-
/*
- * If !efi_sve_state, SVE can't be in use yet and doesn't need
- * preserving:
+ * We are running in hardirq or NMI context, and the only
+ * legitimate case where this might happen is when EFI pstore
+ * is attempting to record the system's dying gasps into EFI
+ * variables. This could be due to an oops, a panic or a call
+ * to emergency_restart(), and in none of those cases, we can
+ * expect the current task to ever return to user space again,
+ * or for the kernel to resume any normal execution, for that
+ * matter (an oops in hardirq context triggers a panic too).
+ *
+ * Therefore, there is no point in attempting to preserve any
+ * SVE/SME state here. On the off chance that we might have
+ * ended up here for a different reason inadvertently, kill the
+ * task and preserve/restore the base FP/SIMD state, which
+ * might belong to kernel mode FP/SIMD.
*/
- if (system_supports_sve() && efi_sve_state != NULL) {
- bool ffr = true;
- u64 svcr;
-
- efi_sve_state_used = true;
-
- if (system_supports_sme()) {
- svcr = read_sysreg_s(SYS_SVCR);
-
- efi_sm_state = svcr & SVCR_SM_MASK;
-
- /*
- * Unless we have FA64 FFR does not
- * exist in streaming mode.
- */
- if (!system_supports_fa64())
- ffr = !(svcr & SVCR_SM_MASK);
- }
-
- sve_save_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
- &efi_fpsimd_state.fpsr, ffr);
-
- if (system_supports_sme())
- sysreg_clear_set_s(SYS_SVCR,
- SVCR_SM_MASK, 0);
-
- } else {
- fpsimd_save_state(&efi_fpsimd_state);
- }
-
- efi_fpsimd_state_used = true;
+ pr_warn_ratelimited("Calling EFI runtime from %s context\n",
+ in_nmi() ? "NMI" : "hardirq");
+ force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
+ fpsimd_save_state(&efi_fpsimd_state);
}
}
@@ -2024,41 +1965,10 @@ void __efi_fpsimd_end(void)
if (!system_supports_fpsimd())
return;
- if (!efi_fpsimd_state_used) {
+ if (may_use_simd()) {
kernel_neon_end(&efi_fpsimd_state);
} else {
- if (system_supports_sve() && efi_sve_state_used) {
- bool ffr = true;
-
- /*
- * Restore streaming mode; EFI calls are
- * normal function calls so should not return in
- * streaming mode.
- */
- if (system_supports_sme()) {
- if (efi_sm_state) {
- sysreg_clear_set_s(SYS_SVCR,
- 0,
- SVCR_SM_MASK);
-
- /*
- * Unless we have FA64 FFR does not
- * exist in streaming mode.
- */
- if (!system_supports_fa64())
- ffr = false;
- }
- }
-
- sve_load_state(efi_sve_state + sve_ffr_offset(sve_max_vl()),
- &efi_fpsimd_state.fpsr, ffr);
-
- efi_sve_state_used = false;
- } else {
- fpsimd_load_state(&efi_fpsimd_state);
- }
-
- efi_fpsimd_state_used = false;
+ fpsimd_load_state(&efi_fpsimd_state);
}
}
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index fba7ca102a8c..489554931231 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -292,6 +292,7 @@ static void flush_gcs(void)
current->thread.gcs_base = 0;
current->thread.gcs_size = 0;
current->thread.gcs_el0_mode = 0;
+ current->thread.gcs_el0_locked = 0;
write_sysreg_s(GCSCRE0_EL1_nTR, SYS_GCSCRE0_EL1);
write_sysreg_s(0, SYS_GCSPR_EL0);
}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9ae7ce00a7ef..8e1d80a7033e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -767,18 +767,6 @@ static inline bool force_pte_mapping(void)
return rodata_full || arm64_kfence_can_set_direct_map() || is_realm_world();
}
-static inline bool split_leaf_mapping_possible(void)
-{
- /*
- * !BBML2_NOABORT systems should never run into scenarios where we would
- * have to split. So exit early and let calling code detect it and raise
- * a warning.
- */
- if (!system_supports_bbml2_noabort())
- return false;
- return !force_pte_mapping();
-}
-
static DEFINE_MUTEX(pgtable_split_lock);
int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
@@ -786,11 +774,22 @@ int split_kernel_leaf_mapping(unsigned long start, unsigned long end)
int ret;
/*
- * Exit early if the region is within a pte-mapped area or if we can't
- * split. For the latter case, the permission change code will raise a
- * warning if not already pte-mapped.
+ * !BBML2_NOABORT systems should not be trying to change permissions on
+ * anything that is not pte-mapped in the first place. Just return early
+ * and let the permission change code raise a warning if not already
+ * pte-mapped.
*/
- if (!split_leaf_mapping_possible() || is_kfence_address((void *)start))
+ if (!system_supports_bbml2_noabort())
+ return 0;
+
+ /*
+ * If the region is within a pte-mapped area, there is no need to try to
+ * split. Additionally, CONFIG_DEBUG_PAGEALLOC and CONFIG_KFENCE may
+ * change permissions from atomic context so for those cases (which are
+ * always pte-mapped), we must not go any further because taking the
+ * mutex below may sleep.
+ */
+ if (force_pte_mapping() || is_kfence_address((void *)start))
return 0;
/*
@@ -1089,7 +1088,7 @@ bool arch_kfence_init_pool(void)
int ret;
/* Exit early if we know the linear map is already pte-mapped. */
- if (!split_leaf_mapping_possible())
+ if (force_pte_mapping())
return true;
/* Kfence pool is already pte-mapped for the early init case. */
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index 376047beea3d..502059078b45 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -8,6 +8,7 @@
#include "lkdtm.h"
#include <linux/cpu.h>
#include <linux/list.h>
+#include <linux/hrtimer.h>
#include <linux/sched.h>
#include <linux/sched/signal.h>
#include <linux/sched/task_stack.h>
@@ -100,11 +101,61 @@ static void lkdtm_PANIC_STOP_IRQOFF(void)
stop_machine(panic_stop_irqoff_fn, &v, cpu_online_mask);
}
+static bool wait_for_panic;
+
+static enum hrtimer_restart panic_in_hardirq(struct hrtimer *timer)
+{
+ panic("from hard IRQ context");
+
+ wait_for_panic = false;
+ return HRTIMER_NORESTART;
+}
+
+static void lkdtm_PANIC_IN_HARDIRQ(void)
+{
+ struct hrtimer timer;
+
+ wait_for_panic = true;
+ hrtimer_setup_on_stack(&timer, panic_in_hardirq,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+ hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD);
+
+ while (READ_ONCE(wait_for_panic))
+ cpu_relax();
+
+ hrtimer_cancel(&timer);
+}
+
static void lkdtm_BUG(void)
{
BUG();
}
+static bool wait_for_bug;
+
+static enum hrtimer_restart bug_in_hardirq(struct hrtimer *timer)
+{
+ BUG();
+
+ wait_for_bug = false;
+ return HRTIMER_NORESTART;
+}
+
+static void lkdtm_BUG_IN_HARDIRQ(void)
+{
+ struct hrtimer timer;
+
+ wait_for_bug = true;
+ hrtimer_setup_on_stack(&timer, bug_in_hardirq,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
+ hrtimer_start(&timer, us_to_ktime(100), HRTIMER_MODE_REL_HARD);
+
+ while (READ_ONCE(wait_for_bug))
+ cpu_relax();
+
+ hrtimer_cancel(&timer);
+}
+
static int warn_counter;
static void lkdtm_WARNING(void)
@@ -696,7 +747,9 @@ static noinline void lkdtm_CORRUPT_PAC(void)
static struct crashtype crashtypes[] = {
CRASHTYPE(PANIC),
CRASHTYPE(PANIC_STOP_IRQOFF),
+ CRASHTYPE(PANIC_IN_HARDIRQ),
CRASHTYPE(BUG),
+ CRASHTYPE(BUG_IN_HARDIRQ),
CRASHTYPE(WARNING),
CRASHTYPE(WARNING_MESSAGE),
CRASHTYPE(EXCEPTION),
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index cff124c1eddd..67cd53715d93 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -1,6 +1,8 @@
#PANIC
#PANIC_STOP_IRQOFF Crashes entire system
+#PANIC_IN_HARDIRQ Crashes entire system
BUG kernel BUG at
+#BUG_IN_HARDIRQ Crashes entire system
WARNING WARNING:
WARNING_MESSAGE message trigger
EXCEPTION