diff options
| author | Paolo Bonzini <pbonzini@redhat.com> | 2025-12-18 18:38:45 +0100 |
|---|---|---|
| committer | Paolo Bonzini <pbonzini@redhat.com> | 2025-12-18 18:38:45 +0100 |
| commit | 0499add8efd72456514c6218c062911ccc922a99 (patch) | |
| tree | dc041b6ce12a0611e68a23f94fd8204377b38dc5 | |
| parent | ea1013c1539270e372fc99854bc6e4d94eaeff66 (diff) | |
| parent | 29763138830916f46daaa50e83e7f4f907a3236b (diff) | |
Merge tag 'kvm-x86-fixes-6.19-rc1' of https://github.com/kvm-x86/linux into HEAD
KVM fixes for 6.19-rc1
- Add a missing "break" to fix param parsing in the rseq selftest.
- Apply runtime updates to the _current_ CPUID when userspace is setting
CPUID, e.g. as part of vCPU hotplug, to fix a false positive and to avoid
dropping the pending update.
- Disallow toggling KVM_MEM_GUEST_MEMFD on an existing memslot, as it's not
supported by KVM and leads to a use-after-free due to KVM failing to unbind
the memslot from the previously-associated guest_memfd instance.
- Harden against similar KVM_MEM_GUEST_MEMFD goofs, and prepare for supporting
flags-only changes on KVM_MEM_GUEST_MEMFD memlslots, e.g. for dirty logging.
- Set exit_code[63:32] to -1 (all 0xffs) when synthesizing a nested
SVM_EXIT_ERR (a.k.a. VMEXIT_INVALID) #VMEXIT, as VMEXIT_INVALID is defined
as -1ull (a 64-bit value).
- Update SVI when activating APICv to fix a bug where a post-activation EOI
for an in-service IRQ would effective be lost due to SVI being stale.
- Immediately refresh APICv controls (if necessary) on a nested VM-Exit
instead of deferring the update via KVM_REQ_APICV_UPDATE, as the request is
effectively ignored because KVM thinks the vCPU already has the correct
APICv settings.
| -rw-r--r-- | arch/x86/kvm/cpuid.c | 11 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/nested.c | 4 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
| -rw-r--r-- | arch/x86/kvm/svm/svm.h | 7 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/nested.c | 3 | ||||
| -rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 9 | ||||
| -rw-r--r-- | arch/x86/kvm/x86.c | 7 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/rseq_test.c | 1 | ||||
| -rw-r--r-- | tools/testing/selftests/kvm/x86/cpuid_test.c | 15 | ||||
| -rw-r--r-- | virt/kvm/kvm_main.c | 17 |
10 files changed, 58 insertions, 18 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index d563a948318b..88a5426674a1 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -510,10 +510,17 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, int r; /* + * Apply pending runtime CPUID updates to the current CPUID entries to + * avoid false positives due to mismatches on KVM-owned feature flags. + */ + if (vcpu->arch.cpuid_dynamic_bits_dirty) + kvm_update_cpuid_runtime(vcpu); + + /* * Swap the existing (old) entries with the incoming (new) entries in * order to massage the new entries, e.g. to account for dynamic bits - * that KVM controls, without clobbering the current guest CPUID, which - * KVM needs to preserve in order to unwind on failure. + * that KVM controls, without losing the current guest CPUID, which KVM + * needs to preserve in order to unwind on failure. * * Similarly, save the vCPU's current cpu_caps so that the capabilities * can be updated alongside the CPUID entries when performing runtime diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index c81005b24522..ba0f11c68372 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -985,7 +985,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) if (!nested_vmcb_check_save(vcpu) || !nested_vmcb_check_controls(vcpu)) { vmcb12->control.exit_code = SVM_EXIT_ERR; - vmcb12->control.exit_code_hi = 0; + vmcb12->control.exit_code_hi = -1u; vmcb12->control.exit_info_1 = 0; vmcb12->control.exit_info_2 = 0; goto out; @@ -1018,7 +1018,7 @@ out_exit_err: svm->soft_int_injected = false; svm->vmcb->control.exit_code = SVM_EXIT_ERR; - svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_code_hi = -1u; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f56c2d895011..24d59ccfa40d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2443,6 +2443,7 @@ static bool check_selective_cr0_intercepted(struct kvm_vcpu *vcpu, if (cr0 ^ val) { svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE; + svm->vmcb->control.exit_code_hi = 0; ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE); } @@ -4617,6 +4618,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, if (static_cpu_has(X86_FEATURE_NRIPS)) vmcb->control.next_rip = info->next_rip; vmcb->control.exit_code = icpt_info.exit_code; + vmcb->control.exit_code_hi = 0; vmexit = nested_svm_exit_handled(svm); ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9e151dbdef25..01be93a53d07 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -761,9 +761,10 @@ int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) { - svm->vmcb->control.exit_code = exit_code; - svm->vmcb->control.exit_info_1 = 0; - svm->vmcb->control.exit_info_2 = 0; + svm->vmcb->control.exit_code = exit_code; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; return nested_svm_vmexit(svm); } diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 40777278eabb..6137e5307d0f 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -19,6 +19,7 @@ #include "trace.h" #include "vmx.h" #include "smm.h" +#include "x86_ops.h" static bool __read_mostly enable_shadow_vmcs = 1; module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); @@ -5165,7 +5166,7 @@ void __nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, if (vmx->nested.update_vmcs01_apicv_status) { vmx->nested.update_vmcs01_apicv_status = false; - kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + vmx_refresh_apicv_exec_ctrl(vcpu); } if (vmx->nested.update_vmcs01_hwapic_isr) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4cbe8c84b636..6b96f7aea20b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6937,15 +6937,6 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) * VM-Exit, otherwise L1 with run with a stale SVI. */ if (is_guest_mode(vcpu)) { - /* - * KVM is supposed to forward intercepted L2 EOIs to L1 if VID - * is enabled in vmcs12; as above, the EOIs affect L2's vAPIC. - * Note, userspace can stuff state while L2 is active; assert - * that VID is disabled if and only if the vCPU is in KVM_RUN - * to avoid false positives if userspace is setting APIC state. - */ - WARN_ON_ONCE(vcpu->wants_to_run && - nested_cpu_has_vid(get_vmcs12(vcpu))); to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true; return; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0c6d899d53dd..ff8812f3a129 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10886,9 +10886,16 @@ void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu) * pending. At the same time, KVM_REQ_EVENT may not be set as APICv was * still active when the interrupt got accepted. Make sure * kvm_check_and_inject_events() is called to check for that. + * + * Update SVI when APICv gets enabled, otherwise SVI won't reflect the + * highest bit in vISR and the next accelerated EOI in the guest won't + * be virtualized correctly (the CPU uses SVI to determine which vISR + * vector to clear). */ if (!apic->apicv_active) kvm_make_request(KVM_REQ_EVENT, vcpu); + else + kvm_apic_update_hwapic_isr(vcpu); out: preempt_enable(); diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 1375fca80bcd..f80ad6b47d16 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -215,6 +215,7 @@ int main(int argc, char *argv[]) switch (opt) { case 'u': skip_sanity_check = true; + break; case 'l': latency = atoi_paranoid(optarg); break; diff --git a/tools/testing/selftests/kvm/x86/cpuid_test.c b/tools/testing/selftests/kvm/x86/cpuid_test.c index 7b3fda6842bc..f9ed14996977 100644 --- a/tools/testing/selftests/kvm/x86/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86/cpuid_test.c @@ -155,6 +155,7 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct static void set_cpuid_after_run(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *ent; + struct kvm_sregs sregs; int rc; u32 eax, ebx, x; @@ -162,6 +163,20 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu) rc = __vcpu_set_cpuid(vcpu); TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); + /* + * Toggle CR4 bits that affect dynamic CPUID feature flags to verify + * setting unmodified CPUID succeeds with runtime CPUID updates. + */ + vcpu_sregs_get(vcpu, &sregs); + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 ^= X86_CR4_OSXSAVE; + if (kvm_cpu_has(X86_FEATURE_PKU)) + sregs.cr4 ^= X86_CR4_PKE; + vcpu_sregs_set(vcpu, &sregs); + + rc = __vcpu_set_cpuid(vcpu); + TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); + /* Changing CPU features is forbidden */ ent = vcpu_get_cpuid_entry(vcpu, 0x7); ebx = ent->ebx; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5fcd401a5897..5b5b69c97665 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1749,6 +1749,12 @@ static void kvm_commit_memory_region(struct kvm *kvm, kvm_free_memslot(kvm, old); break; case KVM_MR_MOVE: + /* + * Moving a guest_memfd memslot isn't supported, and will never + * be supported. + */ + WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD); + fallthrough; case KVM_MR_FLAGS_ONLY: /* * Free the dirty bitmap as needed; the below check encompasses @@ -1758,6 +1764,15 @@ static void kvm_commit_memory_region(struct kvm *kvm, kvm_destroy_dirty_bitmap(old); /* + * Unbind the guest_memfd instance as needed; the @new slot has + * already created its own binding. TODO: Drop the WARN when + * dirty logging guest_memfd memslots is supported. Until then, + * flags-only changes on guest_memfd slots should be impossible. + */ + if (WARN_ON_ONCE(old->flags & KVM_MEM_GUEST_MEMFD)) + kvm_gmem_unbind(old); + + /* * The final quirk. Free the detached, old slot, but only its * memory, not any metadata. Metadata, including arch specific * data, may be reused by @new. @@ -2086,7 +2101,7 @@ static int kvm_set_memory_region(struct kvm *kvm, return -EINVAL; if ((mem->userspace_addr != old->userspace_addr) || (npages != old->npages) || - ((mem->flags ^ old->flags) & KVM_MEM_READONLY)) + ((mem->flags ^ old->flags) & (KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD))) return -EINVAL; if (base_gfn != old->base_gfn) |
