* PPC bugfixes
* RCU splat fix * swait races fix * pointless userspace-triggerable BUG() fix * misc fixes for KVM_RUN corner cases * nested virt correctness fixes + one host DoS * some cleanups * clang build fix * fix AMD AVIC with default QEMU command line options * x86 bugfixes -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJZvANtAAoJEL/70l94x66DtcIH/0i4fenYamxdq2xiWtsZdbcy yfk7mWKEzWGZhP+2X8SSOeetd5mqnIcf2cc4m68UCXpt0zoPEjY0i0D4xrYJHZ03 R3ifqvtpHByodfT7dOKQPEisO8PdJ5tvecaCMnK3u6SNaNLjAZfhobuLppQHOwQO eBvpm0jROpA7ENlDgXtsti8MEdsoWtnmGGrRBY77EGW+t24OpNuGB1EMC0nvcs65 eChwZ3u8xeU5Ws3Y/DiC8tK8t628znknd8ay02LTZjA303Ftoe192jPpS33V4v15 kqS6vUFy2lpr9L6wicZtcnnSLtKv+LqecK6o8cxNjzlkOeaZuo9D8UMYsWQfj6w= =Ma23 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull more KVM updates from Paolo Bonzini: - PPC bugfixes - RCU splat fix - swait races fix - pointless userspace-triggerable BUG() fix - misc fixes for KVM_RUN corner cases - nested virt correctness fixes + one host DoS - some cleanups - clang build fix - fix AMD AVIC with default QEMU command line options - x86 bugfixes * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits) kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly kvm: vmx: Handle VMLAUNCH/VMRESUME failure properly kvm: nVMX: Remove nested_vmx_succeed after successful VM-entry kvm,mips: Fix potential swait_active() races kvm,powerpc: Serialize wq active checks in ops->vcpu_kick kvm: Serialize wq active checks in kvm_vcpu_wake_up() kvm,x86: Fix apf_task_wake_one() wq serialization kvm,lapic: Justify use of swait_active() kvm,async_pf: Use swq_has_sleeper() sched/wait: Add swq_has_sleeper() KVM: VMX: Do not BUG() on out-of-bounds guest IRQ KVM: Don't accept obviously wrong gsi values via KVM_IRQFD kvm: nVMX: Don't allow L2 to access the hardware CR8 KVM: trace events: update list of exit reasons KVM: async_pf: Fix #DF due to inject "Page not Present" and "Page Ready" exceptions simultaneously KVM: X86: Don't block vCPU if there is pending exception KVM: SVM: Add irqchip_split() checks before enabling AVIC KVM: Add struct kvm_vcpu pointer parameter to get_enable_apicv() KVM: SVM: Refactor AVIC vcpu initialization into avic_init_vcpu() KVM: x86: fix clang build ...
This commit is contained in:
commit
9db59599ae
18 changed files with 258 additions and 112 deletions
|
@ -514,7 +514,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
|
|||
|
||||
dvcpu->arch.wait = 0;
|
||||
|
||||
if (swait_active(&dvcpu->wq))
|
||||
if (swq_has_sleeper(&dvcpu->wq))
|
||||
swake_up(&dvcpu->wq);
|
||||
|
||||
return 0;
|
||||
|
@ -1179,7 +1179,7 @@ static void kvm_mips_comparecount_func(unsigned long data)
|
|||
kvm_mips_callbacks->queue_timer_int(vcpu);
|
||||
|
||||
vcpu->arch.wait = 0;
|
||||
if (swait_active(&vcpu->wq))
|
||||
if (swq_has_sleeper(&vcpu->wq))
|
||||
swake_up(&vcpu->wq);
|
||||
}
|
||||
|
||||
|
|
|
@ -181,7 +181,7 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
|
|||
struct swait_queue_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
if (swait_active(wqp)) {
|
||||
if (swq_has_sleeper(wqp)) {
|
||||
swake_up(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
}
|
||||
|
@ -4212,11 +4212,13 @@ static int kvmhv_configure_mmu(struct kvm *kvm, struct kvm_ppc_mmuv3_cfg *cfg)
|
|||
if ((cfg->process_table & PRTS_MASK) > 24)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm->arch.process_table = cfg->process_table;
|
||||
kvmppc_setup_partition_table(kvm);
|
||||
|
||||
lpcr = (cfg->flags & KVM_PPC_MMUV3_GTSE) ? LPCR_GTSE : 0;
|
||||
kvmppc_update_lpcr(kvm, lpcr, LPCR_GTSE);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -38,7 +38,6 @@ static inline void __iomem *get_tima_phys(void)
|
|||
#define __x_tima get_tima_phys()
|
||||
#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_page))
|
||||
#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_page))
|
||||
#define __x_readb __raw_rm_readb
|
||||
#define __x_writeb __raw_rm_writeb
|
||||
#define __x_readw __raw_rm_readw
|
||||
#define __x_readq __raw_rm_readq
|
||||
|
|
|
@ -771,6 +771,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
|
|||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
/*
|
||||
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
|
||||
*/
|
||||
bl kvmppc_restore_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
#endif
|
||||
|
@ -1630,6 +1633,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
|
|||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
/*
|
||||
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
|
||||
*/
|
||||
bl kvmppc_save_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
#endif
|
||||
|
@ -1749,7 +1755,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
|
|||
/*
|
||||
* Are we running hash or radix ?
|
||||
*/
|
||||
beq cr2,3f
|
||||
ld r5, VCPU_KVM(r9)
|
||||
lbz r0, KVM_RADIX(r5)
|
||||
cmpwi cr2, r0, 0
|
||||
beq cr2, 3f
|
||||
|
||||
/* Radix: Handle the case where the guest used an illegal PID */
|
||||
LOAD_REG_ADDR(r4, mmu_base_pid)
|
||||
|
@ -2466,6 +2475,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
|
|||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
/*
|
||||
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
|
||||
*/
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
bl kvmppc_save_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
|
@ -2578,6 +2590,9 @@ kvm_end_cede:
|
|||
|
||||
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
|
||||
BEGIN_FTR_SECTION
|
||||
/*
|
||||
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
|
||||
*/
|
||||
bl kvmppc_restore_tm
|
||||
END_FTR_SECTION_IFSET(CPU_FTR_TM)
|
||||
#endif
|
||||
|
|
|
@ -48,7 +48,6 @@
|
|||
#define __x_tima xive_tima
|
||||
#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
|
||||
#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
|
||||
#define __x_readb __raw_readb
|
||||
#define __x_writeb __raw_writeb
|
||||
#define __x_readw __raw_readw
|
||||
#define __x_readq __raw_readq
|
||||
|
|
|
@ -28,7 +28,8 @@ static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc)
|
|||
* bit.
|
||||
*/
|
||||
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
|
||||
u8 pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
|
||||
__be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
|
||||
u8 pipr = be64_to_cpu(qw1) & 0xff;
|
||||
if (pipr >= xc->hw_cppr)
|
||||
return;
|
||||
}
|
||||
|
@ -336,7 +337,6 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long
|
|||
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
||||
u8 pending = xc->pending;
|
||||
u32 hirq;
|
||||
u8 pipr;
|
||||
|
||||
pr_devel("H_IPOLL(server=%ld)\n", server);
|
||||
|
||||
|
@ -353,7 +353,8 @@ X_STATIC unsigned long GLUE(X_PFX,h_ipoll)(struct kvm_vcpu *vcpu, unsigned long
|
|||
pending = 0xff;
|
||||
} else {
|
||||
/* Grab pending interrupt if any */
|
||||
pipr = __x_readb(__x_tima + TM_QW1_OS + TM_PIPR);
|
||||
__be64 qw1 = __x_readq(__x_tima + TM_QW1_OS);
|
||||
u8 pipr = be64_to_cpu(qw1) & 0xff;
|
||||
if (pipr < 8)
|
||||
pending |= 1 << pipr;
|
||||
}
|
||||
|
|
|
@ -951,7 +951,6 @@ struct kvm_x86_ops {
|
|||
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
|
||||
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
|
||||
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
|
||||
u32 (*get_pkru)(struct kvm_vcpu *vcpu);
|
||||
|
||||
void (*tlb_flush)(struct kvm_vcpu *vcpu);
|
||||
|
||||
|
@ -973,7 +972,7 @@ struct kvm_x86_ops {
|
|||
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
|
||||
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
|
||||
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
|
||||
bool (*get_enable_apicv)(void);
|
||||
bool (*get_enable_apicv)(struct kvm_vcpu *vcpu);
|
||||
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
|
||||
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
|
||||
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
|
||||
|
|
|
@ -180,7 +180,7 @@ static void apf_task_wake_one(struct kvm_task_sleep_node *n)
|
|||
hlist_del_init(&n->link);
|
||||
if (n->halted)
|
||||
smp_send_reschedule(n->cpu);
|
||||
else if (swait_active(&n->wq))
|
||||
else if (swq_has_sleeper(&n->wq))
|
||||
swake_up(&n->wq);
|
||||
}
|
||||
|
||||
|
|
|
@ -59,7 +59,6 @@ static __always_inline struct cpuid_reg x86_feature_cpuid(unsigned x86_feature)
|
|||
{
|
||||
unsigned x86_leaf = x86_feature / 32;
|
||||
|
||||
BUILD_BUG_ON(!__builtin_constant_p(x86_leaf));
|
||||
BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
|
||||
BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
|
||||
|
||||
|
|
|
@ -1324,6 +1324,10 @@ static void apic_timer_expired(struct kvm_lapic *apic)
|
|||
atomic_inc(&apic->lapic_timer.pending);
|
||||
kvm_set_pending_timer(vcpu);
|
||||
|
||||
/*
|
||||
* For x86, the atomic_inc() is serialized, thus
|
||||
* using swait_active() is safe.
|
||||
*/
|
||||
if (swait_active(q))
|
||||
swake_up(q);
|
||||
|
||||
|
|
|
@ -1200,7 +1200,6 @@ static void avic_init_vmcb(struct vcpu_svm *svm)
|
|||
vmcb->control.avic_physical_id = ppa & AVIC_HPA_MASK;
|
||||
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID_COUNT;
|
||||
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
|
||||
svm->vcpu.arch.apicv_active = true;
|
||||
}
|
||||
|
||||
static void init_vmcb(struct vcpu_svm *svm)
|
||||
|
@ -1316,7 +1315,7 @@ static void init_vmcb(struct vcpu_svm *svm)
|
|||
set_intercept(svm, INTERCEPT_PAUSE);
|
||||
}
|
||||
|
||||
if (avic)
|
||||
if (kvm_vcpu_apicv_active(&svm->vcpu))
|
||||
avic_init_vmcb(svm);
|
||||
|
||||
/*
|
||||
|
@ -1600,6 +1599,23 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|||
avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
|
||||
}
|
||||
|
||||
static int avic_init_vcpu(struct vcpu_svm *svm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!kvm_vcpu_apicv_active(&svm->vcpu))
|
||||
return 0;
|
||||
|
||||
ret = avic_init_backing_page(&svm->vcpu);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
INIT_LIST_HEAD(&svm->ir_list);
|
||||
spin_lock_init(&svm->ir_list_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
||||
{
|
||||
struct vcpu_svm *svm;
|
||||
|
@ -1636,14 +1652,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
if (!hsave_page)
|
||||
goto free_page3;
|
||||
|
||||
if (avic) {
|
||||
err = avic_init_backing_page(&svm->vcpu);
|
||||
if (err)
|
||||
goto free_page4;
|
||||
|
||||
INIT_LIST_HEAD(&svm->ir_list);
|
||||
spin_lock_init(&svm->ir_list_lock);
|
||||
}
|
||||
err = avic_init_vcpu(svm);
|
||||
if (err)
|
||||
goto free_page4;
|
||||
|
||||
/* We initialize this flag to true to make sure that the is_running
|
||||
* bit would be set the first time the vcpu is loaded.
|
||||
|
@ -4395,9 +4406,9 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
|
|||
return;
|
||||
}
|
||||
|
||||
static bool svm_get_enable_apicv(void)
|
||||
static bool svm_get_enable_apicv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return avic;
|
||||
return avic && irqchip_split(vcpu->kvm);
|
||||
}
|
||||
|
||||
static void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
|
||||
|
@ -4414,7 +4425,7 @@ static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
|
|||
struct vcpu_svm *svm = to_svm(vcpu);
|
||||
struct vmcb *vmcb = svm->vmcb;
|
||||
|
||||
if (!avic)
|
||||
if (!kvm_vcpu_apicv_active(&svm->vcpu))
|
||||
return;
|
||||
|
||||
vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK;
|
||||
|
@ -5302,6 +5313,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu,
|
|||
*/
|
||||
if (info->rep_prefix != REPE_PREFIX)
|
||||
goto out;
|
||||
break;
|
||||
case SVM_EXIT_IOIO: {
|
||||
u64 exit_info;
|
||||
u32 bytes;
|
||||
|
|
|
@ -5012,7 +5012,7 @@ static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_activ
|
|||
}
|
||||
}
|
||||
|
||||
static bool vmx_get_enable_apicv(void)
|
||||
static bool vmx_get_enable_apicv(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return enable_apicv;
|
||||
}
|
||||
|
@ -8344,12 +8344,14 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
|
|||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
|
||||
trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
|
||||
vmcs_readl(EXIT_QUALIFICATION),
|
||||
vmx->idt_vectoring_info,
|
||||
intr_info,
|
||||
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
|
||||
KVM_ISA_VMX);
|
||||
if (vmx->nested.nested_run_pending)
|
||||
return false;
|
||||
|
||||
if (unlikely(vmx->fail)) {
|
||||
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
|
||||
vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The host physical addresses of some pages of guest memory
|
||||
|
@ -8363,14 +8365,12 @@ static bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason)
|
|||
*/
|
||||
nested_mark_vmcs12_pages_dirty(vcpu);
|
||||
|
||||
if (vmx->nested.nested_run_pending)
|
||||
return false;
|
||||
|
||||
if (unlikely(vmx->fail)) {
|
||||
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
|
||||
vmcs_read32(VM_INSTRUCTION_ERROR));
|
||||
return true;
|
||||
}
|
||||
trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
|
||||
vmcs_readl(EXIT_QUALIFICATION),
|
||||
vmx->idt_vectoring_info,
|
||||
intr_info,
|
||||
vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
|
||||
KVM_ISA_VMX);
|
||||
|
||||
switch (exit_reason) {
|
||||
case EXIT_REASON_EXCEPTION_NMI:
|
||||
|
@ -9424,12 +9424,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
| (1 << VCPU_EXREG_CR3));
|
||||
vcpu->arch.regs_dirty = 0;
|
||||
|
||||
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
|
||||
|
||||
vmx->loaded_vmcs->launched = 1;
|
||||
|
||||
vmx->exit_reason = vmcs_read32(VM_EXIT_REASON);
|
||||
|
||||
/*
|
||||
* eager fpu is enabled if PKEY is supported and CR4 is switched
|
||||
* back on host, so it is safe to read guest PKRU from current
|
||||
|
@ -9451,6 +9445,14 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
|
|||
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
||||
|
||||
vmx->nested.nested_run_pending = 0;
|
||||
vmx->idt_vectoring_info = 0;
|
||||
|
||||
vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
|
||||
if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
|
||||
return;
|
||||
|
||||
vmx->loaded_vmcs->launched = 1;
|
||||
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
|
||||
|
||||
vmx_complete_atomic_exit(vmx);
|
||||
vmx_recover_nmi_blocking(vmx);
|
||||
|
@ -10525,6 +10527,11 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|||
if (exec_control & CPU_BASED_TPR_SHADOW) {
|
||||
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
|
||||
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
|
||||
} else {
|
||||
#ifdef CONFIG_X86_64
|
||||
exec_control |= CPU_BASED_CR8_LOAD_EXITING |
|
||||
CPU_BASED_CR8_STORE_EXITING;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -11388,46 +11395,30 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
|||
{
|
||||
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
||||
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
||||
u32 vm_inst_error = 0;
|
||||
|
||||
/* trying to cancel vmlaunch/vmresume is a bug */
|
||||
WARN_ON_ONCE(vmx->nested.nested_run_pending);
|
||||
|
||||
leave_guest_mode(vcpu);
|
||||
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
|
||||
exit_qualification);
|
||||
|
||||
if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
|
||||
vmcs12->vm_exit_msr_store_count))
|
||||
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
|
||||
|
||||
if (unlikely(vmx->fail))
|
||||
vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
|
||||
|
||||
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
||||
|
||||
/*
|
||||
* TODO: SDM says that with acknowledge interrupt on exit, bit 31 of
|
||||
* the VM-exit interrupt information (valid interrupt) is always set to
|
||||
* 1 on EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't need
|
||||
* kvm_cpu_has_interrupt(). See the commit message for details.
|
||||
* The only expected VM-instruction error is "VM entry with
|
||||
* invalid control field(s)." Anything else indicates a
|
||||
* problem with L0.
|
||||
*/
|
||||
if (nested_exit_intr_ack_set(vcpu) &&
|
||||
exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
|
||||
kvm_cpu_has_interrupt(vcpu)) {
|
||||
int irq = kvm_cpu_get_interrupt(vcpu);
|
||||
WARN_ON(irq < 0);
|
||||
vmcs12->vm_exit_intr_info = irq |
|
||||
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
|
||||
WARN_ON_ONCE(vmx->fail && (vmcs_read32(VM_INSTRUCTION_ERROR) !=
|
||||
VMXERR_ENTRY_INVALID_CONTROL_FIELD));
|
||||
|
||||
leave_guest_mode(vcpu);
|
||||
|
||||
if (likely(!vmx->fail)) {
|
||||
prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
|
||||
exit_qualification);
|
||||
|
||||
if (nested_vmx_store_msr(vcpu, vmcs12->vm_exit_msr_store_addr,
|
||||
vmcs12->vm_exit_msr_store_count))
|
||||
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
|
||||
}
|
||||
|
||||
trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
|
||||
vmcs12->exit_qualification,
|
||||
vmcs12->idt_vectoring_info_field,
|
||||
vmcs12->vm_exit_intr_info,
|
||||
vmcs12->vm_exit_intr_error_code,
|
||||
KVM_ISA_VMX);
|
||||
|
||||
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
||||
vm_entry_controls_reset_shadow(vmx);
|
||||
vm_exit_controls_reset_shadow(vmx);
|
||||
vmx_segment_cache_clear(vmx);
|
||||
|
@ -11436,8 +11427,6 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
|||
if (VMCS02_POOL_SIZE == 0)
|
||||
nested_free_vmcs02(vmx, vmx->nested.current_vmptr);
|
||||
|
||||
load_vmcs12_host_state(vcpu, vmcs12);
|
||||
|
||||
/* Update any VMCS fields that might have changed while L2 ran */
|
||||
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
|
||||
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
|
||||
|
@ -11486,21 +11475,57 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
|
|||
*/
|
||||
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
|
||||
|
||||
/*
|
||||
* Exiting from L2 to L1, we're now back to L1 which thinks it just
|
||||
* finished a VMLAUNCH or VMRESUME instruction, so we need to set the
|
||||
* success or failure flag accordingly.
|
||||
*/
|
||||
if (unlikely(vmx->fail)) {
|
||||
vmx->fail = 0;
|
||||
nested_vmx_failValid(vcpu, vm_inst_error);
|
||||
} else
|
||||
nested_vmx_succeed(vcpu);
|
||||
if (enable_shadow_vmcs)
|
||||
vmx->nested.sync_shadow_vmcs = true;
|
||||
|
||||
/* in case we halted in L2 */
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
|
||||
if (likely(!vmx->fail)) {
|
||||
/*
|
||||
* TODO: SDM says that with acknowledge interrupt on
|
||||
* exit, bit 31 of the VM-exit interrupt information
|
||||
* (valid interrupt) is always set to 1 on
|
||||
* EXIT_REASON_EXTERNAL_INTERRUPT, so we shouldn't
|
||||
* need kvm_cpu_has_interrupt(). See the commit
|
||||
* message for details.
|
||||
*/
|
||||
if (nested_exit_intr_ack_set(vcpu) &&
|
||||
exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
|
||||
kvm_cpu_has_interrupt(vcpu)) {
|
||||
int irq = kvm_cpu_get_interrupt(vcpu);
|
||||
WARN_ON(irq < 0);
|
||||
vmcs12->vm_exit_intr_info = irq |
|
||||
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
|
||||
}
|
||||
|
||||
trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
|
||||
vmcs12->exit_qualification,
|
||||
vmcs12->idt_vectoring_info_field,
|
||||
vmcs12->vm_exit_intr_info,
|
||||
vmcs12->vm_exit_intr_error_code,
|
||||
KVM_ISA_VMX);
|
||||
|
||||
load_vmcs12_host_state(vcpu, vmcs12);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* After an early L2 VM-entry failure, we're now back
|
||||
* in L1 which thinks it just finished a VMLAUNCH or
|
||||
* VMRESUME instruction, so we need to set the failure
|
||||
* flag and the VM-instruction error field of the VMCS
|
||||
* accordingly.
|
||||
*/
|
||||
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
||||
/*
|
||||
* The emulated instruction was already skipped in
|
||||
* nested_vmx_run, but the updated RIP was never
|
||||
* written back to the vmcs01.
|
||||
*/
|
||||
skip_emulated_instruction(vcpu);
|
||||
vmx->fail = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -11829,7 +11854,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
|
|||
struct kvm_lapic_irq irq;
|
||||
struct kvm_vcpu *vcpu;
|
||||
struct vcpu_data vcpu_info;
|
||||
int idx, ret = -EINVAL;
|
||||
int idx, ret = 0;
|
||||
|
||||
if (!kvm_arch_has_assigned_device(kvm) ||
|
||||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
|
||||
|
@ -11838,7 +11863,12 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
|
|||
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
||||
BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
|
||||
if (guest_irq >= irq_rt->nr_rt_entries ||
|
||||
hlist_empty(&irq_rt->map[guest_irq])) {
|
||||
pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
|
||||
guest_irq, irq_rt->nr_rt_entries);
|
||||
goto out;
|
||||
}
|
||||
|
||||
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
|
||||
if (e->type != KVM_IRQ_ROUTING_MSI)
|
||||
|
|
|
@ -7231,10 +7231,19 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
|||
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
|
||||
|
||||
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
|
||||
if (kvm_run->immediate_exit) {
|
||||
r = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
kvm_vcpu_block(vcpu);
|
||||
kvm_apic_accept_events(vcpu);
|
||||
kvm_clear_request(KVM_REQ_UNHALT, vcpu);
|
||||
r = -EAGAIN;
|
||||
if (signal_pending(current)) {
|
||||
r = -EINTR;
|
||||
vcpu->run->exit_reason = KVM_EXIT_INTR;
|
||||
++vcpu->stat.signal_exits;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -7971,7 +7980,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
|
|||
BUG_ON(vcpu->kvm == NULL);
|
||||
kvm = vcpu->kvm;
|
||||
|
||||
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
|
||||
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
|
||||
vcpu->arch.pv.pv_unhalted = false;
|
||||
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
|
||||
if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
|
||||
|
@ -8452,6 +8461,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
|
|||
if (vcpu->arch.pv.pv_unhalted)
|
||||
return true;
|
||||
|
||||
if (vcpu->arch.exception.pending)
|
||||
return true;
|
||||
|
||||
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
|
||||
(vcpu->arch.nmi_pending &&
|
||||
kvm_x86_ops->nmi_allowed(vcpu)))
|
||||
|
@ -8619,6 +8631,13 @@ static int apf_put_user(struct kvm_vcpu *vcpu, u32 val)
|
|||
sizeof(val));
|
||||
}
|
||||
|
||||
static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
|
||||
{
|
||||
|
||||
return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val,
|
||||
sizeof(u32));
|
||||
}
|
||||
|
||||
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
|
||||
struct kvm_async_pf *work)
|
||||
{
|
||||
|
@ -8646,6 +8665,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
|
|||
struct kvm_async_pf *work)
|
||||
{
|
||||
struct x86_exception fault;
|
||||
u32 val;
|
||||
|
||||
if (work->wakeup_all)
|
||||
work->arch.token = ~0; /* broadcast wakeup */
|
||||
|
@ -8653,15 +8673,26 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
|
|||
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
|
||||
trace_kvm_async_pf_ready(work->arch.token, work->gva);
|
||||
|
||||
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
|
||||
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
|
||||
fault.vector = PF_VECTOR;
|
||||
fault.error_code_valid = true;
|
||||
fault.error_code = 0;
|
||||
fault.nested_page_fault = false;
|
||||
fault.address = work->arch.token;
|
||||
fault.async_page_fault = true;
|
||||
kvm_inject_page_fault(vcpu, &fault);
|
||||
if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED &&
|
||||
!apf_get_user(vcpu, &val)) {
|
||||
if (val == KVM_PV_REASON_PAGE_NOT_PRESENT &&
|
||||
vcpu->arch.exception.pending &&
|
||||
vcpu->arch.exception.nr == PF_VECTOR &&
|
||||
!apf_put_user(vcpu, 0)) {
|
||||
vcpu->arch.exception.injected = false;
|
||||
vcpu->arch.exception.pending = false;
|
||||
vcpu->arch.exception.nr = 0;
|
||||
vcpu->arch.exception.has_error_code = false;
|
||||
vcpu->arch.exception.error_code = 0;
|
||||
} else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
|
||||
fault.vector = PF_VECTOR;
|
||||
fault.error_code_valid = true;
|
||||
fault.error_code = 0;
|
||||
fault.nested_page_fault = false;
|
||||
fault.address = work->arch.token;
|
||||
fault.async_page_fault = true;
|
||||
kvm_inject_page_fault(vcpu, &fault);
|
||||
}
|
||||
}
|
||||
vcpu->arch.apf.halted = false;
|
||||
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
||||
|
|
|
@ -79,9 +79,63 @@ extern void __init_swait_queue_head(struct swait_queue_head *q, const char *name
|
|||
DECLARE_SWAIT_QUEUE_HEAD(name)
|
||||
#endif
|
||||
|
||||
static inline int swait_active(struct swait_queue_head *q)
|
||||
/**
|
||||
* swait_active -- locklessly test for waiters on the queue
|
||||
* @wq: the waitqueue to test for waiters
|
||||
*
|
||||
* returns true if the wait list is not empty
|
||||
*
|
||||
* NOTE: this function is lockless and requires care, incorrect usage _will_
|
||||
* lead to sporadic and non-obvious failure.
|
||||
*
|
||||
* NOTE2: this function has the same above implications as regular waitqueues.
|
||||
*
|
||||
* Use either while holding swait_queue_head::lock or when used for wakeups
|
||||
* with an extra smp_mb() like:
|
||||
*
|
||||
* CPU0 - waker CPU1 - waiter
|
||||
*
|
||||
* for (;;) {
|
||||
* @cond = true; prepare_to_swait(&wq_head, &wait, state);
|
||||
* smp_mb(); // smp_mb() from set_current_state()
|
||||
* if (swait_active(wq_head)) if (@cond)
|
||||
* wake_up(wq_head); break;
|
||||
* schedule();
|
||||
* }
|
||||
* finish_swait(&wq_head, &wait);
|
||||
*
|
||||
* Because without the explicit smp_mb() it's possible for the
|
||||
* swait_active() load to get hoisted over the @cond store such that we'll
|
||||
* observe an empty wait list while the waiter might not observe @cond.
|
||||
* This, in turn, can trigger missing wakeups.
|
||||
*
|
||||
* Also note that this 'optimization' trades a spin_lock() for an smp_mb(),
|
||||
* which (when the lock is uncontended) are of roughly equal cost.
|
||||
*/
|
||||
static inline int swait_active(struct swait_queue_head *wq)
|
||||
{
|
||||
return !list_empty(&q->task_list);
|
||||
return !list_empty(&wq->task_list);
|
||||
}
|
||||
|
||||
/**
|
||||
* swq_has_sleeper - check if there are any waiting processes
|
||||
* @wq: the waitqueue to test for waiters
|
||||
*
|
||||
* Returns true if @wq has waiting processes
|
||||
*
|
||||
* Please refer to the comment for swait_active.
|
||||
*/
|
||||
static inline bool swq_has_sleeper(struct swait_queue_head *wq)
|
||||
{
|
||||
/*
|
||||
* We need to be sure we are in sync with the list_add()
|
||||
* modifications to the wait queue (task_list).
|
||||
*
|
||||
* This memory barrier should be paired with one on the
|
||||
* waiting side.
|
||||
*/
|
||||
smp_mb();
|
||||
return swait_active(wq);
|
||||
}
|
||||
|
||||
extern void swake_up(struct swait_queue_head *q);
|
||||
|
|
|
@ -14,7 +14,9 @@
|
|||
ERSN(SHUTDOWN), ERSN(FAIL_ENTRY), ERSN(INTR), ERSN(SET_TPR), \
|
||||
ERSN(TPR_ACCESS), ERSN(S390_SIEIC), ERSN(S390_RESET), ERSN(DCR),\
|
||||
ERSN(NMI), ERSN(INTERNAL_ERROR), ERSN(OSI), ERSN(PAPR_HCALL), \
|
||||
ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH)
|
||||
ERSN(S390_UCONTROL), ERSN(WATCHDOG), ERSN(S390_TSCH), ERSN(EPR),\
|
||||
ERSN(SYSTEM_EVENT), ERSN(S390_STSI), ERSN(IOAPIC_EOI), \
|
||||
ERSN(HYPERV)
|
||||
|
||||
TRACE_EVENT(kvm_userspace_exit,
|
||||
TP_PROTO(__u32 reason, int errno),
|
||||
|
|
|
@ -106,11 +106,7 @@ static void async_pf_execute(struct work_struct *work)
|
|||
|
||||
trace_kvm_async_pf_completed(addr, gva);
|
||||
|
||||
/*
|
||||
* This memory barrier pairs with prepare_to_wait's set_current_state()
|
||||
*/
|
||||
smp_mb();
|
||||
if (swait_active(&vcpu->wq))
|
||||
if (swq_has_sleeper(&vcpu->wq))
|
||||
swake_up(&vcpu->wq);
|
||||
|
||||
mmput(mm);
|
||||
|
|
|
@ -565,6 +565,8 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
|
|||
{
|
||||
if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
|
||||
return -EINVAL;
|
||||
if (args->gsi >= KVM_MAX_IRQ_ROUTES)
|
||||
return -EINVAL;
|
||||
|
||||
if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
|
||||
return kvm_irqfd_deassign(kvm, args);
|
||||
|
|
|
@ -674,6 +674,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
|||
out_err_no_srcu:
|
||||
hardware_disable_all();
|
||||
out_err_no_disable:
|
||||
refcount_set(&kvm->users_count, 0);
|
||||
for (i = 0; i < KVM_NR_BUSES; i++)
|
||||
kfree(kvm_get_bus(kvm, i));
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
|
@ -2186,7 +2187,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
|
|||
struct swait_queue_head *wqp;
|
||||
|
||||
wqp = kvm_arch_vcpu_wq(vcpu);
|
||||
if (swait_active(wqp)) {
|
||||
if (swq_has_sleeper(wqp)) {
|
||||
swake_up(wqp);
|
||||
++vcpu->stat.halt_wakeup;
|
||||
return true;
|
||||
|
|
Loading…
Reference in a new issue