Fixes for KVM/ARM for 4.0-rc5.
Fixes page refcounting issues in our Stage-2 page table management code, fixes a missing unlock in a gicv3 error path, and fixes a race that can cause lost interrupts if signals are pending just prior to entering the guest. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJVBs95AAoJEEtpOizt6ddyAfoH/Rwj2T38ZDikImPpgfeFrmJs ZlWC+Z3akwjVHPv308/MsKdyashtA7OjiMp3DOheMFMYJay/ecgY/92vFCc6uh5S LDoXCbp+Pneth6C6bbU2Gw+aoCD07ZYCn9PeFq40MfpQUhCEGWhx41OFzHppqOZx e+jodHRE+sBVTFUtbz+HubAfcM46f/8bP7682CEKsVZPeTSiHyeojdZEglfB37MG ar/iC1/cyO/097vWaBqv1t1WZoHbWmMrDlzo5X+AtayVXFNdv4Ztw0Rz2kRhnLB8 8GXYawoSQoTN8FX1oyTyr5YWcWD7wDTzhcHsHS1xZHhvrdLCEcFrHeEWkuUlYjU= =YS6j -----END PGP SIGNATURE----- Merge tag 'kvm-arm-fixes-4.0-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into 'kvm-next' Fixes for KVM/ARM for 4.0-rc5. Fixes page refcounting issues in our Stage-2 page table management code, fixes a missing unlock in a gicv3 error path, and fixes a race that can cause lost interrupts if signals are pending just prior to entering the guest.
This commit is contained in:
commit
8999602d08
8 changed files with 106 additions and 76 deletions
|
@ -149,31 +149,30 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
|
|||
(__boundary - 1 < (end) - 1)? __boundary: (end); \
|
||||
})
|
||||
|
||||
#define kvm_pgd_index(addr) pgd_index(addr)
|
||||
|
||||
static inline bool kvm_page_empty(void *ptr)
|
||||
{
|
||||
struct page *ptr_page = virt_to_page(ptr);
|
||||
return page_count(ptr_page) == 1;
|
||||
}
|
||||
|
||||
|
||||
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
|
||||
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
|
||||
#define kvm_pud_table_empty(kvm, pudp) (0)
|
||||
|
||||
#define KVM_PREALLOC_LEVEL 0
|
||||
|
||||
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvm_free_hwpgd(struct kvm *kvm) { }
|
||||
|
||||
static inline void *kvm_get_hwpgd(struct kvm *kvm)
|
||||
{
|
||||
return kvm->arch.pgd;
|
||||
}
|
||||
|
||||
static inline unsigned int kvm_get_hwpgd_size(void)
|
||||
{
|
||||
return PTRS_PER_S2_PGD * sizeof(pgd_t);
|
||||
}
|
||||
|
||||
struct kvm;
|
||||
|
||||
#define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l))
|
||||
|
|
|
@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
|
|||
phys_addr_t addr = start, end = start + size;
|
||||
phys_addr_t next;
|
||||
|
||||
pgd = pgdp + pgd_index(addr);
|
||||
pgd = pgdp + kvm_pgd_index(addr);
|
||||
do {
|
||||
next = kvm_pgd_addr_end(addr, end);
|
||||
if (!pgd_none(*pgd))
|
||||
|
@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
|
|||
phys_addr_t next;
|
||||
pgd_t *pgd;
|
||||
|
||||
pgd = kvm->arch.pgd + pgd_index(addr);
|
||||
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
|
||||
do {
|
||||
next = kvm_pgd_addr_end(addr, end);
|
||||
stage2_flush_puds(kvm, pgd, addr, next);
|
||||
|
@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
|
|||
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
|
||||
}
|
||||
|
||||
/* Free the HW pgd, one page at a time */
|
||||
static void kvm_free_hwpgd(void *hwpgd)
|
||||
{
|
||||
free_pages_exact(hwpgd, kvm_get_hwpgd_size());
|
||||
}
|
||||
|
||||
/* Allocate the HW PGD, making sure that each page gets its own refcount */
|
||||
static void *kvm_alloc_hwpgd(void)
|
||||
{
|
||||
unsigned int size = kvm_get_hwpgd_size();
|
||||
|
||||
return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
|
||||
}
|
||||
|
||||
/**
|
||||
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
|
||||
* @kvm: The KVM struct pointer for the VM.
|
||||
|
@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
|
|||
*/
|
||||
int kvm_alloc_stage2_pgd(struct kvm *kvm)
|
||||
{
|
||||
int ret;
|
||||
pgd_t *pgd;
|
||||
void *hwpgd;
|
||||
|
||||
if (kvm->arch.pgd != NULL) {
|
||||
kvm_err("kvm_arch already initialized?\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
hwpgd = kvm_alloc_hwpgd();
|
||||
if (!hwpgd)
|
||||
return -ENOMEM;
|
||||
|
||||
/* When the kernel uses more levels of page tables than the
|
||||
* guest, we allocate a fake PGD and pre-populate it to point
|
||||
* to the next-level page table, which will be the real
|
||||
* initial page table pointed to by the VTTBR.
|
||||
*
|
||||
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for
|
||||
* the PMD and the kernel will use folded pud.
|
||||
* When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
|
||||
* pages.
|
||||
*/
|
||||
if (KVM_PREALLOC_LEVEL > 0) {
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Allocate fake pgd for the page table manipulation macros to
|
||||
* work. This is not used by the hardware and we have no
|
||||
|
@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
|
|||
*/
|
||||
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
|
||||
if (!pgd) {
|
||||
kvm_free_hwpgd(hwpgd);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/* Plug the HW PGD into the fake one. */
|
||||
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
|
||||
if (KVM_PREALLOC_LEVEL == 1)
|
||||
pgd_populate(NULL, pgd + i,
|
||||
(pud_t *)hwpgd + i * PTRS_PER_PUD);
|
||||
else if (KVM_PREALLOC_LEVEL == 2)
|
||||
pud_populate(NULL, pud_offset(pgd, 0) + i,
|
||||
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Allocate actual first-level Stage-2 page table used by the
|
||||
* hardware for Stage-2 page table walks.
|
||||
*/
|
||||
pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
|
||||
pgd = (pgd_t *)hwpgd;
|
||||
}
|
||||
|
||||
if (!pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kvm_prealloc_hwpgd(kvm, pgd);
|
||||
if (ret)
|
||||
goto out_err;
|
||||
|
||||
kvm_clean_pgd(pgd);
|
||||
kvm->arch.pgd = pgd;
|
||||
return 0;
|
||||
out_err:
|
||||
if (KVM_PREALLOC_LEVEL > 0)
|
||||
kfree(pgd);
|
||||
else
|
||||
free_pages((unsigned long)pgd, S2_PGD_ORDER);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
|
|||
return;
|
||||
|
||||
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
|
||||
kvm_free_hwpgd(kvm);
|
||||
kvm_free_hwpgd(kvm_get_hwpgd(kvm));
|
||||
if (KVM_PREALLOC_LEVEL > 0)
|
||||
kfree(kvm->arch.pgd);
|
||||
else
|
||||
free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
|
||||
|
||||
kvm->arch.pgd = NULL;
|
||||
}
|
||||
|
||||
|
@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
|
|||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
|
||||
pgd = kvm->arch.pgd + pgd_index(addr);
|
||||
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
|
||||
if (WARN_ON(pgd_none(*pgd))) {
|
||||
if (!cache)
|
||||
return NULL;
|
||||
|
@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
|
|||
pgd_t *pgd;
|
||||
phys_addr_t next;
|
||||
|
||||
pgd = kvm->arch.pgd + pgd_index(addr);
|
||||
pgd = kvm->arch.pgd + kvm_pgd_index(addr);
|
||||
do {
|
||||
/*
|
||||
* Release kvm_mmu_lock periodically if the memory region is
|
||||
|
|
|
@ -129,6 +129,9 @@
|
|||
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
|
||||
* not known to exist and will break with this configuration.
|
||||
*
|
||||
* VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
|
||||
* (see hyp-init.S).
|
||||
*
|
||||
* Note that when using 4K pages, we concatenate two first level page tables
|
||||
* together.
|
||||
*
|
||||
|
@ -138,7 +141,6 @@
|
|||
#ifdef CONFIG_ARM64_64K_PAGES
|
||||
/*
|
||||
* Stage2 translation configuration:
|
||||
* 40bits output (PS = 2)
|
||||
* 40bits input (T0SZ = 24)
|
||||
* 64kB pages (TG0 = 1)
|
||||
* 2 level page tables (SL = 1)
|
||||
|
@ -150,7 +152,6 @@
|
|||
#else
|
||||
/*
|
||||
* Stage2 translation configuration:
|
||||
* 40bits output (PS = 2)
|
||||
* 40bits input (T0SZ = 24)
|
||||
* 4kB pages (TG0 = 0)
|
||||
* 3 level page tables (SL = 1)
|
||||
|
|
|
@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
|
|||
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
|
||||
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
|
||||
|
||||
#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
|
||||
|
||||
/*
|
||||
* If we are concatenating first level stage-2 page tables, we would have less
|
||||
* than or equal to 16 pointers in the fake PGD, because that's what the
|
||||
|
@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
|
|||
#define KVM_PREALLOC_LEVEL (0)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* kvm_prealloc_hwpgd - allocate inital table for VTTBR
|
||||
* @kvm: The KVM struct pointer for the VM.
|
||||
* @pgd: The kernel pseudo pgd
|
||||
*
|
||||
* When the kernel uses more levels of page tables than the guest, we allocate
|
||||
* a fake PGD and pre-populate it to point to the next-level page table, which
|
||||
* will be the real initial page table pointed to by the VTTBR.
|
||||
*
|
||||
* When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
|
||||
* the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
|
||||
* allocate 2 consecutive PUD pages.
|
||||
*/
|
||||
static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned long hwpgd;
|
||||
|
||||
if (KVM_PREALLOC_LEVEL == 0)
|
||||
return 0;
|
||||
|
||||
hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
|
||||
if (!hwpgd)
|
||||
return -ENOMEM;
|
||||
|
||||
for (i = 0; i < PTRS_PER_S2_PGD; i++) {
|
||||
if (KVM_PREALLOC_LEVEL == 1)
|
||||
pgd_populate(NULL, pgd + i,
|
||||
(pud_t *)hwpgd + i * PTRS_PER_PUD);
|
||||
else if (KVM_PREALLOC_LEVEL == 2)
|
||||
pud_populate(NULL, pud_offset(pgd, 0) + i,
|
||||
(pmd_t *)hwpgd + i * PTRS_PER_PMD);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void *kvm_get_hwpgd(struct kvm *kvm)
|
||||
{
|
||||
pgd_t *pgd = kvm->arch.pgd;
|
||||
|
@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
|
|||
return pmd_offset(pud, 0);
|
||||
}
|
||||
|
||||
static inline void kvm_free_hwpgd(struct kvm *kvm)
|
||||
static inline unsigned int kvm_get_hwpgd_size(void)
|
||||
{
|
||||
if (KVM_PREALLOC_LEVEL > 0) {
|
||||
unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
|
||||
free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
|
||||
}
|
||||
if (KVM_PREALLOC_LEVEL > 0)
|
||||
return PTRS_PER_S2_PGD * PAGE_SIZE;
|
||||
return PTRS_PER_S2_PGD * sizeof(pgd_t);
|
||||
}
|
||||
|
||||
static inline bool kvm_page_empty(void *ptr)
|
||||
|
|
|
@ -114,6 +114,7 @@ struct vgic_ops {
|
|||
void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
|
||||
u64 (*get_elrsr)(const struct kvm_vcpu *vcpu);
|
||||
u64 (*get_eisr)(const struct kvm_vcpu *vcpu);
|
||||
void (*clear_eisr)(struct kvm_vcpu *vcpu);
|
||||
u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
|
||||
void (*enable_underflow)(struct kvm_vcpu *vcpu);
|
||||
void (*disable_underflow)(struct kvm_vcpu *vcpu);
|
||||
|
|
|
@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
|
|||
{
|
||||
if (!(lr_desc.state & LR_STATE_MASK))
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
|
||||
else
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
|
||||
}
|
||||
|
||||
static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
|
||||
|
@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
|
|||
return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
|
||||
}
|
||||
|
||||
static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
|
||||
}
|
||||
|
||||
static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
|
||||
|
@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
|
|||
.sync_lr_elrsr = vgic_v2_sync_lr_elrsr,
|
||||
.get_elrsr = vgic_v2_get_elrsr,
|
||||
.get_eisr = vgic_v2_get_eisr,
|
||||
.clear_eisr = vgic_v2_clear_eisr,
|
||||
.get_interrupt_status = vgic_v2_get_interrupt_status,
|
||||
.enable_underflow = vgic_v2_enable_underflow,
|
||||
.disable_underflow = vgic_v2_disable_underflow,
|
||||
|
|
|
@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
|
|||
{
|
||||
if (!(lr_desc.state & LR_STATE_MASK))
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
|
||||
else
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
|
||||
}
|
||||
|
||||
static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
|
||||
|
@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
|
|||
return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
|
||||
}
|
||||
|
||||
static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
|
||||
}
|
||||
|
||||
static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
|
||||
|
@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
|
|||
.sync_lr_elrsr = vgic_v3_sync_lr_elrsr,
|
||||
.get_elrsr = vgic_v3_get_elrsr,
|
||||
.get_eisr = vgic_v3_get_eisr,
|
||||
.clear_eisr = vgic_v3_clear_eisr,
|
||||
.get_interrupt_status = vgic_v3_get_interrupt_status,
|
||||
.enable_underflow = vgic_v3_enable_underflow,
|
||||
.disable_underflow = vgic_v3_disable_underflow,
|
||||
|
|
|
@ -883,6 +883,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
|
|||
return vgic_ops->get_eisr(vcpu);
|
||||
}
|
||||
|
||||
static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
vgic_ops->clear_eisr(vcpu);
|
||||
}
|
||||
|
||||
static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
return vgic_ops->get_interrupt_status(vcpu);
|
||||
|
@ -922,6 +927,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
|
|||
vgic_set_lr(vcpu, lr_nr, vlr);
|
||||
clear_bit(lr_nr, vgic_cpu->lr_used);
|
||||
vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
|
||||
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -978,6 +984,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
|
|||
BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
|
||||
vlr.state |= LR_STATE_PENDING;
|
||||
vgic_set_lr(vcpu, lr, vlr);
|
||||
vgic_sync_lr_elrsr(vcpu, lr, vlr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -999,6 +1006,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
|
|||
vlr.state |= LR_EOI_INT;
|
||||
|
||||
vgic_set_lr(vcpu, lr, vlr);
|
||||
vgic_sync_lr_elrsr(vcpu, lr, vlr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1136,6 +1144,14 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
|
|||
if (status & INT_STATUS_UNDERFLOW)
|
||||
vgic_disable_underflow(vcpu);
|
||||
|
||||
/*
|
||||
* In the next iterations of the vcpu loop, if we sync the vgic state
|
||||
* after flushing it, but before entering the guest (this happens for
|
||||
* pending signals and vmid rollovers), then make sure we don't pick
|
||||
* up any old maintenance interrupts here.
|
||||
*/
|
||||
vgic_clear_eisr(vcpu);
|
||||
|
||||
return level_pending;
|
||||
}
|
||||
|
||||
|
@ -1583,8 +1599,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
|
|||
* emulation. So check this here again. KVM_CREATE_DEVICE does
|
||||
* the proper checks already.
|
||||
*/
|
||||
if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
|
||||
return -ENODEV;
|
||||
if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
|
||||
ret = -ENODEV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Any time a vcpu is run, vcpu_load is called which tries to grab the
|
||||
|
|
Loading…
Reference in a new issue