Merge branch 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm

* 'kvm-updates/2.6.34' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (145 commits)
  KVM: x86: Add KVM_CAP_X86_ROBUST_SINGLESTEP
  KVM: VMX: Update instruction length on intercepted BP
  KVM: Fix emulate_sys[call, enter, exit]()'s fault handling
  KVM: Fix segment descriptor loading
  KVM: Fix load_guest_segment_descriptor() to inject page fault
  KVM: x86 emulator: Forbid modifying CS segment register by mov instruction
  KVM: Convert kvm->requests_lock to raw_spinlock_t
  KVM: Convert i8254/i8259 locks to raw_spinlocks
  KVM: x86 emulator: disallow opcode 82 in 64-bit mode
  KVM: x86 emulator: code style cleanup
  KVM: Plan obsolescence of kernel allocated slots, paravirt mmu
  KVM: x86 emulator: Add LOCK prefix validity checking
  KVM: x86 emulator: Check CPL level during privilege instruction emulation
  KVM: x86 emulator: Fix popf emulation
  KVM: x86 emulator: Check IOPL level during io instruction emulation
  KVM: x86 emulator: fix memory access during x86 emulation
  KVM: x86 emulator: Add Virtual-8086 mode of emulation
  KVM: x86 emulator: Add group9 instruction decoding
  KVM: x86 emulator: Add group8 instruction decoding
  KVM: do not store wqh in irqfd
  ...

Trivial conflicts in Documentation/feature-removal-schedule.txt
This commit is contained in:
Linus Torvalds 2010-03-05 13:12:34 -08:00
commit c812a51d11
74 changed files with 3772 additions and 1663 deletions

View file

@ -556,3 +556,35 @@ Why: udev fully replaces this special file system that only contains CAPI
NCCI TTY device nodes. User space (pppdcapiplugin) works without
noticing the difference.
Who: Jan Kiszka <jan.kiszka@web.de>
----------------------------
What: KVM memory aliases support
When: July 2010
Why: Memory aliasing support is used for speeding up guest vga access
through the vga windows.
Modern userspace no longer uses this feature, so it's just bitrotted
code and can be removed with no impact.
Who: Avi Kivity <avi@redhat.com>
----------------------------
What: KVM kernel-allocated memory slots
When: July 2010
Why: Since 2.6.25, kvm supports user-allocated memory slots, which are
much more flexible than kernel-allocated slots. All current userspace
supports the newer interface and this code can be removed with no
impact.
Who: Avi Kivity <avi@redhat.com>
----------------------------
What: KVM paravirt mmu host support
When: January 2011
Why: The paravirt mmu host support is slower than non-paravirt mmu, both
on newer and older hardware. It is already not exposed to the guest,
and kept only for live migration purposes.
Who: Avi Kivity <avi@redhat.com>
----------------------------

View file

@ -23,12 +23,12 @@ of a virtual machine. The ioctls belong to three classes
Only run vcpu ioctls from the same thread that was used to create the
vcpu.
2. File descritpors
2. File descriptors
The kvm API is centered around file descriptors. An initial
open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this
handle will create a VM file descripror which can be used to issue VM
handle will create a VM file descriptor which can be used to issue VM
ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
and return a file descriptor pointing to it. Finally, ioctls on a vcpu
fd can be used to control the vcpu, including the important task of
@ -643,7 +643,7 @@ Type: vm ioctl
Parameters: struct kvm_clock_data (in)
Returns: 0 on success, -1 on error
Sets the current timestamp of kvmclock to the valued specific in its parameter.
Sets the current timestamp of kvmclock to the value specified in its parameter.
In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
such as migration.
@ -795,11 +795,11 @@ Unused.
__u64 data_offset; /* relative to kvm_run start */
} io;
If exit_reason is KVM_EXIT_IO_IN or KVM_EXIT_IO_OUT, then the vcpu has
If exit_reason is KVM_EXIT_IO, then the vcpu has
executed a port I/O instruction which could not be satisfied by kvm.
data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
where kvm expects application code to place the data for the next
KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a patcked array.
KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
struct {
struct kvm_debug_exit_arch arch;
@ -815,7 +815,7 @@ Unused.
__u8 is_write;
} mmio;
If exit_reason is KVM_EXIT_MMIO or KVM_EXIT_IO_OUT, then the vcpu has
If exit_reason is KVM_EXIT_MMIO, then the vcpu has
executed a memory-mapped I/O instruction which could not be satisfied
by kvm. The 'data' member contains the written data if 'is_write' is
true, and should be filled by application code otherwise.

View file

@ -3173,7 +3173,7 @@ F: arch/x86/include/asm/svm.h
F: arch/x86/kvm/svm.c
KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
M: Hollis Blanchard <hollisb@us.ibm.com>
M: Alexander Graf <agraf@suse.de>
L: kvm-ppc@vger.kernel.org
W: http://kvm.qumranet.com
S: Supported

View file

@ -26,6 +26,7 @@ config KVM
select ANON_INODES
select HAVE_KVM_IRQCHIP
select KVM_APIC_ARCHITECTURE
select KVM_MMIO
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent

View file

@ -241,10 +241,10 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
return 0;
mmio:
if (p->dir)
r = kvm_io_bus_read(&vcpu->kvm->mmio_bus, p->addr,
r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
p->size, &p->data);
else
r = kvm_io_bus_write(&vcpu->kvm->mmio_bus, p->addr,
r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
p->size, &p->data);
if (r)
printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
@ -636,12 +636,9 @@ static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
union context *host_ctx, *guest_ctx;
int r;
int r, idx;
/*
* down_read() may sleep and return with interrupts enabled
*/
down_read(&vcpu->kvm->slots_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu);
again:
if (signal_pending(current)) {
@ -663,7 +660,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (r < 0)
goto vcpu_run_fail;
up_read(&vcpu->kvm->slots_lock);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
kvm_guest_enter();
/*
@ -687,7 +684,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
kvm_guest_exit();
preempt_enable();
down_read(&vcpu->kvm->slots_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_handle_exit(kvm_run, vcpu);
@ -697,10 +694,10 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
}
out:
up_read(&vcpu->kvm->slots_lock);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (r > 0) {
kvm_resched(vcpu);
down_read(&vcpu->kvm->slots_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu);
goto again;
}
@ -971,7 +968,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
r = kvm_setup_default_irq_routing(kvm);
if (r) {
kfree(kvm->arch.vioapic);
kvm_ioapic_destroy(kvm);
goto out;
}
break;
@ -1377,12 +1374,14 @@ static void free_kvm(struct kvm *kvm)
static void kvm_release_vm_pages(struct kvm *kvm)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
int i, j;
unsigned long base_gfn;
for (i = 0; i < kvm->nmemslots; i++) {
memslot = &kvm->memslots[i];
slots = rcu_dereference(kvm->memslots);
for (i = 0; i < slots->nmemslots; i++) {
memslot = &slots->memslots[i];
base_gfn = memslot->base_gfn;
for (j = 0; j < memslot->npages; j++) {
@ -1405,6 +1404,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(kvm->arch.vioapic);
kvm_release_vm_pages(kvm);
kvm_free_physmem(kvm);
cleanup_srcu_struct(&kvm->srcu);
free_kvm(kvm);
}
@ -1576,15 +1576,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
return r;
}
int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
unsigned long i;
unsigned long pfn;
int npages = mem->memory_size >> PAGE_SHIFT;
struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
int npages = memslot->npages;
unsigned long base_gfn = memslot->base_gfn;
if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
@ -1608,6 +1608,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc)
{
return;
}
void kvm_arch_flush_shadow(struct kvm *kvm)
{
kvm_flush_remote_tlbs(kvm);
@ -1802,7 +1810,7 @@ static int kvm_ia64_sync_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_MEMORY_SLOTS)
goto out;
memslot = &kvm->memslots[log->slot];
memslot = &kvm->memslots->memslots[log->slot];
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@ -1827,6 +1835,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot;
int is_dirty = 0;
mutex_lock(&kvm->slots_lock);
spin_lock(&kvm->arch.dirty_log_lock);
r = kvm_ia64_sync_dirty_log(kvm, log);
@ -1840,12 +1849,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
kvm_flush_remote_tlbs(kvm);
memslot = &kvm->memslots[log->slot];
memslot = &kvm->memslots->memslots[log->slot];
n = ALIGN(memslot->npages, BITS_PER_LONG) / 8;
memset(memslot->dirty_bitmap, 0, n);
}
r = 0;
out:
mutex_unlock(&kvm->slots_lock);
spin_unlock(&kvm->arch.dirty_log_lock);
return r;
}

View file

@ -75,7 +75,7 @@ static void set_pal_result(struct kvm_vcpu *vcpu,
struct exit_ctl_data *p;
p = kvm_get_exit_data(vcpu);
if (p && p->exit_reason == EXIT_REASON_PAL_CALL) {
if (p->exit_reason == EXIT_REASON_PAL_CALL) {
p->u.pal_data.ret = result;
return ;
}
@ -87,7 +87,7 @@ static void set_sal_result(struct kvm_vcpu *vcpu,
struct exit_ctl_data *p;
p = kvm_get_exit_data(vcpu);
if (p && p->exit_reason == EXIT_REASON_SAL_CALL) {
if (p->exit_reason == EXIT_REASON_SAL_CALL) {
p->u.sal_data.ret = result;
return ;
}
@ -322,7 +322,7 @@ static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
struct exit_ctl_data *p;
p = kvm_get_exit_data(vcpu);
if (p && (p->exit_reason == EXIT_REASON_PAL_CALL))
if (p->exit_reason == EXIT_REASON_PAL_CALL)
index = p->u.pal_data.gr28;
return index;
@ -646,18 +646,16 @@ static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
p = kvm_get_exit_data(vcpu);
if (p) {
if (p->exit_reason == EXIT_REASON_SAL_CALL) {
*in0 = p->u.sal_data.in0;
*in1 = p->u.sal_data.in1;
*in2 = p->u.sal_data.in2;
*in3 = p->u.sal_data.in3;
*in4 = p->u.sal_data.in4;
*in5 = p->u.sal_data.in5;
*in6 = p->u.sal_data.in6;
*in7 = p->u.sal_data.in7;
return ;
}
if (p->exit_reason == EXIT_REASON_SAL_CALL) {
*in0 = p->u.sal_data.in0;
*in1 = p->u.sal_data.in1;
*in2 = p->u.sal_data.in2;
*in3 = p->u.sal_data.in3;
*in4 = p->u.sal_data.in4;
*in5 = p->u.sal_data.in5;
*in6 = p->u.sal_data.in6;
*in7 = p->u.sal_data.in7;
return ;
}
*in0 = 0;
}

View file

@ -316,8 +316,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
return;
} else {
inst_type = -1;
panic_vm(vcpu, "Unsupported MMIO access instruction! \
Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
panic_vm(vcpu, "Unsupported MMIO access instruction! "
"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
bundle.i64[0], bundle.i64[1]);
}

View file

@ -1639,8 +1639,8 @@ void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
* Otherwise panic
*/
if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
panic_vm(vcpu, "Only support guests with vpsr.pk =0 \
& vpsr.is=0\n");
panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
"& vpsr.is=0\n");
/*
* For those IA64_PSR bits: id/da/dd/ss/ed/ia

View file

@ -97,4 +97,10 @@
#define RESUME_HOST RESUME_FLAG_HOST
#define RESUME_HOST_NV (RESUME_FLAG_HOST|RESUME_FLAG_NV)
#define KVM_GUEST_MODE_NONE 0
#define KVM_GUEST_MODE_GUEST 1
#define KVM_GUEST_MODE_SKIP 2
#define KVM_INST_FETCH_FAILED -1
#endif /* __POWERPC_KVM_ASM_H__ */

View file

@ -22,7 +22,7 @@
#include <linux/types.h>
#include <linux/kvm_host.h>
#include <asm/kvm_ppc.h>
#include <asm/kvm_book3s_64_asm.h>
struct kvmppc_slb {
u64 esid;
@ -33,7 +33,8 @@ struct kvmppc_slb {
bool Ks;
bool Kp;
bool nx;
bool large;
bool large; /* PTEs are 16MB */
bool tb; /* 1TB segment */
bool class;
};
@ -69,6 +70,7 @@ struct kvmppc_sid_map {
struct kvmppc_vcpu_book3s {
struct kvm_vcpu vcpu;
struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
struct kvmppc_sid_map sid_map[SID_MAP_NUM];
struct kvmppc_slb slb[64];
struct {
@ -89,6 +91,7 @@ struct kvmppc_vcpu_book3s {
u64 vsid_next;
u64 vsid_max;
int context_id;
ulong prog_flags; /* flags to inject when giving a 700 trap */
};
#define CONTEXT_HOST 0
@ -119,6 +122,10 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
extern u32 kvmppc_trampoline_lowmem;
extern u32 kvmppc_trampoline_enter;
extern void kvmppc_rmcall(ulong srr0, ulong srr1);
extern void kvmppc_load_up_fpu(void);
extern void kvmppc_load_up_altivec(void);
extern void kvmppc_load_up_vsx(void);
static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
{

View file

@ -20,6 +20,8 @@
#ifndef __ASM_KVM_BOOK3S_ASM_H__
#define __ASM_KVM_BOOK3S_ASM_H__
#ifdef __ASSEMBLY__
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_asm.h>
@ -55,4 +57,20 @@ kvmppc_resume_\intno:
#endif /* CONFIG_KVM_BOOK3S_64_HANDLER */
#else /*__ASSEMBLY__ */
struct kvmppc_book3s_shadow_vcpu {
ulong gpr[14];
u32 cr;
u32 xer;
ulong host_r1;
ulong host_r2;
ulong handler;
ulong scratch0;
ulong scratch1;
ulong vmhandler;
};
#endif /*__ASSEMBLY__ */
#endif /* __ASM_KVM_BOOK3S_ASM_H__ */

View file

@ -52,9 +52,12 @@ struct kvmppc_vcpu_e500 {
u32 mas5;
u32 mas6;
u32 mas7;
u32 l1csr0;
u32 l1csr1;
u32 hid0;
u32 hid1;
u32 tlb0cfg;
u32 tlb1cfg;
struct kvm_vcpu vcpu;
};

View file

@ -167,23 +167,40 @@ struct kvm_vcpu_arch {
ulong trampoline_lowmem;
ulong trampoline_enter;
ulong highmem_handler;
ulong rmcall;
ulong host_paca_phys;
struct kvmppc_mmu mmu;
#endif
u64 fpr[32];
ulong gpr[32];
u64 fpr[32];
u32 fpscr;
#ifdef CONFIG_ALTIVEC
vector128 vr[32];
vector128 vscr;
#endif
#ifdef CONFIG_VSX
u64 vsr[32];
#endif
ulong pc;
u32 cr;
ulong ctr;
ulong lr;
#ifdef CONFIG_BOOKE
ulong xer;
u32 cr;
#endif
ulong msr;
#ifdef CONFIG_PPC64
ulong shadow_msr;
ulong shadow_srr1;
ulong hflags;
ulong guest_owned_ext;
#endif
u32 mmucr;
ulong sprg0;
@ -242,6 +259,8 @@ struct kvm_vcpu_arch {
#endif
ulong fault_dear;
ulong fault_esr;
ulong queued_dear;
ulong queued_esr;
gpa_t paddr_accessed;
u8 io_gpr; /* GPR used as IO source/target */

View file

@ -28,6 +28,9 @@
#include <linux/types.h>
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#ifdef CONFIG_PPC_BOOK3S
#include <asm/kvm_book3s.h>
#endif
enum emulation_result {
EMULATE_DONE, /* no further processing */
@ -80,8 +83,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq);
@ -95,4 +99,81 @@ extern void kvmppc_booke_exit(void);
extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
#ifdef CONFIG_PPC_BOOK3S
/* We assume we're always acting on the current vcpu */
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
if ( num < 14 ) {
get_paca()->shadow_vcpu.gpr[num] = val;
to_book3s(vcpu)->shadow_vcpu.gpr[num] = val;
} else
vcpu->arch.gpr[num] = val;
}
static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
{
if ( num < 14 )
return get_paca()->shadow_vcpu.gpr[num];
else
return vcpu->arch.gpr[num];
}
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
get_paca()->shadow_vcpu.cr = val;
to_book3s(vcpu)->shadow_vcpu.cr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
return get_paca()->shadow_vcpu.cr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
{
get_paca()->shadow_vcpu.xer = val;
to_book3s(vcpu)->shadow_vcpu.xer = val;
}
static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
return get_paca()->shadow_vcpu.xer;
}
#else
static inline void kvmppc_set_gpr(struct kvm_vcpu *vcpu, int num, ulong val)
{
vcpu->arch.gpr[num] = val;
}
static inline ulong kvmppc_get_gpr(struct kvm_vcpu *vcpu, int num)
{
return vcpu->arch.gpr[num];
}
static inline void kvmppc_set_cr(struct kvm_vcpu *vcpu, u32 val)
{
vcpu->arch.cr = val;
}
static inline u32 kvmppc_get_cr(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr;
}
static inline void kvmppc_set_xer(struct kvm_vcpu *vcpu, u32 val)
{
vcpu->arch.xer = val;
}
static inline u32 kvmppc_get_xer(struct kvm_vcpu *vcpu)
{
return vcpu->arch.xer;
}
#endif
#endif /* __POWERPC_KVM_PPC_H__ */

View file

@ -19,6 +19,9 @@
#include <asm/mmu.h>
#include <asm/page.h>
#include <asm/exception-64e.h>
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#include <asm/kvm_book3s_64_asm.h>
#endif
register struct paca_struct *local_paca asm("r13");
@ -135,6 +138,8 @@ struct paca_struct {
u64 esid;
u64 vsid;
} kvm_slb[64]; /* guest SLB */
/* We use this to store guest state in */
struct kvmppc_book3s_shadow_vcpu shadow_vcpu;
u8 kvm_slb_max; /* highest used guest slb entry */
u8 kvm_in_guest; /* are we inside the guest? */
#endif

View file

@ -426,6 +426,10 @@
#define SRR1_WAKEMT 0x00280000 /* mtctrl */
#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
#define SRR1_PROGTRAP 0x00020000 /* Trap */
#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
#define SPRN_HSRR0 0x13A /* Save/Restore Register 0 */
#define SPRN_HSRR1 0x13B /* Save/Restore Register 1 */

View file

@ -194,6 +194,30 @@ int main(void)
DEFINE(PACA_KVM_IN_GUEST, offsetof(struct paca_struct, kvm_in_guest));
DEFINE(PACA_KVM_SLB, offsetof(struct paca_struct, kvm_slb));
DEFINE(PACA_KVM_SLB_MAX, offsetof(struct paca_struct, kvm_slb_max));
DEFINE(PACA_KVM_CR, offsetof(struct paca_struct, shadow_vcpu.cr));
DEFINE(PACA_KVM_XER, offsetof(struct paca_struct, shadow_vcpu.xer));
DEFINE(PACA_KVM_R0, offsetof(struct paca_struct, shadow_vcpu.gpr[0]));
DEFINE(PACA_KVM_R1, offsetof(struct paca_struct, shadow_vcpu.gpr[1]));
DEFINE(PACA_KVM_R2, offsetof(struct paca_struct, shadow_vcpu.gpr[2]));
DEFINE(PACA_KVM_R3, offsetof(struct paca_struct, shadow_vcpu.gpr[3]));
DEFINE(PACA_KVM_R4, offsetof(struct paca_struct, shadow_vcpu.gpr[4]));
DEFINE(PACA_KVM_R5, offsetof(struct paca_struct, shadow_vcpu.gpr[5]));
DEFINE(PACA_KVM_R6, offsetof(struct paca_struct, shadow_vcpu.gpr[6]));
DEFINE(PACA_KVM_R7, offsetof(struct paca_struct, shadow_vcpu.gpr[7]));
DEFINE(PACA_KVM_R8, offsetof(struct paca_struct, shadow_vcpu.gpr[8]));
DEFINE(PACA_KVM_R9, offsetof(struct paca_struct, shadow_vcpu.gpr[9]));
DEFINE(PACA_KVM_R10, offsetof(struct paca_struct, shadow_vcpu.gpr[10]));
DEFINE(PACA_KVM_R11, offsetof(struct paca_struct, shadow_vcpu.gpr[11]));
DEFINE(PACA_KVM_R12, offsetof(struct paca_struct, shadow_vcpu.gpr[12]));
DEFINE(PACA_KVM_R13, offsetof(struct paca_struct, shadow_vcpu.gpr[13]));
DEFINE(PACA_KVM_HOST_R1, offsetof(struct paca_struct, shadow_vcpu.host_r1));
DEFINE(PACA_KVM_HOST_R2, offsetof(struct paca_struct, shadow_vcpu.host_r2));
DEFINE(PACA_KVM_VMHANDLER, offsetof(struct paca_struct,
shadow_vcpu.vmhandler));
DEFINE(PACA_KVM_SCRATCH0, offsetof(struct paca_struct,
shadow_vcpu.scratch0));
DEFINE(PACA_KVM_SCRATCH1, offsetof(struct paca_struct,
shadow_vcpu.scratch1));
#endif
#endif /* CONFIG_PPC64 */
@ -389,8 +413,6 @@ int main(void)
DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid));
DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr));
DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr));
@ -411,11 +433,16 @@ int main(void)
DEFINE(VCPU_HOST_R2, offsetof(struct kvm_vcpu, arch.host_r2));
DEFINE(VCPU_HOST_MSR, offsetof(struct kvm_vcpu, arch.host_msr));
DEFINE(VCPU_SHADOW_MSR, offsetof(struct kvm_vcpu, arch.shadow_msr));
DEFINE(VCPU_SHADOW_SRR1, offsetof(struct kvm_vcpu, arch.shadow_srr1));
DEFINE(VCPU_TRAMPOLINE_LOWMEM, offsetof(struct kvm_vcpu, arch.trampoline_lowmem));
DEFINE(VCPU_TRAMPOLINE_ENTER, offsetof(struct kvm_vcpu, arch.trampoline_enter));
DEFINE(VCPU_HIGHMEM_HANDLER, offsetof(struct kvm_vcpu, arch.highmem_handler));
DEFINE(VCPU_RMCALL, offsetof(struct kvm_vcpu, arch.rmcall));
DEFINE(VCPU_HFLAGS, offsetof(struct kvm_vcpu, arch.hflags));
#endif
#else
DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr));
DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer));
#endif /* CONFIG_PPC64 */
#endif
#ifdef CONFIG_44x
DEFINE(PGD_T_LOG2, PGD_T_LOG2);

View file

@ -107,6 +107,7 @@ EXPORT_SYMBOL(giveup_altivec);
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_VSX
EXPORT_SYMBOL(giveup_vsx);
EXPORT_SYMBOL_GPL(__giveup_vsx);
#endif /* CONFIG_VSX */
#ifdef CONFIG_SPE
EXPORT_SYMBOL(giveup_spe);

View file

@ -65,13 +65,14 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
break;
case DCRN_CPR0_CONFIG_DATA:
local_irq_disable();
mtdcr(DCRN_CPR0_CONFIG_ADDR,
vcpu->arch.cpr0_cfgaddr);
vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA);
kvmppc_set_gpr(vcpu, rt,
mfdcr(DCRN_CPR0_CONFIG_DATA));
local_irq_enable();
break;
default:
@ -93,11 +94,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* emulate some access in kernel */
switch (dcrn) {
case DCRN_CPR0_CONFIG_ADDR:
vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs];
vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
break;
default:
run->dcr.dcrn = dcrn;
run->dcr.data = vcpu->arch.gpr[rs];
run->dcr.data = kvmppc_get_gpr(vcpu, rs);
run->dcr.is_write = 1;
vcpu->arch.dcr_needed = 1;
kvmppc_account_exit(vcpu, DCR_EXITS);
@ -146,13 +147,13 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
switch (sprn) {
case SPRN_PID:
kvmppc_set_pid(vcpu, vcpu->arch.gpr[rs]); break;
kvmppc_set_pid(vcpu, kvmppc_get_gpr(vcpu, rs)); break;
case SPRN_MMUCR:
vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break;
vcpu->arch.mmucr = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_CCR0:
vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break;
vcpu->arch.ccr0 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_CCR1:
vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break;
vcpu->arch.ccr1 = kvmppc_get_gpr(vcpu, rs); break;
default:
emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, rs);
}
@ -167,13 +168,13 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
switch (sprn) {
case SPRN_PID:
vcpu->arch.gpr[rt] = vcpu->arch.pid; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.pid); break;
case SPRN_MMUCR:
vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.mmucr); break;
case SPRN_CCR0:
vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr0); break;
case SPRN_CCR1:
vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ccr1); break;
default:
emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);
}

View file

@ -439,7 +439,7 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
struct kvmppc_44x_tlbe *tlbe;
unsigned int gtlb_index;
gtlb_index = vcpu->arch.gpr[ra];
gtlb_index = kvmppc_get_gpr(vcpu, ra);
if (gtlb_index > KVM44x_GUEST_TLB_SIZE) {
printk("%s: index %d\n", __func__, gtlb_index);
kvmppc_dump_vcpu(vcpu);
@ -455,15 +455,15 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
switch (ws) {
case PPC44x_TLB_PAGEID:
tlbe->tid = get_mmucr_stid(vcpu);
tlbe->word0 = vcpu->arch.gpr[rs];
tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
break;
case PPC44x_TLB_XLAT:
tlbe->word1 = vcpu->arch.gpr[rs];
tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
break;
case PPC44x_TLB_ATTRIB:
tlbe->word2 = vcpu->arch.gpr[rs];
tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
break;
default:
@ -500,18 +500,20 @@ int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
unsigned int as = get_mmucr_sts(vcpu);
unsigned int pid = get_mmucr_stid(vcpu);
ea = vcpu->arch.gpr[rb];
ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
ea += vcpu->arch.gpr[ra];
ea += kvmppc_get_gpr(vcpu, ra);
gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
if (rc) {
u32 cr = kvmppc_get_cr(vcpu);
if (gtlb_index < 0)
vcpu->arch.cr &= ~0x20000000;
kvmppc_set_cr(vcpu, cr & ~0x20000000);
else
vcpu->arch.cr |= 0x20000000;
kvmppc_set_cr(vcpu, cr | 0x20000000);
}
vcpu->arch.gpr[rt] = gtlb_index;
kvmppc_set_gpr(vcpu, rt, gtlb_index);
kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
return EMULATE_DONE;

View file

@ -20,6 +20,7 @@ config KVM
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
select KVM_MMIO
config KVM_BOOK3S_64_HANDLER
bool

View file

@ -33,12 +33,9 @@
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
/* #define DEBUG_EXT */
/* Without AGGRESSIVE_DEC we only fire off a DEC interrupt when DEC turns 0.
* When set, we retrigger a DEC interrupt after that if DEC <= 0.
* PPC32 Linux runs faster without AGGRESSIVE_DEC, PPC64 Linux requires it. */
/* #define AGGRESSIVE_DEC */
static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exits", VCPU_STAT(sum_exits) },
@ -72,16 +69,24 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
memcpy(get_paca()->kvm_slb, to_book3s(vcpu)->slb_shadow, sizeof(get_paca()->kvm_slb));
memcpy(&get_paca()->shadow_vcpu, &to_book3s(vcpu)->shadow_vcpu,
sizeof(get_paca()->shadow_vcpu));
get_paca()->kvm_slb_max = to_book3s(vcpu)->slb_shadow_max;
}
void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
{
memcpy(to_book3s(vcpu)->slb_shadow, get_paca()->kvm_slb, sizeof(get_paca()->kvm_slb));
memcpy(&to_book3s(vcpu)->shadow_vcpu, &get_paca()->shadow_vcpu,
sizeof(get_paca()->shadow_vcpu));
to_book3s(vcpu)->slb_shadow_max = get_paca()->kvm_slb_max;
kvmppc_giveup_ext(vcpu, MSR_FP);
kvmppc_giveup_ext(vcpu, MSR_VEC);
kvmppc_giveup_ext(vcpu, MSR_VSX);
}
#if defined(AGGRESSIVE_DEC) || defined(EXIT_DEBUG)
#if defined(EXIT_DEBUG)
static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
{
u64 jd = mftb() - vcpu->arch.dec_jiffies;
@ -89,6 +94,23 @@ static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
}
#endif
static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
{
vcpu->arch.shadow_msr = vcpu->arch.msr;
/* Guest MSR values */
vcpu->arch.shadow_msr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE |
MSR_BE | MSR_DE;
/* Process MSR values */
vcpu->arch.shadow_msr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR |
MSR_EE;
/* External providers the guest reserved */
vcpu->arch.shadow_msr |= (vcpu->arch.msr & vcpu->arch.guest_owned_ext);
/* 64-bit Process MSR values */
#ifdef CONFIG_PPC_BOOK3S_64
vcpu->arch.shadow_msr |= MSR_ISF | MSR_HV;
#endif
}
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
{
ulong old_msr = vcpu->arch.msr;
@ -96,12 +118,10 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
#ifdef EXIT_DEBUG
printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
#endif
msr &= to_book3s(vcpu)->msr_mask;
vcpu->arch.msr = msr;
vcpu->arch.shadow_msr = msr | MSR_USER32;
vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
MSR_FE1);
kvmppc_recalc_shadow_msr(vcpu);
if (msr & (MSR_WE|MSR_POW)) {
if (!vcpu->arch.pending_exceptions) {
@ -125,11 +145,10 @@ void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
vcpu->arch.mmu.reset_msr(vcpu);
}
void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
static int kvmppc_book3s_vec2irqprio(unsigned int vec)
{
unsigned int prio;
vcpu->stat.queue_intr++;
switch (vec) {
case 0x100: prio = BOOK3S_IRQPRIO_SYSTEM_RESET; break;
case 0x200: prio = BOOK3S_IRQPRIO_MACHINE_CHECK; break;
@ -149,15 +168,31 @@ void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
default: prio = BOOK3S_IRQPRIO_MAX; break;
}
set_bit(prio, &vcpu->arch.pending_exceptions);
return prio;
}
static void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
unsigned int vec)
{
clear_bit(kvmppc_book3s_vec2irqprio(vec),
&vcpu->arch.pending_exceptions);
}
void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
{
vcpu->stat.queue_intr++;
set_bit(kvmppc_book3s_vec2irqprio(vec),
&vcpu->arch.pending_exceptions);
#ifdef EXIT_DEBUG
printk(KERN_INFO "Queueing interrupt %x\n", vec);
#endif
}
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
{
to_book3s(vcpu)->prog_flags = flags;
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_PROGRAM);
}
@ -171,6 +206,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
return test_bit(BOOK3S_INTERRUPT_DECREMENTER >> 7, &vcpu->arch.pending_exceptions);
}
void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
}
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
@ -181,6 +221,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
{
int deliver = 1;
int vec = 0;
ulong flags = 0ULL;
switch (priority) {
case BOOK3S_IRQPRIO_DECREMENTER:
@ -214,6 +255,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
break;
case BOOK3S_IRQPRIO_PROGRAM:
vec = BOOK3S_INTERRUPT_PROGRAM;
flags = to_book3s(vcpu)->prog_flags;
break;
case BOOK3S_IRQPRIO_VSX:
vec = BOOK3S_INTERRUPT_VSX;
@ -244,7 +286,7 @@ int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
#endif
if (deliver)
kvmppc_inject_interrupt(vcpu, vec, 0ULL);
kvmppc_inject_interrupt(vcpu, vec, flags);
return deliver;
}
@ -254,21 +296,15 @@ void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
unsigned long *pending = &vcpu->arch.pending_exceptions;
unsigned int priority;
/* XXX be more clever here - no need to mftb() on every entry */
/* Issue DEC again if it's still active */
#ifdef AGGRESSIVE_DEC
if (vcpu->arch.msr & MSR_EE)
if (kvmppc_get_dec(vcpu) & 0x80000000)
kvmppc_core_queue_dec(vcpu);
#endif
#ifdef EXIT_DEBUG
if (vcpu->arch.pending_exceptions)
printk(KERN_EMERG "KVM: Check pending: %lx\n", vcpu->arch.pending_exceptions);
#endif
priority = __ffs(*pending);
while (priority <= (sizeof(unsigned int) * 8)) {
if (kvmppc_book3s_irqprio_deliver(vcpu, priority)) {
if (kvmppc_book3s_irqprio_deliver(vcpu, priority) &&
(priority != BOOK3S_IRQPRIO_DECREMENTER)) {
/* DEC interrupts get cleared by mtdec */
clear_bit(priority, &vcpu->arch.pending_exceptions);
break;
}
@ -503,14 +539,14 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Page not found in guest PTE entries */
vcpu->arch.dear = vcpu->arch.fault_dear;
to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr;
vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EPERM) {
/* Storage protection */
vcpu->arch.dear = vcpu->arch.fault_dear;
to_book3s(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
to_book3s(vcpu)->dsisr |= DSISR_PROTFAULT;
vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x00000000f8000000ULL);
vcpu->arch.msr |= (vcpu->arch.shadow_srr1 & 0x00000000f8000000ULL);
kvmppc_book3s_queue_irqprio(vcpu, vec);
} else if (page_found == -EINVAL) {
/* Page not found in guest SLB */
@ -532,13 +568,122 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = kvmppc_emulate_mmio(run, vcpu);
if ( r == RESUME_HOST_NV )
r = RESUME_HOST;
if ( r == RESUME_GUEST_NV )
r = RESUME_GUEST;
}
return r;
}
static inline int get_fpr_index(int i)
{
#ifdef CONFIG_VSX
i *= 2;
#endif
return i;
}
/* Give up external provider (FPU, Altivec, VSX) */
static void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
{
struct thread_struct *t = &current->thread;
u64 *vcpu_fpr = vcpu->arch.fpr;
u64 *vcpu_vsx = vcpu->arch.vsr;
u64 *thread_fpr = (u64*)t->fpr;
int i;
if (!(vcpu->arch.guest_owned_ext & msr))
return;
#ifdef DEBUG_EXT
printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
#endif
switch (msr) {
case MSR_FP:
giveup_fpu(current);
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
vcpu->arch.fpscr = t->fpscr.val;
break;
case MSR_VEC:
#ifdef CONFIG_ALTIVEC
giveup_altivec(current);
memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
vcpu->arch.vscr = t->vscr;
#endif
break;
case MSR_VSX:
#ifdef CONFIG_VSX
__giveup_vsx(current);
for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
#endif
break;
default:
BUG();
}
vcpu->arch.guest_owned_ext &= ~msr;
current->thread.regs->msr &= ~msr;
kvmppc_recalc_shadow_msr(vcpu);
}
/* Handle external providers (FPU, Altivec, VSX) */
static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
ulong msr)
{
struct thread_struct *t = &current->thread;
u64 *vcpu_fpr = vcpu->arch.fpr;
u64 *vcpu_vsx = vcpu->arch.vsr;
u64 *thread_fpr = (u64*)t->fpr;
int i;
if (!(vcpu->arch.msr & msr)) {
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
return RESUME_GUEST;
}
#ifdef DEBUG_EXT
printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
#endif
current->thread.regs->msr |= msr;
switch (msr) {
case MSR_FP:
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
t->fpscr.val = vcpu->arch.fpscr;
t->fpexc_mode = 0;
kvmppc_load_up_fpu();
break;
case MSR_VEC:
#ifdef CONFIG_ALTIVEC
memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
t->vscr = vcpu->arch.vscr;
t->vrsave = -1;
kvmppc_load_up_altivec();
#endif
break;
case MSR_VSX:
#ifdef CONFIG_VSX
for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
kvmppc_load_up_vsx();
#endif
break;
default:
BUG();
}
vcpu->arch.guest_owned_ext |= msr;
kvmppc_recalc_shadow_msr(vcpu);
return RESUME_GUEST;
}
int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
@ -563,7 +708,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_INST_STORAGE:
vcpu->stat.pf_instruc++;
/* only care about PTEG not found errors, but leave NX alone */
if (vcpu->arch.shadow_msr & 0x40000000) {
if (vcpu->arch.shadow_srr1 & 0x40000000) {
r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, exit_nr);
vcpu->stat.sp_instruc++;
} else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
@ -575,7 +720,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
} else {
vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000);
vcpu->arch.msr |= vcpu->arch.shadow_srr1 & 0x58000000;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
r = RESUME_GUEST;
@ -621,6 +766,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_PROGRAM:
{
enum emulation_result er;
ulong flags;
flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
if (vcpu->arch.msr & MSR_PR) {
#ifdef EXIT_DEBUG
@ -628,7 +776,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
#endif
if ((vcpu->arch.last_inst & 0xff0007ff) !=
(INS_DCBZ & 0xfffffff7)) {
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
kvmppc_core_queue_program(vcpu, flags);
r = RESUME_GUEST;
break;
}
@ -638,12 +786,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
er = kvmppc_emulate_instruction(run, vcpu);
switch (er) {
case EMULATE_DONE:
r = RESUME_GUEST;
r = RESUME_GUEST_NV;
break;
case EMULATE_FAIL:
printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
__func__, vcpu->arch.pc, vcpu->arch.last_inst);
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
kvmppc_core_queue_program(vcpu, flags);
r = RESUME_GUEST;
break;
default:
@ -653,23 +801,30 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
case BOOK3S_INTERRUPT_SYSCALL:
#ifdef EXIT_DEBUG
printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]);
printk(KERN_INFO "Syscall Nr %d\n", (int)kvmppc_get_gpr(vcpu, 0));
#endif
vcpu->stat.syscall_exits++;
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
case BOOK3S_INTERRUPT_FP_UNAVAIL:
case BOOK3S_INTERRUPT_TRACE:
r = kvmppc_handle_ext(vcpu, exit_nr, MSR_FP);
break;
case BOOK3S_INTERRUPT_ALTIVEC:
r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VEC);
break;
case BOOK3S_INTERRUPT_VSX:
r = kvmppc_handle_ext(vcpu, exit_nr, MSR_VSX);
break;
case BOOK3S_INTERRUPT_MACHINE_CHECK:
case BOOK3S_INTERRUPT_TRACE:
kvmppc_book3s_queue_irqprio(vcpu, exit_nr);
r = RESUME_GUEST;
break;
default:
/* Ugh - bork here! What did we get? */
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr);
printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n",
exit_nr, vcpu->arch.pc, vcpu->arch.shadow_srr1);
r = RESUME_HOST;
BUG();
break;
@ -712,10 +867,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int i;
regs->pc = vcpu->arch.pc;
regs->cr = vcpu->arch.cr;
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = vcpu->arch.ctr;
regs->lr = vcpu->arch.lr;
regs->xer = vcpu->arch.xer;
regs->xer = kvmppc_get_xer(vcpu);
regs->msr = vcpu->arch.msr;
regs->srr0 = vcpu->arch.srr0;
regs->srr1 = vcpu->arch.srr1;
@ -729,7 +884,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->sprg7 = vcpu->arch.sprg6;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = vcpu->arch.gpr[i];
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
return 0;
}
@ -739,10 +894,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int i;
vcpu->arch.pc = regs->pc;
vcpu->arch.cr = regs->cr;
kvmppc_set_cr(vcpu, regs->cr);
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
vcpu->arch.xer = regs->xer;
kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
@ -754,8 +909,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.sprg6 = regs->sprg5;
vcpu->arch.sprg7 = regs->sprg6;
for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
vcpu->arch.gpr[i] = regs->gpr[i];
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
return 0;
}
@ -850,7 +1005,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
int is_dirty = 0;
int r, n;
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
r = kvm_get_dirty_log(kvm, log, &is_dirty);
if (r)
@ -858,7 +1013,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
/* If nothing is dirty, don't bother messing with page tables. */
if (is_dirty) {
memslot = &kvm->memslots[log->slot];
memslot = &kvm->memslots->memslots[log->slot];
ga = memslot->base_gfn << PAGE_SHIFT;
ga_end = ga + (memslot->npages << PAGE_SHIFT);
@ -872,7 +1027,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
r = 0;
out:
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return r;
}
@ -910,6 +1065,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
vcpu->arch.rmcall = *(ulong*)kvmppc_rmcall;
vcpu->arch.shadow_msr = MSR_USER64;
@ -943,6 +1099,10 @@ extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
{
int ret;
struct thread_struct ext_bkp;
bool save_vec = current->thread.used_vr;
bool save_vsx = current->thread.used_vsr;
ulong ext_msr;
/* No need to go into the guest when all we do is going out */
if (signal_pending(current)) {
@ -950,6 +1110,35 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
return -EINTR;
}
/* Save FPU state in stack */
if (current->thread.regs->msr & MSR_FP)
giveup_fpu(current);
memcpy(ext_bkp.fpr, current->thread.fpr, sizeof(current->thread.fpr));
ext_bkp.fpscr = current->thread.fpscr;
ext_bkp.fpexc_mode = current->thread.fpexc_mode;
#ifdef CONFIG_ALTIVEC
/* Save Altivec state in stack */
if (save_vec) {
if (current->thread.regs->msr & MSR_VEC)
giveup_altivec(current);
memcpy(ext_bkp.vr, current->thread.vr, sizeof(ext_bkp.vr));
ext_bkp.vscr = current->thread.vscr;
ext_bkp.vrsave = current->thread.vrsave;
}
ext_bkp.used_vr = current->thread.used_vr;
#endif
#ifdef CONFIG_VSX
/* Save VSX state in stack */
if (save_vsx && (current->thread.regs->msr & MSR_VSX))
__giveup_vsx(current);
ext_bkp.used_vsr = current->thread.used_vsr;
#endif
/* Remember the MSR with disabled extensions */
ext_msr = current->thread.regs->msr;
/* XXX we get called with irq disabled - change that! */
local_irq_enable();
@ -957,6 +1146,32 @@ int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
local_irq_disable();
current->thread.regs->msr = ext_msr;
/* Make sure we save the guest FPU/Altivec/VSX state */
kvmppc_giveup_ext(vcpu, MSR_FP);
kvmppc_giveup_ext(vcpu, MSR_VEC);
kvmppc_giveup_ext(vcpu, MSR_VSX);
/* Restore FPU state from stack */
memcpy(current->thread.fpr, ext_bkp.fpr, sizeof(ext_bkp.fpr));
current->thread.fpscr = ext_bkp.fpscr;
current->thread.fpexc_mode = ext_bkp.fpexc_mode;
#ifdef CONFIG_ALTIVEC
/* Restore Altivec state from stack */
if (save_vec && current->thread.used_vr) {
memcpy(current->thread.vr, ext_bkp.vr, sizeof(ext_bkp.vr));
current->thread.vscr = ext_bkp.vscr;
current->thread.vrsave= ext_bkp.vrsave;
}
current->thread.used_vr = ext_bkp.used_vr;
#endif
#ifdef CONFIG_VSX
current->thread.used_vsr = ext_bkp.used_vsr;
#endif
return ret;
}

View file

@ -65,11 +65,11 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
case 31:
switch (get_xop(inst)) {
case OP_31_XOP_MFMSR:
vcpu->arch.gpr[get_rt(inst)] = vcpu->arch.msr;
kvmppc_set_gpr(vcpu, get_rt(inst), vcpu->arch.msr);
break;
case OP_31_XOP_MTMSRD:
{
ulong rs = vcpu->arch.gpr[get_rs(inst)];
ulong rs = kvmppc_get_gpr(vcpu, get_rs(inst));
if (inst & 0x10000) {
vcpu->arch.msr &= ~(MSR_RI | MSR_EE);
vcpu->arch.msr |= rs & (MSR_RI | MSR_EE);
@ -78,30 +78,30 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
}
case OP_31_XOP_MTMSR:
kvmppc_set_msr(vcpu, vcpu->arch.gpr[get_rs(inst)]);
kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, get_rs(inst)));
break;
case OP_31_XOP_MFSRIN:
{
int srnum;
srnum = (vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf;
srnum = (kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf;
if (vcpu->arch.mmu.mfsrin) {
u32 sr;
sr = vcpu->arch.mmu.mfsrin(vcpu, srnum);
vcpu->arch.gpr[get_rt(inst)] = sr;
kvmppc_set_gpr(vcpu, get_rt(inst), sr);
}
break;
}
case OP_31_XOP_MTSRIN:
vcpu->arch.mmu.mtsrin(vcpu,
(vcpu->arch.gpr[get_rb(inst)] >> 28) & 0xf,
vcpu->arch.gpr[get_rs(inst)]);
(kvmppc_get_gpr(vcpu, get_rb(inst)) >> 28) & 0xf,
kvmppc_get_gpr(vcpu, get_rs(inst)));
break;
case OP_31_XOP_TLBIE:
case OP_31_XOP_TLBIEL:
{
bool large = (inst & 0x00200000) ? true : false;
ulong addr = vcpu->arch.gpr[get_rb(inst)];
ulong addr = kvmppc_get_gpr(vcpu, get_rb(inst));
vcpu->arch.mmu.tlbie(vcpu, addr, large);
break;
}
@ -111,14 +111,16 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (!vcpu->arch.mmu.slbmte)
return EMULATE_FAIL;
vcpu->arch.mmu.slbmte(vcpu, vcpu->arch.gpr[get_rs(inst)],
vcpu->arch.gpr[get_rb(inst)]);
vcpu->arch.mmu.slbmte(vcpu,
kvmppc_get_gpr(vcpu, get_rs(inst)),
kvmppc_get_gpr(vcpu, get_rb(inst)));
break;
case OP_31_XOP_SLBIE:
if (!vcpu->arch.mmu.slbie)
return EMULATE_FAIL;
vcpu->arch.mmu.slbie(vcpu, vcpu->arch.gpr[get_rb(inst)]);
vcpu->arch.mmu.slbie(vcpu,
kvmppc_get_gpr(vcpu, get_rb(inst)));
break;
case OP_31_XOP_SLBIA:
if (!vcpu->arch.mmu.slbia)
@ -132,9 +134,9 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
} else {
ulong t, rb;
rb = vcpu->arch.gpr[get_rb(inst)];
rb = kvmppc_get_gpr(vcpu, get_rb(inst));
t = vcpu->arch.mmu.slbmfee(vcpu, rb);
vcpu->arch.gpr[get_rt(inst)] = t;
kvmppc_set_gpr(vcpu, get_rt(inst), t);
}
break;
case OP_31_XOP_SLBMFEV:
@ -143,20 +145,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
} else {
ulong t, rb;
rb = vcpu->arch.gpr[get_rb(inst)];
rb = kvmppc_get_gpr(vcpu, get_rb(inst));
t = vcpu->arch.mmu.slbmfev(vcpu, rb);
vcpu->arch.gpr[get_rt(inst)] = t;
kvmppc_set_gpr(vcpu, get_rt(inst), t);
}
break;
case OP_31_XOP_DCBZ:
{
ulong rb = vcpu->arch.gpr[get_rb(inst)];
ulong rb = kvmppc_get_gpr(vcpu, get_rb(inst));
ulong ra = 0;
ulong addr;
u32 zeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
if (get_ra(inst))
ra = vcpu->arch.gpr[get_ra(inst)];
ra = kvmppc_get_gpr(vcpu, get_ra(inst));
addr = (ra + rb) & ~31ULL;
if (!(vcpu->arch.msr & MSR_SF))
@ -233,43 +235,44 @@ static void kvmppc_write_bat(struct kvm_vcpu *vcpu, int sprn, u32 val)
int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
int emulated = EMULATE_DONE;
ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_SDR1:
to_book3s(vcpu)->sdr1 = vcpu->arch.gpr[rs];
to_book3s(vcpu)->sdr1 = spr_val;
break;
case SPRN_DSISR:
to_book3s(vcpu)->dsisr = vcpu->arch.gpr[rs];
to_book3s(vcpu)->dsisr = spr_val;
break;
case SPRN_DAR:
vcpu->arch.dear = vcpu->arch.gpr[rs];
vcpu->arch.dear = spr_val;
break;
case SPRN_HIOR:
to_book3s(vcpu)->hior = vcpu->arch.gpr[rs];
to_book3s(vcpu)->hior = spr_val;
break;
case SPRN_IBAT0U ... SPRN_IBAT3L:
case SPRN_IBAT4U ... SPRN_IBAT7L:
case SPRN_DBAT0U ... SPRN_DBAT3L:
case SPRN_DBAT4U ... SPRN_DBAT7L:
kvmppc_write_bat(vcpu, sprn, (u32)vcpu->arch.gpr[rs]);
kvmppc_write_bat(vcpu, sprn, (u32)spr_val);
/* BAT writes happen so rarely that we're ok to flush
* everything here */
kvmppc_mmu_pte_flush(vcpu, 0, 0);
break;
case SPRN_HID0:
to_book3s(vcpu)->hid[0] = vcpu->arch.gpr[rs];
to_book3s(vcpu)->hid[0] = spr_val;
break;
case SPRN_HID1:
to_book3s(vcpu)->hid[1] = vcpu->arch.gpr[rs];
to_book3s(vcpu)->hid[1] = spr_val;
break;
case SPRN_HID2:
to_book3s(vcpu)->hid[2] = vcpu->arch.gpr[rs];
to_book3s(vcpu)->hid[2] = spr_val;
break;
case SPRN_HID4:
to_book3s(vcpu)->hid[4] = vcpu->arch.gpr[rs];
to_book3s(vcpu)->hid[4] = spr_val;
break;
case SPRN_HID5:
to_book3s(vcpu)->hid[5] = vcpu->arch.gpr[rs];
to_book3s(vcpu)->hid[5] = spr_val;
/* guest HID5 set can change is_dcbz32 */
if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
(mfmsr() & MSR_HV))
@ -299,38 +302,38 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
switch (sprn) {
case SPRN_SDR1:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->sdr1;
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->sdr1);
break;
case SPRN_DSISR:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->dsisr;
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->dsisr);
break;
case SPRN_DAR:
vcpu->arch.gpr[rt] = vcpu->arch.dear;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear);
break;
case SPRN_HIOR:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->hior;
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hior);
break;
case SPRN_HID0:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[0];
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[0]);
break;
case SPRN_HID1:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[1];
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[1]);
break;
case SPRN_HID2:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[2];
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[2]);
break;
case SPRN_HID4:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[4];
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[4]);
break;
case SPRN_HID5:
vcpu->arch.gpr[rt] = to_book3s(vcpu)->hid[5];
kvmppc_set_gpr(vcpu, rt, to_book3s(vcpu)->hid[5]);
break;
case SPRN_THRM1:
case SPRN_THRM2:
case SPRN_THRM3:
case SPRN_CTRLF:
case SPRN_CTRLT:
vcpu->arch.gpr[rt] = 0;
kvmppc_set_gpr(vcpu, rt, 0);
break;
default:
printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);

View file

@ -22,3 +22,11 @@
EXPORT_SYMBOL_GPL(kvmppc_trampoline_enter);
EXPORT_SYMBOL_GPL(kvmppc_trampoline_lowmem);
EXPORT_SYMBOL_GPL(kvmppc_rmcall);
EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
#endif
#ifdef CONFIG_VSX
EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
#endif

View file

@ -28,11 +28,6 @@
#define ULONG_SIZE 8
#define VCPU_GPR(n) (VCPU_GPRS + (n * ULONG_SIZE))
.macro mfpaca tmp_reg, src_reg, offset, vcpu_reg
ld \tmp_reg, (PACA_EXMC+\offset)(r13)
std \tmp_reg, VCPU_GPR(\src_reg)(\vcpu_reg)
.endm
.macro DISABLE_INTERRUPTS
mfmsr r0
rldicl r0,r0,48,1
@ -40,6 +35,26 @@
mtmsrd r0,1
.endm
#define VCPU_LOAD_NVGPRS(vcpu) \
ld r14, VCPU_GPR(r14)(vcpu); \
ld r15, VCPU_GPR(r15)(vcpu); \
ld r16, VCPU_GPR(r16)(vcpu); \
ld r17, VCPU_GPR(r17)(vcpu); \
ld r18, VCPU_GPR(r18)(vcpu); \
ld r19, VCPU_GPR(r19)(vcpu); \
ld r20, VCPU_GPR(r20)(vcpu); \
ld r21, VCPU_GPR(r21)(vcpu); \
ld r22, VCPU_GPR(r22)(vcpu); \
ld r23, VCPU_GPR(r23)(vcpu); \
ld r24, VCPU_GPR(r24)(vcpu); \
ld r25, VCPU_GPR(r25)(vcpu); \
ld r26, VCPU_GPR(r26)(vcpu); \
ld r27, VCPU_GPR(r27)(vcpu); \
ld r28, VCPU_GPR(r28)(vcpu); \
ld r29, VCPU_GPR(r29)(vcpu); \
ld r30, VCPU_GPR(r30)(vcpu); \
ld r31, VCPU_GPR(r31)(vcpu); \
/*****************************************************************************
* *
* Guest entry / exit code that is in kernel module memory (highmem) *
@ -67,61 +82,32 @@ kvm_start_entry:
SAVE_NVGPRS(r1)
/* Save LR */
mflr r14
std r14, _LINK(r1)
/* XXX optimize non-volatile loading away */
kvm_start_lightweight:
DISABLE_INTERRUPTS
/* Save R1/R2 in the PACA */
std r1, PACAR1(r13)
std r2, (PACA_EXMC+EX_SRR0)(r13)
ld r3, VCPU_HIGHMEM_HANDLER(r4)
std r3, PACASAVEDMSR(r13)
std r0, _LINK(r1)
/* Load non-volatile guest state from the vcpu */
ld r14, VCPU_GPR(r14)(r4)
ld r15, VCPU_GPR(r15)(r4)
ld r16, VCPU_GPR(r16)(r4)
ld r17, VCPU_GPR(r17)(r4)
ld r18, VCPU_GPR(r18)(r4)
ld r19, VCPU_GPR(r19)(r4)
ld r20, VCPU_GPR(r20)(r4)
ld r21, VCPU_GPR(r21)(r4)
ld r22, VCPU_GPR(r22)(r4)
ld r23, VCPU_GPR(r23)(r4)
ld r24, VCPU_GPR(r24)(r4)
ld r25, VCPU_GPR(r25)(r4)
ld r26, VCPU_GPR(r26)(r4)
ld r27, VCPU_GPR(r27)(r4)
ld r28, VCPU_GPR(r28)(r4)
ld r29, VCPU_GPR(r29)(r4)
ld r30, VCPU_GPR(r30)(r4)
ld r31, VCPU_GPR(r31)(r4)
VCPU_LOAD_NVGPRS(r4)
/* Save R1/R2 in the PACA */
std r1, PACA_KVM_HOST_R1(r13)
std r2, PACA_KVM_HOST_R2(r13)
/* XXX swap in/out on load? */
ld r3, VCPU_HIGHMEM_HANDLER(r4)
std r3, PACA_KVM_VMHANDLER(r13)
kvm_start_lightweight:
ld r9, VCPU_PC(r4) /* r9 = vcpu->arch.pc */
ld r10, VCPU_SHADOW_MSR(r4) /* r10 = vcpu->arch.shadow_msr */
ld r3, VCPU_TRAMPOLINE_ENTER(r4)
mtsrr0 r3
LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
mtsrr1 r3
/* Load guest state in the respective registers */
lwz r3, VCPU_CR(r4) /* r3 = vcpu->arch.cr */
stw r3, (PACA_EXMC + EX_CCR)(r13)
ld r3, VCPU_CTR(r4) /* r3 = vcpu->arch.ctr */
mtctr r3 /* CTR = r3 */
/* Load some guest state in the respective registers */
ld r5, VCPU_CTR(r4) /* r5 = vcpu->arch.ctr */
/* will be swapped in by rmcall */
ld r3, VCPU_LR(r4) /* r3 = vcpu->arch.lr */
mtlr r3 /* LR = r3 */
ld r3, VCPU_XER(r4) /* r3 = vcpu->arch.xer */
std r3, (PACA_EXMC + EX_R3)(r13)
DISABLE_INTERRUPTS
/* Some guests may need to have dcbz set to 32 byte length.
*
@ -141,36 +127,15 @@ kvm_start_lightweight:
mtspr SPRN_HID5,r3
no_dcbz32_on:
/* Load guest GPRs */
ld r3, VCPU_GPR(r9)(r4)
std r3, (PACA_EXMC + EX_R9)(r13)
ld r3, VCPU_GPR(r10)(r4)
std r3, (PACA_EXMC + EX_R10)(r13)
ld r3, VCPU_GPR(r11)(r4)
std r3, (PACA_EXMC + EX_R11)(r13)
ld r3, VCPU_GPR(r12)(r4)
std r3, (PACA_EXMC + EX_R12)(r13)
ld r3, VCPU_GPR(r13)(r4)
std r3, (PACA_EXMC + EX_R13)(r13)
ld r6, VCPU_RMCALL(r4)
mtctr r6
ld r0, VCPU_GPR(r0)(r4)
ld r1, VCPU_GPR(r1)(r4)
ld r2, VCPU_GPR(r2)(r4)
ld r3, VCPU_GPR(r3)(r4)
ld r5, VCPU_GPR(r5)(r4)
ld r6, VCPU_GPR(r6)(r4)
ld r7, VCPU_GPR(r7)(r4)
ld r8, VCPU_GPR(r8)(r4)
ld r4, VCPU_GPR(r4)(r4)
/* This sets the Magic value for the trampoline */
li r11, 1
stb r11, PACA_KVM_IN_GUEST(r13)
ld r3, VCPU_TRAMPOLINE_ENTER(r4)
LOAD_REG_IMMEDIATE(r4, MSR_KERNEL & ~(MSR_IR | MSR_DR))
/* Jump to SLB patching handlder and into our guest */
RFI
bctr
/*
* This is the handler in module memory. It gets jumped at from the
@ -184,125 +149,70 @@ kvmppc_handler_highmem:
/*
* Register usage at this point:
*
* R00 = guest R13
* R01 = host R1
* R02 = host R2
* R10 = guest PC
* R11 = guest MSR
* R12 = exit handler id
* R13 = PACA
* PACA.exmc.R9 = guest R1
* PACA.exmc.R10 = guest R10
* PACA.exmc.R11 = guest R11
* PACA.exmc.R12 = guest R12
* PACA.exmc.R13 = guest R2
* PACA.exmc.DAR = guest DAR
* PACA.exmc.DSISR = guest DSISR
* PACA.exmc.LR = guest instruction
* PACA.exmc.CCR = guest CR
* PACA.exmc.SRR0 = guest R0
* R0 = guest last inst
* R1 = host R1
* R2 = host R2
* R3 = guest PC
* R4 = guest MSR
* R5 = guest DAR
* R6 = guest DSISR
* R13 = PACA
* PACA.KVM.* = guest *
*
*/
std r3, (PACA_EXMC+EX_R3)(r13)
/* save the exit id in R3 */
mr r3, r12
/* R12 = vcpu */
ld r12, GPR4(r1)
/* R7 = vcpu */
ld r7, GPR4(r1)
/* Now save the guest state */
std r0, VCPU_GPR(r13)(r12)
std r4, VCPU_GPR(r4)(r12)
std r5, VCPU_GPR(r5)(r12)
std r6, VCPU_GPR(r6)(r12)
std r7, VCPU_GPR(r7)(r12)
std r8, VCPU_GPR(r8)(r12)
std r9, VCPU_GPR(r9)(r12)
stw r0, VCPU_LAST_INST(r7)
/* get registers from PACA */
mfpaca r5, r0, EX_SRR0, r12
mfpaca r5, r3, EX_R3, r12
mfpaca r5, r1, EX_R9, r12
mfpaca r5, r10, EX_R10, r12
mfpaca r5, r11, EX_R11, r12
mfpaca r5, r12, EX_R12, r12
mfpaca r5, r2, EX_R13, r12
std r3, VCPU_PC(r7)
std r4, VCPU_SHADOW_SRR1(r7)
std r5, VCPU_FAULT_DEAR(r7)
std r6, VCPU_FAULT_DSISR(r7)
lwz r5, (PACA_EXMC+EX_LR)(r13)
stw r5, VCPU_LAST_INST(r12)
lwz r5, (PACA_EXMC+EX_CCR)(r13)
stw r5, VCPU_CR(r12)
ld r5, VCPU_HFLAGS(r12)
ld r5, VCPU_HFLAGS(r7)
rldicl. r5, r5, 0, 63 /* CR = ((r5 & 1) == 0) */
beq no_dcbz32_off
li r4, 0
mfspr r5,SPRN_HID5
rldimi r5,r5,6,56
rldimi r5,r4,6,56
mtspr SPRN_HID5,r5
no_dcbz32_off:
/* XXX maybe skip on lightweight? */
std r14, VCPU_GPR(r14)(r12)
std r15, VCPU_GPR(r15)(r12)
std r16, VCPU_GPR(r16)(r12)
std r17, VCPU_GPR(r17)(r12)
std r18, VCPU_GPR(r18)(r12)
std r19, VCPU_GPR(r19)(r12)
std r20, VCPU_GPR(r20)(r12)
std r21, VCPU_GPR(r21)(r12)
std r22, VCPU_GPR(r22)(r12)
std r23, VCPU_GPR(r23)(r12)
std r24, VCPU_GPR(r24)(r12)
std r25, VCPU_GPR(r25)(r12)
std r26, VCPU_GPR(r26)(r12)
std r27, VCPU_GPR(r27)(r12)
std r28, VCPU_GPR(r28)(r12)
std r29, VCPU_GPR(r29)(r12)
std r30, VCPU_GPR(r30)(r12)
std r31, VCPU_GPR(r31)(r12)
std r14, VCPU_GPR(r14)(r7)
std r15, VCPU_GPR(r15)(r7)
std r16, VCPU_GPR(r16)(r7)
std r17, VCPU_GPR(r17)(r7)
std r18, VCPU_GPR(r18)(r7)
std r19, VCPU_GPR(r19)(r7)
std r20, VCPU_GPR(r20)(r7)
std r21, VCPU_GPR(r21)(r7)
std r22, VCPU_GPR(r22)(r7)
std r23, VCPU_GPR(r23)(r7)
std r24, VCPU_GPR(r24)(r7)
std r25, VCPU_GPR(r25)(r7)
std r26, VCPU_GPR(r26)(r7)
std r27, VCPU_GPR(r27)(r7)
std r28, VCPU_GPR(r28)(r7)
std r29, VCPU_GPR(r29)(r7)
std r30, VCPU_GPR(r30)(r7)
std r31, VCPU_GPR(r31)(r7)
/* Restore non-volatile host registers (r14 - r31) */
REST_NVGPRS(r1)
/* Save guest PC (R10) */
std r10, VCPU_PC(r12)
/* Save guest msr (R11) */
std r11, VCPU_SHADOW_MSR(r12)
/* Save guest CTR (in R12) */
/* Save guest CTR */
mfctr r5
std r5, VCPU_CTR(r12)
std r5, VCPU_CTR(r7)
/* Save guest LR */
mflr r5
std r5, VCPU_LR(r12)
/* Save guest XER */
mfxer r5
std r5, VCPU_XER(r12)
/* Save guest DAR */
ld r5, (PACA_EXMC+EX_DAR)(r13)
std r5, VCPU_FAULT_DEAR(r12)
/* Save guest DSISR */
lwz r5, (PACA_EXMC+EX_DSISR)(r13)
std r5, VCPU_FAULT_DSISR(r12)
std r5, VCPU_LR(r7)
/* Restore host msr -> SRR1 */
ld r7, VCPU_HOST_MSR(r12)
mtsrr1 r7
/* Restore host IP -> SRR0 */
ld r6, VCPU_HOST_RETIP(r12)
mtsrr0 r6
ld r6, VCPU_HOST_MSR(r7)
/*
* For some interrupts, we need to call the real Linux
@ -314,13 +224,14 @@ no_dcbz32_off:
* r3 = address of interrupt handler (exit reason)
*/
cmpwi r3, BOOK3S_INTERRUPT_EXTERNAL
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
beq call_linux_handler
cmpwi r3, BOOK3S_INTERRUPT_DECREMENTER
cmpwi r12, BOOK3S_INTERRUPT_DECREMENTER
beq call_linux_handler
/* Back to Interruptable Mode! (goto kvm_return_point) */
RFI
/* Back to EE=1 */
mtmsr r6
b kvm_return_point
call_linux_handler:
@ -333,16 +244,22 @@ call_linux_handler:
* interrupt handler!
*
* R3 still contains the exit code,
* R6 VCPU_HOST_RETIP and
* R7 VCPU_HOST_MSR
* R5 VCPU_HOST_RETIP and
* R6 VCPU_HOST_MSR
*/
mtlr r3
/* Restore host IP -> SRR0 */
ld r5, VCPU_HOST_RETIP(r7)
ld r5, VCPU_TRAMPOLINE_LOWMEM(r12)
mtsrr0 r5
LOAD_REG_IMMEDIATE(r5, MSR_KERNEL & ~(MSR_IR | MSR_DR))
mtsrr1 r5
/* XXX Better move to a safe function?
* What if we get an HTAB flush in between mtsrr0 and mtsrr1? */
mtlr r12
ld r4, VCPU_TRAMPOLINE_LOWMEM(r7)
mtsrr0 r4
LOAD_REG_IMMEDIATE(r3, MSR_KERNEL & ~(MSR_IR | MSR_DR))
mtsrr1 r3
RFI
@ -351,42 +268,51 @@ kvm_return_point:
/* Jump back to lightweight entry if we're supposed to */
/* go back into the guest */
mr r5, r3
/* Pass the exit number as 3rd argument to kvmppc_handle_exit */
mr r5, r12
/* Restore r3 (kvm_run) and r4 (vcpu) */
REST_2GPRS(3, r1)
bl KVMPPC_HANDLE_EXIT
#if 0 /* XXX get lightweight exits back */
/* If RESUME_GUEST, get back in the loop */
cmpwi r3, RESUME_GUEST
bne kvm_exit_heavyweight
beq kvm_loop_lightweight
/* put VCPU and KVM_RUN back into place and roll again! */
REST_2GPRS(3, r1)
b kvm_start_lightweight
cmpwi r3, RESUME_GUEST_NV
beq kvm_loop_heavyweight
kvm_exit_heavyweight:
/* Restore non-volatile host registers */
ld r14, _LINK(r1)
mtlr r14
REST_NVGPRS(r1)
kvm_exit_loop:
addi r1, r1, SWITCH_FRAME_SIZE
#else
ld r4, _LINK(r1)
mtlr r4
cmpwi r3, RESUME_GUEST
bne kvm_exit_heavyweight
/* Restore non-volatile host registers (r14 - r31) */
REST_NVGPRS(r1)
addi r1, r1, SWITCH_FRAME_SIZE
blr
kvm_loop_heavyweight:
ld r4, _LINK(r1)
std r4, (16 + SWITCH_FRAME_SIZE)(r1)
/* Load vcpu and cpu_run */
REST_2GPRS(3, r1)
addi r1, r1, SWITCH_FRAME_SIZE
/* Load non-volatile guest state from the vcpu */
VCPU_LOAD_NVGPRS(r4)
b kvm_start_entry
/* Jump back into the beginning of this function */
b kvm_start_lightweight
kvm_exit_heavyweight:
kvm_loop_lightweight:
addi r1, r1, SWITCH_FRAME_SIZE
#endif
/* We'll need the vcpu pointer */
REST_GPR(4, r1)
/* Jump back into the beginning of this function */
b kvm_start_lightweight
blr

View file

@ -54,7 +54,7 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
if (!vcpu_book3s->slb[i].valid)
continue;
if (vcpu_book3s->slb[i].large)
if (vcpu_book3s->slb[i].tb)
cmp_esid = esid_1t;
if (vcpu_book3s->slb[i].esid == cmp_esid)
@ -65,9 +65,10 @@ static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
eaddr, esid, esid_1t);
for (i = 0; i < vcpu_book3s->slb_nr; i++) {
if (vcpu_book3s->slb[i].vsid)
dprintk(" %d: %c%c %llx %llx\n", i,
dprintk(" %d: %c%c%c %llx %llx\n", i,
vcpu_book3s->slb[i].valid ? 'v' : ' ',
vcpu_book3s->slb[i].large ? 'l' : ' ',
vcpu_book3s->slb[i].tb ? 't' : ' ',
vcpu_book3s->slb[i].esid,
vcpu_book3s->slb[i].vsid);
}
@ -84,7 +85,7 @@ static u64 kvmppc_mmu_book3s_64_ea_to_vp(struct kvm_vcpu *vcpu, gva_t eaddr,
if (!slb)
return 0;
if (slb->large)
if (slb->tb)
return (((u64)eaddr >> 12) & 0xfffffff) |
(((u64)slb->vsid) << 28);
@ -309,7 +310,8 @@ static void kvmppc_mmu_book3s_64_slbmte(struct kvm_vcpu *vcpu, u64 rs, u64 rb)
slbe = &vcpu_book3s->slb[slb_nr];
slbe->large = (rs & SLB_VSID_L) ? 1 : 0;
slbe->esid = slbe->large ? esid_1t : esid;
slbe->tb = (rs & SLB_VSID_B_1T) ? 1 : 0;
slbe->esid = slbe->tb ? esid_1t : esid;
slbe->vsid = rs >> 12;
slbe->valid = (rb & SLB_ESID_V) ? 1 : 0;
slbe->Ks = (rs & SLB_VSID_KS) ? 1 : 0;

View file

@ -45,36 +45,25 @@ kvmppc_trampoline_\intno:
* To distinguish, we check a magic byte in the PACA
*/
mfspr r13, SPRN_SPRG_PACA /* r13 = PACA */
std r12, (PACA_EXMC + EX_R12)(r13)
std r12, PACA_KVM_SCRATCH0(r13)
mfcr r12
stw r12, (PACA_EXMC + EX_CCR)(r13)
stw r12, PACA_KVM_SCRATCH1(r13)
lbz r12, PACA_KVM_IN_GUEST(r13)
cmpwi r12, 0
cmpwi r12, KVM_GUEST_MODE_NONE
bne ..kvmppc_handler_hasmagic_\intno
/* No KVM guest? Then jump back to the Linux handler! */
lwz r12, (PACA_EXMC + EX_CCR)(r13)
lwz r12, PACA_KVM_SCRATCH1(r13)
mtcr r12
ld r12, (PACA_EXMC + EX_R12)(r13)
ld r12, PACA_KVM_SCRATCH0(r13)
mfspr r13, SPRN_SPRG_SCRATCH0 /* r13 = original r13 */
b kvmppc_resume_\intno /* Get back original handler */
/* Now we know we're handling a KVM guest */
..kvmppc_handler_hasmagic_\intno:
/* Unset guest state */
li r12, 0
stb r12, PACA_KVM_IN_GUEST(r13)
std r1, (PACA_EXMC+EX_R9)(r13)
std r10, (PACA_EXMC+EX_R10)(r13)
std r11, (PACA_EXMC+EX_R11)(r13)
std r2, (PACA_EXMC+EX_R13)(r13)
mfsrr0 r10
mfsrr1 r11
/* Restore R1/R2 so we can handle faults */
ld r1, PACAR1(r13)
ld r2, (PACA_EXMC+EX_SRR0)(r13)
/* Should we just skip the faulting instruction? */
cmpwi r12, KVM_GUEST_MODE_SKIP
beq kvmppc_handler_skip_ins
/* Let's store which interrupt we're handling */
li r12, \intno
@ -101,24 +90,108 @@ INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_PERFMON
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_ALTIVEC
INTERRUPT_TRAMPOLINE BOOK3S_INTERRUPT_VSX
/*
* Bring us back to the faulting code, but skip the
* faulting instruction.
*
* This is a generic exit path from the interrupt
* trampolines above.
*
* Input Registers:
*
* R12 = free
* R13 = PACA
* PACA.KVM.SCRATCH0 = guest R12
* PACA.KVM.SCRATCH1 = guest CR
* SPRG_SCRATCH0 = guest R13
*
*/
kvmppc_handler_skip_ins:
/* Patch the IP to the next instruction */
mfsrr0 r12
addi r12, r12, 4
mtsrr0 r12
/* Clean up all state */
lwz r12, PACA_KVM_SCRATCH1(r13)
mtcr r12
ld r12, PACA_KVM_SCRATCH0(r13)
mfspr r13, SPRN_SPRG_SCRATCH0
/* And get back into the code */
RFI
/*
* This trampoline brings us back to a real mode handler
*
* Input Registers:
*
* R6 = SRR0
* R7 = SRR1
* R5 = SRR0
* R6 = SRR1
* LR = real-mode IP
*
*/
.global kvmppc_handler_lowmem_trampoline
kvmppc_handler_lowmem_trampoline:
mtsrr0 r6
mtsrr1 r7
mtsrr0 r5
mtsrr1 r6
blr
kvmppc_handler_lowmem_trampoline_end:
/*
* Call a function in real mode
*
* Input Registers:
*
* R3 = function
* R4 = MSR
* R5 = CTR
*
*/
_GLOBAL(kvmppc_rmcall)
mtmsr r4 /* Disable relocation, so mtsrr
doesn't get interrupted */
mtctr r5
mtsrr0 r3
mtsrr1 r4
RFI
/*
* Activate current's external feature (FPU/Altivec/VSX)
*/
#define define_load_up(what) \
\
_GLOBAL(kvmppc_load_up_ ## what); \
subi r1, r1, INT_FRAME_SIZE; \
mflr r3; \
std r3, _LINK(r1); \
mfmsr r4; \
std r31, GPR3(r1); \
mr r31, r4; \
li r5, MSR_DR; \
oris r5, r5, MSR_EE@h; \
andc r4, r4, r5; \
mtmsr r4; \
\
bl .load_up_ ## what; \
\
mtmsr r31; \
ld r3, _LINK(r1); \
ld r31, GPR3(r1); \
addi r1, r1, INT_FRAME_SIZE; \
mtlr r3; \
blr
define_load_up(fpu)
#ifdef CONFIG_ALTIVEC
define_load_up(altivec)
#endif
#ifdef CONFIG_VSX
define_load_up(vsx)
#endif
.global kvmppc_trampoline_lowmem
kvmppc_trampoline_lowmem:
.long kvmppc_handler_lowmem_trampoline - _stext

View file

@ -31,7 +31,7 @@
#define REBOLT_SLB_ENTRY(num) \
ld r10, SHADOW_SLB_ESID(num)(r11); \
cmpdi r10, 0; \
beq slb_exit_skip_1; \
beq slb_exit_skip_ ## num; \
oris r10, r10, SLB_ESID_V@h; \
ld r9, SHADOW_SLB_VSID(num)(r11); \
slbmte r9, r10; \
@ -51,23 +51,21 @@ kvmppc_handler_trampoline_enter:
*
* MSR = ~IR|DR
* R13 = PACA
* R1 = host R1
* R2 = host R2
* R9 = guest IP
* R10 = guest MSR
* R11 = free
* R12 = free
* PACA[PACA_EXMC + EX_R9] = guest R9
* PACA[PACA_EXMC + EX_R10] = guest R10
* PACA[PACA_EXMC + EX_R11] = guest R11
* PACA[PACA_EXMC + EX_R12] = guest R12
* PACA[PACA_EXMC + EX_R13] = guest R13
* PACA[PACA_EXMC + EX_CCR] = guest CR
* PACA[PACA_EXMC + EX_R3] = guest XER
* all other GPRS = free
* PACA[KVM_CR] = guest CR
* PACA[KVM_XER] = guest XER
*/
mtsrr0 r9
mtsrr1 r10
mtspr SPRN_SPRG_SCRATCH0, r0
/* Activate guest mode, so faults get handled by KVM */
li r11, KVM_GUEST_MODE_GUEST
stb r11, PACA_KVM_IN_GUEST(r13)
/* Remove LPAR shadow entries */
@ -131,20 +129,27 @@ slb_do_enter:
/* Enter guest */
mfspr r0, SPRN_SPRG_SCRATCH0
ld r0, (PACA_KVM_R0)(r13)
ld r1, (PACA_KVM_R1)(r13)
ld r2, (PACA_KVM_R2)(r13)
ld r3, (PACA_KVM_R3)(r13)
ld r4, (PACA_KVM_R4)(r13)
ld r5, (PACA_KVM_R5)(r13)
ld r6, (PACA_KVM_R6)(r13)
ld r7, (PACA_KVM_R7)(r13)
ld r8, (PACA_KVM_R8)(r13)
ld r9, (PACA_KVM_R9)(r13)
ld r10, (PACA_KVM_R10)(r13)
ld r12, (PACA_KVM_R12)(r13)
ld r9, (PACA_EXMC+EX_R9)(r13)
ld r10, (PACA_EXMC+EX_R10)(r13)
ld r12, (PACA_EXMC+EX_R12)(r13)
lwz r11, (PACA_EXMC+EX_CCR)(r13)
lwz r11, (PACA_KVM_CR)(r13)
mtcr r11
ld r11, (PACA_EXMC+EX_R3)(r13)
ld r11, (PACA_KVM_XER)(r13)
mtxer r11
ld r11, (PACA_EXMC+EX_R11)(r13)
ld r13, (PACA_EXMC+EX_R13)(r13)
ld r11, (PACA_KVM_R11)(r13)
ld r13, (PACA_KVM_R13)(r13)
RFI
kvmppc_handler_trampoline_enter_end:
@ -162,28 +167,54 @@ kvmppc_handler_trampoline_exit:
/* Register usage at this point:
*
* SPRG_SCRATCH0 = guest R13
* R01 = host R1
* R02 = host R2
* R10 = guest PC
* R11 = guest MSR
* R12 = exit handler id
* R13 = PACA
* PACA.exmc.CCR = guest CR
* PACA.exmc.R9 = guest R1
* PACA.exmc.R10 = guest R10
* PACA.exmc.R11 = guest R11
* PACA.exmc.R12 = guest R12
* PACA.exmc.R13 = guest R2
* SPRG_SCRATCH0 = guest R13
* R12 = exit handler id
* R13 = PACA
* PACA.KVM.SCRATCH0 = guest R12
* PACA.KVM.SCRATCH1 = guest CR
*
*/
/* Save registers */
std r0, (PACA_EXMC+EX_SRR0)(r13)
std r9, (PACA_EXMC+EX_R3)(r13)
std r10, (PACA_EXMC+EX_LR)(r13)
std r11, (PACA_EXMC+EX_DAR)(r13)
std r0, PACA_KVM_R0(r13)
std r1, PACA_KVM_R1(r13)
std r2, PACA_KVM_R2(r13)
std r3, PACA_KVM_R3(r13)
std r4, PACA_KVM_R4(r13)
std r5, PACA_KVM_R5(r13)
std r6, PACA_KVM_R6(r13)
std r7, PACA_KVM_R7(r13)
std r8, PACA_KVM_R8(r13)
std r9, PACA_KVM_R9(r13)
std r10, PACA_KVM_R10(r13)
std r11, PACA_KVM_R11(r13)
/* Restore R1/R2 so we can handle faults */
ld r1, PACA_KVM_HOST_R1(r13)
ld r2, PACA_KVM_HOST_R2(r13)
/* Save guest PC and MSR in GPRs */
mfsrr0 r3
mfsrr1 r4
/* Get scratch'ed off registers */
mfspr r9, SPRN_SPRG_SCRATCH0
std r9, PACA_KVM_R13(r13)
ld r8, PACA_KVM_SCRATCH0(r13)
std r8, PACA_KVM_R12(r13)
lwz r7, PACA_KVM_SCRATCH1(r13)
stw r7, PACA_KVM_CR(r13)
/* Save more register state */
mfxer r6
stw r6, PACA_KVM_XER(r13)
mfdar r5
mfdsisr r6
/*
* In order for us to easily get the last instruction,
@ -202,17 +233,28 @@ kvmppc_handler_trampoline_exit:
ld_last_inst:
/* Save off the guest instruction we're at */
/* Set guest mode to 'jump over instruction' so if lwz faults
* we'll just continue at the next IP. */
li r9, KVM_GUEST_MODE_SKIP
stb r9, PACA_KVM_IN_GUEST(r13)
/* 1) enable paging for data */
mfmsr r9
ori r11, r9, MSR_DR /* Enable paging for data */
mtmsr r11
/* 2) fetch the instruction */
lwz r0, 0(r10)
li r0, KVM_INST_FETCH_FAILED /* In case lwz faults */
lwz r0, 0(r3)
/* 3) disable paging again */
mtmsr r9
no_ld_last_inst:
/* Unset guest mode */
li r9, KVM_GUEST_MODE_NONE
stb r9, PACA_KVM_IN_GUEST(r13)
/* Restore bolted entries from the shadow and fix it along the way */
/* We don't store anything in entry 0, so we don't need to take care of it */
@ -233,29 +275,27 @@ no_ld_last_inst:
slb_do_exit:
/* Restore registers */
ld r11, (PACA_EXMC+EX_DAR)(r13)
ld r10, (PACA_EXMC+EX_LR)(r13)
ld r9, (PACA_EXMC+EX_R3)(r13)
/* Save last inst */
stw r0, (PACA_EXMC+EX_LR)(r13)
/* Save DAR and DSISR before going to paged mode */
mfdar r0
std r0, (PACA_EXMC+EX_DAR)(r13)
mfdsisr r0
stw r0, (PACA_EXMC+EX_DSISR)(r13)
/* Register usage at this point:
*
* R0 = guest last inst
* R1 = host R1
* R2 = host R2
* R3 = guest PC
* R4 = guest MSR
* R5 = guest DAR
* R6 = guest DSISR
* R12 = exit handler id
* R13 = PACA
* PACA.KVM.* = guest *
*
*/
/* RFI into the highmem handler */
mfmsr r0
ori r0, r0, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
mtsrr1 r0
ld r0, PACASAVEDMSR(r13) /* Highmem handler address */
mtsrr0 r0
mfspr r0, SPRN_SPRG_SCRATCH0
mfmsr r7
ori r7, r7, MSR_IR|MSR_DR|MSR_RI /* Enable paging */
mtsrr1 r7
ld r8, PACA_KVM_VMHANDLER(r13) /* Highmem handler address */
mtsrr0 r8
RFI
kvmppc_handler_trampoline_exit_end:

View file

@ -69,10 +69,10 @@ void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu)
for (i = 0; i < 32; i += 4) {
printk("gpr%02d: %08lx %08lx %08lx %08lx\n", i,
vcpu->arch.gpr[i],
vcpu->arch.gpr[i+1],
vcpu->arch.gpr[i+2],
vcpu->arch.gpr[i+3]);
kvmppc_get_gpr(vcpu, i),
kvmppc_get_gpr(vcpu, i+1),
kvmppc_get_gpr(vcpu, i+2),
kvmppc_get_gpr(vcpu, i+3));
}
}
@ -82,8 +82,32 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
set_bit(priority, &vcpu->arch.pending_exceptions);
}
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
ulong dear_flags, ulong esr_flags)
{
vcpu->arch.queued_dear = dear_flags;
vcpu->arch.queued_esr = esr_flags;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
}
static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
ulong dear_flags, ulong esr_flags)
{
vcpu->arch.queued_dear = dear_flags;
vcpu->arch.queued_esr = esr_flags;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
}
static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
ulong esr_flags)
{
vcpu->arch.queued_esr = esr_flags;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
}
void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
{
vcpu->arch.queued_esr = esr_flags;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
}
@ -97,6 +121,11 @@ int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
return test_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}
void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu)
{
clear_bit(BOOKE_IRQPRIO_DECREMENTER, &vcpu->arch.pending_exceptions);
}
void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
{
@ -109,14 +138,19 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
{
int allowed = 0;
ulong msr_mask;
bool update_esr = false, update_dear = false;
switch (priority) {
case BOOKE_IRQPRIO_PROGRAM:
case BOOKE_IRQPRIO_DTLB_MISS:
case BOOKE_IRQPRIO_DATA_STORAGE:
update_dear = true;
/* fall through */
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_PROGRAM:
update_esr = true;
/* fall through */
case BOOKE_IRQPRIO_ITLB_MISS:
case BOOKE_IRQPRIO_SYSCALL:
case BOOKE_IRQPRIO_DATA_STORAGE:
case BOOKE_IRQPRIO_INST_STORAGE:
case BOOKE_IRQPRIO_FP_UNAVAIL:
case BOOKE_IRQPRIO_SPE_UNAVAIL:
case BOOKE_IRQPRIO_SPE_FP_DATA:
@ -151,6 +185,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
vcpu->arch.srr0 = vcpu->arch.pc;
vcpu->arch.srr1 = vcpu->arch.msr;
vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
if (update_esr == true)
vcpu->arch.esr = vcpu->arch.queued_esr;
if (update_dear == true)
vcpu->arch.dear = vcpu->arch.queued_dear;
kvmppc_set_msr(vcpu, vcpu->arch.msr & msr_mask);
clear_bit(priority, &vcpu->arch.pending_exceptions);
@ -223,8 +261,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
if (vcpu->arch.msr & MSR_PR) {
/* Program traps generated by user-level software must be handled
* by the guest kernel. */
vcpu->arch.esr = vcpu->arch.fault_esr;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
kvmppc_core_queue_program(vcpu, vcpu->arch.fault_esr);
r = RESUME_GUEST;
kvmppc_account_exit(vcpu, USR_PR_INST);
break;
@ -280,16 +317,14 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOKE_INTERRUPT_DATA_STORAGE:
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dear,
vcpu->arch.fault_esr);
kvmppc_account_exit(vcpu, DSI_EXITS);
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_INST_STORAGE:
vcpu->arch.esr = vcpu->arch.fault_esr;
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_esr);
kvmppc_account_exit(vcpu, ISI_EXITS);
r = RESUME_GUEST;
break;
@ -310,9 +345,9 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
if (gtlb_index < 0) {
/* The guest didn't have a mapping for it. */
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
vcpu->arch.dear = vcpu->arch.fault_dear;
vcpu->arch.esr = vcpu->arch.fault_esr;
kvmppc_core_queue_dtlb_miss(vcpu,
vcpu->arch.fault_dear,
vcpu->arch.fault_esr);
kvmppc_mmu_dtlb_miss(vcpu);
kvmppc_account_exit(vcpu, DTLB_REAL_MISS_EXITS);
r = RESUME_GUEST;
@ -426,7 +461,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
vcpu->arch.pc = 0;
vcpu->arch.msr = 0;
vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */
kvmppc_set_gpr(vcpu, 1, (16<<20) - 8); /* -8 for the callee-save LR slot */
vcpu->arch.shadow_pid = 1;
@ -444,10 +479,10 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int i;
regs->pc = vcpu->arch.pc;
regs->cr = vcpu->arch.cr;
regs->cr = kvmppc_get_cr(vcpu);
regs->ctr = vcpu->arch.ctr;
regs->lr = vcpu->arch.lr;
regs->xer = vcpu->arch.xer;
regs->xer = kvmppc_get_xer(vcpu);
regs->msr = vcpu->arch.msr;
regs->srr0 = vcpu->arch.srr0;
regs->srr1 = vcpu->arch.srr1;
@ -461,7 +496,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
regs->sprg7 = vcpu->arch.sprg6;
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
regs->gpr[i] = vcpu->arch.gpr[i];
regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
return 0;
}
@ -471,10 +506,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
int i;
vcpu->arch.pc = regs->pc;
vcpu->arch.cr = regs->cr;
kvmppc_set_cr(vcpu, regs->cr);
vcpu->arch.ctr = regs->ctr;
vcpu->arch.lr = regs->lr;
vcpu->arch.xer = regs->xer;
kvmppc_set_xer(vcpu, regs->xer);
kvmppc_set_msr(vcpu, regs->msr);
vcpu->arch.srr0 = regs->srr0;
vcpu->arch.srr1 = regs->srr1;
@ -486,8 +521,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
vcpu->arch.sprg6 = regs->sprg5;
vcpu->arch.sprg7 = regs->sprg6;
for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
vcpu->arch.gpr[i] = regs->gpr[i];
for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
return 0;
}

View file

@ -62,20 +62,20 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
case OP_31_XOP_MFMSR:
rt = get_rt(inst);
vcpu->arch.gpr[rt] = vcpu->arch.msr;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.msr);
kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS);
break;
case OP_31_XOP_MTMSR:
rs = get_rs(inst);
kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]);
kvmppc_set_msr(vcpu, kvmppc_get_gpr(vcpu, rs));
break;
case OP_31_XOP_WRTEE:
rs = get_rs(inst);
vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE)
| (vcpu->arch.gpr[rs] & MSR_EE);
| (kvmppc_get_gpr(vcpu, rs) & MSR_EE);
kvmppc_set_exit_type(vcpu, EMULATED_WRTEE_EXITS);
break;
@ -101,22 +101,23 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
int emulated = EMULATE_DONE;
ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_DEAR:
vcpu->arch.dear = vcpu->arch.gpr[rs]; break;
vcpu->arch.dear = spr_val; break;
case SPRN_ESR:
vcpu->arch.esr = vcpu->arch.gpr[rs]; break;
vcpu->arch.esr = spr_val; break;
case SPRN_DBCR0:
vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break;
vcpu->arch.dbcr0 = spr_val; break;
case SPRN_DBCR1:
vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break;
vcpu->arch.dbcr1 = spr_val; break;
case SPRN_DBSR:
vcpu->arch.dbsr &= ~vcpu->arch.gpr[rs]; break;
vcpu->arch.dbsr &= ~spr_val; break;
case SPRN_TSR:
vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break;
vcpu->arch.tsr &= ~spr_val; break;
case SPRN_TCR:
vcpu->arch.tcr = vcpu->arch.gpr[rs];
vcpu->arch.tcr = spr_val;
kvmppc_emulate_dec(vcpu);
break;
@ -124,64 +125,64 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
* loaded into the real SPRGs when resuming the
* guest. */
case SPRN_SPRG4:
vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg4 = spr_val; break;
case SPRN_SPRG5:
vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg5 = spr_val; break;
case SPRN_SPRG6:
vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg6 = spr_val; break;
case SPRN_SPRG7:
vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg7 = spr_val; break;
case SPRN_IVPR:
vcpu->arch.ivpr = vcpu->arch.gpr[rs];
vcpu->arch.ivpr = spr_val;
break;
case SPRN_IVOR0:
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL] = spr_val;
break;
case SPRN_IVOR1:
vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK] = spr_val;
break;
case SPRN_IVOR2:
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE] = spr_val;
break;
case SPRN_IVOR3:
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE] = spr_val;
break;
case SPRN_IVOR4:
vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL] = spr_val;
break;
case SPRN_IVOR5:
vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT] = spr_val;
break;
case SPRN_IVOR6:
vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM] = spr_val;
break;
case SPRN_IVOR7:
vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL] = spr_val;
break;
case SPRN_IVOR8:
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL] = spr_val;
break;
case SPRN_IVOR9:
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL] = spr_val;
break;
case SPRN_IVOR10:
vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER] = spr_val;
break;
case SPRN_IVOR11:
vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_FIT] = spr_val;
break;
case SPRN_IVOR12:
vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG] = spr_val;
break;
case SPRN_IVOR13:
vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS] = spr_val;
break;
case SPRN_IVOR14:
vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS] = spr_val;
break;
case SPRN_IVOR15:
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
break;
default:
@ -197,65 +198,65 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
switch (sprn) {
case SPRN_IVPR:
vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivpr); break;
case SPRN_DEAR:
vcpu->arch.gpr[rt] = vcpu->arch.dear; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dear); break;
case SPRN_ESR:
vcpu->arch.gpr[rt] = vcpu->arch.esr; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.esr); break;
case SPRN_DBCR0:
vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr0); break;
case SPRN_DBCR1:
vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbcr1); break;
case SPRN_DBSR:
vcpu->arch.gpr[rt] = vcpu->arch.dbsr; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dbsr); break;
case SPRN_IVOR0:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_CRITICAL]);
break;
case SPRN_IVOR1:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_MACHINE_CHECK]);
break;
case SPRN_IVOR2:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DATA_STORAGE]);
break;
case SPRN_IVOR3:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_INST_STORAGE]);
break;
case SPRN_IVOR4:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_EXTERNAL]);
break;
case SPRN_IVOR5:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ALIGNMENT]);
break;
case SPRN_IVOR6:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PROGRAM]);
break;
case SPRN_IVOR7:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FP_UNAVAIL]);
break;
case SPRN_IVOR8:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SYSCALL]);
break;
case SPRN_IVOR9:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_AP_UNAVAIL]);
break;
case SPRN_IVOR10:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DECREMENTER]);
break;
case SPRN_IVOR11:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_FIT];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_FIT]);
break;
case SPRN_IVOR12:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_WATCHDOG]);
break;
case SPRN_IVOR13:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DTLB_MISS]);
break;
case SPRN_IVOR14:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_ITLB_MISS]);
break;
case SPRN_IVOR15:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG]);
break;
default:

View file

@ -60,6 +60,12 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
kvmppc_e500_tlb_setup(vcpu_e500);
/* Registers init */
vcpu->arch.pvr = mfspr(SPRN_PVR);
/* Since booke kvm only support one core, update all vcpus' PIR to 0 */
vcpu->vcpu_id = 0;
return 0;
}

View file

@ -74,54 +74,59 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int emulated = EMULATE_DONE;
ulong spr_val = kvmppc_get_gpr(vcpu, rs);
switch (sprn) {
case SPRN_PID:
vcpu_e500->pid[0] = vcpu->arch.shadow_pid =
vcpu->arch.pid = vcpu->arch.gpr[rs];
vcpu->arch.pid = spr_val;
break;
case SPRN_PID1:
vcpu_e500->pid[1] = vcpu->arch.gpr[rs]; break;
vcpu_e500->pid[1] = spr_val; break;
case SPRN_PID2:
vcpu_e500->pid[2] = vcpu->arch.gpr[rs]; break;
vcpu_e500->pid[2] = spr_val; break;
case SPRN_MAS0:
vcpu_e500->mas0 = vcpu->arch.gpr[rs]; break;
vcpu_e500->mas0 = spr_val; break;
case SPRN_MAS1:
vcpu_e500->mas1 = vcpu->arch.gpr[rs]; break;
vcpu_e500->mas1 = spr_val; break;
case SPRN_MAS2:
vcpu_e500->mas2 = vcpu->arch.gpr[rs]; break;
vcpu_e500->mas2 = spr_val; break;
case SPRN_MAS3:
vcpu_e500->mas3 = vcpu->arch.gpr[rs]; break;
vcpu_e500->mas3 = spr_val; break;
case SPRN_MAS4:
vcpu_e500->mas4 = vcpu->arch.gpr[rs]; break;
vcpu_e500->mas4 = spr_val; break;
case SPRN_MAS6:
vcpu_e500->mas6 = vcpu->arch.gpr[rs]; break;
vcpu_e500->mas6 = spr_val; break;
case SPRN_MAS7:
vcpu_e500->mas7 = vcpu->arch.gpr[rs]; break;
vcpu_e500->mas7 = spr_val; break;
case SPRN_L1CSR0:
vcpu_e500->l1csr0 = spr_val;
vcpu_e500->l1csr0 &= ~(L1CSR0_DCFI | L1CSR0_CLFC);
break;
case SPRN_L1CSR1:
vcpu_e500->l1csr1 = vcpu->arch.gpr[rs]; break;
vcpu_e500->l1csr1 = spr_val; break;
case SPRN_HID0:
vcpu_e500->hid0 = vcpu->arch.gpr[rs]; break;
vcpu_e500->hid0 = spr_val; break;
case SPRN_HID1:
vcpu_e500->hid1 = vcpu->arch.gpr[rs]; break;
vcpu_e500->hid1 = spr_val; break;
case SPRN_MMUCSR0:
emulated = kvmppc_e500_emul_mt_mmucsr0(vcpu_e500,
vcpu->arch.gpr[rs]);
spr_val);
break;
/* extra exceptions */
case SPRN_IVOR32:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
break;
case SPRN_IVOR33:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA] = spr_val;
break;
case SPRN_IVOR34:
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND] = spr_val;
break;
case SPRN_IVOR35:
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = vcpu->arch.gpr[rs];
vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR] = spr_val;
break;
default:
@ -138,63 +143,57 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
switch (sprn) {
case SPRN_PID:
vcpu->arch.gpr[rt] = vcpu_e500->pid[0]; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[0]); break;
case SPRN_PID1:
vcpu->arch.gpr[rt] = vcpu_e500->pid[1]; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[1]); break;
case SPRN_PID2:
vcpu->arch.gpr[rt] = vcpu_e500->pid[2]; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->pid[2]); break;
case SPRN_MAS0:
vcpu->arch.gpr[rt] = vcpu_e500->mas0; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas0); break;
case SPRN_MAS1:
vcpu->arch.gpr[rt] = vcpu_e500->mas1; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas1); break;
case SPRN_MAS2:
vcpu->arch.gpr[rt] = vcpu_e500->mas2; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas2); break;
case SPRN_MAS3:
vcpu->arch.gpr[rt] = vcpu_e500->mas3; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas3); break;
case SPRN_MAS4:
vcpu->arch.gpr[rt] = vcpu_e500->mas4; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas4); break;
case SPRN_MAS6:
vcpu->arch.gpr[rt] = vcpu_e500->mas6; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas6); break;
case SPRN_MAS7:
vcpu->arch.gpr[rt] = vcpu_e500->mas7; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->mas7); break;
case SPRN_TLB0CFG:
vcpu->arch.gpr[rt] = mfspr(SPRN_TLB0CFG);
vcpu->arch.gpr[rt] &= ~0xfffUL;
vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[0];
break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb0cfg); break;
case SPRN_TLB1CFG:
vcpu->arch.gpr[rt] = mfspr(SPRN_TLB1CFG);
vcpu->arch.gpr[rt] &= ~0xfffUL;
vcpu->arch.gpr[rt] |= vcpu_e500->guest_tlb_size[1];
break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->tlb1cfg); break;
case SPRN_L1CSR0:
kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr0); break;
case SPRN_L1CSR1:
vcpu->arch.gpr[rt] = vcpu_e500->l1csr1; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->l1csr1); break;
case SPRN_HID0:
vcpu->arch.gpr[rt] = vcpu_e500->hid0; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid0); break;
case SPRN_HID1:
vcpu->arch.gpr[rt] = vcpu_e500->hid1; break;
kvmppc_set_gpr(vcpu, rt, vcpu_e500->hid1); break;
case SPRN_MMUCSR0:
vcpu->arch.gpr[rt] = 0; break;
kvmppc_set_gpr(vcpu, rt, 0); break;
case SPRN_MMUCFG:
vcpu->arch.gpr[rt] = mfspr(SPRN_MMUCFG); break;
kvmppc_set_gpr(vcpu, rt, mfspr(SPRN_MMUCFG)); break;
/* extra exceptions */
case SPRN_IVOR32:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL]);
break;
case SPRN_IVOR33:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_DATA]);
break;
case SPRN_IVOR34:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_FP_ROUND]);
break;
case SPRN_IVOR35:
vcpu->arch.gpr[rt] = vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR];
kvmppc_set_gpr(vcpu, rt, vcpu->arch.ivor[BOOKE_IRQPRIO_PERFORMANCE_MONITOR]);
break;
default:
emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, rt);

View file

@ -417,7 +417,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
int esel, tlbsel;
gva_t ea;
ea = ((ra) ? vcpu->arch.gpr[ra] : 0) + vcpu->arch.gpr[rb];
ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
ia = (ea >> 2) & 0x1;
@ -470,7 +470,7 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
struct tlbe *gtlbe = NULL;
gva_t ea;
ea = vcpu->arch.gpr[rb];
ea = kvmppc_get_gpr(vcpu, rb);
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
@ -728,6 +728,12 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
if (vcpu_e500->shadow_pages[1] == NULL)
goto err_out_page0;
/* Init TLB configuration register */
vcpu_e500->tlb0cfg = mfspr(SPRN_TLB0CFG) & ~0xfffUL;
vcpu_e500->tlb0cfg |= vcpu_e500->guest_tlb_size[0];
vcpu_e500->tlb1cfg = mfspr(SPRN_TLB1CFG) & ~0xfffUL;
vcpu_e500->tlb1cfg |= vcpu_e500->guest_tlb_size[1];
return 0;
err_out_page0:

View file

@ -83,6 +83,9 @@ void kvmppc_emulate_dec(struct kvm_vcpu *vcpu)
pr_debug("mtDEC: %x\n", vcpu->arch.dec);
#ifdef CONFIG_PPC64
/* mtdec lowers the interrupt line when positive. */
kvmppc_core_dequeue_dec(vcpu);
/* POWER4+ triggers a dec interrupt if the value is < 0 */
if (vcpu->arch.dec & 0x80000000) {
hrtimer_try_to_cancel(&vcpu->arch.dec_timer);
@ -140,14 +143,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
pr_debug(KERN_INFO "Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
/* Try again next time */
if (inst == KVM_INST_FETCH_FAILED)
return EMULATE_DONE;
switch (get_op(inst)) {
case OP_TRAP:
#ifdef CONFIG_PPC64
case OP_TRAP_64:
kvmppc_core_queue_program(vcpu, SRR1_PROGTRAP);
#else
vcpu->arch.esr |= ESR_PTR;
kvmppc_core_queue_program(vcpu, vcpu->arch.esr | ESR_PTR);
#endif
kvmppc_core_queue_program(vcpu);
advance = 0;
break;
@ -167,14 +174,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case OP_31_XOP_STWX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_31_XOP_STBX:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
@ -183,14 +190,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
ra = get_ra(inst);
rb = get_rb(inst);
ea = vcpu->arch.gpr[rb];
ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
ea += vcpu->arch.gpr[ra];
ea += kvmppc_get_gpr(vcpu, ra);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
kvmppc_get_gpr(vcpu, rs),
1, 1);
vcpu->arch.gpr[rs] = ea;
kvmppc_set_gpr(vcpu, rs, ea);
break;
case OP_31_XOP_LHZX:
@ -203,12 +210,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
ra = get_ra(inst);
rb = get_rb(inst);
ea = vcpu->arch.gpr[rb];
ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
ea += vcpu->arch.gpr[ra];
ea += kvmppc_get_gpr(vcpu, ra);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
vcpu->arch.gpr[ra] = ea;
kvmppc_set_gpr(vcpu, ra, ea);
break;
case OP_31_XOP_MFSPR:
@ -217,47 +224,49 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
switch (sprn) {
case SPRN_SRR0:
vcpu->arch.gpr[rt] = vcpu->arch.srr0; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr0); break;
case SPRN_SRR1:
vcpu->arch.gpr[rt] = vcpu->arch.srr1; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.srr1); break;
case SPRN_PVR:
vcpu->arch.gpr[rt] = vcpu->arch.pvr; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.pvr); break;
case SPRN_PIR:
vcpu->arch.gpr[rt] = vcpu->vcpu_id; break;
kvmppc_set_gpr(vcpu, rt, vcpu->vcpu_id); break;
case SPRN_MSSSR0:
vcpu->arch.gpr[rt] = 0; break;
kvmppc_set_gpr(vcpu, rt, 0); break;
/* Note: mftb and TBRL/TBWL are user-accessible, so
* the guest can always access the real TB anyways.
* In fact, we probably will never see these traps. */
case SPRN_TBWL:
vcpu->arch.gpr[rt] = get_tb() >> 32; break;
kvmppc_set_gpr(vcpu, rt, get_tb() >> 32); break;
case SPRN_TBWU:
vcpu->arch.gpr[rt] = get_tb(); break;
kvmppc_set_gpr(vcpu, rt, get_tb()); break;
case SPRN_SPRG0:
vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg0); break;
case SPRN_SPRG1:
vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg1); break;
case SPRN_SPRG2:
vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg2); break;
case SPRN_SPRG3:
vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break;
kvmppc_set_gpr(vcpu, rt, vcpu->arch.sprg3); break;
/* Note: SPRG4-7 are user-readable, so we don't get
* a trap. */
case SPRN_DEC:
{
u64 jd = get_tb() - vcpu->arch.dec_jiffies;
vcpu->arch.gpr[rt] = vcpu->arch.dec - jd;
pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n", vcpu->arch.dec, jd, vcpu->arch.gpr[rt]);
kvmppc_set_gpr(vcpu, rt, vcpu->arch.dec - jd);
pr_debug(KERN_INFO "mfDEC: %x - %llx = %lx\n",
vcpu->arch.dec, jd,
kvmppc_get_gpr(vcpu, rt));
break;
}
default:
emulated = kvmppc_core_emulate_mfspr(vcpu, sprn, rt);
if (emulated == EMULATE_FAIL) {
printk("mfspr: unknown spr %x\n", sprn);
vcpu->arch.gpr[rt] = 0;
kvmppc_set_gpr(vcpu, rt, 0);
}
break;
}
@ -269,7 +278,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
rb = get_rb(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
@ -278,14 +287,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
ra = get_ra(inst);
rb = get_rb(inst);
ea = vcpu->arch.gpr[rb];
ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
ea += vcpu->arch.gpr[ra];
ea += kvmppc_get_gpr(vcpu, ra);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
kvmppc_get_gpr(vcpu, rs),
2, 1);
vcpu->arch.gpr[ra] = ea;
kvmppc_set_gpr(vcpu, ra, ea);
break;
case OP_31_XOP_MTSPR:
@ -293,9 +302,9 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
rs = get_rs(inst);
switch (sprn) {
case SPRN_SRR0:
vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break;
vcpu->arch.srr0 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SRR1:
vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break;
vcpu->arch.srr1 = kvmppc_get_gpr(vcpu, rs); break;
/* XXX We need to context-switch the timebase for
* watchdog and FIT. */
@ -305,18 +314,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case SPRN_MSSSR0: break;
case SPRN_DEC:
vcpu->arch.dec = vcpu->arch.gpr[rs];
vcpu->arch.dec = kvmppc_get_gpr(vcpu, rs);
kvmppc_emulate_dec(vcpu);
break;
case SPRN_SPRG0:
vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg0 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SPRG1:
vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg1 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SPRG2:
vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg2 = kvmppc_get_gpr(vcpu, rs); break;
case SPRN_SPRG3:
vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break;
vcpu->arch.sprg3 = kvmppc_get_gpr(vcpu, rs); break;
default:
emulated = kvmppc_core_emulate_mtspr(vcpu, sprn, rs);
@ -348,7 +357,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
rb = get_rb(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
kvmppc_get_gpr(vcpu, rs),
4, 0);
break;
@ -363,7 +372,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
rb = get_rb(inst);
emulated = kvmppc_handle_store(run, vcpu,
vcpu->arch.gpr[rs],
kvmppc_get_gpr(vcpu, rs),
2, 0);
break;
@ -382,7 +391,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_LBZ:
@ -394,35 +403,39 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_STW:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
break;
case OP_STWU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
4, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_STB:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
break;
case OP_STBU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
1, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_LHZ:
@ -434,21 +447,23 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
ra = get_ra(inst);
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
case OP_STH:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
break;
case OP_STHU:
ra = get_ra(inst);
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs],
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
2, 1);
vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed;
kvmppc_set_gpr(vcpu, ra, vcpu->arch.paddr_accessed);
break;
default:
@ -461,6 +476,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
advance = 0;
printk(KERN_ERR "Couldn't emulate instruction 0x%08x "
"(op %d xop %d)\n", inst, get_op(inst), get_xop(inst));
kvmppc_core_queue_program(vcpu, 0);
}
}

View file

@ -137,6 +137,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvmppc_free_vcpus(kvm);
kvm_free_physmem(kvm);
cleanup_srcu_struct(&kvm->srcu);
kfree(kvm);
}
@ -165,14 +166,24 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc)
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc)
{
return;
}
void kvm_arch_flush_shadow(struct kvm *kvm)
{
}
@ -260,34 +271,35 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
*gpr = run->dcr.data;
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
}
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
ulong *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr];
ulong gpr;
if (run->mmio.len > sizeof(*gpr)) {
if (run->mmio.len > sizeof(gpr)) {
printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len);
return;
}
if (vcpu->arch.mmio_is_bigendian) {
switch (run->mmio.len) {
case 4: *gpr = *(u32 *)run->mmio.data; break;
case 2: *gpr = *(u16 *)run->mmio.data; break;
case 1: *gpr = *(u8 *)run->mmio.data; break;
case 4: gpr = *(u32 *)run->mmio.data; break;
case 2: gpr = *(u16 *)run->mmio.data; break;
case 1: gpr = *(u8 *)run->mmio.data; break;
}
} else {
/* Convert BE data from userland back to LE. */
switch (run->mmio.len) {
case 4: *gpr = ld_le32((u32 *)run->mmio.data); break;
case 2: *gpr = ld_le16((u16 *)run->mmio.data); break;
case 1: *gpr = *(u8 *)run->mmio.data; break;
case 4: gpr = ld_le32((u32 *)run->mmio.data); break;
case 2: gpr = ld_le16((u16 *)run->mmio.data); break;
case 1: gpr = *(u8 *)run->mmio.data; break;
}
}
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
}
int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,

View file

@ -242,6 +242,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_free_physmem(kvm);
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
cleanup_srcu_struct(&kvm->srcu);
kfree(kvm);
}
@ -690,14 +691,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
/* Section: memory related */
int kvm_arch_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc)
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
int i;
struct kvm_vcpu *vcpu;
/* A few sanity checks. We can have exactly one memory slot which has
to start at guest virtual zero and which has to be located at a
page boundary in userland and which has to end at a page boundary.
@ -720,14 +719,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
if (!user_alloc)
return -EINVAL;
return 0;
}
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc)
{
int i;
struct kvm_vcpu *vcpu;
/* request update of sie control block for all available vcpus */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (test_and_set_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
continue;
kvm_s390_inject_sigp_stop(vcpu, ACTION_RELOADVCPU_ON_STOP);
}
return 0;
}
void kvm_arch_flush_shadow(struct kvm *kvm)

View file

@ -67,10 +67,14 @@ static inline long kvm_s390_vcpu_get_memsize(struct kvm_vcpu *vcpu)
static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
{
int idx;
struct kvm_memory_slot *mem;
struct kvm_memslots *memslots;
down_read(&vcpu->kvm->slots_lock);
mem = &vcpu->kvm->memslots[0];
idx = srcu_read_lock(&vcpu->kvm->srcu);
memslots = rcu_dereference(vcpu->kvm->memslots);
mem = &memslots->memslots[0];
vcpu->arch.sie_block->gmsor = mem->userspace_addr;
vcpu->arch.sie_block->gmslm =
@ -78,7 +82,7 @@ static inline void kvm_s390_vcpu_set_mem(struct kvm_vcpu *vcpu)
(mem->npages << PAGE_SHIFT) +
VIRTIODESCSPACE - 1ul;
up_read(&vcpu->kvm->slots_lock);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
}
/* implemented in priv.c */

View file

@ -11,6 +11,7 @@ header-y += sigcontext32.h
header-y += ucontext.h
header-y += processor-flags.h
header-y += hw_breakpoint.h
header-y += hyperv.h
unifdef-y += e820.h
unifdef-y += ist.h

View file

@ -0,0 +1,186 @@
#ifndef _ASM_X86_KVM_HYPERV_H
#define _ASM_X86_KVM_HYPERV_H
#include <linux/types.h>
/*
* The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
* is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
*/
#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
#define HYPERV_CPUID_INTERFACE 0x40000001
#define HYPERV_CPUID_VERSION 0x40000002
#define HYPERV_CPUID_FEATURES 0x40000003
#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
/*
* Feature identification. EAX indicates which features are available
* to the partition based upon the current partition privileges.
*/
/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */
#define HV_X64_MSR_VP_RUNTIME_AVAILABLE (1 << 0)
/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
#define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1)
/*
* Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM
* and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available
*/
#define HV_X64_MSR_SYNIC_AVAILABLE (1 << 2)
/*
* Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through
* HV_X64_MSR_STIMER3_COUNT) available
*/
#define HV_X64_MSR_SYNTIMER_AVAILABLE (1 << 3)
/*
* APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR)
* are available
*/
#define HV_X64_MSR_APIC_ACCESS_AVAILABLE (1 << 4)
/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/
#define HV_X64_MSR_HYPERCALL_AVAILABLE (1 << 5)
/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/
#define HV_X64_MSR_VP_INDEX_AVAILABLE (1 << 6)
/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/
#define HV_X64_MSR_RESET_AVAILABLE (1 << 7)
/*
* Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE,
* HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE,
* HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available
*/
#define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8)
/*
* Feature identification: EBX indicates which flags were specified at
* partition creation. The format is the same as the partition creation
* flag structure defined in section Partition Creation Flags.
*/
#define HV_X64_CREATE_PARTITIONS (1 << 0)
#define HV_X64_ACCESS_PARTITION_ID (1 << 1)
#define HV_X64_ACCESS_MEMORY_POOL (1 << 2)
#define HV_X64_ADJUST_MESSAGE_BUFFERS (1 << 3)
#define HV_X64_POST_MESSAGES (1 << 4)
#define HV_X64_SIGNAL_EVENTS (1 << 5)
#define HV_X64_CREATE_PORT (1 << 6)
#define HV_X64_CONNECT_PORT (1 << 7)
#define HV_X64_ACCESS_STATS (1 << 8)
#define HV_X64_DEBUGGING (1 << 11)
#define HV_X64_CPU_POWER_MANAGEMENT (1 << 12)
#define HV_X64_CONFIGURE_PROFILER (1 << 13)
/*
* Feature identification. EDX indicates which miscellaneous features
* are available to the partition.
*/
/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
#define HV_X64_MWAIT_AVAILABLE (1 << 0)
/* Guest debugging support is available */
#define HV_X64_GUEST_DEBUGGING_AVAILABLE (1 << 1)
/* Performance Monitor support is available*/
#define HV_X64_PERF_MONITOR_AVAILABLE (1 << 2)
/* Support for physical CPU dynamic partitioning events is available*/
#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1 << 3)
/*
* Support for passing hypercall input parameter block via XMM
* registers is available
*/
#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4)
/* Support for a virtual guest idle state is available */
#define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5)
/*
* Implementation recommendations. Indicates which behaviors the hypervisor
* recommends the OS implement for optimal performance.
*/
/*
* Recommend using hypercall for address space switches rather
* than MOV to CR3 instruction
*/
#define HV_X64_MWAIT_RECOMMENDED (1 << 0)
/* Recommend using hypercall for local TLB flushes rather
* than INVLPG or MOV to CR3 instructions */
#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED (1 << 1)
/*
* Recommend using hypercall for remote TLB flushes rather
* than inter-processor interrupts
*/
#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED (1 << 2)
/*
* Recommend using MSRs for accessing APIC registers
* EOI, ICR and TPR rather than their memory-mapped counterparts
*/
#define HV_X64_APIC_ACCESS_RECOMMENDED (1 << 3)
/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
#define HV_X64_SYSTEM_RESET_RECOMMENDED (1 << 4)
/*
* Recommend using relaxed timing for this partition. If used,
* the VM should disable any watchdog timeouts that rely on the
* timely delivery of external interrupts
*/
#define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5)
/* MSR used to identify the guest OS. */
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
/* MSR used to setup pages used to communicate with the hypervisor. */
#define HV_X64_MSR_HYPERCALL 0x40000001
/* MSR used to provide vcpu index */
#define HV_X64_MSR_VP_INDEX 0x40000002
/* Define the virtual APIC registers */
#define HV_X64_MSR_EOI 0x40000070
#define HV_X64_MSR_ICR 0x40000071
#define HV_X64_MSR_TPR 0x40000072
#define HV_X64_MSR_APIC_ASSIST_PAGE 0x40000073
/* Define synthetic interrupt controller model specific registers. */
#define HV_X64_MSR_SCONTROL 0x40000080
#define HV_X64_MSR_SVERSION 0x40000081
#define HV_X64_MSR_SIEFP 0x40000082
#define HV_X64_MSR_SIMP 0x40000083
#define HV_X64_MSR_EOM 0x40000084
#define HV_X64_MSR_SINT0 0x40000090
#define HV_X64_MSR_SINT1 0x40000091
#define HV_X64_MSR_SINT2 0x40000092
#define HV_X64_MSR_SINT3 0x40000093
#define HV_X64_MSR_SINT4 0x40000094
#define HV_X64_MSR_SINT5 0x40000095
#define HV_X64_MSR_SINT6 0x40000096
#define HV_X64_MSR_SINT7 0x40000097
#define HV_X64_MSR_SINT8 0x40000098
#define HV_X64_MSR_SINT9 0x40000099
#define HV_X64_MSR_SINT10 0x4000009A
#define HV_X64_MSR_SINT11 0x4000009B
#define HV_X64_MSR_SINT12 0x4000009C
#define HV_X64_MSR_SINT13 0x4000009D
#define HV_X64_MSR_SINT14 0x4000009E
#define HV_X64_MSR_SINT15 0x4000009F
#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
(~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
/* Declare the various hypercall operations. */
#define HV_X64_HV_NOTIFY_LONG_SPIN_WAIT 0x0008
#define HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE 0x00000001
#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT 12
#define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \
(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
#define HV_PROCESSOR_POWER_STATE_C0 0
#define HV_PROCESSOR_POWER_STATE_C1 1
#define HV_PROCESSOR_POWER_STATE_C2 2
#define HV_PROCESSOR_POWER_STATE_C3 3
/* hypercall status code */
#define HV_STATUS_SUCCESS 0
#define HV_STATUS_INVALID_HYPERCALL_CODE 2
#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
#define HV_STATUS_INVALID_ALIGNMENT 4
#endif

View file

@ -54,13 +54,23 @@ struct x86_emulate_ctxt;
struct x86_emulate_ops {
/*
* read_std: Read bytes of standard (non-emulated/special) memory.
* Used for instruction fetch, stack operations, and others.
* Used for descriptor reading.
* @addr: [IN ] Linear address from which to read.
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr, void *val,
unsigned int bytes, struct kvm_vcpu *vcpu);
unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
/*
* fetch: Read bytes of standard (non-emulated/special) memory.
* Used for instruction fetch.
* @addr: [IN ] Linear address from which to read.
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*fetch)(unsigned long addr, void *val,
unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
/*
* read_emulated: Read bytes from emulated/special memory area.
@ -74,7 +84,7 @@ struct x86_emulate_ops {
struct kvm_vcpu *vcpu);
/*
* write_emulated: Read bytes from emulated/special memory area.
* write_emulated: Write bytes to emulated/special memory area.
* @addr: [IN ] Linear address to which to write.
* @val: [IN ] Value to write to memory (low-order bytes used as
* required).
@ -168,6 +178,7 @@ struct x86_emulate_ctxt {
/* Execution mode, passed to the emulator. */
#define X86EMUL_MODE_REAL 0 /* Real mode. */
#define X86EMUL_MODE_VM86 1 /* Virtual 8086 mode. */
#define X86EMUL_MODE_PROT16 2 /* 16-bit protected mode. */
#define X86EMUL_MODE_PROT32 4 /* 32-bit protected mode. */
#define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */

View file

@ -25,7 +25,7 @@
#include <asm/mtrr.h>
#include <asm/msr-index.h>
#define KVM_MAX_VCPUS 16
#define KVM_MAX_VCPUS 64
#define KVM_MEMORY_SLOTS 32
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 4
@ -38,19 +38,6 @@
#define CR3_L_MODE_RESERVED_BITS (CR3_NONPAE_RESERVED_BITS | \
0xFFFFFF0000000000ULL)
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
#define KVM_GUEST_CR0_MASK \
(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE | X86_CR0_TS | X86_CR0_MP)
#define KVM_VM_CR0_ALWAYS_ON \
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_GUEST_CR4_MASK \
(X86_CR4_VME | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_VMXE)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
#define INVALID_PAGE (~(hpa_t)0)
#define UNMAPPED_GVA (~(gpa_t)0)
@ -256,7 +243,8 @@ struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
u32 *error);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
@ -282,13 +270,15 @@ struct kvm_vcpu_arch {
u32 regs_dirty;
unsigned long cr0;
unsigned long cr0_guest_owned_bits;
unsigned long cr2;
unsigned long cr3;
unsigned long cr4;
unsigned long cr4_guest_owned_bits;
unsigned long cr8;
u32 hflags;
u64 pdptrs[4]; /* pae */
u64 shadow_efer;
u64 efer;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
int32_t apic_arb_prio;
@ -374,17 +364,27 @@ struct kvm_vcpu_arch {
/* used for guest single stepping over the given code position */
u16 singlestep_cs;
unsigned long singlestep_rip;
/* fields used by HYPER-V emulation */
u64 hv_vapic;
};
struct kvm_mem_alias {
gfn_t base_gfn;
unsigned long npages;
gfn_t target_gfn;
#define KVM_ALIAS_INVALID 1UL
unsigned long flags;
};
struct kvm_arch{
int naliases;
#define KVM_ARCH_HAS_UNALIAS_INSTANTIATION
struct kvm_mem_aliases {
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int naliases;
};
struct kvm_arch {
struct kvm_mem_aliases *aliases;
unsigned int n_free_mmu_pages;
unsigned int n_requested_mmu_pages;
@ -416,6 +416,10 @@ struct kvm_arch{
s64 kvmclock_offset;
struct kvm_xen_hvm_config xen_hvm_config;
/* fields used by HYPER-V emulation */
u64 hv_guest_os_id;
u64 hv_hypercall;
};
struct kvm_vm_stat {
@ -471,6 +475,7 @@ struct kvm_x86_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
@ -492,6 +497,7 @@ struct kvm_x86_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu);
void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
@ -501,12 +507,13 @@ struct kvm_x86_ops {
void (*set_idt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
void (*get_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct descriptor_table *dt);
unsigned long (*get_dr)(struct kvm_vcpu *vcpu, int dr);
void (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value,
int *exception);
int (*get_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long *dest);
int (*set_dr)(struct kvm_vcpu *vcpu, int dr, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
void (*fpu_activate)(struct kvm_vcpu *vcpu);
void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
void (*tlb_flush)(struct kvm_vcpu *vcpu);
@ -531,7 +538,8 @@ struct kvm_x86_ops {
int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
int (*get_tdp_level)(void);
u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
bool (*gb_page_enable)(void);
int (*get_lpage_level)(void);
bool (*rdtscp_supported)(void);
const struct trace_print_flags *exit_reasons_str;
};
@ -606,8 +614,7 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long value);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
int type_bits, int seg);
int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason);
@ -653,6 +660,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
@ -666,6 +677,7 @@ void kvm_disable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
bool kvm_check_iopl(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);

View file

@ -2,6 +2,7 @@
#define _ASM_X86_KVM_PARA_H
#include <linux/types.h>
#include <asm/hyperv.h>
/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx. It
* should be used to determine that a VM is running under KVM.

View file

@ -313,7 +313,7 @@ struct __attribute__ ((__packed__)) vmcb {
#define SVM_EXIT_ERR -1
#define SVM_CR0_SELECTIVE_MASK (1 << 3 | 1) /* TS and MP */
#define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
#define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
#define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"

View file

@ -53,6 +53,7 @@
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
@ -251,6 +252,7 @@ enum vmcs_field {
#define EXIT_REASON_MSR_READ 31
#define EXIT_REASON_MSR_WRITE 32
#define EXIT_REASON_MWAIT_INSTRUCTION 36
#define EXIT_REASON_MONITOR_INSTRUCTION 39
#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
@ -362,6 +364,7 @@ enum vmcs_field {
#define VMX_EPTP_UC_BIT (1ull << 8)
#define VMX_EPTP_WB_BIT (1ull << 14)
#define VMX_EPT_2MB_PAGE_BIT (1ull << 16)
#define VMX_EPT_1GB_PAGE_BIT (1ull << 17)
#define VMX_EPT_EXTENT_INDIVIDUAL_BIT (1ull << 24)
#define VMX_EPT_EXTENT_CONTEXT_BIT (1ull << 25)
#define VMX_EPT_EXTENT_GLOBAL_BIT (1ull << 26)
@ -374,7 +377,7 @@ enum vmcs_field {
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
#define VMX_EPT_IGMT_BIT (1ull << 6)
#define VMX_EPT_IPAT_BIT (1ull << 6)
#define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul

View file

@ -301,7 +301,8 @@ static int __init vsyscall_init(void)
register_sysctl_table(kernel_root_table2);
#endif
on_each_cpu(cpu_vsyscall_init, NULL, 1);
hotcpu_notifier(cpu_vsyscall_notifier, 0);
/* notifier priority > KVM */
hotcpu_notifier(cpu_vsyscall_notifier, 30);
return 0;
}

View file

@ -29,6 +29,7 @@ config KVM
select HAVE_KVM_EVENTFD
select KVM_APIC_ARCHITECTURE
select USER_RETURN_NOTIFIER
select KVM_MMIO
---help---
Support hosting fully virtualized guest machines using hardware
virtualization extensions. You will need a fairly recent

View file

@ -32,7 +32,7 @@
#include <linux/module.h>
#include <asm/kvm_emulate.h>
#include "mmu.h" /* for is_long_mode() */
#include "x86.h"
/*
* Opcode effective-address decode tables.
@ -76,6 +76,8 @@
#define GroupDual (1<<15) /* Alternate decoding of mod == 3 */
#define GroupMask 0xff /* Group number stored in bits 0:7 */
/* Misc flags */
#define Lock (1<<26) /* lock prefix is allowed for the instruction */
#define Priv (1<<27) /* instruction generates #GP if current CPL != 0 */
#define No64 (1<<28)
/* Source 2 operand type */
#define Src2None (0<<29)
@ -88,39 +90,40 @@
enum {
Group1_80, Group1_81, Group1_82, Group1_83,
Group1A, Group3_Byte, Group3, Group4, Group5, Group7,
Group8, Group9,
};
static u32 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x08 - 0x0F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, 0,
/* 0x10 - 0x17 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x18 - 0x1F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
/* 0x28 - 0x2F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
/* 0x30 - 0x37 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
/* 0x38 - 0x3F */
@ -156,7 +159,7 @@ static u32 opcode_table[256] = {
Group | Group1_80, Group | Group1_81,
Group | Group1_82, Group | Group1_83,
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
/* 0x88 - 0x8F */
ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov,
ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@ -210,7 +213,7 @@ static u32 opcode_table[256] = {
SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
ImplicitOps | Priv, ImplicitOps, Group | Group3_Byte, Group | Group3,
/* 0xF8 - 0xFF */
ImplicitOps, 0, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
@ -218,16 +221,20 @@ static u32 opcode_table[256] = {
static u32 twobyte_table[256] = {
/* 0x00 - 0x0F */
0, Group | GroupDual | Group7, 0, 0, 0, ImplicitOps, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0,
0, Group | GroupDual | Group7, 0, 0,
0, ImplicitOps, ImplicitOps | Priv, 0,
ImplicitOps | Priv, ImplicitOps | Priv, 0, 0,
0, ImplicitOps | ModRM, 0, 0,
/* 0x10 - 0x1F */
0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps | ModRM, 0, 0, 0, 0, 0, 0, 0,
/* 0x20 - 0x2F */
ModRM | ImplicitOps, ModRM, ModRM | ImplicitOps, ModRM, 0, 0, 0, 0,
ModRM | ImplicitOps | Priv, ModRM | Priv,
ModRM | ImplicitOps | Priv, ModRM | Priv,
0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x30 - 0x3F */
ImplicitOps, 0, ImplicitOps, 0,
ImplicitOps, ImplicitOps, 0, 0,
ImplicitOps | Priv, 0, ImplicitOps | Priv, 0,
ImplicitOps, ImplicitOps | Priv, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0x40 - 0x47 */
DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov,
@ -257,21 +264,23 @@ static u32 twobyte_table[256] = {
DstMem | SrcReg | Src2CL | ModRM, 0, 0,
/* 0xA8 - 0xAF */
ImplicitOps | Stack, ImplicitOps | Stack,
0, DstMem | SrcReg | ModRM | BitOp,
0, DstMem | SrcReg | ModRM | BitOp | Lock,
DstMem | SrcReg | Src2ImmByte | ModRM,
DstMem | SrcReg | Src2CL | ModRM,
ModRM, 0,
/* 0xB0 - 0xB7 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0,
DstMem | SrcReg | ModRM | BitOp,
ByteOp | DstMem | SrcReg | ModRM | Lock, DstMem | SrcReg | ModRM | Lock,
0, DstMem | SrcReg | ModRM | BitOp | Lock,
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov,
/* 0xB8 - 0xBF */
0, 0, DstMem | SrcImmByte | ModRM, DstMem | SrcReg | ModRM | BitOp,
0, 0,
Group | Group8, DstMem | SrcReg | ModRM | BitOp | Lock,
0, 0, ByteOp | DstReg | SrcMem | ModRM | Mov,
DstReg | SrcMem16 | ModRM | Mov,
/* 0xC0 - 0xCF */
0, 0, 0, DstMem | SrcReg | ModRM | Mov, 0, 0, 0, ImplicitOps | ModRM,
0, 0, 0, DstMem | SrcReg | ModRM | Mov,
0, 0, 0, Group | GroupDual | Group9,
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xD0 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -283,25 +292,41 @@ static u32 twobyte_table[256] = {
static u32 group_table[] = {
[Group1_80*8] =
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM | Lock,
ByteOp | DstMem | SrcImm | ModRM | Lock,
ByteOp | DstMem | SrcImm | ModRM | Lock,
ByteOp | DstMem | SrcImm | ModRM | Lock,
ByteOp | DstMem | SrcImm | ModRM | Lock,
ByteOp | DstMem | SrcImm | ModRM | Lock,
ByteOp | DstMem | SrcImm | ModRM | Lock,
ByteOp | DstMem | SrcImm | ModRM,
[Group1_81*8] =
DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
DstMem | SrcImm | ModRM, DstMem | SrcImm | ModRM,
DstMem | SrcImm | ModRM | Lock,
DstMem | SrcImm | ModRM | Lock,
DstMem | SrcImm | ModRM | Lock,
DstMem | SrcImm | ModRM | Lock,
DstMem | SrcImm | ModRM | Lock,
DstMem | SrcImm | ModRM | Lock,
DstMem | SrcImm | ModRM | Lock,
DstMem | SrcImm | ModRM,
[Group1_82*8] =
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM,
ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
ByteOp | DstMem | SrcImm | ModRM | No64 | Lock,
ByteOp | DstMem | SrcImm | ModRM | No64,
[Group1_83*8] =
DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM,
DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM,
[Group1A*8] =
DstMem | SrcNone | ModRM | Mov | Stack, 0, 0, 0, 0, 0, 0, 0,
[Group3_Byte*8] =
@ -320,24 +345,39 @@ static u32 group_table[] = {
SrcMem | ModRM | Stack, 0,
SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
[Group7*8] =
0, 0, ModRM | SrcMem, ModRM | SrcMem,
0, 0, ModRM | SrcMem | Priv, ModRM | SrcMem | Priv,
SrcNone | ModRM | DstMem | Mov, 0,
SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp,
SrcMem16 | ModRM | Mov | Priv, SrcMem | ModRM | ByteOp | Priv,
[Group8*8] =
0, 0, 0, 0,
DstMem | SrcImmByte | ModRM, DstMem | SrcImmByte | ModRM | Lock,
DstMem | SrcImmByte | ModRM | Lock, DstMem | SrcImmByte | ModRM | Lock,
[Group9*8] =
0, ImplicitOps | ModRM | Lock, 0, 0, 0, 0, 0, 0,
};
static u32 group2_table[] = {
[Group7*8] =
SrcNone | ModRM, 0, 0, SrcNone | ModRM,
SrcNone | ModRM | Priv, 0, 0, SrcNone | ModRM,
SrcNone | ModRM | DstMem | Mov, 0,
SrcMem16 | ModRM | Mov, 0,
[Group9*8] =
0, 0, 0, 0, 0, 0, 0, 0,
};
/* EFLAGS bit definitions. */
#define EFLG_ID (1<<21)
#define EFLG_VIP (1<<20)
#define EFLG_VIF (1<<19)
#define EFLG_AC (1<<18)
#define EFLG_VM (1<<17)
#define EFLG_RF (1<<16)
#define EFLG_IOPL (3<<12)
#define EFLG_NT (1<<14)
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
#define EFLG_IF (1<<9)
#define EFLG_TF (1<<8)
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
@ -606,7 +646,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
if (linear < fc->start || linear >= fc->end) {
size = min(15UL, PAGE_SIZE - offset_in_page(linear));
rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
if (rc)
return rc;
fc->start = linear;
@ -661,11 +701,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
op_bytes = 3;
*address = 0;
rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
ctxt->vcpu);
ctxt->vcpu, NULL);
if (rc)
return rc;
rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
ctxt->vcpu);
ctxt->vcpu, NULL);
return rc;
}
@ -889,6 +929,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
switch (mode) {
case X86EMUL_MODE_REAL:
case X86EMUL_MODE_VM86:
case X86EMUL_MODE_PROT16:
def_op_bytes = def_ad_bytes = 2;
break;
@ -975,7 +1016,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
}
if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");
return -1;
}
@ -1196,13 +1237,56 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
rc = ops->read_emulated(register_address(c, ss_base(ctxt),
c->regs[VCPU_REGS_RSP]),
dest, len, ctxt->vcpu);
if (rc != 0)
if (rc != X86EMUL_CONTINUE)
return rc;
register_address_increment(c, &c->regs[VCPU_REGS_RSP], len);
return rc;
}
static int emulate_popf(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
void *dest, int len)
{
int rc;
unsigned long val, change_mask;
int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
rc = emulate_pop(ctxt, ops, &val, len);
if (rc != X86EMUL_CONTINUE)
return rc;
change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
| EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
switch(ctxt->mode) {
case X86EMUL_MODE_PROT64:
case X86EMUL_MODE_PROT32:
case X86EMUL_MODE_PROT16:
if (cpl == 0)
change_mask |= EFLG_IOPL;
if (cpl <= iopl)
change_mask |= EFLG_IF;
break;
case X86EMUL_MODE_VM86:
if (iopl < 3) {
kvm_inject_gp(ctxt->vcpu, 0);
return X86EMUL_PROPAGATE_FAULT;
}
change_mask |= EFLG_IF;
break;
default: /* real mode */
change_mask |= (EFLG_IOPL | EFLG_IF);
break;
}
*(unsigned long *)dest =
(ctxt->eflags & ~change_mask) | (val & change_mask);
return rc;
}
static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
{
struct decode_cache *c = &ctxt->decode;
@ -1225,7 +1309,7 @@ static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
if (rc != 0)
return rc;
rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, seg);
return rc;
}
@ -1370,7 +1454,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
int rc;
rc = ops->read_emulated(memop, &old, 8, ctxt->vcpu);
if (rc != 0)
if (rc != X86EMUL_CONTINUE)
return rc;
if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) ||
@ -1385,7 +1469,7 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
(u32) c->regs[VCPU_REGS_RBX];
rc = ops->cmpxchg_emulated(memop, &old, &new, 8, ctxt->vcpu);
if (rc != 0)
if (rc != X86EMUL_CONTINUE)
return rc;
ctxt->eflags |= EFLG_ZF;
}
@ -1407,7 +1491,7 @@ static int emulate_ret_far(struct x86_emulate_ctxt *ctxt,
rc = emulate_pop(ctxt, ops, &cs, c->op_bytes);
if (rc)
return rc;
rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, 1, VCPU_SREG_CS);
rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)cs, VCPU_SREG_CS);
return rc;
}
@ -1451,7 +1535,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt,
&c->dst.val,
c->dst.bytes,
ctxt->vcpu);
if (rc != 0)
if (rc != X86EMUL_CONTINUE)
return rc;
break;
case OP_NONE:
@ -1514,9 +1598,8 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
u64 msr_data;
/* syscall is not available in real mode */
if (c->lock_prefix || ctxt->mode == X86EMUL_MODE_REAL
|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
return -1;
if (ctxt->mode == X86EMUL_MODE_REAL || ctxt->mode == X86EMUL_MODE_VM86)
return X86EMUL_UNHANDLEABLE;
setup_syscalls_segments(ctxt, &cs, &ss);
@ -1553,7 +1636,7 @@ emulate_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
}
return 0;
return X86EMUL_CONTINUE;
}
static int
@ -1563,22 +1646,17 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
struct kvm_segment cs, ss;
u64 msr_data;
/* inject #UD if LOCK prefix is used */
if (c->lock_prefix)
return -1;
/* inject #GP if in real mode or paging is disabled */
if (ctxt->mode == X86EMUL_MODE_REAL ||
!(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
/* inject #GP if in real mode */
if (ctxt->mode == X86EMUL_MODE_REAL) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
return X86EMUL_UNHANDLEABLE;
}
/* XXX sysenter/sysexit have not been tested in 64bit mode.
* Therefore, we inject an #UD.
*/
if (ctxt->mode == X86EMUL_MODE_PROT64)
return -1;
return X86EMUL_UNHANDLEABLE;
setup_syscalls_segments(ctxt, &cs, &ss);
@ -1587,13 +1665,13 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
case X86EMUL_MODE_PROT32:
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
return X86EMUL_PROPAGATE_FAULT;
}
break;
case X86EMUL_MODE_PROT64:
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
return X86EMUL_PROPAGATE_FAULT;
}
break;
}
@ -1618,7 +1696,7 @@ emulate_sysenter(struct x86_emulate_ctxt *ctxt)
kvm_x86_ops->get_msr(ctxt->vcpu, MSR_IA32_SYSENTER_ESP, &msr_data);
c->regs[VCPU_REGS_RSP] = msr_data;
return 0;
return X86EMUL_CONTINUE;
}
static int
@ -1629,21 +1707,11 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
u64 msr_data;
int usermode;
/* inject #UD if LOCK prefix is used */
if (c->lock_prefix)
return -1;
/* inject #GP if in real mode or paging is disabled */
if (ctxt->mode == X86EMUL_MODE_REAL
|| !(ctxt->vcpu->arch.cr0 & X86_CR0_PE)) {
/* inject #GP if in real mode or Virtual 8086 mode */
if (ctxt->mode == X86EMUL_MODE_REAL ||
ctxt->mode == X86EMUL_MODE_VM86) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
}
/* sysexit must be called from CPL 0 */
if (kvm_x86_ops->get_cpl(ctxt->vcpu) != 0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
return X86EMUL_UNHANDLEABLE;
}
setup_syscalls_segments(ctxt, &cs, &ss);
@ -1661,7 +1729,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
cs.selector = (u16)(msr_data + 16);
if ((msr_data & 0xfffc) == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
return X86EMUL_PROPAGATE_FAULT;
}
ss.selector = (u16)(msr_data + 24);
break;
@ -1669,7 +1737,7 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
cs.selector = (u16)(msr_data + 32);
if (msr_data == 0x0) {
kvm_inject_gp(ctxt->vcpu, 0);
return -1;
return X86EMUL_PROPAGATE_FAULT;
}
ss.selector = cs.selector + 8;
cs.db = 0;
@ -1685,7 +1753,58 @@ emulate_sysexit(struct x86_emulate_ctxt *ctxt)
c->eip = ctxt->vcpu->arch.regs[VCPU_REGS_RDX];
c->regs[VCPU_REGS_RSP] = ctxt->vcpu->arch.regs[VCPU_REGS_RCX];
return 0;
return X86EMUL_CONTINUE;
}
static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
{
int iopl;
if (ctxt->mode == X86EMUL_MODE_REAL)
return false;
if (ctxt->mode == X86EMUL_MODE_VM86)
return true;
iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
return kvm_x86_ops->get_cpl(ctxt->vcpu) > iopl;
}
static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
u16 port, u16 len)
{
struct kvm_segment tr_seg;
int r;
u16 io_bitmap_ptr;
u8 perm, bit_idx = port & 0x7;
unsigned mask = (1 << len) - 1;
kvm_get_segment(ctxt->vcpu, &tr_seg, VCPU_SREG_TR);
if (tr_seg.unusable)
return false;
if (tr_seg.limit < 103)
return false;
r = ops->read_std(tr_seg.base + 102, &io_bitmap_ptr, 2, ctxt->vcpu,
NULL);
if (r != X86EMUL_CONTINUE)
return false;
if (io_bitmap_ptr + port/8 > tr_seg.limit)
return false;
r = ops->read_std(tr_seg.base + io_bitmap_ptr + port/8, &perm, 1,
ctxt->vcpu, NULL);
if (r != X86EMUL_CONTINUE)
return false;
if ((perm >> bit_idx) & mask)
return false;
return true;
}
static bool emulator_io_permited(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops,
u16 port, u16 len)
{
if (emulator_bad_iopl(ctxt))
if (!emulator_io_port_access_allowed(ctxt, ops, port, len))
return false;
return true;
}
int
@ -1709,6 +1828,18 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
saved_eip = c->eip;
/* LOCK prefix is allowed only with some instructions */
if (c->lock_prefix && !(c->d & Lock)) {
kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
goto done;
}
/* Privileged instruction can be executed only in CPL=0 */
if ((c->d & Priv) && kvm_x86_ops->get_cpl(ctxt->vcpu)) {
kvm_inject_gp(ctxt->vcpu, 0);
goto done;
}
if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
memop = c->modrm_ea;
@ -1749,7 +1880,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
&c->src.val,
c->src.bytes,
ctxt->vcpu);
if (rc != 0)
if (rc != X86EMUL_CONTINUE)
goto done;
c->src.orig_val = c->src.val;
}
@ -1768,12 +1899,15 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->dst.ptr = (void *)c->dst.ptr +
(c->src.val & mask) / 8;
}
if (!(c->d & Mov) &&
/* optimisation - avoid slow emulated read */
((rc = ops->read_emulated((unsigned long)c->dst.ptr,
&c->dst.val,
c->dst.bytes, ctxt->vcpu)) != 0))
goto done;
if (!(c->d & Mov)) {
/* optimisation - avoid slow emulated read */
rc = ops->read_emulated((unsigned long)c->dst.ptr,
&c->dst.val,
c->dst.bytes,
ctxt->vcpu);
if (rc != X86EMUL_CONTINUE)
goto done;
}
}
c->dst.orig_val = c->dst.val;
@ -1876,7 +2010,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
break;
case 0x6c: /* insb */
case 0x6d: /* insw/insd */
if (kvm_emulate_pio_string(ctxt->vcpu,
if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
(c->d & ByteOp) ? 1 : c->op_bytes)) {
kvm_inject_gp(ctxt->vcpu, 0);
goto done;
}
if (kvm_emulate_pio_string(ctxt->vcpu,
1,
(c->d & ByteOp) ? 1 : c->op_bytes,
c->rep_prefix ?
@ -1892,6 +2031,11 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
return 0;
case 0x6e: /* outsb */
case 0x6f: /* outsw/outsd */
if (!emulator_io_permited(ctxt, ops, c->regs[VCPU_REGS_RDX],
(c->d & ByteOp) ? 1 : c->op_bytes)) {
kvm_inject_gp(ctxt->vcpu, 0);
goto done;
}
if (kvm_emulate_pio_string(ctxt->vcpu,
0,
(c->d & ByteOp) ? 1 : c->op_bytes,
@ -1978,25 +2122,19 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
break;
case 0x8e: { /* mov seg, r/m16 */
uint16_t sel;
int type_bits;
int err;
sel = c->src.val;
if (c->modrm_reg == VCPU_SREG_CS ||
c->modrm_reg > VCPU_SREG_GS) {
kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
goto done;
}
if (c->modrm_reg == VCPU_SREG_SS)
toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS);
if (c->modrm_reg <= 5) {
type_bits = (c->modrm_reg == 1) ? 9 : 1;
err = kvm_load_segment_descriptor(ctxt->vcpu, sel,
type_bits, c->modrm_reg);
} else {
printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n",
c->modrm);
goto cannot_emulate;
}
if (err < 0)
goto cannot_emulate;
rc = kvm_load_segment_descriptor(ctxt->vcpu, sel, c->modrm_reg);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
@ -2025,7 +2163,10 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->dst.type = OP_REG;
c->dst.ptr = (unsigned long *) &ctxt->eflags;
c->dst.bytes = c->op_bytes;
goto pop_instruction;
rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
if (rc != X86EMUL_CONTINUE)
goto done;
break;
case 0xa0 ... 0xa1: /* mov */
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
c->dst.val = c->src.val;
@ -2039,11 +2180,12 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->dst.ptr = (unsigned long *)register_address(c,
es_base(ctxt),
c->regs[VCPU_REGS_RDI]);
if ((rc = ops->read_emulated(register_address(c,
seg_override_base(ctxt, c),
c->regs[VCPU_REGS_RSI]),
rc = ops->read_emulated(register_address(c,
seg_override_base(ctxt, c),
c->regs[VCPU_REGS_RSI]),
&c->dst.val,
c->dst.bytes, ctxt->vcpu)) != 0)
c->dst.bytes, ctxt->vcpu);
if (rc != X86EMUL_CONTINUE)
goto done;
register_address_increment(c, &c->regs[VCPU_REGS_RSI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@ -2058,10 +2200,11 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->src.ptr = (unsigned long *)register_address(c,
seg_override_base(ctxt, c),
c->regs[VCPU_REGS_RSI]);
if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
&c->src.val,
c->src.bytes,
ctxt->vcpu)) != 0)
rc = ops->read_emulated((unsigned long)c->src.ptr,
&c->src.val,
c->src.bytes,
ctxt->vcpu);
if (rc != X86EMUL_CONTINUE)
goto done;
c->dst.type = OP_NONE; /* Disable writeback. */
@ -2069,10 +2212,11 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->dst.ptr = (unsigned long *)register_address(c,
es_base(ctxt),
c->regs[VCPU_REGS_RDI]);
if ((rc = ops->read_emulated((unsigned long)c->dst.ptr,
&c->dst.val,
c->dst.bytes,
ctxt->vcpu)) != 0)
rc = ops->read_emulated((unsigned long)c->dst.ptr,
&c->dst.val,
c->dst.bytes,
ctxt->vcpu);
if (rc != X86EMUL_CONTINUE)
goto done;
DPRINTF("cmps: mem1=0x%p mem2=0x%p\n", c->src.ptr, c->dst.ptr);
@ -2102,12 +2246,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->dst.type = OP_REG;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
if ((rc = ops->read_emulated(register_address(c,
seg_override_base(ctxt, c),
c->regs[VCPU_REGS_RSI]),
&c->dst.val,
c->dst.bytes,
ctxt->vcpu)) != 0)
rc = ops->read_emulated(register_address(c,
seg_override_base(ctxt, c),
c->regs[VCPU_REGS_RSI]),
&c->dst.val,
c->dst.bytes,
ctxt->vcpu);
if (rc != X86EMUL_CONTINUE)
goto done;
register_address_increment(c, &c->regs[VCPU_REGS_RSI],
(ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@ -2163,11 +2308,9 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xe9: /* jmp rel */
goto jmp;
case 0xea: /* jmp far */
if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9,
VCPU_SREG_CS) < 0) {
DPRINTF("jmp far: Failed to load CS descriptor\n");
goto cannot_emulate;
}
if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val,
VCPU_SREG_CS))
goto done;
c->eip = c->src.val;
break;
@ -2185,7 +2328,13 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
case 0xef: /* out (e/r)ax,dx */
port = c->regs[VCPU_REGS_RDX];
io_dir_in = 0;
do_io: if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
do_io:
if (!emulator_io_permited(ctxt, ops, port,
(c->d & ByteOp) ? 1 : c->op_bytes)) {
kvm_inject_gp(ctxt->vcpu, 0);
goto done;
}
if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
(c->d & ByteOp) ? 1 : c->op_bytes,
port) != 0) {
c->eip = saved_eip;
@ -2210,13 +2359,21 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfa: /* cli */
ctxt->eflags &= ~X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */
if (emulator_bad_iopl(ctxt))
kvm_inject_gp(ctxt->vcpu, 0);
else {
ctxt->eflags &= ~X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */
}
break;
case 0xfb: /* sti */
toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */
if (emulator_bad_iopl(ctxt))
kvm_inject_gp(ctxt->vcpu, 0);
else {
toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE; /* Disable writeback. */
}
break;
case 0xfc: /* cld */
ctxt->eflags &= ~EFLG_DF;
@ -2319,8 +2476,9 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
}
break;
case 0x05: /* syscall */
if (emulate_syscall(ctxt) == -1)
goto cannot_emulate;
rc = emulate_syscall(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
else
goto writeback;
break;
@ -2391,14 +2549,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
c->dst.type = OP_NONE;
break;
case 0x34: /* sysenter */
if (emulate_sysenter(ctxt) == -1)
goto cannot_emulate;
rc = emulate_sysenter(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
else
goto writeback;
break;
case 0x35: /* sysexit */
if (emulate_sysexit(ctxt) == -1)
goto cannot_emulate;
rc = emulate_sysexit(ctxt);
if (rc != X86EMUL_CONTINUE)
goto done;
else
goto writeback;
break;

View file

@ -242,11 +242,11 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
irq_ack_notifier);
spin_lock(&ps->inject_lock);
raw_spin_lock(&ps->inject_lock);
if (atomic_dec_return(&ps->pit_timer.pending) < 0)
atomic_inc(&ps->pit_timer.pending);
ps->irq_ack = 1;
spin_unlock(&ps->inject_lock);
raw_spin_unlock(&ps->inject_lock);
}
void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@ -605,7 +605,7 @@ static const struct kvm_io_device_ops speaker_dev_ops = {
.write = speaker_ioport_write,
};
/* Caller must have writers lock on slots_lock */
/* Caller must hold slots_lock */
struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
{
struct kvm_pit *pit;
@ -624,7 +624,7 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
mutex_init(&pit->pit_state.lock);
mutex_lock(&pit->pit_state.lock);
spin_lock_init(&pit->pit_state.inject_lock);
raw_spin_lock_init(&pit->pit_state.inject_lock);
kvm->arch.vpit = pit;
pit->kvm = kvm;
@ -645,13 +645,13 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_iodevice_init(&pit->dev, &pit_dev_ops);
ret = __kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
if (ret < 0)
goto fail;
if (flags & KVM_PIT_SPEAKER_DUMMY) {
kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
ret = __kvm_io_bus_register_dev(&kvm->pio_bus,
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
&pit->speaker_dev);
if (ret < 0)
goto fail_unregister;
@ -660,11 +660,12 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
return pit;
fail_unregister:
__kvm_io_bus_unregister_dev(&kvm->pio_bus, &pit->dev);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
fail:
if (pit->irq_source_id >= 0)
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kvm_unregister_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
kvm_unregister_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
kvm_free_irq_source_id(kvm, pit->irq_source_id);
kfree(pit);
return NULL;
@ -723,12 +724,12 @@ void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu)
/* Try to inject pending interrupts when
* last one has been acked.
*/
spin_lock(&ps->inject_lock);
raw_spin_lock(&ps->inject_lock);
if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
ps->irq_ack = 0;
inject = 1;
}
spin_unlock(&ps->inject_lock);
raw_spin_unlock(&ps->inject_lock);
if (inject)
__inject_pit_timer_intr(kvm);
}

View file

@ -27,7 +27,7 @@ struct kvm_kpit_state {
u32 speaker_data_on;
struct mutex lock;
struct kvm_pit *pit;
spinlock_t inject_lock;
raw_spinlock_t inject_lock;
unsigned long irq_ack;
struct kvm_irq_ack_notifier irq_ack_notifier;
};

View file

@ -44,18 +44,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
* Other interrupt may be delivered to PIC while lock is dropped but
* it should be safe since PIC state is already updated at this stage.
*/
spin_unlock(&s->pics_state->lock);
raw_spin_unlock(&s->pics_state->lock);
kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
spin_lock(&s->pics_state->lock);
raw_spin_lock(&s->pics_state->lock);
}
void kvm_pic_clear_isr_ack(struct kvm *kvm)
{
struct kvm_pic *s = pic_irqchip(kvm);
spin_lock(&s->lock);
raw_spin_lock(&s->lock);
s->pics[0].isr_ack = 0xff;
s->pics[1].isr_ack = 0xff;
spin_unlock(&s->lock);
raw_spin_unlock(&s->lock);
}
/*
@ -156,9 +157,9 @@ static void pic_update_irq(struct kvm_pic *s)
void kvm_pic_update_irq(struct kvm_pic *s)
{
spin_lock(&s->lock);
raw_spin_lock(&s->lock);
pic_update_irq(s);
spin_unlock(&s->lock);
raw_spin_unlock(&s->lock);
}
int kvm_pic_set_irq(void *opaque, int irq, int level)
@ -166,14 +167,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
struct kvm_pic *s = opaque;
int ret = -1;
spin_lock(&s->lock);
raw_spin_lock(&s->lock);
if (irq >= 0 && irq < PIC_NUM_PINS) {
ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
pic_update_irq(s);
trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
s->pics[irq >> 3].imr, ret == 0);
}
spin_unlock(&s->lock);
raw_spin_unlock(&s->lock);
return ret;
}
@ -203,7 +204,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
int irq, irq2, intno;
struct kvm_pic *s = pic_irqchip(kvm);
spin_lock(&s->lock);
raw_spin_lock(&s->lock);
irq = pic_get_irq(&s->pics[0]);
if (irq >= 0) {
pic_intack(&s->pics[0], irq);
@ -228,7 +229,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
intno = s->pics[0].irq_base + irq;
}
pic_update_irq(s);
spin_unlock(&s->lock);
raw_spin_unlock(&s->lock);
return intno;
}
@ -442,7 +443,7 @@ static int picdev_write(struct kvm_io_device *this,
printk(KERN_ERR "PIC: non byte write\n");
return 0;
}
spin_lock(&s->lock);
raw_spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
@ -455,7 +456,7 @@ static int picdev_write(struct kvm_io_device *this,
elcr_ioport_write(&s->pics[addr & 1], addr, data);
break;
}
spin_unlock(&s->lock);
raw_spin_unlock(&s->lock);
return 0;
}
@ -472,7 +473,7 @@ static int picdev_read(struct kvm_io_device *this,
printk(KERN_ERR "PIC: non byte read\n");
return 0;
}
spin_lock(&s->lock);
raw_spin_lock(&s->lock);
switch (addr) {
case 0x20:
case 0x21:
@ -486,7 +487,7 @@ static int picdev_read(struct kvm_io_device *this,
break;
}
*(unsigned char *)val = data;
spin_unlock(&s->lock);
raw_spin_unlock(&s->lock);
return 0;
}
@ -520,7 +521,7 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
if (!s)
return NULL;
spin_lock_init(&s->lock);
raw_spin_lock_init(&s->lock);
s->kvm = kvm;
s->pics[0].elcr_mask = 0xf8;
s->pics[1].elcr_mask = 0xde;
@ -533,7 +534,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
* Initialize PIO device
*/
kvm_iodevice_init(&s->dev, &picdev_ops);
ret = kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev);
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kfree(s);
return NULL;
@ -541,3 +544,14 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
return s;
}
void kvm_destroy_pic(struct kvm *kvm)
{
struct kvm_pic *vpic = kvm->arch.vpic;
if (vpic) {
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
kvm->arch.vpic = NULL;
kfree(vpic);
}
}

View file

@ -62,7 +62,7 @@ struct kvm_kpic_state {
};
struct kvm_pic {
spinlock_t lock;
raw_spinlock_t lock;
unsigned pending_acks;
struct kvm *kvm;
struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
@ -75,6 +75,7 @@ struct kvm_pic {
};
struct kvm_pic *kvm_create_pic(struct kvm *kvm);
void kvm_destroy_pic(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
void kvm_pic_clear_isr_ack(struct kvm *kvm);

View file

@ -1,6 +1,11 @@
#ifndef ASM_KVM_CACHE_REGS_H
#define ASM_KVM_CACHE_REGS_H
#define KVM_POSSIBLE_CR0_GUEST_BITS X86_CR0_TS
#define KVM_POSSIBLE_CR4_GUEST_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE)
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
enum kvm_reg reg)
{
@ -38,4 +43,30 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index)
return vcpu->arch.pdptrs[index];
}
static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask)
{
ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS;
if (tmask & vcpu->arch.cr0_guest_owned_bits)
kvm_x86_ops->decache_cr0_guest_bits(vcpu);
return vcpu->arch.cr0 & mask;
}
static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu)
{
return kvm_read_cr0_bits(vcpu, ~0UL);
}
static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask)
{
ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS;
if (tmask & vcpu->arch.cr4_guest_owned_bits)
kvm_x86_ops->decache_cr4_guest_bits(vcpu);
return vcpu->arch.cr4 & mask;
}
static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, ~0UL);
}
#endif

View file

@ -1246,3 +1246,34 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
return 0;
}
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
if (!irqchip_in_kernel(vcpu->kvm))
return 1;
/* if this is ICR write vector before command */
if (reg == APIC_ICR)
apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
return apic_reg_write(apic, reg, (u32)data);
}
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u32 low, high = 0;
if (!irqchip_in_kernel(vcpu->kvm))
return 1;
if (apic_reg_read(apic, reg, 4, &low))
return 1;
if (reg == APIC_ICR)
apic_reg_read(apic, APIC_ICR2, 4, &high);
*data = (((u64)high) << 32) | low;
return 0;
}

View file

@ -48,4 +48,12 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu);
int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
{
return vcpu->arch.hv_vapic & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE;
}
#endif

View file

@ -18,6 +18,7 @@
*/
#include "mmu.h"
#include "x86.h"
#include "kvm_cache_regs.h"
#include <linux/kvm_host.h>
@ -29,6 +30,7 @@
#include <linux/swap.h>
#include <linux/hugetlb.h>
#include <linux/compiler.h>
#include <linux/srcu.h>
#include <asm/page.h>
#include <asm/cmpxchg.h>
@ -136,16 +138,6 @@ module_param(oos_shadow, bool, 0644);
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
| PT64_NX_MASK)
#define PFERR_PRESENT_MASK (1U << 0)
#define PFERR_WRITE_MASK (1U << 1)
#define PFERR_USER_MASK (1U << 2)
#define PFERR_RSVD_MASK (1U << 3)
#define PFERR_FETCH_MASK (1U << 4)
#define PT_PDPE_LEVEL 3
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
#define RMAP_EXT 4
#define ACC_EXEC_MASK 1
@ -153,6 +145,9 @@ module_param(oos_shadow, bool, 0644);
#define ACC_USER_MASK PT_USER_MASK
#define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK)
#include <trace/events/kvm.h>
#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
@ -229,7 +224,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static int is_write_protection(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr0 & X86_CR0_WP;
return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
}
static int is_cpuid_PSE36(void)
@ -239,7 +234,7 @@ static int is_cpuid_PSE36(void)
static int is_nx(struct kvm_vcpu *vcpu)
{
return vcpu->arch.shadow_efer & EFER_NX;
return vcpu->arch.efer & EFER_NX;
}
static int is_shadow_present_pte(u64 pte)
@ -253,7 +248,7 @@ static int is_large_pte(u64 pte)
return pte & PT_PAGE_SIZE_MASK;
}
static int is_writeble_pte(unsigned long pte)
static int is_writable_pte(unsigned long pte)
{
return pte & PT_WRITABLE_MASK;
}
@ -470,24 +465,10 @@ static int has_wrprotected_page(struct kvm *kvm,
static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
{
unsigned long page_size = PAGE_SIZE;
struct vm_area_struct *vma;
unsigned long addr;
unsigned long page_size;
int i, ret = 0;
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return PT_PAGE_TABLE_LEVEL;
down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, addr);
if (!vma)
goto out;
page_size = vma_kernel_pagesize(vma);
out:
up_read(&current->mm->mmap_sem);
page_size = kvm_host_page_size(kvm, gfn);
for (i = PT_PAGE_TABLE_LEVEL;
i < (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES); ++i) {
@ -503,8 +484,7 @@ static int host_mapping_level(struct kvm *kvm, gfn_t gfn)
static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
{
struct kvm_memory_slot *slot;
int host_level;
int level = PT_PAGE_TABLE_LEVEL;
int host_level, level, max_level;
slot = gfn_to_memslot(vcpu->kvm, large_gfn);
if (slot && slot->dirty_bitmap)
@ -515,7 +495,10 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn)
if (host_level == PT_PAGE_TABLE_LEVEL)
return host_level;
for (level = PT_DIRECTORY_LEVEL; level <= host_level; ++level)
max_level = kvm_x86_ops->get_lpage_level() < host_level ?
kvm_x86_ops->get_lpage_level() : host_level;
for (level = PT_DIRECTORY_LEVEL; level <= max_level; ++level)
if (has_wrprotected_page(vcpu->kvm, large_gfn, level))
break;
@ -633,7 +616,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
pfn = spte_to_pfn(*spte);
if (*spte & shadow_accessed_mask)
kvm_set_pfn_accessed(pfn);
if (is_writeble_pte(*spte))
if (is_writable_pte(*spte))
kvm_set_pfn_dirty(pfn);
rmapp = gfn_to_rmap(kvm, sp->gfns[spte - sp->spt], sp->role.level);
if (!*rmapp) {
@ -662,6 +645,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
prev_desc = desc;
desc = desc->more;
}
pr_err("rmap_remove: %p %llx many->many\n", spte, *spte);
BUG();
}
}
@ -708,7 +692,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
BUG_ON(!spte);
BUG_ON(!(*spte & PT_PRESENT_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
if (is_writeble_pte(*spte)) {
if (is_writable_pte(*spte)) {
__set_spte(spte, *spte & ~PT_WRITABLE_MASK);
write_protected = 1;
}
@ -732,7 +716,7 @@ static int rmap_write_protect(struct kvm *kvm, u64 gfn)
BUG_ON(!(*spte & PT_PRESENT_MASK));
BUG_ON((*spte & (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK)) != (PT_PAGE_SIZE_MASK|PT_PRESENT_MASK));
pgprintk("rmap_write_protect(large): spte %p %llx %lld\n", spte, *spte, gfn);
if (is_writeble_pte(*spte)) {
if (is_writable_pte(*spte)) {
rmap_remove(kvm, spte);
--kvm->stat.lpages;
__set_spte(spte, shadow_trap_nonpresent_pte);
@ -787,7 +771,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
new_spte &= ~PT_WRITABLE_MASK;
new_spte &= ~SPTE_HOST_WRITEABLE;
if (is_writeble_pte(*spte))
if (is_writable_pte(*spte))
kvm_set_pfn_dirty(spte_to_pfn(*spte));
__set_spte(spte, new_spte);
spte = rmap_next(kvm, rmapp, spte);
@ -805,35 +789,32 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
unsigned long data))
{
int i, j;
int ret;
int retval = 0;
struct kvm_memslots *slots;
/*
* If mmap_sem isn't taken, we can look the memslots with only
* the mmu_lock by skipping over the slots with userspace_addr == 0.
*/
for (i = 0; i < kvm->nmemslots; i++) {
struct kvm_memory_slot *memslot = &kvm->memslots[i];
slots = rcu_dereference(kvm->memslots);
for (i = 0; i < slots->nmemslots; i++) {
struct kvm_memory_slot *memslot = &slots->memslots[i];
unsigned long start = memslot->userspace_addr;
unsigned long end;
/* mmu_lock protects userspace_addr */
if (!start)
continue;
end = start + (memslot->npages << PAGE_SHIFT);
if (hva >= start && hva < end) {
gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
retval |= handler(kvm, &memslot->rmap[gfn_offset],
data);
ret = handler(kvm, &memslot->rmap[gfn_offset], data);
for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
int idx = gfn_offset;
idx /= KVM_PAGES_PER_HPAGE(PT_DIRECTORY_LEVEL + j);
retval |= handler(kvm,
ret |= handler(kvm,
&memslot->lpage_info[j][idx].rmap_pde,
data);
}
trace_kvm_age_page(hva, memslot, ret);
retval |= ret;
}
}
@ -856,9 +837,15 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
u64 *spte;
int young = 0;
/* always return old for EPT */
/*
* Emulate the accessed bit for EPT, by checking if this page has
* an EPT mapping, and clearing it if it does. On the next access,
* a new EPT mapping will be established.
* This has some overhead, but not as much as the cost of swapping
* out actively used pages or breaking up actively used hugepages.
*/
if (!shadow_accessed_mask)
return 0;
return kvm_unmap_rmapp(kvm, rmapp, data);
spte = rmap_next(kvm, rmapp, NULL);
while (spte) {
@ -1615,7 +1602,7 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn)
{
int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn));
int slot = memslot_id(kvm, gfn);
struct kvm_mmu_page *sp = page_header(__pa(pte));
__set_bit(slot, sp->slot_bitmap);
@ -1639,7 +1626,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
{
struct page *page;
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
if (gpa == UNMAPPED_GVA)
return NULL;
@ -1852,7 +1839,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
* is responsibility of mmu_get_page / kvm_sync_page.
* Same reasoning can be applied to dirty page accounting.
*/
if (!can_unsync && is_writeble_pte(*sptep))
if (!can_unsync && is_writable_pte(*sptep))
goto set_pte;
if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
@ -1860,7 +1847,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
__func__, gfn);
ret = 1;
pte_access &= ~ACC_WRITE_MASK;
if (is_writeble_pte(spte))
if (is_writable_pte(spte))
spte &= ~PT_WRITABLE_MASK;
}
}
@ -1881,7 +1868,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
bool reset_host_protection)
{
int was_rmapped = 0;
int was_writeble = is_writeble_pte(*sptep);
int was_writable = is_writable_pte(*sptep);
int rmap_count;
pgprintk("%s: spte %llx access %x write_fault %d"
@ -1932,7 +1919,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
if (rmap_count > RMAP_RECYCLE_THRESHOLD)
rmap_recycle(vcpu, sptep, gfn);
} else {
if (was_writeble)
if (was_writable)
kvm_release_pfn_dirty(pfn);
else
kvm_release_pfn_clean(pfn);
@ -2162,8 +2149,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock);
}
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
u32 access, u32 *error)
{
if (error)
*error = 0;
return vaddr;
}
@ -2747,7 +2737,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
if (tdp_enabled)
return 0;
gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
spin_lock(&vcpu->kvm->mmu_lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@ -2847,16 +2837,13 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
*/
page = alloc_page(GFP_KERNEL | __GFP_DMA32);
if (!page)
goto error_1;
return -ENOMEM;
vcpu->arch.mmu.pae_root = page_address(page);
for (i = 0; i < 4; ++i)
vcpu->arch.mmu.pae_root[i] = INVALID_PAGE;
return 0;
error_1:
free_mmu_pages(vcpu);
return -ENOMEM;
}
int kvm_mmu_create(struct kvm_vcpu *vcpu)
@ -2936,10 +2923,9 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
spin_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int npages;
int npages, idx;
if (!down_read_trylock(&kvm->slots_lock))
continue;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
npages = kvm->arch.n_alloc_mmu_pages -
kvm->arch.n_free_mmu_pages;
@ -2952,7 +2938,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
nr_to_scan--;
spin_unlock(&kvm->mmu_lock);
up_read(&kvm->slots_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
if (kvm_freed)
list_move_tail(&kvm_freed->vm_list, &vm_list);
@ -3019,9 +3005,11 @@ unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm)
int i;
unsigned int nr_mmu_pages;
unsigned int nr_pages = 0;
struct kvm_memslots *slots;
for (i = 0; i < kvm->nmemslots; i++)
nr_pages += kvm->memslots[i].npages;
slots = rcu_dereference(kvm->memslots);
for (i = 0; i < slots->nmemslots; i++)
nr_pages += slots->memslots[i].npages;
nr_mmu_pages = nr_pages * KVM_PERMILLE_MMU_PAGES / 1000;
nr_mmu_pages = max(nr_mmu_pages,
@ -3246,7 +3234,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
audit_mappings_page(vcpu, ent, va, level - 1);
else {
gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, va, NULL);
gfn_t gfn = gpa >> PAGE_SHIFT;
pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
@ -3291,10 +3279,12 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
static int count_rmaps(struct kvm_vcpu *vcpu)
{
int nmaps = 0;
int i, j, k;
int i, j, k, idx;
idx = srcu_read_lock(&kvm->srcu);
slots = rcu_dereference(kvm->memslots);
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
struct kvm_memory_slot *m = &slots->memslots[i];
struct kvm_rmap_desc *d;
for (j = 0; j < m->npages; ++j) {
@ -3317,6 +3307,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
}
}
}
srcu_read_unlock(&kvm->srcu, idx);
return nmaps;
}

View file

@ -2,6 +2,7 @@
#define __KVM_X86_MMU_H
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
#define PT64_PT_BITS 9
#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
@ -37,6 +38,16 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
#define PT_PDPE_LEVEL 3
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
#define PFERR_PRESENT_MASK (1U << 0)
#define PFERR_WRITE_MASK (1U << 1)
#define PFERR_USER_MASK (1U << 2)
#define PFERR_RSVD_MASK (1U << 3)
#define PFERR_FETCH_MASK (1U << 4)
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
@ -53,30 +64,6 @@ static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
return kvm_mmu_load(vcpu);
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return vcpu->arch.shadow_efer & EFER_LMA;
#else
return 0;
#endif
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PAE;
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr4 & X86_CR4_PSE;
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return vcpu->arch.cr0 & X86_CR0_PG;
}
static inline int is_present_gpte(unsigned long pte)
{
return pte & PT_PRESENT_MASK;

View file

@ -162,7 +162,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
if (rsvd_fault)
goto access_error;
if (write_fault && !is_writeble_pte(pte))
if (write_fault && !is_writable_pte(pte))
if (user_fault || is_write_protection(vcpu))
goto access_error;
@ -490,18 +490,23 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
spin_unlock(&vcpu->kvm->mmu_lock);
}
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
u32 *error)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
r = FNAME(walk_addr)(&walker, vcpu, vaddr,
!!(access & PFERR_WRITE_MASK),
!!(access & PFERR_USER_MASK),
!!(access & PFERR_FETCH_MASK));
if (r) {
gpa = gfn_to_gpa(walker.gfn);
gpa |= vaddr & ~PAGE_MASK;
}
} else if (error)
*error = walker.error_code;
return gpa;
}

View file

@ -231,7 +231,7 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
efer &= ~EFER_LME;
to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
vcpu->arch.shadow_efer = efer;
vcpu->arch.efer = efer;
}
static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
@ -540,6 +540,8 @@ static void init_vmcb(struct vcpu_svm *svm)
struct vmcb_control_area *control = &svm->vmcb->control;
struct vmcb_save_area *save = &svm->vmcb->save;
svm->vcpu.fpu_active = 1;
control->intercept_cr_read = INTERCEPT_CR0_MASK |
INTERCEPT_CR3_MASK |
INTERCEPT_CR4_MASK;
@ -552,13 +554,19 @@ static void init_vmcb(struct vcpu_svm *svm)
control->intercept_dr_read = INTERCEPT_DR0_MASK |
INTERCEPT_DR1_MASK |
INTERCEPT_DR2_MASK |
INTERCEPT_DR3_MASK;
INTERCEPT_DR3_MASK |
INTERCEPT_DR4_MASK |
INTERCEPT_DR5_MASK |
INTERCEPT_DR6_MASK |
INTERCEPT_DR7_MASK;
control->intercept_dr_write = INTERCEPT_DR0_MASK |
INTERCEPT_DR1_MASK |
INTERCEPT_DR2_MASK |
INTERCEPT_DR3_MASK |
INTERCEPT_DR4_MASK |
INTERCEPT_DR5_MASK |
INTERCEPT_DR6_MASK |
INTERCEPT_DR7_MASK;
control->intercept_exceptions = (1 << PF_VECTOR) |
@ -569,6 +577,7 @@ static void init_vmcb(struct vcpu_svm *svm)
control->intercept = (1ULL << INTERCEPT_INTR) |
(1ULL << INTERCEPT_NMI) |
(1ULL << INTERCEPT_SMI) |
(1ULL << INTERCEPT_SELECTIVE_CR0) |
(1ULL << INTERCEPT_CPUID) |
(1ULL << INTERCEPT_INVD) |
(1ULL << INTERCEPT_HLT) |
@ -641,10 +650,8 @@ static void init_vmcb(struct vcpu_svm *svm)
control->intercept &= ~((1ULL << INTERCEPT_TASK_SWITCH) |
(1ULL << INTERCEPT_INVLPG));
control->intercept_exceptions &= ~(1 << PF_VECTOR);
control->intercept_cr_read &= ~(INTERCEPT_CR0_MASK|
INTERCEPT_CR3_MASK);
control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
INTERCEPT_CR3_MASK);
control->intercept_cr_read &= ~INTERCEPT_CR3_MASK;
control->intercept_cr_write &= ~INTERCEPT_CR3_MASK;
save->g_pat = 0x0007040600070406ULL;
save->cr3 = 0;
save->cr4 = 0;
@ -730,7 +737,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
init_vmcb(svm);
fx_init(&svm->vcpu);
svm->vcpu.fpu_active = 1;
svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
if (kvm_vcpu_is_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
@ -765,14 +771,16 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (unlikely(cpu != vcpu->cpu)) {
u64 delta;
/*
* Make sure that the guest sees a monotonically
* increasing TSC.
*/
delta = vcpu->arch.host_tsc - native_read_tsc();
svm->vmcb->control.tsc_offset += delta;
if (is_nested(svm))
svm->nested.hsave->control.tsc_offset += delta;
if (check_tsc_unstable()) {
/*
* Make sure that the guest sees a monotonically
* increasing TSC.
*/
delta = vcpu->arch.host_tsc - native_read_tsc();
svm->vmcb->control.tsc_offset += delta;
if (is_nested(svm))
svm->nested.hsave->control.tsc_offset += delta;
}
vcpu->cpu = cpu;
kvm_migrate_timers(vcpu);
svm->asid_generation = 0;
@ -954,42 +962,59 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
svm->vmcb->save.gdtr.base = dt->base ;
}
static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
}
static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
}
static void update_cr0_intercept(struct vcpu_svm *svm)
{
ulong gcr0 = svm->vcpu.arch.cr0;
u64 *hcr0 = &svm->vmcb->save.cr0;
if (!svm->vcpu.fpu_active)
*hcr0 |= SVM_CR0_SELECTIVE_MASK;
else
*hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
| (gcr0 & SVM_CR0_SELECTIVE_MASK);
if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
svm->vmcb->control.intercept_cr_read &= ~INTERCEPT_CR0_MASK;
svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR0_MASK;
} else {
svm->vmcb->control.intercept_cr_read |= INTERCEPT_CR0_MASK;
svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR0_MASK;
}
}
static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
#ifdef CONFIG_X86_64
if (vcpu->arch.shadow_efer & EFER_LME) {
if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
vcpu->arch.shadow_efer |= EFER_LMA;
vcpu->arch.efer |= EFER_LMA;
svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
}
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
vcpu->arch.shadow_efer &= ~EFER_LMA;
vcpu->arch.efer &= ~EFER_LMA;
svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
}
}
#endif
if (npt_enabled)
goto set;
if ((vcpu->arch.cr0 & X86_CR0_TS) && !(cr0 & X86_CR0_TS)) {
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
vcpu->fpu_active = 1;
}
vcpu->arch.cr0 = cr0;
cr0 |= X86_CR0_PG | X86_CR0_WP;
if (!vcpu->fpu_active) {
svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
if (!npt_enabled)
cr0 |= X86_CR0_PG | X86_CR0_WP;
if (!vcpu->fpu_active)
cr0 |= X86_CR0_TS;
}
set:
/*
* re-enable caching here because the QEMU bios
* does not do it - this results in some delay at
@ -997,6 +1022,7 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
*/
cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
update_cr0_intercept(svm);
}
static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@ -1102,76 +1128,70 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
svm->vmcb->control.asid = sd->next_asid++;
}
static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr)
static int svm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *dest)
{
struct vcpu_svm *svm = to_svm(vcpu);
unsigned long val;
switch (dr) {
case 0 ... 3:
val = vcpu->arch.db[dr];
*dest = vcpu->arch.db[dr];
break;
case 4:
if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
return EMULATE_FAIL; /* will re-inject UD */
/* fall through */
case 6:
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
val = vcpu->arch.dr6;
*dest = vcpu->arch.dr6;
else
val = svm->vmcb->save.dr6;
*dest = svm->vmcb->save.dr6;
break;
case 5:
if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
return EMULATE_FAIL; /* will re-inject UD */
/* fall through */
case 7:
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
val = vcpu->arch.dr7;
*dest = vcpu->arch.dr7;
else
val = svm->vmcb->save.dr7;
*dest = svm->vmcb->save.dr7;
break;
default:
val = 0;
}
return val;
return EMULATE_DONE;
}
static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
int *exception)
static int svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value)
{
struct vcpu_svm *svm = to_svm(vcpu);
*exception = 0;
switch (dr) {
case 0 ... 3:
vcpu->arch.db[dr] = value;
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
vcpu->arch.eff_db[dr] = value;
return;
case 4 ... 5:
if (vcpu->arch.cr4 & X86_CR4_DE)
*exception = UD_VECTOR;
return;
break;
case 4:
if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
return EMULATE_FAIL; /* will re-inject UD */
/* fall through */
case 6:
if (value & 0xffffffff00000000ULL) {
*exception = GP_VECTOR;
return;
}
vcpu->arch.dr6 = (value & DR6_VOLATILE) | DR6_FIXED_1;
return;
break;
case 5:
if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
return EMULATE_FAIL; /* will re-inject UD */
/* fall through */
case 7:
if (value & 0xffffffff00000000ULL) {
*exception = GP_VECTOR;
return;
}
vcpu->arch.dr7 = (value & DR7_VOLATILE) | DR7_FIXED_1;
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
svm->vmcb->save.dr7 = vcpu->arch.dr7;
vcpu->arch.switch_db_regs = (value & DR7_BP_EN_MASK);
}
return;
default:
/* FIXME: Possible case? */
printk(KERN_DEBUG "%s: unexpected dr %u\n",
__func__, dr);
*exception = UD_VECTOR;
return;
break;
}
return EMULATE_DONE;
}
static int pf_interception(struct vcpu_svm *svm)
@ -1239,13 +1259,17 @@ static int ud_interception(struct vcpu_svm *svm)
return 1;
}
static void svm_fpu_activate(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
svm->vcpu.fpu_active = 1;
update_cr0_intercept(svm);
}
static int nm_interception(struct vcpu_svm *svm)
{
svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
svm->vmcb->save.cr0 &= ~X86_CR0_TS;
svm->vcpu.fpu_active = 1;
svm_fpu_activate(&svm->vcpu);
return 1;
}
@ -1337,7 +1361,7 @@ static int vmmcall_interception(struct vcpu_svm *svm)
static int nested_svm_check_permissions(struct vcpu_svm *svm)
{
if (!(svm->vcpu.arch.shadow_efer & EFER_SVME)
if (!(svm->vcpu.arch.efer & EFER_SVME)
|| !is_paging(&svm->vcpu)) {
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
@ -1740,8 +1764,8 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
hsave->save.ds = vmcb->save.ds;
hsave->save.gdtr = vmcb->save.gdtr;
hsave->save.idtr = vmcb->save.idtr;
hsave->save.efer = svm->vcpu.arch.shadow_efer;
hsave->save.cr0 = svm->vcpu.arch.cr0;
hsave->save.efer = svm->vcpu.arch.efer;
hsave->save.cr0 = kvm_read_cr0(&svm->vcpu);
hsave->save.cr4 = svm->vcpu.arch.cr4;
hsave->save.rflags = vmcb->save.rflags;
hsave->save.rip = svm->next_rip;
@ -2153,9 +2177,10 @@ static int rdmsr_interception(struct vcpu_svm *svm)
u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
u64 data;
if (svm_get_msr(&svm->vcpu, ecx, &data))
if (svm_get_msr(&svm->vcpu, ecx, &data)) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(&svm->vcpu, 0);
else {
} else {
trace_kvm_msr_read(ecx, data);
svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
@ -2247,13 +2272,15 @@ static int wrmsr_interception(struct vcpu_svm *svm)
u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
trace_kvm_msr_write(ecx, data);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
if (svm_set_msr(&svm->vcpu, ecx, data))
if (svm_set_msr(&svm->vcpu, ecx, data)) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0);
else
} else {
trace_kvm_msr_write(ecx, data);
skip_emulated_instruction(&svm->vcpu);
}
return 1;
}
@ -2297,7 +2324,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR3] = emulate_on_interception,
[SVM_EXIT_READ_CR4] = emulate_on_interception,
[SVM_EXIT_READ_CR8] = emulate_on_interception,
/* for now: */
[SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
[SVM_EXIT_WRITE_CR0] = emulate_on_interception,
[SVM_EXIT_WRITE_CR3] = emulate_on_interception,
[SVM_EXIT_WRITE_CR4] = emulate_on_interception,
@ -2306,11 +2333,17 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_DR1] = emulate_on_interception,
[SVM_EXIT_READ_DR2] = emulate_on_interception,
[SVM_EXIT_READ_DR3] = emulate_on_interception,
[SVM_EXIT_READ_DR4] = emulate_on_interception,
[SVM_EXIT_READ_DR5] = emulate_on_interception,
[SVM_EXIT_READ_DR6] = emulate_on_interception,
[SVM_EXIT_READ_DR7] = emulate_on_interception,
[SVM_EXIT_WRITE_DR0] = emulate_on_interception,
[SVM_EXIT_WRITE_DR1] = emulate_on_interception,
[SVM_EXIT_WRITE_DR2] = emulate_on_interception,
[SVM_EXIT_WRITE_DR3] = emulate_on_interception,
[SVM_EXIT_WRITE_DR4] = emulate_on_interception,
[SVM_EXIT_WRITE_DR5] = emulate_on_interception,
[SVM_EXIT_WRITE_DR6] = emulate_on_interception,
[SVM_EXIT_WRITE_DR7] = emulate_on_interception,
[SVM_EXIT_EXCP_BASE + DB_VECTOR] = db_interception,
[SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception,
@ -2383,20 +2416,10 @@ static int handle_exit(struct kvm_vcpu *vcpu)
svm_complete_interrupts(svm);
if (npt_enabled) {
int mmu_reload = 0;
if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) {
svm_set_cr0(vcpu, svm->vmcb->save.cr0);
mmu_reload = 1;
}
if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR0_MASK))
vcpu->arch.cr0 = svm->vmcb->save.cr0;
if (npt_enabled)
vcpu->arch.cr3 = svm->vmcb->save.cr3;
if (mmu_reload) {
kvm_mmu_reset_context(vcpu);
kvm_mmu_load(vcpu);
}
}
if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@ -2798,12 +2821,6 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
svm->vmcb->save.cr3 = root;
force_new_asid(vcpu);
if (vcpu->fpu_active) {
svm->vmcb->control.intercept_exceptions |= (1 << NM_VECTOR);
svm->vmcb->save.cr0 |= X86_CR0_TS;
vcpu->fpu_active = 0;
}
}
static int is_disabled(void)
@ -2852,6 +2869,10 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
return 0;
}
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
}
static const struct trace_print_flags svm_exit_reasons_str[] = {
{ SVM_EXIT_READ_CR0, "read_cr0" },
{ SVM_EXIT_READ_CR3, "read_cr3" },
@ -2905,9 +2926,22 @@ static const struct trace_print_flags svm_exit_reasons_str[] = {
{ -1, NULL }
};
static bool svm_gb_page_enable(void)
static int svm_get_lpage_level(void)
{
return true;
return PT_PDPE_LEVEL;
}
static bool svm_rdtscp_supported(void)
{
return false;
}
static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
update_cr0_intercept(svm);
svm->vmcb->control.intercept_exceptions |= 1 << NM_VECTOR;
}
static struct kvm_x86_ops svm_x86_ops = {
@ -2936,6 +2970,7 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_segment = svm_set_segment,
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = kvm_get_cs_db_l_bits,
.decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
.decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
.set_cr0 = svm_set_cr0,
.set_cr3 = svm_set_cr3,
@ -2950,6 +2985,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
.fpu_activate = svm_fpu_activate,
.fpu_deactivate = svm_fpu_deactivate,
.tlb_flush = svm_flush_tlb,
@ -2975,7 +3012,11 @@ static struct kvm_x86_ops svm_x86_ops = {
.get_mt_mask = svm_get_mt_mask,
.exit_reasons_str = svm_exit_reasons_str,
.gb_page_enable = svm_gb_page_enable,
.get_lpage_level = svm_get_lpage_level,
.cpuid_update = svm_cpuid_update,
.rdtscp_supported = svm_rdtscp_supported,
};
static int __init svm_init(void)

View file

@ -55,6 +55,38 @@ TRACE_EVENT(kvm_hypercall,
__entry->a3)
);
/*
* Tracepoint for hypercall.
*/
TRACE_EVENT(kvm_hv_hypercall,
TP_PROTO(__u16 code, bool fast, __u16 rep_cnt, __u16 rep_idx,
__u64 ingpa, __u64 outgpa),
TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa),
TP_STRUCT__entry(
__field( __u16, code )
__field( bool, fast )
__field( __u16, rep_cnt )
__field( __u16, rep_idx )
__field( __u64, ingpa )
__field( __u64, outgpa )
),
TP_fast_assign(
__entry->code = code;
__entry->fast = fast;
__entry->rep_cnt = rep_cnt;
__entry->rep_idx = rep_idx;
__entry->ingpa = ingpa;
__entry->outgpa = outgpa;
),
TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx",
__entry->code, __entry->fast ? "fast" : "slow",
__entry->rep_cnt, __entry->rep_idx, __entry->ingpa,
__entry->outgpa)
);
/*
* Tracepoint for PIO.
*/
@ -214,28 +246,33 @@ TRACE_EVENT(kvm_page_fault,
* Tracepoint for guest MSR access.
*/
TRACE_EVENT(kvm_msr,
TP_PROTO(unsigned int rw, unsigned int ecx, unsigned long data),
TP_ARGS(rw, ecx, data),
TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception),
TP_ARGS(write, ecx, data, exception),
TP_STRUCT__entry(
__field( unsigned int, rw )
__field( unsigned int, ecx )
__field( unsigned long, data )
__field( unsigned, write )
__field( u32, ecx )
__field( u64, data )
__field( u8, exception )
),
TP_fast_assign(
__entry->rw = rw;
__entry->write = write;
__entry->ecx = ecx;
__entry->data = data;
__entry->exception = exception;
),
TP_printk("msr_%s %x = 0x%lx",
__entry->rw ? "write" : "read",
__entry->ecx, __entry->data)
TP_printk("msr_%s %x = 0x%llx%s",
__entry->write ? "write" : "read",
__entry->ecx, __entry->data,
__entry->exception ? " (#GP)" : "")
);
#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data)
#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data)
#define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false)
#define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false)
#define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true)
#define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true)
/*
* Tracepoint for guest CR access.

View file

@ -61,6 +61,21 @@ module_param_named(unrestricted_guest,
static int __read_mostly emulate_invalid_guest_state = 0;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
#define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
#define KVM_GUEST_CR0_MASK \
(KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST \
(X86_CR0_WP | X86_CR0_NE)
#define KVM_VM_CR0_ALWAYS_ON \
(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
#define KVM_CR4_GUEST_OWNED_BITS \
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT)
#define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
#define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@ -136,6 +151,8 @@ struct vcpu_vmx {
ktime_t entry_time;
s64 vnmi_blocked_time;
u32 exit_reason;
bool rdtscp_enabled;
};
static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@ -210,7 +227,7 @@ static const u32 vmx_msr_index[] = {
#ifdef CONFIG_X86_64
MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
#endif
MSR_EFER, MSR_K6_STAR,
MSR_EFER, MSR_TSC_AUX, MSR_K6_STAR,
};
#define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
@ -301,6 +318,11 @@ static inline bool cpu_has_vmx_ept_2m_page(void)
return !!(vmx_capability.ept & VMX_EPT_2MB_PAGE_BIT);
}
static inline bool cpu_has_vmx_ept_1g_page(void)
{
return !!(vmx_capability.ept & VMX_EPT_1GB_PAGE_BIT);
}
static inline int cpu_has_vmx_invept_individual_addr(void)
{
return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@ -336,9 +358,7 @@ static inline int cpu_has_vmx_ple(void)
static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
{
return flexpriority_enabled &&
(cpu_has_vmx_virtualize_apic_accesses()) &&
(irqchip_in_kernel(kvm));
return flexpriority_enabled && irqchip_in_kernel(kvm);
}
static inline int cpu_has_vmx_vpid(void)
@ -347,6 +367,12 @@ static inline int cpu_has_vmx_vpid(void)
SECONDARY_EXEC_ENABLE_VPID;
}
static inline int cpu_has_vmx_rdtscp(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
SECONDARY_EXEC_RDTSCP;
}
static inline int cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
@ -551,22 +577,18 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
{
u32 eb;
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR);
if (!vcpu->fpu_active)
eb |= 1u << NM_VECTOR;
/*
* Unconditionally intercept #DB so we can maintain dr6 without
* reading it every exit.
*/
eb |= 1u << DB_VECTOR;
if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
eb |= 1u << BP_VECTOR;
}
eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
(1u << NM_VECTOR) | (1u << DB_VECTOR);
if ((vcpu->guest_debug &
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
(KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
eb |= 1u << BP_VECTOR;
if (to_vmx(vcpu)->rmode.vm86_active)
eb = ~0;
if (enable_ept)
eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
if (vcpu->fpu_active)
eb &= ~(1u << NM_VECTOR);
vmcs_write32(EXCEPTION_BITMAP, eb);
}
@ -589,7 +611,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
u64 guest_efer;
u64 ignore_bits;
guest_efer = vmx->vcpu.arch.shadow_efer;
guest_efer = vmx->vcpu.arch.efer;
/*
* NX is emulated; LMA and LME handled by hardware; SCE meaninless
@ -767,22 +789,30 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
{
ulong cr0;
if (vcpu->fpu_active)
return;
vcpu->fpu_active = 1;
vmcs_clear_bits(GUEST_CR0, X86_CR0_TS);
if (vcpu->arch.cr0 & X86_CR0_TS)
vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
cr0 = vmcs_readl(GUEST_CR0);
cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
vmcs_writel(GUEST_CR0, cr0);
update_exception_bitmap(vcpu);
vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
{
if (!vcpu->fpu_active)
return;
vcpu->fpu_active = 0;
vmcs_set_bits(GUEST_CR0, X86_CR0_TS);
vmx_decache_cr0_guest_bits(vcpu);
vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
update_exception_bitmap(vcpu);
vcpu->arch.cr0_guest_owned_bits = 0;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
}
static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
@ -878,6 +908,11 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
}
static bool vmx_rdtscp_supported(void)
{
return cpu_has_vmx_rdtscp();
}
/*
* Swap MSR entry in host/guest MSR entry array.
*/
@ -913,12 +948,15 @@ static void setup_msrs(struct vcpu_vmx *vmx)
index = __find_msr_index(vmx, MSR_CSTAR);
if (index >= 0)
move_msr_up(vmx, index, save_nmsrs++);
index = __find_msr_index(vmx, MSR_TSC_AUX);
if (index >= 0 && vmx->rdtscp_enabled)
move_msr_up(vmx, index, save_nmsrs++);
/*
* MSR_K6_STAR is only needed on long mode guests, and only
* if efer.sce is enabled.
*/
index = __find_msr_index(vmx, MSR_K6_STAR);
if ((index >= 0) && (vmx->vcpu.arch.shadow_efer & EFER_SCE))
if ((index >= 0) && (vmx->vcpu.arch.efer & EFER_SCE))
move_msr_up(vmx, index, save_nmsrs++);
}
#endif
@ -1002,6 +1040,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
case MSR_IA32_SYSENTER_ESP:
data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_TSC_AUX:
if (!to_vmx(vcpu)->rdtscp_enabled)
return 1;
/* Otherwise falls through */
default:
vmx_load_host_state(to_vmx(vcpu));
msr = find_msr_entry(to_vmx(vcpu), msr_index);
@ -1065,7 +1107,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
vcpu->arch.pat = data;
break;
}
/* Otherwise falls through to kvm_set_msr_common */
ret = kvm_set_msr_common(vcpu, msr_index, data);
break;
case MSR_TSC_AUX:
if (!vmx->rdtscp_enabled)
return 1;
/* Check reserved bit, higher 32 bits should be zero */
if ((data >> 32) != 0)
return 1;
/* Otherwise falls through */
default:
msr = find_msr_entry(vmx, msr_index);
if (msr) {
@ -1224,6 +1274,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_USE_IO_BITMAPS |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING;
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
@ -1243,7 +1295,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_ENABLE_EPT |
SECONDARY_EXEC_UNRESTRICTED_GUEST |
SECONDARY_EXEC_PAUSE_LOOP_EXITING;
SECONDARY_EXEC_PAUSE_LOOP_EXITING |
SECONDARY_EXEC_RDTSCP;
if (adjust_vmx_controls(min2, opt2,
MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
@ -1457,8 +1510,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
static gva_t rmode_tss_base(struct kvm *kvm)
{
if (!kvm->arch.tss_addr) {
gfn_t base_gfn = kvm->memslots[0].base_gfn +
kvm->memslots[0].npages - 3;
struct kvm_memslots *slots;
gfn_t base_gfn;
slots = rcu_dereference(kvm->memslots);
base_gfn = kvm->memslots->memslots[0].base_gfn +
kvm->memslots->memslots[0].npages - 3;
return base_gfn << PAGE_SHIFT;
}
return kvm->arch.tss_addr;
@ -1544,9 +1601,7 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
* of this msr depends on is_long_mode().
*/
vmx_load_host_state(to_vmx(vcpu));
vcpu->arch.shadow_efer = efer;
if (!msr)
return;
vcpu->arch.efer = efer;
if (efer & EFER_LMA) {
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS) |
@ -1576,13 +1631,13 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
(guest_tr_ar & ~AR_TYPE_MASK)
| AR_TYPE_BUSY_64_TSS);
}
vcpu->arch.shadow_efer |= EFER_LMA;
vmx_set_efer(vcpu, vcpu->arch.shadow_efer);
vcpu->arch.efer |= EFER_LMA;
vmx_set_efer(vcpu, vcpu->arch.efer);
}
static void exit_lmode(struct kvm_vcpu *vcpu)
{
vcpu->arch.shadow_efer &= ~EFER_LMA;
vcpu->arch.efer &= ~EFER_LMA;
vmcs_write32(VM_ENTRY_CONTROLS,
vmcs_read32(VM_ENTRY_CONTROLS)
@ -1598,10 +1653,20 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
vcpu->arch.cr0 &= ~cr0_guest_owned_bits;
vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits;
}
static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
{
vcpu->arch.cr4 &= KVM_GUEST_CR4_MASK;
vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits;
vcpu->arch.cr4 &= ~cr4_guest_owned_bits;
vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits;
}
static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
@ -1646,7 +1711,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, vcpu->arch.cr4);
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
@ -1654,23 +1719,13 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
~(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, vcpu->arch.cr4);
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
if (!(cr0 & X86_CR0_WP))
*hw_cr0 &= ~X86_CR0_WP;
}
static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
struct kvm_vcpu *vcpu)
{
if (!is_paging(vcpu)) {
*hw_cr4 &= ~X86_CR4_PAE;
*hw_cr4 |= X86_CR4_PSE;
} else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
*hw_cr4 &= ~X86_CR4_PAE;
}
static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -1682,8 +1737,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
else
hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON;
vmx_fpu_deactivate(vcpu);
if (vmx->rmode.vm86_active && (cr0 & X86_CR0_PE))
enter_pmode(vcpu);
@ -1691,7 +1744,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
enter_rmode(vcpu);
#ifdef CONFIG_X86_64
if (vcpu->arch.shadow_efer & EFER_LME) {
if (vcpu->arch.efer & EFER_LME) {
if (!is_paging(vcpu) && (cr0 & X86_CR0_PG))
enter_lmode(vcpu);
if (is_paging(vcpu) && !(cr0 & X86_CR0_PG))
@ -1702,12 +1755,12 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (enable_ept)
ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
if (!vcpu->fpu_active)
hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
vmcs_writel(CR0_READ_SHADOW, cr0);
vmcs_writel(GUEST_CR0, hw_cr0);
vcpu->arch.cr0 = cr0;
if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
vmx_fpu_activate(vcpu);
}
static u64 construct_eptp(unsigned long root_hpa)
@ -1738,8 +1791,6 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
vmx_flush_tlb(vcpu);
vmcs_writel(GUEST_CR3, guest_cr3);
if (vcpu->arch.cr0 & X86_CR0_PE)
vmx_fpu_deactivate(vcpu);
}
static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
@ -1748,8 +1799,14 @@ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
vcpu->arch.cr4 = cr4;
if (enable_ept)
ept_update_paging_mode_cr4(&hw_cr4, vcpu);
if (enable_ept) {
if (!is_paging(vcpu)) {
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
}
vmcs_writel(CR4_READ_SHADOW, cr4);
vmcs_writel(GUEST_CR4, hw_cr4);
@ -1787,7 +1844,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
static int vmx_get_cpl(struct kvm_vcpu *vcpu)
{
if (!(vcpu->arch.cr0 & X86_CR0_PE)) /* if real mode */
if (!is_protmode(vcpu))
return 0;
if (vmx_get_rflags(vcpu) & X86_EFLAGS_VM) /* if virtual 8086 */
@ -2042,7 +2099,7 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
static bool guest_state_valid(struct kvm_vcpu *vcpu)
{
/* real mode guest state checks */
if (!(vcpu->arch.cr0 & X86_CR0_PE)) {
if (!is_protmode(vcpu)) {
if (!rmode_segment_valid(vcpu, VCPU_SREG_CS))
return false;
if (!rmode_segment_valid(vcpu, VCPU_SREG_SS))
@ -2175,7 +2232,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page)
goto out;
kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
@ -2188,7 +2245,7 @@ static int alloc_apic_access_page(struct kvm *kvm)
kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
out:
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return r;
}
@ -2197,7 +2254,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
if (kvm->arch.ept_identity_pagetable)
goto out;
kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
@ -2212,7 +2269,7 @@ static int alloc_identity_pagetable(struct kvm *kvm)
kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
out:
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return r;
}
@ -2384,14 +2441,12 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
for (i = 0; i < NR_VMX_MSR; ++i) {
u32 index = vmx_msr_index[i];
u32 data_low, data_high;
u64 data;
int j = vmx->nmsrs;
if (rdmsr_safe(index, &data_low, &data_high) < 0)
continue;
if (wrmsr_safe(index, data_low, data_high) < 0)
continue;
data = data_low | ((u64)data_high << 32);
vmx->guest_msrs[j].index = i;
vmx->guest_msrs[j].data = 0;
vmx->guest_msrs[j].mask = -1ull;
@ -2404,7 +2459,10 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
vmcs_writel(CR4_GUEST_HOST_MASK, KVM_GUEST_CR4_MASK);
vmx->vcpu.arch.cr4_guest_owned_bits = KVM_CR4_GUEST_OWNED_BITS;
if (enable_ept)
vmx->vcpu.arch.cr4_guest_owned_bits |= X86_CR4_PGE;
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
tsc_base = vmx->vcpu.kvm->arch.vm_init_tsc;
rdtscll(tsc_this);
@ -2429,10 +2487,10 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 msr;
int ret;
int ret, idx;
vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP));
down_read(&vcpu->kvm->slots_lock);
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (!init_rmode(vmx->vcpu.kvm)) {
ret = -ENOMEM;
goto out;
@ -2526,7 +2584,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
vmx_set_cr0(&vmx->vcpu, kvm_read_cr0(vcpu)); /* enter rmode */
vmx_set_cr4(&vmx->vcpu, 0);
vmx_set_efer(&vmx->vcpu, 0);
vmx_fpu_activate(&vmx->vcpu);
@ -2540,7 +2598,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->emulation_required = 0;
out:
up_read(&vcpu->kvm->slots_lock);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
}
@ -2717,6 +2775,12 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
kvm_queue_exception(vcpu, vec);
return 1;
case BP_VECTOR:
/*
* Update instruction length as we may reinject the exception
* from user space while in guest debugging mode.
*/
to_vmx(vcpu)->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
return 0;
/* fall through */
@ -2839,6 +2903,13 @@ static int handle_exception(struct kvm_vcpu *vcpu)
kvm_run->debug.arch.dr7 = vmcs_readl(GUEST_DR7);
/* fall through */
case BP_VECTOR:
/*
* Update instruction length as we may reinject #BP from
* user space while in guest debugging mode. Reading it for
* #DB as well causes no harm, it is not used in that case.
*/
vmx->vcpu.arch.event_exit_inst_len =
vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
kvm_run->exit_reason = KVM_EXIT_DEBUG;
kvm_run->debug.arch.pc = vmcs_readl(GUEST_CS_BASE) + rip;
kvm_run->debug.arch.exception = ex_no;
@ -2940,11 +3011,10 @@ static int handle_cr(struct kvm_vcpu *vcpu)
};
break;
case 2: /* clts */
vmx_fpu_deactivate(vcpu);
vcpu->arch.cr0 &= ~X86_CR0_TS;
vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
vmx_fpu_activate(vcpu);
vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
skip_emulated_instruction(vcpu);
vmx_fpu_activate(vcpu);
return 1;
case 1: /*mov from cr*/
switch (cr) {
@ -2962,7 +3032,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
}
break;
case 3: /* lmsw */
kvm_lmsw(vcpu, (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f);
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
trace_kvm_cr_write(0, (kvm_read_cr0(vcpu) & ~0xful) | val);
kvm_lmsw(vcpu, val);
skip_emulated_instruction(vcpu);
return 1;
@ -2975,12 +3047,22 @@ static int handle_cr(struct kvm_vcpu *vcpu)
return 0;
}
static int check_dr_alias(struct kvm_vcpu *vcpu)
{
if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return -1;
}
return 0;
}
static int handle_dr(struct kvm_vcpu *vcpu)
{
unsigned long exit_qualification;
unsigned long val;
int dr, reg;
/* Do not handle if the CPL > 0, will trigger GP on re-entry */
if (!kvm_require_cpl(vcpu, 0))
return 1;
dr = vmcs_readl(GUEST_DR7);
@ -3016,14 +3098,20 @@ static int handle_dr(struct kvm_vcpu *vcpu)
case 0 ... 3:
val = vcpu->arch.db[dr];
break;
case 4:
if (check_dr_alias(vcpu) < 0)
return 1;
/* fall through */
case 6:
val = vcpu->arch.dr6;
break;
case 7:
case 5:
if (check_dr_alias(vcpu) < 0)
return 1;
/* fall through */
default: /* 7 */
val = vcpu->arch.dr7;
break;
default:
val = 0;
}
kvm_register_write(vcpu, reg, val);
} else {
@ -3034,21 +3122,25 @@ static int handle_dr(struct kvm_vcpu *vcpu)
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
vcpu->arch.eff_db[dr] = val;
break;
case 4 ... 5:
if (vcpu->arch.cr4 & X86_CR4_DE)
kvm_queue_exception(vcpu, UD_VECTOR);
break;
case 4:
if (check_dr_alias(vcpu) < 0)
return 1;
/* fall through */
case 6:
if (val & 0xffffffff00000000ULL) {
kvm_queue_exception(vcpu, GP_VECTOR);
break;
kvm_inject_gp(vcpu, 0);
return 1;
}
vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
break;
case 7:
case 5:
if (check_dr_alias(vcpu) < 0)
return 1;
/* fall through */
default: /* 7 */
if (val & 0xffffffff00000000ULL) {
kvm_queue_exception(vcpu, GP_VECTOR);
break;
kvm_inject_gp(vcpu, 0);
return 1;
}
vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
@ -3075,6 +3167,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu)
u64 data;
if (vmx_get_msr(vcpu, ecx, &data)) {
trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(vcpu, 0);
return 1;
}
@ -3094,13 +3187,13 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu)
u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
| ((u64)(vcpu->arch.regs[VCPU_REGS_RDX] & -1u) << 32);
trace_kvm_msr_write(ecx, data);
if (vmx_set_msr(vcpu, ecx, data) != 0) {
trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(vcpu, 0);
return 1;
}
trace_kvm_msr_write(ecx, data);
skip_emulated_instruction(vcpu);
return 1;
}
@ -3385,7 +3478,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
}
if (err != EMULATE_DONE) {
kvm_report_emulation_failure(vcpu, "emulation failure");
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
@ -3416,6 +3508,12 @@ static int handle_pause(struct kvm_vcpu *vcpu)
return 1;
}
static int handle_invalid_op(struct kvm_vcpu *vcpu)
{
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
/*
* The exit handlers return 1 if the exit was handled fully and guest execution
* may resume. Otherwise they set the kvm_run parameter to indicate what needs
@ -3453,6 +3551,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_EPT_VIOLATION] = handle_ept_violation,
[EXIT_REASON_EPT_MISCONFIG] = handle_ept_misconfig,
[EXIT_REASON_PAUSE_INSTRUCTION] = handle_pause,
[EXIT_REASON_MWAIT_INSTRUCTION] = handle_invalid_op,
[EXIT_REASON_MONITOR_INSTRUCTION] = handle_invalid_op,
};
static const int kvm_vmx_max_exit_handlers =
@ -3686,9 +3786,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
*/
vmcs_writel(HOST_CR0, read_cr0());
if (vcpu->arch.switch_db_regs)
set_debugreg(vcpu->arch.dr6, 6);
asm(
/* Store host registers */
"push %%"R"dx; push %%"R"bp;"
@ -3789,9 +3886,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
| (1 << VCPU_EXREG_PDPTR));
vcpu->arch.regs_dirty = 0;
if (vcpu->arch.switch_db_regs)
get_debugreg(vcpu->arch.dr6, 6);
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if (vmx->rmode.irq.pending)
fixup_rmode_irq(vmx);
@ -3920,7 +4014,7 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
* b. VT-d with snooping control feature: snooping control feature of
* VT-d engine can guarantee the cache correctness. Just set it
* to WB to keep consistent with host. So the same as item 3.
* 3. EPT without VT-d: always map as WB and set IGMT=1 to keep
* 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
* consistent with host MTRR
*/
if (is_mmio)
@ -3931,37 +4025,88 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
VMX_EPT_MT_EPTE_SHIFT;
else
ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT)
| VMX_EPT_IGMT_BIT;
| VMX_EPT_IPAT_BIT;
return ret;
}
#define _ER(x) { EXIT_REASON_##x, #x }
static const struct trace_print_flags vmx_exit_reasons_str[] = {
{ EXIT_REASON_EXCEPTION_NMI, "exception" },
{ EXIT_REASON_EXTERNAL_INTERRUPT, "ext_irq" },
{ EXIT_REASON_TRIPLE_FAULT, "triple_fault" },
{ EXIT_REASON_NMI_WINDOW, "nmi_window" },
{ EXIT_REASON_IO_INSTRUCTION, "io_instruction" },
{ EXIT_REASON_CR_ACCESS, "cr_access" },
{ EXIT_REASON_DR_ACCESS, "dr_access" },
{ EXIT_REASON_CPUID, "cpuid" },
{ EXIT_REASON_MSR_READ, "rdmsr" },
{ EXIT_REASON_MSR_WRITE, "wrmsr" },
{ EXIT_REASON_PENDING_INTERRUPT, "interrupt_window" },
{ EXIT_REASON_HLT, "halt" },
{ EXIT_REASON_INVLPG, "invlpg" },
{ EXIT_REASON_VMCALL, "hypercall" },
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "tpr_below_thres" },
{ EXIT_REASON_APIC_ACCESS, "apic_access" },
{ EXIT_REASON_WBINVD, "wbinvd" },
{ EXIT_REASON_TASK_SWITCH, "task_switch" },
{ EXIT_REASON_EPT_VIOLATION, "ept_violation" },
_ER(EXCEPTION_NMI),
_ER(EXTERNAL_INTERRUPT),
_ER(TRIPLE_FAULT),
_ER(PENDING_INTERRUPT),
_ER(NMI_WINDOW),
_ER(TASK_SWITCH),
_ER(CPUID),
_ER(HLT),
_ER(INVLPG),
_ER(RDPMC),
_ER(RDTSC),
_ER(VMCALL),
_ER(VMCLEAR),
_ER(VMLAUNCH),
_ER(VMPTRLD),
_ER(VMPTRST),
_ER(VMREAD),
_ER(VMRESUME),
_ER(VMWRITE),
_ER(VMOFF),
_ER(VMON),
_ER(CR_ACCESS),
_ER(DR_ACCESS),
_ER(IO_INSTRUCTION),
_ER(MSR_READ),
_ER(MSR_WRITE),
_ER(MWAIT_INSTRUCTION),
_ER(MONITOR_INSTRUCTION),
_ER(PAUSE_INSTRUCTION),
_ER(MCE_DURING_VMENTRY),
_ER(TPR_BELOW_THRESHOLD),
_ER(APIC_ACCESS),
_ER(EPT_VIOLATION),
_ER(EPT_MISCONFIG),
_ER(WBINVD),
{ -1, NULL }
};
static bool vmx_gb_page_enable(void)
#undef _ER
static int vmx_get_lpage_level(void)
{
return false;
if (enable_ept && !cpu_has_vmx_ept_1g_page())
return PT_DIRECTORY_LEVEL;
else
/* For shadow and EPT supported 1GB page */
return PT_PDPE_LEVEL;
}
static inline u32 bit(int bitno)
{
return 1 << (bitno & 31);
}
static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
vmx->rdtscp_enabled = false;
if (vmx_rdtscp_supported()) {
exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
if (exec_control & SECONDARY_EXEC_RDTSCP) {
best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
if (best && (best->edx & bit(X86_FEATURE_RDTSCP)))
vmx->rdtscp_enabled = true;
else {
exec_control &= ~SECONDARY_EXEC_RDTSCP;
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
exec_control);
}
}
}
}
static struct kvm_x86_ops vmx_x86_ops = {
@ -3990,6 +4135,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
.set_segment = vmx_set_segment,
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.decache_cr0_guest_bits = vmx_decache_cr0_guest_bits,
.decache_cr4_guest_bits = vmx_decache_cr4_guest_bits,
.set_cr0 = vmx_set_cr0,
.set_cr3 = vmx_set_cr3,
@ -4002,6 +4148,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
.cache_reg = vmx_cache_reg,
.get_rflags = vmx_get_rflags,
.set_rflags = vmx_set_rflags,
.fpu_activate = vmx_fpu_activate,
.fpu_deactivate = vmx_fpu_deactivate,
.tlb_flush = vmx_flush_tlb,
@ -4027,7 +4175,11 @@ static struct kvm_x86_ops vmx_x86_ops = {
.get_mt_mask = vmx_get_mt_mask,
.exit_reasons_str = vmx_exit_reasons_str,
.gb_page_enable = vmx_gb_page_enable,
.get_lpage_level = vmx_get_lpage_level,
.cpuid_update = vmx_cpuid_update,
.rdtscp_supported = vmx_rdtscp_supported,
};
static int __init vmx_init(void)

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
#define ARCH_X86_KVM_X86_H
#include <linux/kvm_host.h>
#include "kvm_cache_regs.h"
static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
{
@ -35,4 +36,33 @@ static inline bool kvm_exception_is_soft(unsigned int nr)
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
u32 function, u32 index);
static inline bool is_protmode(struct kvm_vcpu *vcpu)
{
return kvm_read_cr0_bits(vcpu, X86_CR0_PE);
}
static inline int is_long_mode(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_X86_64
return vcpu->arch.efer & EFER_LMA;
#else
return 0;
#endif
}
static inline int is_pae(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, X86_CR4_PAE);
}
static inline int is_pse(struct kvm_vcpu *vcpu)
{
return kvm_read_cr4_bits(vcpu, X86_CR4_PSE);
}
static inline int is_paging(struct kvm_vcpu *vcpu)
{
return kvm_read_cr0_bits(vcpu, X86_CR0_PG);
}
#endif

View file

@ -103,7 +103,7 @@ struct kvm_userspace_memory_region {
/* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL
#define KVM_MEMSLOT_INVALID (1UL << 1)
/* for KVM_IRQ_LINE */
struct kvm_irq_level {
@ -497,6 +497,11 @@ struct kvm_ioeventfd {
#endif
#define KVM_CAP_S390_PSW 42
#define KVM_CAP_PPC_SEGSTATE 43
#define KVM_CAP_HYPERV 44
#define KVM_CAP_HYPERV_VAPIC 45
#define KVM_CAP_HYPERV_SPIN 46
#define KVM_CAP_PCI_SEGMENT 47
#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
#ifdef KVM_CAP_IRQ_ROUTING
@ -691,8 +696,9 @@ struct kvm_assigned_pci_dev {
__u32 busnr;
__u32 devfn;
__u32 flags;
__u32 segnr;
union {
__u32 reserved[12];
__u32 reserved[11];
};
};

View file

@ -38,6 +38,7 @@
#define KVM_REQ_MMU_SYNC 7
#define KVM_REQ_KVMCLOCK_UPDATE 8
#define KVM_REQ_KICK 9
#define KVM_REQ_DEACTIVATE_FPU 10
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
@ -57,20 +58,20 @@ struct kvm_io_bus {
struct kvm_io_device *devs[NR_IOBUS_DEVS];
};
void kvm_io_bus_init(struct kvm_io_bus *bus);
void kvm_io_bus_destroy(struct kvm_io_bus *bus);
int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, int len,
const void *val);
int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len,
enum kvm_bus {
KVM_MMIO_BUS,
KVM_PIO_BUS,
KVM_NR_BUSES
};
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val);
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
void *val);
int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev);
void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev);
void kvm_io_bus_unregister_dev(struct kvm *kvm, struct kvm_io_bus *bus,
struct kvm_io_device *dev);
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev);
struct kvm_vcpu {
struct kvm *kvm;
@ -83,6 +84,8 @@ struct kvm_vcpu {
struct kvm_run *run;
unsigned long requests;
unsigned long guest_debug;
int srcu_idx;
int fpu_active;
int guest_fpu_loaded;
wait_queue_head_t wq;
@ -150,14 +153,19 @@ struct kvm_irq_routing_table {};
#endif
struct kvm {
spinlock_t mmu_lock;
spinlock_t requests_lock;
struct rw_semaphore slots_lock;
struct mm_struct *mm; /* userspace tied to this vm */
struct kvm_memslots {
int nmemslots;
struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
KVM_PRIVATE_MEM_SLOTS];
};
struct kvm {
spinlock_t mmu_lock;
raw_spinlock_t requests_lock;
struct mutex slots_lock;
struct mm_struct *mm; /* userspace tied to this vm */
struct kvm_memslots *memslots;
struct srcu_struct srcu;
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
u32 bsp_vcpu_id;
struct kvm_vcpu *bsp_vcpu;
@ -166,8 +174,7 @@ struct kvm {
atomic_t online_vcpus;
struct list_head vm_list;
struct mutex lock;
struct kvm_io_bus mmio_bus;
struct kvm_io_bus pio_bus;
struct kvm_io_bus *buses[KVM_NR_BUSES];
#ifdef CONFIG_HAVE_KVM_EVENTFD
struct {
spinlock_t lock;
@ -249,13 +256,20 @@ int kvm_set_memory_region(struct kvm *kvm,
int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc);
int kvm_arch_set_memory_region(struct kvm *kvm,
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_memory_slot old,
struct kvm_userspace_memory_region *mem,
int user_alloc);
void kvm_arch_commit_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
struct kvm_memory_slot old,
int user_alloc);
void kvm_disable_largepages(void);
void kvm_arch_flush_shadow(struct kvm *kvm);
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
gfn_t unalias_gfn_instantiation(struct kvm *kvm, gfn_t gfn);
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
void kvm_release_page_clean(struct page *page);
@ -264,6 +278,9 @@ void kvm_set_page_dirty(struct page *page);
void kvm_set_page_accessed(struct page *page);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
int memslot_id(struct kvm *kvm, gfn_t gfn);
void kvm_release_pfn_dirty(pfn_t);
void kvm_release_pfn_clean(pfn_t pfn);
void kvm_set_pfn_dirty(pfn_t pfn);
@ -283,6 +300,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
void kvm_vcpu_block(struct kvm_vcpu *vcpu);
@ -383,6 +401,7 @@ struct kvm_assigned_dev_kernel {
struct work_struct interrupt_work;
struct list_head list;
int assigned_dev_id;
int host_segnr;
int host_busnr;
int host_devfn;
unsigned int entries_nr;
@ -429,8 +448,7 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
#define KVM_IOMMU_CACHE_COHERENCY 0x1
#ifdef CONFIG_IOMMU_API
int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn,
unsigned long npages);
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
int kvm_iommu_map_guest(struct kvm *kvm);
int kvm_iommu_unmap_guest(struct kvm *kvm);
int kvm_assign_device(struct kvm *kvm,
@ -480,11 +498,6 @@ static inline void kvm_guest_exit(void)
current->flags &= ~PF_VCPU;
}
static inline int memslot_id(struct kvm *kvm, struct kvm_memory_slot *slot)
{
return slot - kvm->memslots;
}
static inline gpa_t gfn_to_gpa(gfn_t gfn)
{
return (gpa_t)gfn << PAGE_SHIFT;
@ -532,6 +545,10 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se
}
#endif
#ifndef KVM_ARCH_HAS_UNALIAS_INSTANTIATION
#define unalias_gfn_instantiation unalias_gfn
#endif
#ifdef CONFIG_HAVE_KVM_IRQCHIP
#define KVM_MAX_IRQ_ROUTES 1024

View file

@ -145,6 +145,47 @@ TRACE_EVENT(kvm_mmio,
__entry->len, __entry->gpa, __entry->val)
);
#define kvm_fpu_load_symbol \
{0, "unload"}, \
{1, "load"}
TRACE_EVENT(kvm_fpu,
TP_PROTO(int load),
TP_ARGS(load),
TP_STRUCT__entry(
__field( u32, load )
),
TP_fast_assign(
__entry->load = load;
),
TP_printk("%s", __print_symbolic(__entry->load, kvm_fpu_load_symbol))
);
TRACE_EVENT(kvm_age_page,
TP_PROTO(ulong hva, struct kvm_memory_slot *slot, int ref),
TP_ARGS(hva, slot, ref),
TP_STRUCT__entry(
__field( u64, hva )
__field( u64, gfn )
__field( u8, referenced )
),
TP_fast_assign(
__entry->hva = hva;
__entry->gfn =
slot->base_gfn + ((hva - slot->userspace_addr) >> PAGE_SHIFT);
__entry->referenced = ref;
),
TP_printk("hva %llx gfn %llx %s",
__entry->hva, __entry->gfn,
__entry->referenced ? "YOUNG" : "OLD")
);
#endif /* _TRACE_KVM_MAIN_H */
/* This part must be outside protection */

View file

@ -12,3 +12,6 @@ config HAVE_KVM_EVENTFD
config KVM_APIC_ARCHITECTURE
bool
config KVM_MMIO
bool

View file

@ -504,12 +504,12 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0;
int r = 0, idx;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
mutex_lock(&kvm->lock);
down_read(&kvm->slots_lock);
idx = srcu_read_lock(&kvm->srcu);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_dev->assigned_dev_id);
@ -526,7 +526,8 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
r = -ENOMEM;
goto out;
}
dev = pci_get_bus_and_slot(assigned_dev->busnr,
dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
assigned_dev->busnr,
assigned_dev->devfn);
if (!dev) {
printk(KERN_INFO "%s: host device not found\n", __func__);
@ -548,6 +549,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
pci_reset_function(dev);
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_segnr = assigned_dev->segnr;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->flags = assigned_dev->flags;
@ -573,7 +575,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
}
out:
up_read(&kvm->slots_lock);
srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
return r;
out_list_del:
@ -585,7 +587,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
pci_dev_put(dev);
out_free:
kfree(match);
up_read(&kvm->slots_lock);
srcu_read_unlock(&kvm->srcu, idx);
mutex_unlock(&kvm->lock);
return r;
}

View file

@ -92,41 +92,64 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
int kvm_coalesced_mmio_init(struct kvm *kvm)
{
struct kvm_coalesced_mmio_dev *dev;
struct page *page;
int ret;
ret = -ENOMEM;
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto out_err;
kvm->coalesced_mmio_ring = page_address(page);
ret = -ENOMEM;
dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
goto out_free_page;
spin_lock_init(&dev->lock);
kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
dev->kvm = kvm;
kvm->coalesced_mmio_dev = dev;
ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev);
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0)
kfree(dev);
goto out_free_dev;
return ret;
out_free_dev:
kfree(dev);
out_free_page:
__free_page(page);
out_err:
return ret;
}
void kvm_coalesced_mmio_free(struct kvm *kvm)
{
if (kvm->coalesced_mmio_ring)
free_page((unsigned long)kvm->coalesced_mmio_ring);
}
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone)
struct kvm_coalesced_mmio_zone *zone)
{
struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
if (dev == NULL)
return -EINVAL;
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return -ENOBUFS;
}
dev->zone[dev->nb_zones] = *zone;
dev->nb_zones++;
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return 0;
}
@ -140,10 +163,10 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
if (dev == NULL)
return -EINVAL;
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
i = dev->nb_zones;
while(i) {
while (i) {
z = &dev->zone[i - 1];
/* unregister all zones
@ -158,7 +181,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
i--;
}
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return 0;
}

View file

@ -1,3 +1,6 @@
#ifndef __KVM_COALESCED_MMIO_H__
#define __KVM_COALESCED_MMIO_H__
/*
* KVM coalesced MMIO
*
@ -7,6 +10,8 @@
*
*/
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_ZONE_MAX 100
struct kvm_coalesced_mmio_dev {
@ -18,7 +23,17 @@ struct kvm_coalesced_mmio_dev {
};
int kvm_coalesced_mmio_init(struct kvm *kvm);
void kvm_coalesced_mmio_free(struct kvm *kvm);
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone);
#else
static inline int kvm_coalesced_mmio_init(struct kvm *kvm) { return 0; }
static inline void kvm_coalesced_mmio_free(struct kvm *kvm) { }
#endif
#endif

View file

@ -47,7 +47,6 @@ struct _irqfd {
int gsi;
struct list_head list;
poll_table pt;
wait_queue_head_t *wqh;
wait_queue_t wait;
struct work_struct inject;
struct work_struct shutdown;
@ -159,8 +158,6 @@ irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
irqfd->wqh = wqh;
add_wait_queue(wqh, &irqfd->wait);
}
@ -463,7 +460,7 @@ static int
kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p;
struct eventfd_ctx *eventfd;
int ret;
@ -508,7 +505,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
else
p->wildcard = true;
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
/* Verify that there isnt a match already */
if (ioeventfd_check_collision(kvm, p)) {
@ -518,18 +515,18 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
ret = __kvm_io_bus_register_dev(bus, &p->dev);
ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
if (ret < 0)
goto unlock_fail;
list_add_tail(&p->list, &kvm->ioeventfds);
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return 0;
unlock_fail:
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
fail:
kfree(p);
@ -542,7 +539,7 @@ static int
kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO;
struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus;
enum kvm_bus bus_idx = pio ? KVM_PIO_BUS : KVM_MMIO_BUS;
struct _ioeventfd *p, *tmp;
struct eventfd_ctx *eventfd;
int ret = -ENOENT;
@ -551,7 +548,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
@ -565,13 +562,13 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (!p->wildcard && p->datamatch != args->datamatch)
continue;
__kvm_io_bus_unregister_dev(bus, &p->dev);
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
ioeventfd_release(p);
ret = 0;
break;
}
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
eventfd_ctx_put(eventfd);

View file

@ -100,6 +100,19 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
return injected;
}
static void update_handled_vectors(struct kvm_ioapic *ioapic)
{
DECLARE_BITMAP(handled_vectors, 256);
int i;
memset(handled_vectors, 0, sizeof(handled_vectors));
for (i = 0; i < IOAPIC_NUM_PINS; ++i)
__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
memcpy(ioapic->handled_vectors, handled_vectors,
sizeof(handled_vectors));
smp_wmb();
}
static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
{
unsigned index;
@ -134,6 +147,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
e->bits |= (u32) val;
e->fields.remote_irr = 0;
}
update_handled_vectors(ioapic);
mask_after = e->fields.mask;
if (mask_before != mask_after)
kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
@ -241,6 +255,9 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
smp_rmb();
if (!test_bit(vector, ioapic->handled_vectors))
return;
mutex_lock(&ioapic->lock);
__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
mutex_unlock(&ioapic->lock);
@ -352,6 +369,7 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic->ioregsel = 0;
ioapic->irr = 0;
ioapic->id = 0;
update_handled_vectors(ioapic);
}
static const struct kvm_io_device_ops ioapic_mmio_ops = {
@ -372,13 +390,28 @@ int kvm_ioapic_init(struct kvm *kvm)
kvm_ioapic_reset(ioapic);
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
ioapic->kvm = kvm;
ret = kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev);
if (ret < 0)
mutex_lock(&kvm->slots_lock);
ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
return ret;
}
void kvm_ioapic_destroy(struct kvm *kvm)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
if (ioapic) {
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
}
int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
@ -399,6 +432,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
mutex_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
update_handled_vectors(ioapic);
mutex_unlock(&ioapic->lock);
return 0;
}

View file

@ -46,6 +46,7 @@ struct kvm_ioapic {
struct kvm *kvm;
void (*ack_notifier)(void *opaque, int irq);
struct mutex lock;
DECLARE_BITMAP(handled_vectors, 256);
};
#ifdef DEBUG
@ -71,6 +72,7 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,

View file

@ -32,10 +32,10 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
static void kvm_iommu_put_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages);
int kvm_iommu_map_pages(struct kvm *kvm,
gfn_t base_gfn, unsigned long npages)
int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
{
gfn_t gfn = base_gfn;
gfn_t gfn = slot->base_gfn;
unsigned long npages = slot->npages;
pfn_t pfn;
int i, r = 0;
struct iommu_domain *domain = kvm->arch.iommu_domain;
@ -54,7 +54,7 @@ int kvm_iommu_map_pages(struct kvm *kvm,
if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn)))
continue;
pfn = gfn_to_pfn(kvm, gfn);
pfn = gfn_to_pfn_memslot(kvm, slot, gfn);
r = iommu_map_range(domain,
gfn_to_gpa(gfn),
pfn_to_hpa(pfn),
@ -69,17 +69,19 @@ int kvm_iommu_map_pages(struct kvm *kvm,
return 0;
unmap_pages:
kvm_iommu_put_pages(kvm, base_gfn, i);
kvm_iommu_put_pages(kvm, slot->base_gfn, i);
return r;
}
static int kvm_iommu_map_memslots(struct kvm *kvm)
{
int i, r = 0;
struct kvm_memslots *slots;
for (i = 0; i < kvm->nmemslots; i++) {
r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
slots = rcu_dereference(kvm->memslots);
for (i = 0; i < slots->nmemslots; i++) {
r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
if (r)
break;
}
@ -104,7 +106,8 @@ int kvm_assign_device(struct kvm *kvm,
r = iommu_attach_device(domain, &pdev->dev);
if (r) {
printk(KERN_ERR "assign device %x:%x.%x failed",
printk(KERN_ERR "assign device %x:%x:%x.%x failed",
pci_domain_nr(pdev->bus),
pdev->bus->number,
PCI_SLOT(pdev->devfn),
PCI_FUNC(pdev->devfn));
@ -125,7 +128,8 @@ int kvm_assign_device(struct kvm *kvm,
goto out_unmap;
}
printk(KERN_DEBUG "assign device: host bdf = %x:%x:%x\n",
printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
@ -152,7 +156,8 @@ int kvm_deassign_device(struct kvm *kvm,
iommu_detach_device(domain, &pdev->dev);
printk(KERN_DEBUG "deassign device: host bdf = %x:%x:%x\n",
printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
assigned_dev->host_segnr,
assigned_dev->host_busnr,
PCI_SLOT(assigned_dev->host_devfn),
PCI_FUNC(assigned_dev->host_devfn));
@ -210,10 +215,13 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
int i;
struct kvm_memslots *slots;
for (i = 0; i < kvm->nmemslots; i++) {
kvm_iommu_put_pages(kvm, kvm->memslots[i].base_gfn,
kvm->memslots[i].npages);
slots = rcu_dereference(kvm->memslots);
for (i = 0; i < slots->nmemslots; i++) {
kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
slots->memslots[i].npages);
}
return 0;

View file

@ -44,6 +44,8 @@
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/compat.h>
#include <linux/srcu.h>
#include <linux/hugetlb.h>
#include <asm/processor.h>
#include <asm/io.h>
@ -51,9 +53,7 @@
#include <asm/pgtable.h>
#include <asm-generic/bitops/le.h>
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
#include "coalesced_mmio.h"
#endif
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
@ -86,6 +86,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
static int hardware_enable_all(void);
static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
static bool kvm_rebooting;
static bool largepages_enabled = true;
@ -136,7 +138,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
zalloc_cpumask_var(&cpus, GFP_ATOMIC);
spin_lock(&kvm->requests_lock);
raw_spin_lock(&kvm->requests_lock);
me = smp_processor_id();
kvm_for_each_vcpu(i, vcpu, kvm) {
if (test_and_set_bit(req, &vcpu->requests))
@ -151,7 +153,7 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
smp_call_function_many(cpus, ack_flush, NULL, 1);
else
called = false;
spin_unlock(&kvm->requests_lock);
raw_spin_unlock(&kvm->requests_lock);
free_cpumask_var(cpus);
return called;
}
@ -215,7 +217,7 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
unsigned long address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush;
int need_tlb_flush, idx;
/*
* When ->invalidate_page runs, the linux pte has been zapped
@ -235,10 +237,12 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
* pte after kvm_unmap_hva returned, without noticing the page
* is going to be freed.
*/
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
need_tlb_flush = kvm_unmap_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@ -252,11 +256,14 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
pte_t pte)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int idx;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
kvm->mmu_notifier_seq++;
kvm_set_spte_hva(kvm, address, pte);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
@ -265,8 +272,9 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
unsigned long end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int need_tlb_flush = 0;
int need_tlb_flush = 0, idx;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
/*
* The count increase must become visible at unlock time as no
@ -277,6 +285,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
for (; start < end; start += PAGE_SIZE)
need_tlb_flush |= kvm_unmap_hva(kvm, start);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
/* we've to flush the tlb before the pages can be freed */
if (need_tlb_flush)
@ -314,11 +323,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
unsigned long address)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int young;
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
young = kvm_age_hva(kvm, address);
spin_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
if (young)
kvm_flush_remote_tlbs(kvm);
@ -341,15 +352,26 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
.change_pte = kvm_mmu_notifier_change_pte,
.release = kvm_mmu_notifier_release,
};
static int kvm_init_mmu_notifier(struct kvm *kvm)
{
kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
return mmu_notifier_register(&kvm->mmu_notifier, current->mm);
}
#else /* !(CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER) */
static int kvm_init_mmu_notifier(struct kvm *kvm)
{
return 0;
}
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
static struct kvm *kvm_create_vm(void)
{
int r = 0;
int r = 0, i;
struct kvm *kvm = kvm_arch_create_vm();
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
struct page *page;
#endif
if (IS_ERR(kvm))
goto out;
@ -363,39 +385,35 @@ static struct kvm *kvm_create_vm(void)
INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
#endif
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page) {
r = -ENOMEM;
r = -ENOMEM;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
goto out_err;
}
kvm->coalesced_mmio_ring =
(struct kvm_coalesced_mmio_ring *)page_address(page);
#endif
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
{
kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
if (r) {
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
put_page(page);
#endif
if (init_srcu_struct(&kvm->srcu))
goto out_err;
for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
if (!kvm->buses[i]) {
cleanup_srcu_struct(&kvm->srcu);
goto out_err;
}
}
#endif
r = kvm_init_mmu_notifier(kvm);
if (r) {
cleanup_srcu_struct(&kvm->srcu);
goto out_err;
}
kvm->mm = current->mm;
atomic_inc(&kvm->mm->mm_count);
spin_lock_init(&kvm->mmu_lock);
spin_lock_init(&kvm->requests_lock);
kvm_io_bus_init(&kvm->pio_bus);
raw_spin_lock_init(&kvm->requests_lock);
kvm_eventfd_init(kvm);
mutex_init(&kvm->lock);
mutex_init(&kvm->irq_lock);
kvm_io_bus_init(&kvm->mmio_bus);
init_rwsem(&kvm->slots_lock);
mutex_init(&kvm->slots_lock);
atomic_set(&kvm->users_count, 1);
spin_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
@ -406,12 +424,12 @@ static struct kvm *kvm_create_vm(void)
out:
return kvm;
#if defined(KVM_COALESCED_MMIO_PAGE_OFFSET) || \
(defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER))
out_err:
hardware_disable_all();
#endif
out_err_nodisable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
kfree(kvm->memslots);
kfree(kvm);
return ERR_PTR(r);
}
@ -446,13 +464,17 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
void kvm_free_physmem(struct kvm *kvm)
{
int i;
struct kvm_memslots *slots = kvm->memslots;
for (i = 0; i < kvm->nmemslots; ++i)
kvm_free_physmem_slot(&kvm->memslots[i], NULL);
for (i = 0; i < slots->nmemslots; ++i)
kvm_free_physmem_slot(&slots->memslots[i], NULL);
kfree(kvm->memslots);
}
static void kvm_destroy_vm(struct kvm *kvm)
{
int i;
struct mm_struct *mm = kvm->mm;
kvm_arch_sync_events(kvm);
@ -460,12 +482,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
kvm_io_bus_destroy(&kvm->pio_bus);
kvm_io_bus_destroy(&kvm->mmio_bus);
#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
if (kvm->coalesced_mmio_ring != NULL)
free_page((unsigned long)kvm->coalesced_mmio_ring);
#endif
for (i = 0; i < KVM_NR_BUSES; i++)
kvm_io_bus_destroy(kvm->buses[i]);
kvm_coalesced_mmio_free(kvm);
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
#else
@ -512,12 +531,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem,
int user_alloc)
{
int r;
int r, flush_shadow = 0;
gfn_t base_gfn;
unsigned long npages;
unsigned long i;
struct kvm_memory_slot *memslot;
struct kvm_memory_slot old, new;
struct kvm_memslots *slots, *old_memslots;
r = -EINVAL;
/* General sanity checks */
@ -532,7 +552,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
memslot = &kvm->memslots[mem->slot];
memslot = &kvm->memslots->memslots[mem->slot];
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
@ -553,7 +573,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
/* Check for overlaps */
r = -EEXIST;
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
struct kvm_memory_slot *s = &kvm->memslots[i];
struct kvm_memory_slot *s = &kvm->memslots->memslots[i];
if (s == memslot || !s->npages)
continue;
@ -579,15 +599,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
memset(new.rmap, 0, npages * sizeof(*new.rmap));
new.user_alloc = user_alloc;
/*
* hva_to_rmmap() serialzies with the mmu_lock and to be
* safe it has to ignore memslots with !user_alloc &&
* !userspace_addr.
*/
if (user_alloc)
new.userspace_addr = mem->userspace_addr;
else
new.userspace_addr = 0;
new.userspace_addr = mem->userspace_addr;
}
if (!npages)
goto skip_lpage;
@ -642,8 +654,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
if (!new.dirty_bitmap)
goto out_free;
memset(new.dirty_bitmap, 0, dirty_bytes);
/* destroy any largepage mappings for dirty tracking */
if (old.npages)
kvm_arch_flush_shadow(kvm);
flush_shadow = 1;
}
#else /* not defined CONFIG_S390 */
new.user_alloc = user_alloc;
@ -651,36 +664,72 @@ int __kvm_set_memory_region(struct kvm *kvm,
new.userspace_addr = mem->userspace_addr;
#endif /* not defined CONFIG_S390 */
if (!npages)
if (!npages) {
r = -ENOMEM;
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!slots)
goto out_free;
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
if (mem->slot >= slots->nmemslots)
slots->nmemslots = mem->slot + 1;
slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
old_memslots = kvm->memslots;
rcu_assign_pointer(kvm->memslots, slots);
synchronize_srcu_expedited(&kvm->srcu);
/* From this point no new shadow pages pointing to a deleted
* memslot will be created.
*
* validation of sp->gfn happens in:
* - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
* - kvm_is_visible_gfn (mmu_check_roots)
*/
kvm_arch_flush_shadow(kvm);
spin_lock(&kvm->mmu_lock);
if (mem->slot >= kvm->nmemslots)
kvm->nmemslots = mem->slot + 1;
*memslot = new;
spin_unlock(&kvm->mmu_lock);
r = kvm_arch_set_memory_region(kvm, mem, old, user_alloc);
if (r) {
spin_lock(&kvm->mmu_lock);
*memslot = old;
spin_unlock(&kvm->mmu_lock);
goto out_free;
kfree(old_memslots);
}
kvm_free_physmem_slot(&old, npages ? &new : NULL);
/* Slot deletion case: we have to update the current slot */
spin_lock(&kvm->mmu_lock);
if (!npages)
*memslot = old;
spin_unlock(&kvm->mmu_lock);
r = kvm_arch_prepare_memory_region(kvm, &new, old, mem, user_alloc);
if (r)
goto out_free;
#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
r = kvm_iommu_map_pages(kvm, base_gfn, npages);
if (r)
goto out;
if (npages) {
r = kvm_iommu_map_pages(kvm, &new);
if (r)
goto out_free;
}
#endif
r = -ENOMEM;
slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!slots)
goto out_free;
memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
if (mem->slot >= slots->nmemslots)
slots->nmemslots = mem->slot + 1;
/* actual memory is freed via old in kvm_free_physmem_slot below */
if (!npages) {
new.rmap = NULL;
new.dirty_bitmap = NULL;
for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i)
new.lpage_info[i] = NULL;
}
slots->memslots[mem->slot] = new;
old_memslots = kvm->memslots;
rcu_assign_pointer(kvm->memslots, slots);
synchronize_srcu_expedited(&kvm->srcu);
kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
kvm_free_physmem_slot(&old, &new);
kfree(old_memslots);
if (flush_shadow)
kvm_arch_flush_shadow(kvm);
return 0;
out_free:
@ -697,9 +746,9 @@ int kvm_set_memory_region(struct kvm *kvm,
{
int r;
down_write(&kvm->slots_lock);
mutex_lock(&kvm->slots_lock);
r = __kvm_set_memory_region(kvm, mem, user_alloc);
up_write(&kvm->slots_lock);
mutex_unlock(&kvm->slots_lock);
return r;
}
EXPORT_SYMBOL_GPL(kvm_set_memory_region);
@ -726,7 +775,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_MEMORY_SLOTS)
goto out;
memslot = &kvm->memslots[log->slot];
memslot = &kvm->memslots->memslots[log->slot];
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@ -780,9 +829,10 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn)
{
int i;
struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
for (i = 0; i < kvm->nmemslots; ++i) {
struct kvm_memory_slot *memslot = &kvm->memslots[i];
for (i = 0; i < slots->nmemslots; ++i) {
struct kvm_memory_slot *memslot = &slots->memslots[i];
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
@ -801,10 +851,14 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
int i;
struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
gfn = unalias_gfn(kvm, gfn);
gfn = unalias_gfn_instantiation(kvm, gfn);
for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
struct kvm_memory_slot *memslot = &kvm->memslots[i];
struct kvm_memory_slot *memslot = &slots->memslots[i];
if (memslot->flags & KVM_MEMSLOT_INVALID)
continue;
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
@ -814,33 +868,68 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn)
{
struct vm_area_struct *vma;
unsigned long addr, size;
size = PAGE_SIZE;
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
return PAGE_SIZE;
down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, addr);
if (!vma)
goto out;
size = vma_kernel_pagesize(vma);
out:
up_read(&current->mm->mmap_sem);
return size;
}
int memslot_id(struct kvm *kvm, gfn_t gfn)
{
int i;
struct kvm_memslots *slots = rcu_dereference(kvm->memslots);
struct kvm_memory_slot *memslot = NULL;
gfn = unalias_gfn(kvm, gfn);
for (i = 0; i < slots->nmemslots; ++i) {
memslot = &slots->memslots[i];
if (gfn >= memslot->base_gfn
&& gfn < memslot->base_gfn + memslot->npages)
break;
}
return memslot - slots->memslots;
}
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
{
struct kvm_memory_slot *slot;
gfn = unalias_gfn(kvm, gfn);
gfn = unalias_gfn_instantiation(kvm, gfn);
slot = gfn_to_memslot_unaliased(kvm, gfn);
if (!slot)
if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return bad_hva();
return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
}
EXPORT_SYMBOL_GPL(gfn_to_hva);
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr)
{
struct page *page[1];
unsigned long addr;
int npages;
pfn_t pfn;
might_sleep();
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr)) {
get_page(bad_page);
return page_to_pfn(bad_page);
}
npages = get_user_pages_fast(addr, 1, 1, page);
if (unlikely(npages != 1)) {
@ -865,8 +954,32 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
return pfn;
}
pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
{
unsigned long addr;
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr)) {
get_page(bad_page);
return page_to_pfn(bad_page);
}
return hva_to_pfn(kvm, addr);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn);
static unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
}
pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn)
{
unsigned long addr = gfn_to_hva_memslot(slot, gfn);
return hva_to_pfn(kvm, addr);
}
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
{
pfn_t pfn;
@ -1854,12 +1967,7 @@ static struct notifier_block kvm_reboot_notifier = {
.priority = 0,
};
void kvm_io_bus_init(struct kvm_io_bus *bus)
{
memset(bus, 0, sizeof(*bus));
}
void kvm_io_bus_destroy(struct kvm_io_bus *bus)
static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
{
int i;
@ -1868,13 +1976,15 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus)
kvm_iodevice_destructor(pos);
}
kfree(bus);
}
/* kvm_io_bus_write - called under kvm->slots_lock */
int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
int i;
struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
return 0;
@ -1882,59 +1992,71 @@ int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr,
}
/* kvm_io_bus_read - called under kvm->slots_lock */
int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val)
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
int i;
struct kvm_io_bus *bus = rcu_dereference(kvm->buses[bus_idx]);
for (i = 0; i < bus->dev_count; i++)
if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
return 0;
return -EOPNOTSUPP;
}
int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus,
struct kvm_io_device *dev)
/* Caller must hold slots_lock. */
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev)
{
int ret;
struct kvm_io_bus *new_bus, *bus;
down_write(&kvm->slots_lock);
ret = __kvm_io_bus_register_dev(bus, dev);
up_write(&kvm->slots_lock);
return ret;
}
/* An unlocked version. Caller must have write lock on slots_lock. */
int __kvm_io_bus_register_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev)
{
bus = kvm->buses[bus_idx];
if (bus->dev_count > NR_IOBUS_DEVS-1)
return -ENOSPC;
bus->devs[bus->dev_count++] = dev;
new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;
memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
new_bus->devs[new_bus->dev_count++] = dev;
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
return 0;
}
void kvm_io_bus_unregister_dev(struct kvm *kvm,
struct kvm_io_bus *bus,
struct kvm_io_device *dev)
/* Caller must hold slots_lock. */
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
struct kvm_io_device *dev)
{
down_write(&kvm->slots_lock);
__kvm_io_bus_unregister_dev(bus, dev);
up_write(&kvm->slots_lock);
}
int i, r;
struct kvm_io_bus *new_bus, *bus;
/* An unlocked version. Caller must have write lock on slots_lock. */
void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus,
struct kvm_io_device *dev)
{
int i;
new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;
for (i = 0; i < bus->dev_count; i++)
if (bus->devs[i] == dev) {
bus->devs[i] = bus->devs[--bus->dev_count];
bus = kvm->buses[bus_idx];
memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
r = -ENOENT;
for (i = 0; i < new_bus->dev_count; i++)
if (new_bus->devs[i] == dev) {
r = 0;
new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
break;
}
if (r) {
kfree(new_bus);
return r;
}
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
return r;
}
static struct notifier_block kvm_cpu_notifier = {