KVM: Move device assignment logic to common code

To share with other archs, this patch moves device assignment
logic to common parts.

Signed-off-by: Xiantao Zhang <xiantao.zhang@intel.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
This commit is contained in:
Xiantao Zhang 2008-10-06 13:47:38 +08:00 committed by Avi Kivity
parent 371c01b28e
commit 8a98f6648a
4 changed files with 269 additions and 257 deletions

View file

@ -30,7 +30,6 @@
#include <linux/interrupt.h>
#include <linux/kvm.h>
#include <linux/fs.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/mman.h>
@ -107,238 +106,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
struct list_head *ptr;
struct kvm_assigned_dev_kernel *match;
list_for_each(ptr, head) {
match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
if (match->assigned_dev_id == assigned_dev_id)
return match;
}
return NULL;
}
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
struct kvm_assigned_dev_kernel *assigned_dev;
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
interrupt_work);
/* This is taken to safely inject irq inside the guest. When
* the interrupt injection (or the ioapic code) uses a
* finer-grained lock, update this
*/
mutex_lock(&assigned_dev->kvm->lock);
kvm_set_irq(assigned_dev->kvm,
assigned_dev->guest_irq, 1);
mutex_unlock(&assigned_dev->kvm->lock);
kvm_put_kvm(assigned_dev->kvm);
}
/* FIXME: Implement the OR logic needed to make shared interrupts on
* this line behave properly
*/
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
{
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
kvm_get_kvm(assigned_dev->kvm);
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
return IRQ_HANDLED;
}
/* Ack the irq line for an assigned device */
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_assigned_dev_kernel *dev;
if (kian->gsi == -1)
return;
dev = container_of(kian, struct kvm_assigned_dev_kernel,
ack_notifier);
kvm_set_irq(dev->kvm, dev->guest_irq, 0);
enable_irq(dev->host_irq);
}
static void kvm_free_assigned_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel
*assigned_dev)
{
if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
if (cancel_work_sync(&assigned_dev->interrupt_work))
/* We had pending work. That means we will have to take
* care of kvm_put_kvm.
*/
kvm_put_kvm(kvm);
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
list_del(&assigned_dev->list);
kfree(assigned_dev);
}
static void kvm_free_all_assigned_devices(struct kvm *kvm)
{
struct list_head *ptr, *ptr2;
struct kvm_assigned_dev_kernel *assigned_dev;
list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
assigned_dev = list_entry(ptr,
struct kvm_assigned_dev_kernel,
list);
kvm_free_assigned_device(kvm, assigned_dev);
}
}
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_irq->assigned_dev_id);
if (!match) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
if (match->irq_requested) {
match->guest_irq = assigned_irq->guest_irq;
match->ack_notifier.gsi = assigned_irq->guest_irq;
mutex_unlock(&kvm->lock);
return 0;
}
INIT_WORK(&match->interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
if (irqchip_in_kernel(kvm)) {
if (!capable(CAP_SYS_RAWIO)) {
r = -EPERM;
goto out_release;
}
if (assigned_irq->host_irq)
match->host_irq = assigned_irq->host_irq;
else
match->host_irq = match->dev->irq;
match->guest_irq = assigned_irq->guest_irq;
match->ack_notifier.gsi = assigned_irq->guest_irq;
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
/* Even though this is PCI, we don't want to use shared
* interrupts. Sharing host devices with guest-assigned devices
* on the same interrupt line is not a happy situation: there
* are going to be long delays in accepting, acking, etc.
*/
if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_device", (void *)match)) {
r = -EIO;
goto out_release;
}
}
match->irq_requested = true;
mutex_unlock(&kvm->lock);
return r;
out_release:
mutex_unlock(&kvm->lock);
kvm_free_assigned_device(kvm, match);
return r;
}
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_dev->assigned_dev_id);
if (match) {
/* device already assigned */
r = -EINVAL;
goto out;
}
match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
if (match == NULL) {
printk(KERN_INFO "%s: Couldn't allocate memory\n",
__func__);
r = -ENOMEM;
goto out;
}
dev = pci_get_bus_and_slot(assigned_dev->busnr,
assigned_dev->devfn);
if (!dev) {
printk(KERN_INFO "%s: host device not found\n", __func__);
r = -EINVAL;
goto out_free;
}
if (pci_enable_device(dev)) {
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
r = -EBUSY;
goto out_put;
}
r = pci_request_regions(dev, "kvm_assigned_device");
if (r) {
printk(KERN_INFO "%s: Could not get access to device regions\n",
__func__);
goto out_disable;
}
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->dev = dev;
match->kvm = kvm;
list_add(&match->list, &kvm->arch.assigned_dev_head);
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
r = kvm_iommu_map_guest(kvm, match);
if (r)
goto out_list_del;
}
out:
mutex_unlock(&kvm->lock);
return r;
out_list_del:
list_del(&match->list);
pci_release_regions(dev);
out_disable:
pci_disable_device(dev);
out_put:
pci_dev_put(dev);
out_free:
kfree(match);
mutex_unlock(&kvm->lock);
return r;
}
unsigned long segment_base(u16 selector)
{
struct descriptor_table gdt;
@ -2030,28 +1797,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
break;
}
case KVM_ASSIGN_PCI_DEVICE: {
struct kvm_assigned_pci_dev assigned_dev;
r = -EFAULT;
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
goto out;
r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
if (r)
goto out;
break;
}
case KVM_ASSIGN_IRQ: {
struct kvm_assigned_irq assigned_irq;
r = -EFAULT;
if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
goto out;
r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
if (r)
goto out;
break;
}
case KVM_GET_PIT: {
r = -EFAULT;
if (copy_from_user(&u.ps, argp, sizeof(struct kvm_pit_state)))

View file

@ -383,7 +383,9 @@ struct kvm_trace_rec {
#define KVM_CAP_MP_STATE 14
#define KVM_CAP_COALESCED_MMIO 15
#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
#ifdef CONFIG_X86
#define KVM_CAP_DEVICE_ASSIGNMENT 17
#endif
#define KVM_CAP_IOMMU 18
/*

View file

@ -281,6 +281,7 @@ void kvm_free_physmem(struct kvm *kvm);
struct kvm *kvm_arch_create_vm(void);
void kvm_arch_destroy_vm(struct kvm *kvm);
void kvm_free_all_assigned_devices(struct kvm *kvm);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *v);

View file

@ -51,6 +51,12 @@
#include "coalesced_mmio.h"
#endif
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
#include <linux/pci.h>
#include <linux/interrupt.h>
#include "irq.h"
#endif
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@ -71,6 +77,240 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
bool kvm_rebooting;
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
int assigned_dev_id)
{
struct list_head *ptr;
struct kvm_assigned_dev_kernel *match;
list_for_each(ptr, head) {
match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
if (match->assigned_dev_id == assigned_dev_id)
return match;
}
return NULL;
}
static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
{
struct kvm_assigned_dev_kernel *assigned_dev;
assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
interrupt_work);
/* This is taken to safely inject irq inside the guest. When
* the interrupt injection (or the ioapic code) uses a
* finer-grained lock, update this
*/
mutex_lock(&assigned_dev->kvm->lock);
kvm_set_irq(assigned_dev->kvm,
assigned_dev->guest_irq, 1);
mutex_unlock(&assigned_dev->kvm->lock);
kvm_put_kvm(assigned_dev->kvm);
}
/* FIXME: Implement the OR logic needed to make shared interrupts on
* this line behave properly
*/
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
{
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
kvm_get_kvm(assigned_dev->kvm);
schedule_work(&assigned_dev->interrupt_work);
disable_irq_nosync(irq);
return IRQ_HANDLED;
}
/* Ack the irq line for an assigned device */
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
{
struct kvm_assigned_dev_kernel *dev;
if (kian->gsi == -1)
return;
dev = container_of(kian, struct kvm_assigned_dev_kernel,
ack_notifier);
kvm_set_irq(dev->kvm, dev->guest_irq, 0);
enable_irq(dev->host_irq);
}
static void kvm_free_assigned_device(struct kvm *kvm,
struct kvm_assigned_dev_kernel
*assigned_dev)
{
if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
if (cancel_work_sync(&assigned_dev->interrupt_work))
/* We had pending work. That means we will have to take
* care of kvm_put_kvm.
*/
kvm_put_kvm(kvm);
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
list_del(&assigned_dev->list);
kfree(assigned_dev);
}
void kvm_free_all_assigned_devices(struct kvm *kvm)
{
struct list_head *ptr, *ptr2;
struct kvm_assigned_dev_kernel *assigned_dev;
list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
assigned_dev = list_entry(ptr,
struct kvm_assigned_dev_kernel,
list);
kvm_free_assigned_device(kvm, assigned_dev);
}
}
static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
struct kvm_assigned_irq
*assigned_irq)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_irq->assigned_dev_id);
if (!match) {
mutex_unlock(&kvm->lock);
return -EINVAL;
}
if (match->irq_requested) {
match->guest_irq = assigned_irq->guest_irq;
match->ack_notifier.gsi = assigned_irq->guest_irq;
mutex_unlock(&kvm->lock);
return 0;
}
INIT_WORK(&match->interrupt_work,
kvm_assigned_dev_interrupt_work_handler);
if (irqchip_in_kernel(kvm)) {
if (!capable(CAP_SYS_RAWIO)) {
r = -EPERM;
goto out_release;
}
if (assigned_irq->host_irq)
match->host_irq = assigned_irq->host_irq;
else
match->host_irq = match->dev->irq;
match->guest_irq = assigned_irq->guest_irq;
match->ack_notifier.gsi = assigned_irq->guest_irq;
match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
kvm_register_irq_ack_notifier(kvm, &match->ack_notifier);
/* Even though this is PCI, we don't want to use shared
* interrupts. Sharing host devices with guest-assigned devices
* on the same interrupt line is not a happy situation: there
* are going to be long delays in accepting, acking, etc.
*/
if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_device", (void *)match)) {
r = -EIO;
goto out_release;
}
}
match->irq_requested = true;
mutex_unlock(&kvm->lock);
return r;
out_release:
mutex_unlock(&kvm->lock);
kvm_free_assigned_device(kvm, match);
return r;
}
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
mutex_lock(&kvm->lock);
match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
assigned_dev->assigned_dev_id);
if (match) {
/* device already assigned */
r = -EINVAL;
goto out;
}
match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
if (match == NULL) {
printk(KERN_INFO "%s: Couldn't allocate memory\n",
__func__);
r = -ENOMEM;
goto out;
}
dev = pci_get_bus_and_slot(assigned_dev->busnr,
assigned_dev->devfn);
if (!dev) {
printk(KERN_INFO "%s: host device not found\n", __func__);
r = -EINVAL;
goto out_free;
}
if (pci_enable_device(dev)) {
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
r = -EBUSY;
goto out_put;
}
r = pci_request_regions(dev, "kvm_assigned_device");
if (r) {
printk(KERN_INFO "%s: Could not get access to device regions\n",
__func__);
goto out_disable;
}
match->assigned_dev_id = assigned_dev->assigned_dev_id;
match->host_busnr = assigned_dev->busnr;
match->host_devfn = assigned_dev->devfn;
match->dev = dev;
match->kvm = kvm;
list_add(&match->list, &kvm->arch.assigned_dev_head);
if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
r = kvm_iommu_map_guest(kvm, match);
if (r)
goto out_list_del;
}
out:
mutex_unlock(&kvm->lock);
return r;
out_list_del:
list_del(&match->list);
pci_release_regions(dev);
out_disable:
pci_disable_device(dev);
out_put:
pci_dev_put(dev);
out_free:
kfree(match);
mutex_unlock(&kvm->lock);
return r;
}
#endif
static inline int valid_vcpu(int n)
{
return likely(n >= 0 && n < KVM_MAX_VCPUS);
@ -578,12 +818,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
}
kvm_free_physmem_slot(&old, &new);
#ifdef CONFIG_DMAR
/* map the pages in iommu page table */
r = kvm_iommu_map_pages(kvm, base_gfn, npages);
if (r)
goto out;
#endif
return 0;
out_free:
@ -1382,6 +1622,30 @@ static long kvm_vm_ioctl(struct file *filp,
r = 0;
break;
}
#endif
#ifdef KVM_CAP_DEVICE_ASSIGNMENT
case KVM_ASSIGN_PCI_DEVICE: {
struct kvm_assigned_pci_dev assigned_dev;
r = -EFAULT;
if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
goto out;
r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
if (r)
goto out;
break;
}
case KVM_ASSIGN_IRQ: {
struct kvm_assigned_irq assigned_irq;
r = -EFAULT;
if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
goto out;
r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
if (r)
goto out;
break;
}
#endif
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);