From 9c22e660ce8105debb497933b9900eaeb40836d2 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 7 Aug 2014 11:12:02 -0600 Subject: [PATCH 1/6] vfio-pci: Release devices with BusMaster disabled Our current open/release path looks like this: vfio_pci_open vfio_pci_enable pci_enable_device pci_save_state pci_store_saved_state vfio_pci_release vfio_pci_disable pci_disable_device pci_restore_state pci_enable_device() doesn't modify PCI_COMMAND_MASTER, so if a device comes to us with it enabled, it persists through the open and gets stored as part of the device saved state. We then restore that saved state when released, which can allow the device to attempt to continue to do DMA. When the group is disconnected from the domain, this will get caught by the IOMMU, but if there are other devices in the group, the device may continue running and interfere with the user. Even in the former case, IOMMUs don't necessarily behave well and a stream of blocked DMA can result in unpleasant behavior on the host. Explicitly disable Bus Master as we're enabling the device and slightly re-work release to make sure that pci_disable_device() is the last thing that touches the device. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e2ee80f36e3e..fc011e13213b 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -44,6 +44,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + /* Don't allow our initial saved state to include busmaster */ + pci_clear_master(pdev); + ret = pci_enable_device(pdev); if (ret) return ret; @@ -99,7 +102,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) struct pci_dev *pdev = vdev->pdev; int bar; - pci_disable_device(pdev); + /* Stop the device from further DMA */ + pci_clear_master(pdev); vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, @@ -128,7 +132,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) __func__, dev_name(&pdev->dev)); if (!vdev->reset_works) - return; + goto out; pci_save_state(pdev); } @@ -151,6 +155,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) } pci_restore_state(pdev); +out: + pci_disable_device(pdev); } static void vfio_pci_release(void *device_data) From 61d792562b53c610f9fe917f2bbc22218aa39c22 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 7 Aug 2014 11:12:04 -0600 Subject: [PATCH 2/6] vfio-pci: Use mutex around open, release, and remove Serializing open/release allows us to fix a refcnt error if we fail to enable the device and lets us prevent devices from being unbound or opened, giving us an opportunity to do bus resets on release. No restriction added to serialize binding devices to vfio-pci while the mutex is held though. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 35 +++++++++++++++++++---------- drivers/vfio/pci/vfio_pci_private.h | 2 +- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index fc011e13213b..c9d756b7ee9e 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -37,6 +37,8 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); +static DEFINE_MUTEX(driver_lock); + static int vfio_pci_enable(struct vfio_pci_device *vdev) { struct pci_dev *pdev = vdev->pdev; @@ -163,23 +165,29 @@ static void vfio_pci_release(void *device_data) { struct vfio_pci_device *vdev = device_data; - if (atomic_dec_and_test(&vdev->refcnt)) { + mutex_lock(&driver_lock); + + if (!(--vdev->refcnt)) { vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); } + mutex_unlock(&driver_lock); + module_put(THIS_MODULE); } static int vfio_pci_open(void *device_data) { struct vfio_pci_device *vdev = device_data; - int ret; + int ret = 0; if (!try_module_get(THIS_MODULE)) return -ENODEV; - if (atomic_inc_return(&vdev->refcnt) == 1) { + mutex_lock(&driver_lock); + + if (!vdev->refcnt) { ret = vfio_pci_enable(vdev); if (ret) goto error; @@ -190,10 +198,11 @@ static int vfio_pci_open(void *device_data) goto error; } } - - return 0; + vdev->refcnt++; error: - module_put(THIS_MODULE); + mutex_unlock(&driver_lock); + if (ret) + module_put(THIS_MODULE); return ret; } @@ -849,7 +858,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) vdev->irq_type = VFIO_PCI_NUM_IRQS; mutex_init(&vdev->igate); spin_lock_init(&vdev->irqlock); - atomic_set(&vdev->refcnt, 0); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) { @@ -864,12 +872,15 @@ static void vfio_pci_remove(struct pci_dev *pdev) { struct vfio_pci_device *vdev; - vdev = vfio_del_group_dev(&pdev->dev); - if (!vdev) - return; + mutex_lock(&driver_lock); - iommu_group_put(pdev->dev.iommu_group); - kfree(vdev); + vdev = vfio_del_group_dev(&pdev->dev); + if (vdev) { + iommu_group_put(pdev->dev.iommu_group); + kfree(vdev); + } + + mutex_unlock(&driver_lock); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 9c6d5d0f3b02..31e7a30196ab 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -55,7 +55,7 @@ struct vfio_pci_device { bool bardirty; bool has_vga; struct pci_saved_state *pci_saved_state; - atomic_t refcnt; + int refcnt; struct eventfd_ctx *err_trigger; }; From bc4fba77124e2fe4eb14bcb52875c0b0228deace Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 7 Aug 2014 11:12:07 -0600 Subject: [PATCH 3/6] vfio-pci: Attempt bus/slot reset on release Each time a device is released, mark whether a local reset was successful or whether a bus/slot reset is needed. If a reset is needed and all of the affected devices are bound to vfio-pci and unused, allow the reset. This is most useful when the userspace driver is killed and releases all the devices in an unclean state, such as when a QEMU VM quits. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 112 ++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_private.h | 1 + 2 files changed, 113 insertions(+) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index c9d756b7ee9e..1651c0769b72 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -39,6 +39,8 @@ MODULE_PARM_DESC(nointxmask, static DEFINE_MUTEX(driver_lock); +static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); + static int vfio_pci_enable(struct vfio_pci_device *vdev) { struct pci_dev *pdev = vdev->pdev; @@ -123,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vdev->barmap[bar] = NULL; } + vdev->needs_reset = true; + /* * If we have saved state, restore it. If we can reset the device, * even better. Resetting with current state seems better than @@ -154,11 +158,15 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) if (ret) pr_warn("%s: Failed to reset device %s (%d)\n", __func__, dev_name(&pdev->dev), ret); + else + vdev->needs_reset = false; } pci_restore_state(pdev); out: pci_disable_device(pdev); + + vfio_pci_try_bus_reset(vdev); } static void vfio_pci_release(void *device_data) @@ -923,6 +931,110 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; +/* + * Test whether a reset is necessary and possible. We mark devices as + * needs_reset when they are released, but don't have a function-local reset + * available. If any of these exist in the affected devices, we want to do + * a bus/slot reset. We also need all of the affected devices to be unused, + * so we abort if any device has a non-zero refcnt. driver_lock prevents a + * device from being opened during the scan or unbound from vfio-pci. + */ +static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data) +{ + bool *needs_reset = data; + struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); + int ret = -EBUSY; + + if (pci_drv == &vfio_pci_driver) { + struct vfio_device *device; + struct vfio_pci_device *vdev; + + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) + return ret; + + vdev = vfio_device_data(device); + if (vdev) { + if (vdev->needs_reset) + *needs_reset = true; + + if (!vdev->refcnt) + ret = 0; + } + + vfio_device_put(device); + } + + /* + * TODO: vfio-core considers groups to be viable even if some devices + * are attached to known drivers, like pci-stub or pcieport. We can't + * freeze devices from being unbound to those drivers like we can + * here though, so it would be racy to test for them. We also can't + * use device_lock() to prevent changes as that would interfere with + * PCI-core taking device_lock during bus reset. For now, we require + * devices to be bound to vfio-pci to get a bus/slot reset on release. + */ + + return ret; +} + +/* Clear needs_reset on all affected devices after successful bus/slot reset */ +static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data) +{ + struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); + + if (pci_drv == &vfio_pci_driver) { + struct vfio_device *device; + struct vfio_pci_device *vdev; + + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) + return 0; + + vdev = vfio_device_data(device); + if (vdev) + vdev->needs_reset = false; + + vfio_device_put(device); + } + + return 0; +} + +/* + * Attempt to do a bus/slot reset if there are devices affected by a reset for + * this device that are needs_reset and all of the affected devices are unused + * (!refcnt). Callers of this function are required to hold driver_lock such + * that devices can not be unbound from vfio-pci or opened by a user while we + * test for and perform a bus/slot reset. + */ +static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) +{ + bool needs_reset = false, slot = false; + int ret; + + if (!pci_probe_reset_slot(vdev->pdev->slot)) + slot = true; + else if (pci_probe_reset_bus(vdev->pdev->bus)) + return; + + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_test_bus_reset, + &needs_reset, slot) || !needs_reset) + return; + + if (slot) + ret = pci_try_reset_slot(vdev->pdev->slot); + else + ret = pci_try_reset_bus(vdev->pdev->bus); + + if (ret) + return; + + vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_clear_needs_reset, NULL, slot); +} + static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 31e7a30196ab..671c17a6e6d0 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -54,6 +54,7 @@ struct vfio_pci_device { bool extended_caps; bool bardirty; bool has_vga; + bool needs_reset; struct pci_saved_state *pci_saved_state; int refcnt; struct eventfd_ctx *err_trigger; From 92d18a6851fb6295466657ad1cf7fe88c2054ffa Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Fri, 8 Aug 2014 10:36:20 -0600 Subject: [PATCH 4/6] drivers/vfio: Fix EEH build error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The VFIO related components could be built as dynamic modules. Unfortunately, CONFIG_EEH can't be configured to "m". The patch fixes the build errors when configuring VFIO related components as dynamic modules as follows: CC [M] drivers/vfio/vfio_iommu_spapr_tce.o In file included from drivers/vfio/vfio.c:33:0: include/linux/vfio.h:101:43: warning: ‘struct pci_dev’ declared \ inside parameter list [enabled by default] : WRAP arch/powerpc/boot/zImage.pseries WRAP arch/powerpc/boot/zImage.maple WRAP arch/powerpc/boot/zImage.pmac WRAP arch/powerpc/boot/zImage.epapr MODPOST 1818 modules ERROR: ".vfio_spapr_iommu_eeh_ioctl" [drivers/vfio/vfio_iommu_spapr_tce.ko]\ undefined! ERROR: ".vfio_spapr_pci_eeh_open" [drivers/vfio/pci/vfio-pci.ko] undefined! ERROR: ".vfio_spapr_pci_eeh_release" [drivers/vfio/pci/vfio-pci.ko] undefined! Reported-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Signed-off-by: Alexey Kardashevskiy Signed-off-by: Alex Williamson --- drivers/vfio/Kconfig | 6 ++++++ drivers/vfio/Makefile | 2 +- drivers/vfio/vfio_spapr_eeh.c | 3 +++ include/linux/vfio.h | 1 + 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index af7b204b9215..d8c57636b9ce 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE depends on VFIO && SPAPR_TCE_IOMMU default n +config VFIO_SPAPR_EEH + tristate + depends on EEH && VFIO_IOMMU_SPAPR_TCE + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) + select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) select ANON_INODES help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 50e30bc75e85..0b035b12600a 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o +obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index f834b4ce1431..949f98e997af 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -18,11 +18,13 @@ int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { return eeh_dev_open(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) { eeh_dev_release(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release); long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg) @@ -85,3 +87,4 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return ret; } +EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 25a0fbd4b998..224128a96b7f 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -98,6 +98,7 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); +struct pci_dev; #ifdef CONFIG_EEH extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); From 89a2edd62fe5983fa8648f8a3c3cd025c6aea584 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 8 Aug 2014 10:37:51 -0600 Subject: [PATCH 5/6] drivers/vfio: Allow EEH to be built as module This adds necessary declarations to the SPAPR VFIO EEH module, otherwise multiple dynamic linker errors reported: vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0) vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0) vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0) vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0) vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0) vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0) vfio_spapr_eeh: Unknown symbol eeh_pe_set_option (err 0) vfio_spapr_eeh: Unknown symbol eeh_pe_configure (err 0) vfio_spapr_eeh: Unknown symbol eeh_pe_reset (err 0) vfio_spapr_eeh: Unknown symbol eeh_pe_get_state (err 0) vfio_spapr_eeh: Unknown symbol eeh_iommu_group_to_pe (err 0) vfio_spapr_eeh: Unknown symbol eeh_dev_open (err 0) Signed-off-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Signed-off-by: Alex Williamson --- drivers/vfio/vfio_spapr_eeh.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 949f98e997af..4779cace8036 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -9,10 +9,15 @@ * published by the Free Software Foundation. */ +#include #include #include #include +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC "VFIO IOMMU SPAPR EEH" + /* We might build address mapping here for "fast" path later */ int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { @@ -88,3 +93,8 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return ret; } EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); From 9b936c960f22954bfb89f2fefd8f96916bb42908 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Fri, 8 Aug 2014 10:39:16 -0600 Subject: [PATCH 6/6] drivers/vfio: Enable VFIO if EEH is not supported The existing vfio_pci_open() fails upon error returned from vfio_spapr_pci_eeh_open(), which breaks POWER7's P5IOC2 PHB support which this patch brings back. The patch fixes the issue by dropping the return value of vfio_spapr_pci_eeh_open(). Signed-off-by: Alexey Kardashevskiy Signed-off-by: Gavin Shan Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 6 +----- drivers/vfio/vfio_spapr_eeh.c | 4 ++-- include/linux/vfio.h | 5 ++--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1651c0769b72..f7825332a325 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -200,11 +200,7 @@ static int vfio_pci_open(void *device_data) if (ret) goto error; - ret = vfio_spapr_pci_eeh_open(vdev->pdev); - if (ret) { - vfio_pci_disable(vdev); - goto error; - } + vfio_spapr_pci_eeh_open(vdev->pdev); } vdev->refcnt++; error: diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index 4779cace8036..86dfceb9201f 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -19,9 +19,9 @@ #define DRIVER_DESC "VFIO IOMMU SPAPR EEH" /* We might build address mapping here for "fast" path later */ -int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { - return eeh_dev_open(pdev); + eeh_dev_open(pdev); } EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 224128a96b7f..d3204115f15d 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -100,15 +100,14 @@ extern long vfio_external_check_extension(struct vfio_group *group, struct pci_dev; #ifdef CONFIG_EEH -extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); +extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg); #else -static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { - return 0; } static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)