diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 11a9301d52d4..492ade8c978e 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -252,6 +252,8 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) } #ifdef CONFIG_XEN_DOM0 +static bool __read_mostly pci_seg_supported = true; + static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { int ret = 0; @@ -269,10 +271,11 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) memset(&map_irq, 0, sizeof(map_irq)); map_irq.domid = domid; - map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.type = MAP_PIRQ_TYPE_MSI_SEG; map_irq.index = -1; map_irq.pirq = -1; - map_irq.bus = dev->bus->number; + map_irq.bus = dev->bus->number | + (pci_domain_nr(dev->bus) << 16); map_irq.devfn = dev->devfn; if (type == PCI_CAP_ID_MSIX) { @@ -289,7 +292,20 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) map_irq.entry_nr = msidesc->msi_attrib.entry_nr; } - ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + ret = -EINVAL; + if (pci_seg_supported) + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, + &map_irq); + if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, + &map_irq); + if (ret != -EINVAL) + pci_seg_supported = false; + } if (ret) { dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", ret, domid); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index ae559fe91c25..26c731a106af 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST config XEN_PVHVM def_bool y - depends on XEN - depends on X86_LOCAL_APIC + depends on XEN && PCI && X86_LOCAL_APIC config XEN_MAX_DOMAIN_MEMORY int diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 6fa215a38615..90832a955991 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -400,9 +400,8 @@ static int pcifront_claim_resource(struct pci_dev *dev, void *data) dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", pci_name(dev), i); if (pci_claim_resource(dev, i)) { - dev_err(&pdev->xdev->dev, "Could not claim " - "resource %s/%d! Device offline. Try " - "giving less than 4GB to domain.\n", + dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! " + "Device offline. Try using e820_host=1 in the guest config.\n", pci_name(dev), i); } } diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 5f7ff8e2fc14..8795480c2350 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -137,16 +137,6 @@ config XEN_GRANT_DEV_ALLOC to other domains. This can be used to implement frontend drivers or as part of an inter-domain shared memory channel. -config XEN_PLATFORM_PCI - tristate "xen platform pci device driver" - depends on XEN_PVHVM && PCI - default m - help - Driver for the Xen PCI Platform device: it is responsible for - initializing xenbus and grant_table when running in a Xen HVM - domain. As a consequence this driver is required to run any Xen PV - frontend on Xen HVM. - config SWIOTLB_XEN def_bool y depends on PCI diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 72bbb27d7a68..974fffdf22b2 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_PVHVM) += platform-pci.o obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o @@ -23,5 +23,3 @@ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o - -xen-platform-pci-y := platform-pci.o diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 212a5c871bf4..7a55b292bf39 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -873,11 +873,32 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); } +static int find_virq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_status status; + int port, rc = -ENOENT; + + memset(&status, 0, sizeof(status)); + for (port = 0; port <= NR_EVENT_CHANNELS; port++) { + status.dom = DOMID_SELF; + status.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); + if (rc < 0) + continue; + if (status.status != EVTCHNSTAT_virq) + continue; + if (status.u.virq == virq && status.vcpu == cpu) { + rc = port; + break; + } + } + return rc; +} int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; - int evtchn, irq; + int evtchn, irq, ret; mutex_lock(&irq_mapping_update_lock); @@ -893,10 +914,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) bind_virq.virq = virq; bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, - &bind_virq) != 0) - BUG(); - evtchn = bind_virq.port; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (ret == 0) + evtchn = bind_virq.port; + else { + if (ret == -EEXIST) + ret = find_virq(virq, cpu); + BUG_ON(ret < 0); + evtchn = ret; + } xen_irq_info_virq_init(cpu, irq, evtchn, virq); diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index cef4bafc07dc..66057075d6e2 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -26,26 +27,85 @@ #include #include "../pci/pci.h" +static bool __read_mostly pci_seg_supported = true; + static int xen_add_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); +#ifdef CONFIG_PCI_IOV + struct pci_dev *physfn = pci_dev->physfn; +#endif + + if (pci_seg_supported) { + struct physdev_pci_device_add add = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; +#ifdef CONFIG_ACPI + acpi_handle handle; +#endif #ifdef CONFIG_PCI_IOV - if (pci_dev->is_virtfn) { + if (pci_dev->is_virtfn) { + add.flags = XEN_PCI_DEV_VIRTFN; + add.physfn.bus = physfn->bus->number; + add.physfn.devfn = physfn->devfn; + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) + add.flags = XEN_PCI_DEV_EXTFN; + +#ifdef CONFIG_ACPI + handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); + if (!handle) + handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); +#ifdef CONFIG_PCI_IOV + if (!handle && pci_dev->is_virtfn) + handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge); +#endif + if (handle) { + acpi_status status; + + do { + unsigned long long pxm; + + status = acpi_evaluate_integer(handle, "_PXM", + NULL, &pxm); + if (ACPI_SUCCESS(status)) { + add.optarr[0] = pxm; + add.flags |= XEN_PCI_DEV_PXM; + break; + } + status = acpi_get_parent(handle, &handle); + } while (ACPI_SUCCESS(status)); + } +#endif /* CONFIG_ACPI */ + + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add); + if (r != -ENOSYS) + return r; + pci_seg_supported = false; + } + + if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; +#ifdef CONFIG_PCI_IOV + else if (pci_dev->is_virtfn) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, .is_virtfn = 1, - .physfn.bus = pci_dev->physfn->bus->number, - .physfn.devfn = pci_dev->physfn->devfn, + .physfn.bus = physfn->bus->number, + .physfn.devfn = physfn->devfn, }; r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, &manage_pci_ext); - } else + } #endif - if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, @@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); - struct physdev_manage_pci manage_pci; - manage_pci.bus = pci_dev->bus->number; - manage_pci.devfn = pci_dev->devfn; + if (pci_seg_supported) { + struct physdev_pci_device device = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; - r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, - &manage_pci); + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove, + &device); + } else if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; + else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + } return r; } @@ -96,13 +170,16 @@ static int xen_pci_notifier(struct notifier_block *nb, r = xen_remove_device(dev); break; default: - break; + return NOTIFY_DONE; } - - return r; + if (r) + dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "add" : + (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?")); + return NOTIFY_OK; } -struct notifier_block device_nb = { +static struct notifier_block device_nb = { .notifier_call = xen_pci_notifier, }; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 6e8c15a23201..c984768d98ca 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -38,6 +38,7 @@ #include #include #include +#include /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this @@ -146,8 +147,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) void __init xen_swiotlb_init(int verbose) { unsigned long bytes; - int rc; + int rc = -ENOMEM; unsigned long nr_tbl; + char *m = NULL; + unsigned int repeat = 3; nr_tbl = swioltb_nr_tbl(); if (nr_tbl) @@ -156,16 +159,17 @@ void __init xen_swiotlb_init(int verbose) xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); } - +retry: bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; /* * Get IO TLB memory from any location. */ xen_io_tlb_start = alloc_bootmem(bytes); - if (!xen_io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - + if (!xen_io_tlb_start) { + m = "Cannot allocate Xen-SWIOTLB buffer!\n"; + goto error; + } xen_io_tlb_end = xen_io_tlb_start + bytes; /* * And replace that memory with pages under 4GB. @@ -173,17 +177,28 @@ void __init xen_swiotlb_init(int verbose) rc = xen_swiotlb_fixup(xen_io_tlb_start, bytes, xen_io_tlb_nslabs); - if (rc) + if (rc) { + free_bootmem(__pa(xen_io_tlb_start), bytes); + m = "Failed to get contiguous memory for DMA from Xen!\n"\ + "You either: don't have the permissions, do not have"\ + " enough free memory under 4GB, or the hypervisor memory"\ + "is too fragmented!"; goto error; - + } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); return; error: - panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ - "We either don't have the permission or you do not have enough"\ - "free memory under 4GB!\n", rc); + if (repeat--) { + xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ + (xen_io_tlb_nslabs >> 1)); + printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n", + (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); + goto retry; + } + xen_raw_printk("%s (rc:%d)", m, rc); + panic("%s (rc:%d)", m, rc); } void * @@ -194,6 +209,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, int order = get_order(size); u64 dma_mask = DMA_BIT_MASK(32); unsigned long vstart; + phys_addr_t phys; + dma_addr_t dev_addr; /* * Ignore region specifiers - the kernel's ideas of @@ -209,18 +226,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, vstart = __get_free_pages(flags, order); ret = (void *)vstart; - if (hwdev && hwdev->coherent_dma_mask) - dma_mask = dma_alloc_coherent_mask(hwdev, flags); + if (!ret) + return ret; - if (ret) { + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = hwdev->coherent_dma_mask; + + phys = virt_to_phys(ret); + dev_addr = xen_phys_to_bus(phys); + if (((dev_addr + size - 1 <= dma_mask)) && + !range_straddles_page_boundary(phys, size)) + *dma_handle = dev_addr; + else { if (xen_create_contiguous_region(vstart, order, fls64(dma_mask)) != 0) { free_pages(vstart, order); return NULL; } - memset(ret, 0, size); *dma_handle = virt_to_machine(ret).maddr; } + memset(ret, 0, size); return ret; } EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); @@ -230,11 +255,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dev_addr) { int order = get_order(size); + phys_addr_t phys; + u64 dma_mask = DMA_BIT_MASK(32); if (dma_release_from_coherent(hwdev, order, vaddr)) return; - xen_destroy_contiguous_region((unsigned long)vaddr, order); + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = hwdev->coherent_dma_mask; + + phys = virt_to_phys(vaddr); + + if (((dev_addr + size - 1 > dma_mask)) || + range_straddles_page_boundary(phys, size)) + xen_destroy_contiguous_region((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); @@ -278,9 +313,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, /* * Ensure that the address returned is DMA'ble */ - if (!dma_capable(dev, dev_addr, size)) - panic("map_single: bounce buffer is not DMA'ble"); - + if (!dma_capable(dev, dev_addr, size)) { + swiotlb_tbl_unmap_single(dev, map, size, dir); + dev_addr = 0; + } return dev_addr; } EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index a8031445d94e..444345afbd5c 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -15,7 +15,6 @@ #include "conf_space.h" #include "conf_space_quirks.h" -#define DRV_NAME "xen-pciback" static int permissive; module_param(permissive, bool, 0644); diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index da3cbdfcb5dc..3daf862d739d 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -15,7 +15,6 @@ struct pci_bar_info { int which; }; -#define DRV_NAME "xen-pciback" #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) @@ -25,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) int ret; ret = xen_pcibk_read_config_word(dev, offset, value, data); - if (!atomic_read(&dev->enable_cnt)) + if (!pci_is_enabled(dev)) return ret; for (i = 0; i < PCI_ROM_RESOURCE; i++) { @@ -187,7 +186,7 @@ static inline void read_dev_bar(struct pci_dev *dev, bar_info->val = res[pos].start | (res[pos].flags & PCI_REGION_FLAG_MASK); - bar_info->len_val = res[pos].end - res[pos].start + 1; + bar_info->len_val = resource_size(&res[pos]); } static void *bar_init(struct pci_dev *dev, int offset) diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c index 921a889e65eb..7476791cab40 100644 --- a/drivers/xen/xen-pciback/conf_space_quirks.c +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -12,7 +12,6 @@ #include "conf_space_quirks.h" LIST_HEAD(xen_pcibk_quirks); -#define DRV_NAME "xen-pciback" static inline const struct pci_device_id * match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { @@ -36,7 +35,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev) goto out; tmp_quirk = NULL; printk(KERN_DEBUG DRV_NAME - ":quirk didn't match any device xen_pciback knows about\n"); + ": quirk didn't match any device known\n"); out: return tmp_quirk; } diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index 1d32a9a42c01..828dddc360df 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -7,13 +7,13 @@ #include #include -#include +#include #include "pciback.h" struct passthrough_dev_data { /* Access to dev_list must be protected by lock */ struct list_head dev_list; - spinlock_t lock; + struct mutex lock; }; static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, @@ -24,9 +24,8 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; struct pci_dev *dev = NULL; - unsigned long flags; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_for_each_entry(dev_entry, &dev_data->dev_list, list) { if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) @@ -37,7 +36,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); return dev; } @@ -48,7 +47,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; - unsigned long flags; unsigned int domain, bus, devfn; int err; @@ -57,9 +55,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, return -ENOMEM; dev_entry->dev = dev; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_add_tail(&dev_entry->list, &dev_data->dev_list); - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); /* Publish this device. */ domain = (unsigned int)pci_domain_nr(dev->bus); @@ -76,9 +74,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry, *t; struct pci_dev *found_dev = NULL; - unsigned long flags; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { if (dev_entry->dev == dev) { @@ -88,7 +85,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); if (found_dev) pcistub_put_pci_dev(found_dev); @@ -102,7 +99,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) if (!dev_data) return -ENOMEM; - spin_lock_init(&dev_data->lock); + mutex_init(&dev_data->lock); INIT_LIST_HEAD(&dev_data->dev_list); @@ -116,14 +113,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, { int err = 0; struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry, *e, *tmp; + struct pci_dev_entry *dev_entry, *e; struct pci_dev *dev; int found; unsigned int domain, bus; - spin_lock(&dev_data->lock); + mutex_lock(&dev_data->lock); - list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { /* Only publish this device as a root if none of its * parent bridges are exported */ @@ -142,16 +139,13 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, bus = (unsigned int)dev_entry->dev->bus->number; if (!found) { - spin_unlock(&dev_data->lock); err = publish_root_cb(pdev, domain, bus); if (err) break; - spin_lock(&dev_data->lock); } } - if (!err) - spin_unlock(&dev_data->lock); + mutex_unlock(&dev_data->lock); return err; } @@ -182,7 +176,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, return 1; } -struct xen_pcibk_backend xen_pcibk_passthrough_backend = { +const struct xen_pcibk_backend xen_pcibk_passthrough_backend = { .name = "passthrough", .init = __xen_pcibk_init_devices, .free = __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index aec214ac0a14..8f06e1ed028c 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,8 +21,6 @@ #include "conf_space.h" #include "conf_space_quirks.h" -#define DRV_NAME "xen-pciback" - static char *pci_devs_to_hide; wait_queue_head_t xen_pcibk_aer_wait_queue; /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, @@ -222,6 +220,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev) } spin_unlock_irqrestore(&pcistub_devices_lock, flags); + if (WARN_ON(!found_psdev)) + return; /*hold this lock for avoiding breaking link between * pcistub and xen_pcibk when AER is in processing @@ -514,12 +514,9 @@ static void kill_domain_by_device(struct pcistub_device *psdev) int err; char nodename[PCI_NODENAME_MAX]; - if (!psdev) - dev_err(&psdev->dev->dev, - "device is NULL when do AER recovery/kill_domain\n"); + BUG_ON(!psdev); snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", psdev->pdev->xdev->otherend_id); - nodename[strlen(nodename)] = '\0'; again: err = xenbus_transaction_start(&xbt); @@ -605,7 +602,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, if (test_bit(_XEN_PCIF_active, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in xen_pcibk\n"); + "schedule pci_conf service in " DRV_NAME "\n"); xen_pcibk_test_and_schedule_op(psdev->pdev); } @@ -995,8 +992,7 @@ static ssize_t pcistub_slot_add(struct device_driver *drv, const char *buf, err = count; return err; } - -DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); +static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, size_t count) @@ -1015,8 +1011,7 @@ static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, err = count; return err; } - -DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); +static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) { @@ -1039,8 +1034,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) return count; } - -DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); +static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) { @@ -1069,8 +1063,7 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) spin_unlock_irqrestore(&pcistub_devices_lock, flags); return count; } - -DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); +static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, const char *buf, @@ -1106,7 +1099,8 @@ static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, err = count; return err; } -DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); +static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, + pcistub_irq_handler_switch); static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, size_t count) @@ -1170,8 +1164,8 @@ static ssize_t pcistub_quirk_show(struct device_driver *drv, char *buf) return count; } - -DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); +static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, + pcistub_quirk_add); static ssize_t permissive_add(struct device_driver *drv, const char *buf, size_t count) @@ -1236,8 +1230,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf) spin_unlock_irqrestore(&pcistub_devices_lock, flags); return count; } - -DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); +static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, + permissive_add); static void pcistub_exit(void) { @@ -1374,3 +1368,4 @@ module_init(xen_pcibk_init); module_exit(xen_pcibk_cleanup); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("xen-backend:pci"); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index a0e131a81503..e9b4011c5f9a 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -15,6 +15,8 @@ #include #include +#define DRV_NAME "xen-pciback" + struct pci_dev_entry { struct list_head list; struct pci_dev *dev; @@ -27,7 +29,7 @@ struct pci_dev_entry { struct xen_pcibk_device { void *pci_dev_data; - spinlock_t dev_lock; + struct mutex dev_lock; struct xenbus_device *xdev; struct xenbus_watch be_watch; u8 be_watching; @@ -89,7 +91,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, * passthrough - BDFs are exactly like in the host. */ struct xen_pcibk_backend { - char *name; + const char *name; int (*init)(struct xen_pcibk_device *pdev); void (*free)(struct xen_pcibk_device *pdev); int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, @@ -104,9 +106,9 @@ struct xen_pcibk_backend { unsigned int devfn); }; -extern struct xen_pcibk_backend xen_pcibk_vpci_backend; -extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; -extern struct xen_pcibk_backend *xen_pcibk_backend; +extern const struct xen_pcibk_backend xen_pcibk_vpci_backend; +extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend; +extern const struct xen_pcibk_backend *xen_pcibk_backend; static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, @@ -116,13 +118,14 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, if (xen_pcibk_backend && xen_pcibk_backend->add) return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); return -1; -}; +} + static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev) { if (xen_pcibk_backend && xen_pcibk_backend->free) return xen_pcibk_backend->release(pdev, dev); -}; +} static inline struct pci_dev * xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, @@ -131,7 +134,8 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, if (xen_pcibk_backend && xen_pcibk_backend->get) return xen_pcibk_backend->get(pdev, domain, bus, devfn); return NULL; -}; +} + /** * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk * before sending aer request to pcifront, so that guest could identify @@ -148,25 +152,29 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, return xen_pcibk_backend->find(pcidev, pdev, domain, bus, devfn); return -1; -}; +} + static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) { if (xen_pcibk_backend && xen_pcibk_backend->init) return xen_pcibk_backend->init(pdev); return -1; -}; +} + static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, publish_pci_root_cb cb) { if (xen_pcibk_backend && xen_pcibk_backend->publish) return xen_pcibk_backend->publish(pdev, cb); return -1; -}; +} + static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) { if (xen_pcibk_backend && xen_pcibk_backend->free) return xen_pcibk_backend->free(pdev); -}; +} + /* Handles events from front-end */ irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 8c95c3415b75..63616d7453e6 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -10,7 +10,6 @@ #include #include "pciback.h" -#define DRV_NAME "xen-pciback" int verbose_request; module_param(verbose_request, int, 0644); diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 4a42cfb0959d..46d140baebd8 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -8,16 +8,15 @@ #include #include #include -#include +#include #include "pciback.h" #define PCI_SLOT_MAX 32 -#define DRV_NAME "xen-pciback" struct vpci_dev_data { /* Access to dev_list must be protected by lock */ struct list_head dev_list[PCI_SLOT_MAX]; - spinlock_t lock; + struct mutex lock; }; static inline struct list_head *list_first(struct list_head *head) @@ -33,13 +32,12 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev_entry *entry; struct pci_dev *dev = NULL; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; if (domain != 0 || bus != 0) return NULL; if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); list_for_each_entry(entry, &vpci_dev->dev_list[PCI_SLOT(devfn)], @@ -50,7 +48,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); } return dev; } @@ -71,7 +69,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, int err = 0, slot, func = -1; struct pci_dev_entry *t, *dev_entry; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { err = -EFAULT; @@ -90,7 +87,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, dev_entry->dev = dev; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); /* Keep multi-function devices together on the virtual PCI bus */ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { @@ -129,7 +126,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, "No more space on root virtual PCI bus"); unlock: - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); /* Publish this device. */ if (!err) @@ -145,14 +142,13 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, int slot; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; struct pci_dev *found_dev = NULL; - unsigned long flags; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - struct pci_dev_entry *e, *tmp; - list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], - list) { + struct pci_dev_entry *e; + + list_for_each_entry(e, &vpci_dev->dev_list[slot], list) { if (e->dev == dev) { list_del(&e->list); found_dev = e->dev; @@ -163,7 +159,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, } out: - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); if (found_dev) pcistub_put_pci_dev(found_dev); @@ -178,7 +174,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) if (!vpci_dev) return -ENOMEM; - spin_lock_init(&vpci_dev->lock); + mutex_init(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); @@ -222,10 +218,9 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, struct pci_dev_entry *entry; struct pci_dev *dev = NULL; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; int found = 0, slot; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) { list_for_each_entry(entry, &vpci_dev->dev_list[slot], @@ -243,11 +238,11 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, } } } - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); return found; } -struct xen_pcibk_backend xen_pcibk_vpci_backend = { +const struct xen_pcibk_backend xen_pcibk_vpci_backend = { .name = "vpci", .init = __xen_pcibk_init_devices, .free = __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 978d2c6f5dca..474d52ec3374 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -13,7 +13,6 @@ #include #include "pciback.h" -#define DRV_NAME "xen-pciback" #define INVALID_EVTCHN_IRQ (-1) struct workqueue_struct *xen_pcibk_wq; @@ -44,7 +43,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) pdev->xdev = xdev; dev_set_drvdata(&xdev->dev, pdev); - spin_lock_init(&pdev->dev_lock); + mutex_init(&pdev->dev_lock); pdev->sh_info = NULL; pdev->evtchn_irq = INVALID_EVTCHN_IRQ; @@ -62,14 +61,12 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) { - spin_lock(&pdev->dev_lock); - + mutex_lock(&pdev->dev_lock); /* Ensure the guest can't trigger our handler before removing devices */ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { unbind_from_irqhandler(pdev->evtchn_irq, pdev); pdev->evtchn_irq = INVALID_EVTCHN_IRQ; } - spin_unlock(&pdev->dev_lock); /* If the driver domain started an op, make sure we complete it * before releasing the shared memory */ @@ -77,13 +74,11 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) /* Note, the workqueue does not use spinlocks at all.*/ flush_workqueue(xen_pcibk_wq); - spin_lock(&pdev->dev_lock); if (pdev->sh_info != NULL) { xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); pdev->sh_info = NULL; } - spin_unlock(&pdev->dev_lock); - + mutex_unlock(&pdev->dev_lock); } static void free_pdev(struct xen_pcibk_device *pdev) @@ -120,9 +115,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, goto out; } - spin_lock(&pdev->dev_lock); pdev->sh_info = vaddr; - spin_unlock(&pdev->dev_lock); err = bind_interdomain_evtchn_to_irqhandler( pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, @@ -132,10 +125,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Error binding event channel to IRQ"); goto out; } - - spin_lock(&pdev->dev_lock); pdev->evtchn_irq = err; - spin_unlock(&pdev->dev_lock); err = 0; dev_dbg(&pdev->xdev->dev, "Attached!\n"); @@ -150,6 +140,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) char *magic = NULL; + mutex_lock(&pdev->dev_lock); /* Make sure we only do this setup once */ if (xenbus_read_driver_state(pdev->xdev->nodename) != XenbusStateInitialised) @@ -176,7 +167,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { xenbus_dev_fatal(pdev->xdev, -EFAULT, "version mismatch (%s/%s) with pcifront - " - "halting xen_pcibk", + "halting " DRV_NAME, magic, XEN_PCI_MAGIC); goto out; } @@ -194,6 +185,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); out: + mutex_unlock(&pdev->dev_lock); kfree(magic); @@ -369,6 +361,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); + mutex_lock(&pdev->dev_lock); /* Make sure we only reconfigure once */ if (xenbus_read_driver_state(pdev->xdev->nodename) != XenbusStateReconfiguring) @@ -506,6 +499,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) } out: + mutex_unlock(&pdev->dev_lock); return 0; } @@ -562,6 +556,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) char dev_str[64]; char state_str[64]; + mutex_lock(&pdev->dev_lock); /* It's possible we could get the call to setup twice, so make sure * we're not already connected. */ @@ -642,10 +637,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) "Error switching to initialised state!"); out: + mutex_unlock(&pdev->dev_lock); if (!err) /* see if pcifront is already configured (if not, we'll wait) */ xen_pcibk_attach(pdev); - return err; } @@ -724,7 +719,7 @@ static struct xenbus_driver xenbus_xen_pcibk_driver = { .otherend_changed = xen_pcibk_frontend_changed, }; -struct xen_pcibk_backend *xen_pcibk_backend; +const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; int __init xen_pcibk_xenbus_register(void) { diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 090c61ee8fd0..2eff7a6aaa20 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -212,7 +212,9 @@ int xb_init_comms(void) printk(KERN_WARNING "XENBUS response ring is not quiescent " "(%08x:%08x): fixing up\n", intf->rsp_cons, intf->rsp_prod); - intf->rsp_cons = intf->rsp_prod; + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; } if (xenbus_irq) { diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index bd2f90c9ac8b..cef9b0bf63d5 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -684,64 +684,74 @@ static int __init xenbus_probe_initcall(void) device_initcall(xenbus_probe_initcall); -static int __init xenbus_init(void) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) { int err = 0; unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; - DPRINTK(""); + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; + + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; + + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; + + return 0; + + out_err: + if (page != 0) + free_page(page); + return err; +} + +static int __init xenbus_init(void) +{ + int err = 0; - err = -ENODEV; if (!xen_domain()) - return err; + return -ENODEV; - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (xen_initial_domain()) { - struct evtchn_alloc_unbound alloc_unbound; - - /* Allocate Xenstore page */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) + if (xen_hvm_domain()) { + uint64_t v = 0; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) goto out_error; - - xen_store_mfn = xen_start_info->store_mfn = - pfn_to_mfn(virt_to_phys((void *)page) >> - PAGE_SHIFT); - - /* Next allocate a local port which xenstored can bind to */ - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = 0; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (err == -ENOSYS) + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) goto out_error; - - BUG_ON(err); - xen_store_evtchn = xen_start_info->store_evtchn = - alloc_unbound.port; - - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_mfn = (unsigned long)v; + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); } else { - if (xen_hvm_domain()) { - uint64_t v = 0; - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; - err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); - if (err) - goto out_error; - xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + if (xen_store_evtchn) xenstored_ready = 1; + else { + err = xenstored_local_init(); + if (err) + goto out_error; } + xen_store_interface = mfn_to_virt(xen_store_mfn); } /* Initialize the interface to xenstore. */ @@ -760,12 +770,7 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif - return 0; - - out_error: - if (page != 0) - free_page(page); - + out_error: return err; } diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index ed2ba474a560..540587e18a94 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -248,10 +248,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, } EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + printk(KERN_INFO "XENBUS: backend %s timed out.\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + printk(KERN_INFO "XENBUS: reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + static int frontend_probe_and_watch(struct notifier_block *notifier, unsigned long event, void *data) { + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); register_xenbus_watch(&fe_watch); diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 5534690075af..b3b8f2f3ad10 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "xenbus_comms.h" struct xs_stored_msg { @@ -620,6 +621,15 @@ static struct xenbus_watch *find_watch(const char *token) return NULL; } +static void xs_reset_watches(void) +{ + int err; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); +} + /* Register callback to watch this node. */ int register_xenbus_watch(struct xenbus_watch *watch) { @@ -638,8 +648,7 @@ int register_xenbus_watch(struct xenbus_watch *watch) err = xs_watch(watch->node, token); - /* Ignore errors due to multiple registration. */ - if ((err != 0) && (err != -EEXIST)) { + if (err) { spin_lock(&watches_lock); list_del(&watch->list); spin_unlock(&watches_lock); @@ -897,5 +906,9 @@ int xs_init(void) if (IS_ERR(task)) return PTR_ERR(task); + /* shutdown watches for kexec boot */ + if (xen_hvm_domain()) + xs_reset_watches(); + return 0; } diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index 99fcffb372d1..f0b6890370be 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h @@ -26,7 +26,11 @@ enum xsd_sockmsg_type XS_SET_PERMS, XS_WATCH_EVENT, XS_ERROR, - XS_IS_DOMAIN_INTRODUCED + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + XS_RESTRICT, + XS_RESET_WATCHES }; #define XS_WRITE_NONE "NONE" diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 534cac89a77d..c1080d9c705d 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -109,6 +109,7 @@ struct physdev_irq { #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { @@ -119,7 +120,7 @@ struct physdev_map_pirq { int index; /* IN or OUT */ int pirq; - /* IN */ + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ int bus; /* IN */ int devfn; @@ -198,6 +199,37 @@ struct physdev_get_free_pirq { uint32_t pirq; }; +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is **