From 2c25e34c7531ca1849b85cbcdb5a2f507ffe240c Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 22 Aug 2013 11:24:43 +0800 Subject: [PATCH 1/7] PCI: Drop "PCI-E" prefix from Max Payload Size message The conventional spelling is "PCIe", but I think even that is superfluous, so remove the whole thing. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 46ada5c098eb..9a334301a8f4 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1596,7 +1596,7 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) pcie_write_mps(dev, mps); pcie_write_mrrs(dev); - dev_info(&dev->dev, "PCI-E Max Payload Size set to %4d/%4d (was %4d), " + dev_info(&dev->dev, "Max Payload Size set to %4d/%4d (was %4d), " "Max Read Rq %4d\n", pcie_get_mps(dev), 128 << dev->pcie_mpss, orig_mps, pcie_get_readrq(dev)); From a58674ff8383f5b8f6a77f03c48f6a47840b9325 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 22 Aug 2013 11:24:44 +0800 Subject: [PATCH 2/7] PCI: Simplify pcie_bus_configure_settings() interface Based on a patch by Jon Mason (see URL below). All users of pcie_bus_configure_settings() pass arguments of the form "bus, bus->self->pcie_mpss". The "mpss" argument is redundant since we can easily look it up internally. In addition, all callers check "bus->self" for NULL, which we can also do internally. This patch simplifies the interface and the callers. No functional change. Reference: http://lkml.kernel.org/r/1317048850-30728-2-git-send-email-mason@myri.com Signed-off-by: Bjorn Helgaas --- arch/powerpc/kernel/pci-common.c | 8 ++------ arch/tile/kernel/pci_gx.c | 9 ++------- arch/x86/pci/acpi.c | 9 ++------- drivers/pci/hotplug/pcihp_slot.c | 5 ++--- drivers/pci/probe.c | 7 +++++-- include/linux/pci.h | 2 +- 6 files changed, 14 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index f46914a0f33e..d35ec34de1b4 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1672,12 +1672,8 @@ void pcibios_scan_phb(struct pci_controller *hose) /* Configure PCI Express settings */ if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { struct pci_bus *child; - list_for_each_entry(child, &bus->children, node) { - struct pci_dev *self = child->self; - if (!self) - continue; - pcie_bus_configure_settings(child, self->pcie_mpss); - } + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); } } diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c index 11425633b2d7..6640e7bbeaa2 100644 --- a/arch/tile/kernel/pci_gx.c +++ b/arch/tile/kernel/pci_gx.c @@ -508,13 +508,8 @@ static void fixup_read_and_payload_sizes(struct pci_controller *controller) rc_dev_cap.word); /* Configure PCI Express MPS setting. */ - list_for_each_entry(child, &root_bus->children, node) { - struct pci_dev *self = child->self; - if (!self) - continue; - - pcie_bus_configure_settings(child, self->pcie_mpss); - } + list_for_each_entry(child, &root_bus->children, node) + pcie_bus_configure_settings(child); /* * Set the mac_config register in trio based on the MPS/MRS of the link. diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index d641897a1f4e..b30e937689d6 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -568,13 +568,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) */ if (bus) { struct pci_bus *child; - list_for_each_entry(child, &bus->children, node) { - struct pci_dev *self = child->self; - if (!self) - continue; - - pcie_bus_configure_settings(child, self->pcie_mpss); - } + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); } if (bus && node != -1) { diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index fec2d5b75440..16f920352317 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -160,9 +160,8 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; - if (dev->bus && dev->bus->self) - pcie_bus_configure_settings(dev->bus, - dev->bus->self->pcie_mpss); + if (dev->bus) + pcie_bus_configure_settings(dev->bus); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 9a334301a8f4..ecae7f290647 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1607,10 +1607,13 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) * parents then children fashion. If this changes, then this code will not * work as designed. */ -void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) +void pcie_bus_configure_settings(struct pci_bus *bus) { u8 smpss; + if (!bus->self) + return; + if (!pci_is_pcie(bus->self)) return; @@ -1625,7 +1628,7 @@ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) smpss = 0; if (pcie_bus_config == PCIE_BUS_SAFE) { - smpss = mpss; + smpss = bus->self->pcie_mpss; pcie_find_smpss(bus->self, &smpss); pci_walk_bus(bus, pcie_find_smpss, &smpss); diff --git a/include/linux/pci.h b/include/linux/pci.h index 0fd1f1582fa1..57062b7a20ba 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -675,7 +675,7 @@ struct pci_driver { /* these external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI -void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); +void pcie_bus_configure_settings(struct pci_bus *bus); enum pcie_bus_config_types { PCIE_BUS_TUNE_OFF, From f67577118d1154154cf3c1d96f870c4da13846b4 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 22 Aug 2013 11:24:45 +0800 Subject: [PATCH 3/7] PCI: Remove unnecessary check for pcie_get_mps() failure After 59875ae489 ("PCI/core: Use PCI Express Capability accessors"), pcie_get_mps() never returns an error, so don't bother to check for it. No functional change. [bhelgaas: changelog, fix pcie_get_mps() doc] Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e37fea6e178d..5bb97ee2307e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3525,8 +3525,6 @@ int pcie_set_readrq(struct pci_dev *dev, int rq) if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { int mps = pcie_get_mps(dev); - if (mps < 0) - return mps; if (mps < rq) rq = mps; } @@ -3543,7 +3541,6 @@ EXPORT_SYMBOL(pcie_set_readrq); * @dev: PCI device to query * * Returns maximum payload size in bytes - * or appropriate error value. */ int pcie_get_mps(struct pci_dev *dev) { From c2996948ac36a9082f27b9ad94dac4c821a9c33d Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 22 Aug 2013 11:24:46 +0800 Subject: [PATCH 4/7] PCI: Simplify MPS test for Downstream Port PCIe hotplug bridges are always either Root Ports or Downstream Ports. No other device type can have a PCIe link leading downstream to a slot. Root Ports don't have an upstream bridge, so "dev->is_hotplug_bridge && dev->bus->self" is true if and only if "dev" is a Downstream Port. That means we can simplify this by looking at the type of "dev" itself, without looking upstream at all. No functional change. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ecae7f290647..0591b087a6c3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1507,8 +1507,7 @@ static int pcie_find_smpss(struct pci_dev *dev, void *data) * will occur as normal. */ if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || - (dev->bus->self && - pci_pcie_type(dev->bus->self) != PCI_EXP_TYPE_ROOT_PORT))) + pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)) *smpss = 0; if (*smpss > dev->pcie_mpss) From d4aa68f614201e9fbf74e8b9593bb2ec94061dd3 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Thu, 22 Aug 2013 11:24:47 +0800 Subject: [PATCH 5/7] PCI: Don't restrict MPS for slots below Root Ports When booting with "pci=pcie_bus_safe", we previously limited the fabric MPS to 128 when we found: (1) A hotplug-capable Downstream Port ("dev->is_hotplug_bridge && pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT"), or (2) A hotplug-capable Root Port with a slot that was either empty or contained a multi-function device ("dev->is_hotplug_bridge && !list_is_singular(&dev->bus->devices)") Part (1) is valid, but part (2) is not. After a hot-add in the slot below a Root Port, we can reconfigure all MPS values in the fabric below the Root Port because the new device is the only thing below the Root Port and there are no active drivers. Therefore, there's no reason to limit the MPS for Root Ports, no matter what's in the slot. Test info: -+-[0000:40]-+-07.0-[0000:46]--+-00.0 Intel 82576 NIC \-00.1 Intel 82576 NIC 0000:40:07.0 Root Port bridge to [bus 46] (MPS supported=256) 0000:46:00.0 Endpoint (MPS supported=512) 0000:46:00.1 Endpoint (MPS supported=512) # echo 0 > /sys/bus/pci/slots/7/power # echo 1 > /sys/bus/pci/slots/7/power pcieport 0000:40:07.0: PCI-E Max Payload Size set to 256/ 256 (was 256) pci 0000:46:00.0: PCI-E Max Payload Size set to 256/ 512 (was 128) pci 0000:46:00.1: PCI-E Max Payload Size set to 256/ 512 (was 128) Before this change, we set MPS to 128 for the Root Port and both NICs because the slot contained a multi-function device and dev->is_hotplug_bridge && !list_is_singular(&dev->bus->devices) was true. After this change, we set it to 256. [bhelgaas: changelog, comments, split out upstream bridge check] Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas Cc: Jon Mason --- drivers/pci/probe.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 0591b087a6c3..94b1d227ddcd 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1491,23 +1491,23 @@ static int pcie_find_smpss(struct pci_dev *dev, void *data) if (!pci_is_pcie(dev)) return 0; - /* For PCIE hotplug enabled slots not connected directly to a - * PCI-E root port, there can be problems when hotplugging - * devices. This is due to the possibility of hotplugging a - * device into the fabric with a smaller MPS that the devices - * currently running have configured. Modifying the MPS on the - * running devices could cause a fatal bus error due to an - * incoming frame being larger than the newly configured MPS. - * To work around this, the MPS for the entire fabric must be - * set to the minimum size. Any devices hotplugged into this - * fabric will have the minimum MPS set. If the PCI hotplug - * slot is directly connected to the root port and there are not - * other devices on the fabric (which seems to be the most - * common case), then this is not an issue and MPS discovery - * will occur as normal. + /* + * We don't have a way to change MPS settings on devices that have + * drivers attached. A hot-added device might support only the minimum + * MPS setting (MPS=128). Therefore, if the fabric contains a bridge + * where devices may be hot-added, we limit the fabric MPS to 128 so + * hot-added devices will work correctly. + * + * However, if we hot-add a device to a slot directly below a Root + * Port, it's impossible for there to be other existing devices below + * the port. We don't limit the MPS in this case because we can + * reconfigure MPS on both the Root Port and the hot-added device, + * and there are no other devices involved. + * + * Note that this PCIE_BUS_SAFE path assumes no peer-to-peer DMA. */ - if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || - pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT)) + if (dev->is_hotplug_bridge && + pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) *smpss = 0; if (*smpss > dev->pcie_mpss) From 3315472c474af8e1c2beb40d980dfc92f03e4d8e Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 26 Aug 2013 16:33:05 +0800 Subject: [PATCH 6/7] PCI: Fix MPS peer-to-peer DMA comment syntax Correct minor wording issue in MPS peer-to-peer comment. Noticed by Don Dutile. Signed-off-by: Jon Mason Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 94b1d227ddcd..94c5a77ce853 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1620,7 +1620,7 @@ void pcie_bus_configure_settings(struct pci_bus *bus) return; /* FIXME - Peer to peer DMA is possible, though the endpoint would need - * to be aware to the MPS of the destination. To work around this, + * to be aware of the MPS of the destination. To work around this, * simply force the MPS of the entire system to the smallest possible. */ if (pcie_bus_config == PCIE_BUS_PEER2PEER) From 5895af79158a55562753f7f05762f3bd766d32b9 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Mon, 26 Aug 2013 16:33:06 +0800 Subject: [PATCH 7/7] PCI: Warn if unsafe MPS settings detected If a BIOS configures MPS incorrectly, devices may not work normally. For example, if a bridge has MPS set larger than an endpoint below it, the endpoint may discard packets. To help diagnose this issue, print a warning if we find an endpoint MPS setting different than that of the upstream bridge. [bhelgaas: changelog, "bridge" temporary, warning text] Reference: https://bugzilla.kernel.org/show_bug.cgi?id=60799 Reported-by: Joe Jin Signed-off-by: Yijing Wang Signed-off-by: Bjorn Helgaas Cc: Jon Mason --- drivers/pci/probe.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 94c5a77ce853..cd5c4acb5367 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1582,6 +1582,22 @@ static void pcie_write_mrrs(struct pci_dev *dev) "with pci=pcie_bus_safe.\n"); } +static void pcie_bus_detect_mps(struct pci_dev *dev) +{ + struct pci_dev *bridge = dev->bus->self; + int mps, p_mps; + + if (!bridge) + return; + + mps = pcie_get_mps(dev); + p_mps = pcie_get_mps(bridge); + + if (mps != p_mps) + dev_warn(&dev->dev, "Max Payload Size %d, but upstream %s set to %d; if necessary, use \"pci=pcie_bus_safe\" and report a bug\n", + mps, pci_name(bridge), p_mps); +} + static int pcie_bus_configure_set(struct pci_dev *dev, void *data) { int mps, orig_mps; @@ -1589,6 +1605,11 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) if (!pci_is_pcie(dev)) return 0; + if (pcie_bus_config == PCIE_BUS_TUNE_OFF) { + pcie_bus_detect_mps(dev); + return 0; + } + mps = 128 << *(u8 *)data; orig_mps = pcie_get_mps(dev); @@ -1616,9 +1637,6 @@ void pcie_bus_configure_settings(struct pci_bus *bus) if (!pci_is_pcie(bus->self)) return; - if (pcie_bus_config == PCIE_BUS_TUNE_OFF) - return; - /* FIXME - Peer to peer DMA is possible, though the endpoint would need * to be aware of the MPS of the destination. To work around this, * simply force the MPS of the entire system to the smallest possible.