From 39f1d94d300a58eb3e9b851d077cada4e2fa9d46 Mon Sep 17 00:00:00 2001 From: Sathya Perla Date: Tue, 8 May 2012 19:41:24 +0000 Subject: [PATCH] be2net: avoid disabling sriov while VFs are assigned Calling pci_disable_sriov() while VFs are assigned to VMs causes kernel panic. This patch uses PCI_DEV_FLAGS_ASSIGNED bit state of the VF's pci_dev to avoid this. Also, the unconditional function reset cmd issued on a PF probe can delete the VF configuration for the previously enabled VFs. A scratchpad register is now used to issue a function reset only when needed (i.e., in a crash dump scenario.) Signed-off-by: Sathya Perla Signed-off-by: David S. Miller --- drivers/net/ethernet/emulex/benet/be.h | 22 +-- drivers/net/ethernet/emulex/benet/be_hw.h | 2 + drivers/net/ethernet/emulex/benet/be_main.c | 199 ++++++++++++-------- 3 files changed, 134 insertions(+), 89 deletions(-) diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index c3ee9103ff4f..ecf1a81f26e2 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -313,6 +313,11 @@ struct be_vf_cfg { u32 tx_rate; }; +enum vf_state { + ENABLED = 0, + ASSIGNED = 1 +}; + #define BE_FLAGS_LINK_STATUS_INIT 1 #define BE_FLAGS_WORKER_SCHEDULED (1 << 3) #define BE_UC_PMAC_COUNT 30 @@ -403,8 +408,9 @@ struct be_adapter { u32 flash_status; struct completion flash_compl; - u32 num_vfs; - u8 is_virtfn; + u32 num_vfs; /* Number of VFs provisioned by PF driver */ + u32 dev_num_vfs; /* Number of VFs supported by HW */ + u8 virtfn; struct be_vf_cfg *vf_cfg; bool be3_native; u32 sli_family; @@ -417,8 +423,10 @@ struct be_adapter { u32 uc_macs; /* Count of secondary UC MAC programmed */ }; -#define be_physfn(adapter) (!adapter->is_virtfn) +#define be_physfn(adapter) (!adapter->virtfn) #define sriov_enabled(adapter) (adapter->num_vfs > 0) +#define sriov_want(adapter) (adapter->dev_num_vfs && num_vfs && \ + be_physfn(adapter)) #define for_all_vfs(adapter, vf_cfg, i) \ for (i = 0, vf_cfg = &adapter->vf_cfg[i]; i < adapter->num_vfs; \ i++, vf_cfg++) @@ -547,14 +555,6 @@ static inline u8 is_udp_pkt(struct sk_buff *skb) return val; } -static inline void be_check_sriov_fn_type(struct be_adapter *adapter) -{ - u32 sli_intf; - - pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); - adapter->is_virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; -} - static inline void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac) { u32 addr; diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index 0949aa609164..f38b58c8dbba 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -58,6 +58,8 @@ #define SLI_PORT_CONTROL_IP_MASK 0x08000000 +#define PCICFG_CUST_SCRATCHPAD_CSR 0x1EC + /********* Memory BAR register ************/ #define PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET 0xfc /* Host Interrupt Enable, if set interrupts are enabled although "PCI Interrupt diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 6d5d30be0481..a01f73467532 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -1049,6 +1049,29 @@ static int be_set_vf_tx_rate(struct net_device *netdev, return status; } +static int be_find_vfs(struct be_adapter *adapter, int vf_state) +{ + struct pci_dev *dev, *pdev = adapter->pdev; + int vfs = 0, assigned_vfs = 0, pos, vf_fn; + u16 offset, stride; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_OFFSET, &offset); + pci_read_config_word(pdev, pos + PCI_SRIOV_VF_STRIDE, &stride); + + dev = pci_get_device(pdev->vendor, PCI_ANY_ID, NULL); + while (dev) { + vf_fn = (pdev->devfn + offset + stride * vfs) & 0xFFFF; + if (dev->is_virtfn && dev->devfn == vf_fn) { + vfs++; + if (dev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) + assigned_vfs++; + } + dev = pci_get_device(pdev->vendor, PCI_ANY_ID, dev); + } + return (vf_state == ASSIGNED) ? assigned_vfs : vfs; +} + static void be_eqd_update(struct be_adapter *adapter, struct be_eq_obj *eqo) { struct be_rx_stats *stats = rx_stats(&adapter->rx_obj[eqo->idx]); @@ -1789,9 +1812,9 @@ static void be_tx_queues_destroy(struct be_adapter *adapter) static int be_num_txqs_want(struct be_adapter *adapter) { - if (sriov_enabled(adapter) || be_is_mc(adapter) || - lancer_chip(adapter) || !be_physfn(adapter) || - adapter->generation == BE_GEN2) + if (sriov_want(adapter) || be_is_mc(adapter) || + lancer_chip(adapter) || !be_physfn(adapter) || + adapter->generation == BE_GEN2) return 1; else return MAX_TX_QS; @@ -2118,7 +2141,7 @@ static void be_msix_disable(struct be_adapter *adapter) static uint be_num_rss_want(struct be_adapter *adapter) { if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) && - adapter->num_vfs == 0 && be_physfn(adapter) && + !sriov_want(adapter) && be_physfn(adapter) && !be_is_mc(adapter)) return (adapter->be3_native) ? BE3_MAX_RSS_QS : BE2_MAX_RSS_QS; else @@ -2152,53 +2175,6 @@ static void be_msix_enable(struct be_adapter *adapter) return; } -static int be_sriov_enable(struct be_adapter *adapter) -{ - be_check_sriov_fn_type(adapter); - -#ifdef CONFIG_PCI_IOV - if (be_physfn(adapter) && num_vfs) { - int status, pos; - u16 dev_vfs; - - pos = pci_find_ext_capability(adapter->pdev, - PCI_EXT_CAP_ID_SRIOV); - pci_read_config_word(adapter->pdev, - pos + PCI_SRIOV_TOTAL_VF, &dev_vfs); - - adapter->num_vfs = min_t(u16, num_vfs, dev_vfs); - if (adapter->num_vfs != num_vfs) - dev_info(&adapter->pdev->dev, - "Device supports %d VFs and not %d\n", - adapter->num_vfs, num_vfs); - - status = pci_enable_sriov(adapter->pdev, adapter->num_vfs); - if (status) - adapter->num_vfs = 0; - - if (adapter->num_vfs) { - adapter->vf_cfg = kcalloc(num_vfs, - sizeof(struct be_vf_cfg), - GFP_KERNEL); - if (!adapter->vf_cfg) - return -ENOMEM; - } - } -#endif - return 0; -} - -static void be_sriov_disable(struct be_adapter *adapter) -{ -#ifdef CONFIG_PCI_IOV - if (sriov_enabled(adapter)) { - pci_disable_sriov(adapter->pdev); - kfree(adapter->vf_cfg); - adapter->num_vfs = 0; - } -#endif -} - static inline int be_msix_vec_get(struct be_adapter *adapter, struct be_eq_obj *eqo) { @@ -2500,6 +2476,11 @@ static void be_vf_clear(struct be_adapter *adapter) struct be_vf_cfg *vf_cfg; u32 vf; + if (be_find_vfs(adapter, ASSIGNED)) { + dev_warn(&adapter->pdev->dev, "VFs are assigned to VMs\n"); + goto done; + } + for_all_vfs(adapter, vf_cfg, vf) { if (lancer_chip(adapter)) be_cmd_set_mac_list(adapter, NULL, 0, vf + 1); @@ -2509,6 +2490,10 @@ static void be_vf_clear(struct be_adapter *adapter) be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1); } + pci_disable_sriov(adapter->pdev); +done: + kfree(adapter->vf_cfg); + adapter->num_vfs = 0; } static int be_clear(struct be_adapter *adapter) @@ -2538,29 +2523,60 @@ static int be_clear(struct be_adapter *adapter) be_cmd_fw_clean(adapter); be_msix_disable(adapter); - kfree(adapter->pmac_id); + pci_write_config_dword(adapter->pdev, PCICFG_CUST_SCRATCHPAD_CSR, 0); return 0; } -static void be_vf_setup_init(struct be_adapter *adapter) +static int be_vf_setup_init(struct be_adapter *adapter) { struct be_vf_cfg *vf_cfg; int vf; + adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg), + GFP_KERNEL); + if (!adapter->vf_cfg) + return -ENOMEM; + for_all_vfs(adapter, vf_cfg, vf) { vf_cfg->if_handle = -1; vf_cfg->pmac_id = -1; } + return 0; } static int be_vf_setup(struct be_adapter *adapter) { struct be_vf_cfg *vf_cfg; + struct device *dev = &adapter->pdev->dev; u32 cap_flags, en_flags, vf; u16 def_vlan, lnk_speed; - int status; + int status, enabled_vfs; - be_vf_setup_init(adapter); + enabled_vfs = be_find_vfs(adapter, ENABLED); + if (enabled_vfs) { + dev_warn(dev, "%d VFs are already enabled\n", enabled_vfs); + dev_warn(dev, "Ignoring num_vfs=%d setting\n", num_vfs); + return 0; + } + + if (num_vfs > adapter->dev_num_vfs) { + dev_warn(dev, "Device supports %d VFs and not %d\n", + adapter->dev_num_vfs, num_vfs); + num_vfs = adapter->dev_num_vfs; + } + + status = pci_enable_sriov(adapter->pdev, num_vfs); + if (!status) { + adapter->num_vfs = num_vfs; + } else { + /* Platform doesn't support SRIOV though device supports it */ + dev_warn(dev, "SRIOV enable failed\n"); + return 0; + } + + status = be_vf_setup_init(adapter); + if (status) + goto err; cap_flags = en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST | BE_IF_FLAGS_MULTICAST; @@ -2571,9 +2587,11 @@ static int be_vf_setup(struct be_adapter *adapter) goto err; } - status = be_vf_eth_addr_config(adapter); - if (status) - goto err; + if (!enabled_vfs) { + status = be_vf_eth_addr_config(adapter); + if (status) + goto err; + } for_all_vfs(adapter, vf_cfg, vf) { status = be_cmd_link_status_query(adapter, NULL, &lnk_speed, @@ -2630,9 +2648,25 @@ static int be_add_mac_from_list(struct be_adapter *adapter, u8 *mac) return status; } +/* Routine to query per function resource limits */ +static int be_get_config(struct be_adapter *adapter) +{ + int pos; + u16 dev_num_vfs; + + pos = pci_find_ext_capability(adapter->pdev, PCI_EXT_CAP_ID_SRIOV); + if (pos) { + pci_read_config_word(adapter->pdev, pos + PCI_SRIOV_TOTAL_VF, + &dev_num_vfs); + adapter->dev_num_vfs = dev_num_vfs; + } + return 0; +} + static int be_setup(struct be_adapter *adapter) { struct net_device *netdev = adapter->netdev; + struct device *dev = &adapter->pdev->dev; u32 cap_flags, en_flags; u32 tx_fc, rx_fc; int status; @@ -2640,6 +2674,8 @@ static int be_setup(struct be_adapter *adapter) be_setup_init(adapter); + be_get_config(adapter); + be_cmd_req_native_mode(adapter); be_msix_enable(adapter); @@ -2718,10 +2754,11 @@ static int be_setup(struct be_adapter *adapter) pcie_set_readrq(adapter->pdev, 4096); - if (sriov_enabled(adapter)) { - status = be_vf_setup(adapter); - if (status) - goto err; + if (be_physfn(adapter) && num_vfs) { + if (adapter->dev_num_vfs) + be_vf_setup(adapter); + else + dev_warn(dev, "device doesn't support SRIOV\n"); } be_cmd_get_phy_info(adapter); @@ -2731,6 +2768,7 @@ static int be_setup(struct be_adapter *adapter) schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; + pci_write_config_dword(adapter->pdev, PCICFG_CUST_SCRATCHPAD_CSR, 1); return 0; err: be_clear(adapter); @@ -3352,8 +3390,6 @@ static void __devexit be_remove(struct pci_dev *pdev) be_ctrl_cleanup(adapter); - be_sriov_disable(adapter); - pci_set_drvdata(pdev, NULL); pci_release_regions(pdev); pci_disable_device(pdev); @@ -3367,7 +3403,7 @@ bool be_is_wol_supported(struct be_adapter *adapter) !be_is_wol_excluded(adapter)) ? true : false; } -static int be_get_config(struct be_adapter *adapter) +static int be_get_initial_config(struct be_adapter *adapter) { int status; @@ -3410,7 +3446,7 @@ static int be_get_config(struct be_adapter *adapter) return 0; } -static int be_dev_family_check(struct be_adapter *adapter) +static int be_dev_type_check(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; u32 sli_intf = 0, if_type; @@ -3443,6 +3479,9 @@ static int be_dev_family_check(struct be_adapter *adapter) default: adapter->generation = 0; } + + pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf); + adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0; return 0; } @@ -3586,6 +3625,14 @@ static void be_worker(struct work_struct *work) schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); } +static bool be_reset_required(struct be_adapter *adapter) +{ + u32 reg; + + pci_read_config_dword(adapter->pdev, PCICFG_CUST_SCRATCHPAD_CSR, ®); + return reg; +} + static int __devinit be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) { @@ -3611,7 +3658,7 @@ static int __devinit be_probe(struct pci_dev *pdev, adapter->pdev = pdev; pci_set_drvdata(pdev, adapter); - status = be_dev_family_check(adapter); + status = be_dev_type_check(adapter); if (status) goto free_netdev; @@ -3629,13 +3676,9 @@ static int __devinit be_probe(struct pci_dev *pdev, } } - status = be_sriov_enable(adapter); - if (status) - goto free_netdev; - status = be_ctrl_init(adapter); if (status) - goto disable_sriov; + goto free_netdev; if (lancer_chip(adapter)) { status = lancer_wait_ready(adapter); @@ -3662,9 +3705,11 @@ static int __devinit be_probe(struct pci_dev *pdev, if (status) goto ctrl_clean; - status = be_cmd_reset_function(adapter); - if (status) - goto ctrl_clean; + if (be_reset_required(adapter)) { + status = be_cmd_reset_function(adapter); + if (status) + goto ctrl_clean; + } /* The INTR bit may be set in the card when probed by a kdump kernel * after a crash. @@ -3676,7 +3721,7 @@ static int __devinit be_probe(struct pci_dev *pdev, if (status) goto ctrl_clean; - status = be_get_config(adapter); + status = be_get_initial_config(adapter); if (status) goto stats_clean; @@ -3705,8 +3750,6 @@ static int __devinit be_probe(struct pci_dev *pdev, be_stats_cleanup(adapter); ctrl_clean: be_ctrl_cleanup(adapter); -disable_sriov: - be_sriov_disable(adapter); free_netdev: free_netdev(netdev); pci_set_drvdata(pdev, NULL);