From f0dc117abdfa9a0e96c3d013d836460ef3cd08c7 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 3 Mar 2010 12:27:52 +0000 Subject: [PATCH 1/7] IPoIB: Fix TX queue lockup with mixed UD/CM traffic The IPoIB UD QP reports send completions to priv->send_cq, which is usually left unarmed; it only gets armed when the number of outstanding send requests reaches the size of the TX queue. This arming is done only in the send path for the UD QP. However, when sending CM packets, the net queue may be stopped for the same reasons but no measures are taken to recover the UD path from a lockup. Consider this scenario: a host sends high rate of both CM and UD packets, with a TX queue length of N. If at some time the number of outstanding UD packets is more than N/2 and the overall outstanding packets is N-1, and CM sends a packet (making the number of outstanding sends equal N), the TX queue will be stopped. When all the CM packets complete, the number of outstanding packets will still be higher than N/2 so the TX queue will not be restarted. Fix this by calling ib_req_notify_cq() when the queue is stopped in the CM path. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 83a7751c38d6..6e3cc865e369 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -752,6 +752,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ if (++priv->tx_outstanding == ipoib_sendq_size) { ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", tx->qp->qp_num); + if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP)) + ipoib_warn(priv, "request notify on send CQ failed\n"); netif_stop_queue(dev); } } From a48f509b26cec53338f4b0abd52ecea35e3974b8 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 4 Mar 2010 13:17:37 +0000 Subject: [PATCH 2/7] IPoIB: Include return code in trace message for ib_post_send() failures Print the return code of ib_post_send() if it fails to make these debugging messages more useful. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 8 +++++--- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 9 +++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6e3cc865e369..bc658373ad55 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -708,6 +708,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_cm_tx_buf *tx_req; u64 addr; + int rc; if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -739,9 +740,10 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ tx_req->mapping = addr; - if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), - addr, skb->len))) { - ipoib_warn(priv, "post_send failed\n"); + rc = post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1), + addr, skb->len); + if (unlikely(rc)) { + ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE); dev_kfree_skb_any(skb); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 8c91d9f37ada..5df40b128f81 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -529,7 +529,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_tx_buf *tx_req; - int hlen; + int hlen, rc; void *phead; if (skb_is_gso(skb)) { @@ -585,9 +585,10 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, netif_stop_queue(dev); } - if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), - address->ah, qpn, tx_req, phead, hlen))) { - ipoib_warn(priv, "post_send failed\n"); + rc = post_send(priv, priv->tx_head & (ipoib_sendq_size - 1), + address->ah, qpn, tx_req, phead, hlen); + if (unlikely(rc)) { + ipoib_warn(priv, "post_send failed, error %d\n", rc); ++dev->stats.tx_errors; --priv->tx_outstanding; ipoib_dma_unmap_tx(priv->ca, tx_req); From 070e140c4c536df33a9870318791b2ca8f7dbfcf Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 4 Mar 2010 18:18:18 +0000 Subject: [PATCH 3/7] IB/mad: Ignore iWARP devices on device removal When an iWARP device is unloaded, the ib_mad module logs errors. It should be ignoring iWARP devices on device removal just like it does on device add. Signed-off-by: Steve Wise Acked-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/mad.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 58463da814d1..e351b1548535 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -2953,6 +2953,9 @@ static void ib_mad_remove_device(struct ib_device *device) { int i, num_ports, cur_port; + if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB) + return; + if (device->node_type == RDMA_NODE_IB_SWITCH) { num_ports = 1; cur_port = 0; From 69960a275efc9d82797bbbe2460a2d6c9cace314 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 3 Mar 2010 15:06:34 +0000 Subject: [PATCH 4/7] RDMA/cxgb3: Wait at least one schedule cycle during device removal During a hot-plug LLD removal event or an EEH error event, iw_cxgb3 must ensure that any/all threads that might be in a cxgb3 exported function must return from the function before iw_cxgb3 returns from its event processing. Do this by calling synchronize_net(). Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb3/iwch.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index ee1d8b4d4541..63f975f3e30f 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -189,6 +189,7 @@ static void close_rnic_dev(struct t3cdev *tdev) list_for_each_entry_safe(dev, tmp, &dev_list, entry) { if (dev->rdev.t3cdev_p == tdev) { dev->rdev.flags = CXIO_ERROR_FATAL; + synchronize_net(); cancel_delayed_work_sync(&dev->db_drop_task); list_del(&dev->entry); iwch_unregister_device(dev); @@ -217,6 +218,7 @@ static void iwch_event_handler(struct t3cdev *tdev, u32 evt, u32 port_id) switch (evt) { case OFFLOAD_STATUS_DOWN: { rdev->flags = CXIO_ERROR_FATAL; + synchronize_net(); event.event = IB_EVENT_DEVICE_FATAL; dispatch = 1; break; From 883c699241f48667ff59277d8c20790868fd4829 Mon Sep 17 00:00:00 2001 From: Faisal Latif Date: Tue, 2 Mar 2010 17:22:51 -0600 Subject: [PATCH 5/7] RDMA/nes: Set assume_aligned_header bit Set assume_aligned_header bit in QP context as requested by hardware group. Signed-off-by: Faisal Latif Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_verbs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 815725f886c4..69928296d74b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1323,6 +1323,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd, nesqp->nesqp_context->aeq_token_low = cpu_to_le32((u32)((unsigned long)(nesqp))); nesqp->nesqp_context->aeq_token_high = cpu_to_le32((u32)(upper_32_bits((unsigned long)(nesqp)))); nesqp->nesqp_context->ird_ord_sizes = cpu_to_le32(NES_QPCONTEXT_ORDIRD_ALSMM | + NES_QPCONTEXT_ORDIRD_AAH | ((((u32)nesadapter->max_irrq_wr) << NES_QPCONTEXT_ORDIRD_IRDSIZE_SHIFT) & NES_QPCONTEXT_ORDIRD_IRDSIZE_MASK)); if (disable_mpa_crc) { From 9f29006ae8c85746e5a52d557f689359149a0793 Mon Sep 17 00:00:00 2001 From: Chien Tung Date: Wed, 3 Mar 2010 19:13:28 +0000 Subject: [PATCH 6/7] RDMA/nes: Clear stall bit before destroying NIC QP Clear the stall bit to drop any incoming packets while destroying NIC QP. This will prevent a chip resource leak. Signed-off-by: Chien Tung Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_hw.c | 8 ++++++++ drivers/infiniband/hw/nes/nes_hw.h | 1 + 2 files changed, 9 insertions(+) diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index ce7f53833577..925075557dc2 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1899,9 +1899,14 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) u16 wqe_fragment_index; u64 wqe_frag; u32 cqp_head; + u32 wqm_cfg0; unsigned long flags; int ret; + /* clear wqe stall before destroying NIC QP */ + wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0); + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF); + /* Free remaining NIC receive buffers */ while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail]; @@ -2020,6 +2025,9 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic) pci_free_consistent(nesdev->pcidev, nesvnic->nic_mem_size, nesvnic->nic_vbase, nesvnic->nic_pbase); + + /* restore old wqm_cfg0 value */ + nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0); } /** diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 9b1e7f869d83..bbbfe9fc5a5a 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -160,6 +160,7 @@ enum indexed_regs { NES_IDX_ENDNODE0_NSTAT_TX_OCTETS_HI = 0x7004, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_LO = 0x7008, NES_IDX_ENDNODE0_NSTAT_TX_FRAMES_HI = 0x700c, + NES_IDX_WQM_CONFIG0 = 0x5000, NES_IDX_WQM_CONFIG1 = 0x5004, NES_IDX_CM_CONFIG = 0x5100, NES_IDX_NIC_LOGPORT_TO_PHYPORT = 0x6000, From a72042c08a8ba3b685dc9cba62c57c48188ef2c8 Mon Sep 17 00:00:00 2001 From: Chien Tung Date: Wed, 3 Mar 2010 19:13:26 +0000 Subject: [PATCH 7/7] RDMA/nes: Fix CX4 link problem in back-to-back configuration Commit 09124e19 ("RDMA/nes: Add support for KR device id 0x0110") took out too much code and broke CX4 link detection in back-to-back configuration. Put back the code that does the link check. Signed-off-by: Chien Tung Signed-off-by: Roland Dreier --- drivers/infiniband/hw/nes/nes_nic.c | 30 +++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index a1d79b6856ac..91fdde382e82 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1595,7 +1595,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, struct nes_vnic *nesvnic; struct net_device *netdev; struct nic_qp_map *curr_qp_map; - u32 u32temp; u8 phy_type = nesdev->nesadapter->phy_type[nesdev->mac_index]; netdev = alloc_etherdev(sizeof(struct nes_vnic)); @@ -1707,6 +1706,10 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, ((phy_type == NES_PHY_TYPE_PUMA_1G) && (((PCI_FUNC(nesdev->pcidev->devfn) == 1) && (nesdev->mac_index == 2)) || ((PCI_FUNC(nesdev->pcidev->devfn) == 2) && (nesdev->mac_index == 1)))))) { + u32 u32temp; + u32 link_mask; + u32 link_val; + u32temp = nes_read_indexed(nesdev, NES_IDX_PHY_PCS_CONTROL_STATUS0 + (0x200 * (nesdev->mac_index & 1))); if (phy_type != NES_PHY_TYPE_PUMA_1G) { @@ -1715,13 +1718,36 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev, (0x200 * (nesdev->mac_index & 1)), u32temp); } + /* Check and set linkup here. This is for back to back */ + /* configuration where second port won't get link interrupt */ + switch (phy_type) { + case NES_PHY_TYPE_PUMA_1G: + if (nesdev->mac_index < 2) { + link_mask = 0x01010000; + link_val = 0x01010000; + } else { + link_mask = 0x02020000; + link_val = 0x02020000; + } + break; + default: + link_mask = 0x0f1f0000; + link_val = 0x0f0f0000; + break; + } + + u32temp = nes_read_indexed(nesdev, + NES_IDX_PHY_PCS_CONTROL_STATUS0 + + (0x200 * (nesdev->mac_index & 1))); + if ((u32temp & link_mask) == link_val) + nesvnic->linkup = 1; + /* clear the MAC interrupt status, assumes direct logical to physical mapping */ u32temp = nes_read_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index)); nes_debug(NES_DBG_INIT, "Phy interrupt status = 0x%X.\n", u32temp); nes_write_indexed(nesdev, NES_IDX_MAC_INT_STATUS + (0x200 * nesdev->mac_index), u32temp); nes_init_phy(nesdev); - } return netdev;