From f38926aa1dc5fbf7dfc5f97a53377b2e796dedc3 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Thu, 3 Jun 2010 05:37:50 +0000 Subject: [PATCH 01/12] RDMA/cxgb4: Use the DMA state API instead of the pci equivalents This replace the PCI DMA state API (include/linux/pci-dma.h) with the DMA equivalents since the PCI DMA state API will be obsolete. No functional change. For further information about the background: http://marc.info/?l=linux-netdev&m=127037540020276&w=2 Signed-off-by: FUJITA Tomonori Acked-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 6 +++--- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/infiniband/hw/cxgb4/mem.c | 4 ++-- drivers/infiniband/hw/cxgb4/qp.c | 12 ++++++------ drivers/infiniband/hw/cxgb4/t4.h | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 2447f5295482..e1317f581168 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -77,7 +77,7 @@ static int destroy_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, kfree(cq->sw_queue); dma_free_coherent(&(rdev->lldi.pdev->dev), cq->memsize, cq->queue, - pci_unmap_addr(cq, mapping)); + dma_unmap_addr(cq, mapping)); c4iw_put_cqid(rdev, cq->cqid, uctx); return ret; } @@ -112,7 +112,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, ret = -ENOMEM; goto err3; } - pci_unmap_addr_set(cq, mapping, cq->dma_addr); + dma_unmap_addr_set(cq, mapping, cq->dma_addr); memset(cq->queue, 0, cq->memsize); /* build fw_ri_res_wr */ @@ -179,7 +179,7 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq, return 0; err4: dma_free_coherent(&rdev->lldi.pdev->dev, cq->memsize, cq->queue, - pci_unmap_addr(cq, mapping)); + dma_unmap_addr(cq, mapping)); err3: kfree(cq->sw_queue); err2: diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 277ab589b44d..d33e1a668811 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -261,7 +261,7 @@ static inline struct c4iw_mw *to_c4iw_mw(struct ib_mw *ibmw) struct c4iw_fr_page_list { struct ib_fast_reg_page_list ibpl; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); dma_addr_t dma_addr; struct c4iw_dev *dev; int size; diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 7f94da1a2437..82b5703b8947 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -764,7 +764,7 @@ struct ib_fast_reg_page_list *c4iw_alloc_fastreg_pbl(struct ib_device *device, if (!c4pl) return ERR_PTR(-ENOMEM); - pci_unmap_addr_set(c4pl, mapping, dma_addr); + dma_unmap_addr_set(c4pl, mapping, dma_addr); c4pl->dma_addr = dma_addr; c4pl->dev = dev; c4pl->size = size; @@ -779,7 +779,7 @@ void c4iw_free_fastreg_pbl(struct ib_fast_reg_page_list *ibpl) struct c4iw_fr_page_list *c4pl = to_c4iw_fr_page_list(ibpl); dma_free_coherent(&c4pl->dev->rdev.lldi.pdev->dev, c4pl->size, - c4pl, pci_unmap_addr(c4pl, mapping)); + c4pl, dma_unmap_addr(c4pl, mapping)); } int c4iw_dereg_mr(struct ib_mr *ib_mr) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 0c28ed1eafa6..7065cb310553 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -40,10 +40,10 @@ static int destroy_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, */ dma_free_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, wq->rq.queue, - pci_unmap_addr(&wq->rq, mapping)); + dma_unmap_addr(&wq->rq, mapping)); dma_free_coherent(&(rdev->lldi.pdev->dev), wq->sq.memsize, wq->sq.queue, - pci_unmap_addr(&wq->sq, mapping)); + dma_unmap_addr(&wq->sq, mapping)); c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); kfree(wq->rq.sw_rq); kfree(wq->sq.sw_sq); @@ -99,7 +99,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, if (!wq->sq.queue) goto err5; memset(wq->sq.queue, 0, wq->sq.memsize); - pci_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); + dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); wq->rq.queue = dma_alloc_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, &(wq->rq.dma_addr), @@ -112,7 +112,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, wq->rq.queue, (unsigned long long)virt_to_phys(wq->rq.queue)); memset(wq->rq.queue, 0, wq->rq.memsize); - pci_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); + dma_unmap_addr_set(&wq->rq, mapping, wq->rq.dma_addr); wq->db = rdev->lldi.db_reg; wq->gts = rdev->lldi.gts_reg; @@ -217,11 +217,11 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq, err7: dma_free_coherent(&(rdev->lldi.pdev->dev), wq->rq.memsize, wq->rq.queue, - pci_unmap_addr(&wq->rq, mapping)); + dma_unmap_addr(&wq->rq, mapping)); err6: dma_free_coherent(&(rdev->lldi.pdev->dev), wq->sq.memsize, wq->sq.queue, - pci_unmap_addr(&wq->sq, mapping)); + dma_unmap_addr(&wq->sq, mapping)); err5: c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); err4: diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 1057cb96302e..9cf8d85bfcff 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -279,7 +279,7 @@ struct t4_swsqe { struct t4_sq { union t4_wr *queue; dma_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_swsqe *sw_sq; struct t4_swsqe *oldest_read; u64 udb; @@ -298,7 +298,7 @@ struct t4_swrqe { struct t4_rq { union t4_recv_wr *queue; dma_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_swrqe *sw_rq; u64 udb; size_t memsize; @@ -429,7 +429,7 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq) struct t4_cq { struct t4_cqe *queue; dma_addr_t dma_addr; - DECLARE_PCI_UNMAP_ADDR(mapping); + DEFINE_DMA_UNMAP_ADDR(mapping); struct t4_cqe *sw_queue; void __iomem *gts; struct c4iw_rdev *rdev; From b21ef16a8b956aee2fb3d7fc9d24a0b4dae2ae72 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 10 Jun 2010 19:02:55 +0000 Subject: [PATCH 02/12] RDMA/cxgb4: Don't call abort_connection() for active connect failures Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 30ce0a8eca09..3e15a0716702 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -969,7 +969,8 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) goto err; goto out; err: - abort_connection(ep, skb, GFP_KERNEL); + state_set(&ep->com, ABORTING); + send_abort(ep, skb, GFP_KERNEL); out: connect_reply_upcall(ep, err); return; From 1973e8b8edea68d2408328d25b318ee7401293be Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 10 Jun 2010 19:03:06 +0000 Subject: [PATCH 03/12] RDMA/cxgb4: Avoid false GTS CIDX_INC overflows The T4 IQ hw design assumes CIDX_INC credits will be returned on a regular basis and always before the CIDX counter crosses over the PIDX counter. For RDMA CQs, however, returning CIDX_INC credits is only needed and desired when and if the CQ is armed for notification. This can lead to a GTS write returning credits that causes the HW to reject the credit update because it causes CIDX to pass PIDX. Once this happens, the CIDX/PIDX counters get out of whack and an application can miss a notification and get stuck blocked awaiting a notification. To avoid this, we allocate the HW IQ 2x times the requested size. This seems to avoid the false overflow failures. If we see more issues with this, then we'll have to add code in the poll path to return credits periodically like when the amount reaches 1/2 the queue depth). I would like to avoid this as it adds a PCI write transaction for applications that never arm the CQ (like most MPIs). Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cq.c | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index e1317f581168..fac5c6e68011 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -764,7 +764,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, struct c4iw_create_cq_resp uresp; struct c4iw_ucontext *ucontext = NULL; int ret; - size_t memsize; + size_t memsize, hwentries; struct c4iw_mm_entry *mm, *mm2; PDBG("%s ib_dev %p entries %d\n", __func__, ibdev, entries); @@ -788,14 +788,29 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, * entries must be multiple of 16 for HW. */ entries = roundup(entries, 16); - memsize = entries * sizeof *chp->cq.queue; + + /* + * Make actual HW queue 2x to avoid cdix_inc overflows. + */ + hwentries = entries * 2; + + /* + * Make HW queue at least 64 entries so GTS updates aren't too + * frequent. + */ + if (hwentries < 64) + hwentries = 64; + + memsize = hwentries * sizeof *chp->cq.queue; /* * memsize must be a multiple of the page size if its a user cq. */ - if (ucontext) + if (ucontext) { memsize = roundup(memsize, PAGE_SIZE); - chp->cq.size = entries; + hwentries = memsize / sizeof *chp->cq.queue; + } + chp->cq.size = hwentries; chp->cq.memsize = memsize; ret = create_cq(&rhp->rdev, &chp->cq, @@ -805,7 +820,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries, chp->rhp = rhp; chp->cq.size--; /* status page */ - chp->ibcq.cqe = chp->cq.size - 1; + chp->ibcq.cqe = entries - 2; spin_lock_init(&chp->lock); atomic_set(&chp->refcnt, 1); init_waitqueue_head(&chp->wait); From 2c5934bfc5ffcbef3622d0bdbad93628d210012a Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 23 Jun 2010 15:46:44 +0000 Subject: [PATCH 04/12] RDMA/cxgb4: Derive smac_idx from port viid Signed-off-by: Steve Wise Signed-off-by: Roland Dreier --- drivers/infiniband/hw/cxgb4/cm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3e15a0716702..855ee44fdb52 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1373,7 +1373,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) pdev, 0); mtu = pdev->mtu; tx_chan = cxgb4_port_chan(pdev); - smac_idx = tx_chan << 1; + smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; txq_idx = cxgb4_port_idx(pdev) * step; step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; @@ -1384,7 +1384,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) dst->neighbour->dev, 0); mtu = dst_mtu(dst); tx_chan = cxgb4_port_chan(dst->neighbour->dev); - smac_idx = tx_chan << 1; + smac_idx = (cxgb4_port_viid(dst->neighbour->dev) & 0x7F) << 1; step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; txq_idx = cxgb4_port_idx(dst->neighbour->dev) * step; step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; @@ -1951,7 +1951,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) pdev, 0); ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = ep->tx_chan << 1; + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(pdev) * step; @@ -1966,7 +1966,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->dst->neighbour->dev, 0); ep->mtu = dst_mtu(ep->dst); ep->tx_chan = cxgb4_port_chan(ep->dst->neighbour->dev); - ep->smac_idx = ep->tx_chan << 1; + ep->smac_idx = (cxgb4_port_viid(ep->dst->neighbour->dev) & + 0x7F) << 1; step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan; ep->txq_idx = cxgb4_port_idx(ep->dst->neighbour->dev) * step; From fce24a9d28f8b99fd0eacc14e252ab4fca9527a7 Mon Sep 17 00:00:00 2001 From: Dave Olson Date: Thu, 17 Jun 2010 23:13:44 +0000 Subject: [PATCH 05/12] IB/qib: Don't mark VL15 bufs as WC to avoid a rare 7322 chip problem Don't set write combining via PAT on the VL15 buffers to avoid a rare problem with unaligned writes from interrupt-flushed store buffers. Signed-off-by: Dave Olson Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib.h | 1 + drivers/infiniband/hw/qib/qib_diag.c | 19 +++++++++++++++---- drivers/infiniband/hw/qib/qib_iba7322.c | 18 +++++++++++++++++- drivers/infiniband/hw/qib/qib_init.c | 6 ++++++ drivers/infiniband/hw/qib/qib_pcie.c | 2 ++ drivers/infiniband/hw/qib/qib_tx.c | 6 +++++- 6 files changed, 46 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index 32d9208efcff..3593983df7ba 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -686,6 +686,7 @@ struct qib_devdata { void __iomem *piobase; /* mem-mapped pointer to base of user chip regs (if using WC PAT) */ u64 __iomem *userbase; + void __iomem *piovl15base; /* base of VL15 buffers, if not WC */ /* * points to area where PIOavail registers will be DMA'ed. * Has to be on a page of it's own, because the page will be diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c index ca98dd523752..05dcf0d9a7d3 100644 --- a/drivers/infiniband/hw/qib/qib_diag.c +++ b/drivers/infiniband/hw/qib/qib_diag.c @@ -233,6 +233,7 @@ static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset, u32 __iomem *krb32 = (u32 __iomem *)dd->kregbase; u32 __iomem *map = NULL; u32 cnt = 0; + u32 tot4k, offs4k; /* First, simplest case, offset is within the first map. */ kreglen = (dd->kregend - dd->kregbase) * sizeof(u64); @@ -250,7 +251,8 @@ static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset, if (dd->userbase) { /* If user regs mapped, they are after send, so set limit. */ u32 ulim = (dd->cfgctxts * dd->ureg_align) + dd->uregbase; - snd_lim = dd->uregbase; + if (!dd->piovl15base) + snd_lim = dd->uregbase; krb32 = (u32 __iomem *)dd->userbase; if (offset >= dd->uregbase && offset < ulim) { map = krb32 + (offset - dd->uregbase) / sizeof(u32); @@ -277,14 +279,14 @@ static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset, /* If 4k buffers exist, account for them by bumping * appropriate limit. */ + tot4k = dd->piobcnt4k * dd->align4k; + offs4k = dd->piobufbase >> 32; if (dd->piobcnt4k) { - u32 tot4k = dd->piobcnt4k * dd->align4k; - u32 offs4k = dd->piobufbase >> 32; if (snd_bottom > offs4k) snd_bottom = offs4k; else { /* 4k above 2k. Bump snd_lim, if needed*/ - if (!dd->userbase) + if (!dd->userbase || dd->piovl15base) snd_lim = offs4k + tot4k; } } @@ -298,6 +300,15 @@ static u32 __iomem *qib_remap_ioaddr32(struct qib_devdata *dd, u32 offset, cnt = snd_lim - offset; } + if (!map && offs4k && dd->piovl15base) { + snd_lim = offs4k + tot4k + 2 * dd->align4k; + if (offset >= (offs4k + tot4k) && offset < snd_lim) { + map = (u32 __iomem *)dd->piovl15base + + ((offset - (offs4k + tot4k)) / sizeof(u32)); + cnt = snd_lim - offset; + } + } + mapped: if (cntp) *cntp = cnt; diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 503992d9c5ce..3e9828be5010 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -6119,9 +6119,25 @@ static int qib_init_7322_variables(struct qib_devdata *dd) qib_set_ctxtcnt(dd); if (qib_wc_pat) { - ret = init_chip_wc_pat(dd, NUM_VL15_BUFS * dd->align4k); + resource_size_t vl15off; + /* + * We do not set WC on the VL15 buffers to avoid + * a rare problem with unaligned writes from + * interrupt-flushed store buffers, so we need + * to map those separately here. We can't solve + * this for the rarely used mtrr case. + */ + ret = init_chip_wc_pat(dd, 0); if (ret) goto bail; + + /* vl15 buffers start just after the 4k buffers */ + vl15off = dd->physaddr + (dd->piobufbase >> 32) + + dd->piobcnt4k * dd->align4k; + dd->piovl15base = ioremap_nocache(vl15off, + NUM_VL15_BUFS * dd->align4k); + if (!dd->piovl15base) + goto bail; } qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 9b40f345ac3f..25895991dc52 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1499,6 +1499,12 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) return -ENOMEM; } +/* + * Note: Changes to this routine should be mirrored + * for the diagnostics routine qib_remap_ioaddr32(). + * There is also related code for VL15 buffers in qib_init_7322_variables(). + * The teardown code that unmaps is in qib_pcie_ddcleanup() + */ int init_chip_wc_pat(struct qib_devdata *dd, u32 vl15buflen) { u64 __iomem *qib_kregbase = NULL; diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index c926bf4541df..7fa6e5592630 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -179,6 +179,8 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) iounmap(dd->piobase); if (dd->userbase) iounmap(dd->userbase); + if (dd->piovl15base) + iounmap(dd->piovl15base); pci_disable_device(dd->pcidev); pci_release_regions(dd->pcidev); diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index f7eb1ddff5f3..af30232b6831 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -340,9 +340,13 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum, if (i < dd->piobcnt2k) buf = (u32 __iomem *)(dd->pio2kbase + i * dd->palign); - else + else if (i < dd->piobcnt2k + dd->piobcnt4k || !dd->piovl15base) buf = (u32 __iomem *)(dd->pio4kbase + (i - dd->piobcnt2k) * dd->align4k); + else + buf = (u32 __iomem *)(dd->piovl15base + + (i - (dd->piobcnt2k + dd->piobcnt4k)) * + dd->align4k); if (pbufnum) *pbufnum = i; dd->upd_pio_shadow = 0; From b9e03e0489a8616fc415e62128d05ad0159a20a2 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 17 Jun 2010 23:13:54 +0000 Subject: [PATCH 06/12] IB/qib: Mask hardware error during link reset The HCA checks for certain hardware errors which can be falsely triggered when the IB link is reset. The fix is to mask them rather than report them. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_7322_regs.h | 48 +++++++++++------------ drivers/infiniband/hw/qib/qib_iba7322.c | 9 ++++- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_7322_regs.h b/drivers/infiniband/hw/qib/qib_7322_regs.h index a97440ba924c..32dc81ff8d4a 100644 --- a/drivers/infiniband/hw/qib/qib_7322_regs.h +++ b/drivers/infiniband/hw/qib/qib_7322_regs.h @@ -742,15 +742,15 @@ #define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_LSB 0xF #define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_MSB 0xF #define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_1_RMASK 0x1 -#define QIB_7322_HwErrMask_statusValidNoEopMask_1_LSB 0xE -#define QIB_7322_HwErrMask_statusValidNoEopMask_1_MSB 0xE -#define QIB_7322_HwErrMask_statusValidNoEopMask_1_RMASK 0x1 +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_LSB 0xE +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_MSB 0xE +#define QIB_7322_HwErrMask_IBCBusToSPCParityErrMask_1_RMASK 0x1 #define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_LSB 0xD #define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_MSB 0xD #define QIB_7322_HwErrMask_IBCBusFromSPCParityErrMask_0_RMASK 0x1 -#define QIB_7322_HwErrMask_statusValidNoEopMask_0_LSB 0xC -#define QIB_7322_HwErrMask_statusValidNoEopMask_0_MSB 0xC -#define QIB_7322_HwErrMask_statusValidNoEopMask_0_RMASK 0x1 +#define QIB_7322_HwErrMask_statusValidNoEopMask_LSB 0xC +#define QIB_7322_HwErrMask_statusValidNoEopMask_MSB 0xC +#define QIB_7322_HwErrMask_statusValidNoEopMask_RMASK 0x1 #define QIB_7322_HwErrMask_LATriggeredMask_LSB 0xB #define QIB_7322_HwErrMask_LATriggeredMask_MSB 0xB #define QIB_7322_HwErrMask_LATriggeredMask_RMASK 0x1 @@ -796,15 +796,15 @@ #define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_LSB 0xF #define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_MSB 0xF #define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_1_RMASK 0x1 -#define QIB_7322_HwErrStatus_statusValidNoEop_1_LSB 0xE -#define QIB_7322_HwErrStatus_statusValidNoEop_1_MSB 0xE -#define QIB_7322_HwErrStatus_statusValidNoEop_1_RMASK 0x1 +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_LSB 0xE +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_MSB 0xE +#define QIB_7322_HwErrStatus_IBCBusToSPCParityErr_1_RMASK 0x1 #define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_LSB 0xD #define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_MSB 0xD #define QIB_7322_HwErrStatus_IBCBusFromSPCParityErr_0_RMASK 0x1 -#define QIB_7322_HwErrStatus_statusValidNoEop_0_LSB 0xC -#define QIB_7322_HwErrStatus_statusValidNoEop_0_MSB 0xC -#define QIB_7322_HwErrStatus_statusValidNoEop_0_RMASK 0x1 +#define QIB_7322_HwErrStatus_statusValidNoEop_LSB 0xC +#define QIB_7322_HwErrStatus_statusValidNoEop_MSB 0xC +#define QIB_7322_HwErrStatus_statusValidNoEop_RMASK 0x1 #define QIB_7322_HwErrStatus_LATriggered_LSB 0xB #define QIB_7322_HwErrStatus_LATriggered_MSB 0xB #define QIB_7322_HwErrStatus_LATriggered_RMASK 0x1 @@ -850,15 +850,15 @@ #define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_LSB 0xF #define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_MSB 0xF #define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_1_RMASK 0x1 -#define QIB_7322_HwErrClear_IBCBusToSPCparityErrClear_1_LSB 0xE -#define QIB_7322_HwErrClear_IBCBusToSPCparityErrClear_1_MSB 0xE -#define QIB_7322_HwErrClear_IBCBusToSPCparityErrClear_1_RMASK 0x1 +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_LSB 0xE +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_MSB 0xE +#define QIB_7322_HwErrClear_IBCBusToSPCParityErrClear_1_RMASK 0x1 #define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_LSB 0xD #define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_MSB 0xD #define QIB_7322_HwErrClear_IBCBusFromSPCParityErrClear_0_RMASK 0x1 -#define QIB_7322_HwErrClear_IBCBusToSPCparityErrClear_0_LSB 0xC -#define QIB_7322_HwErrClear_IBCBusToSPCparityErrClear_0_MSB 0xC -#define QIB_7322_HwErrClear_IBCBusToSPCparityErrClear_0_RMASK 0x1 +#define QIB_7322_HwErrClear_statusValidNoEopClear_LSB 0xC +#define QIB_7322_HwErrClear_statusValidNoEopClear_MSB 0xC +#define QIB_7322_HwErrClear_statusValidNoEopClear_RMASK 0x1 #define QIB_7322_HwErrClear_LATriggeredClear_LSB 0xB #define QIB_7322_HwErrClear_LATriggeredClear_MSB 0xB #define QIB_7322_HwErrClear_LATriggeredClear_RMASK 0x1 @@ -880,15 +880,15 @@ #define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_LSB 0xF #define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_MSB 0xF #define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_1_RMASK 0x1 -#define QIB_7322_HwDiagCtrl_ForcestatusValidNoEop_1_LSB 0xE -#define QIB_7322_HwDiagCtrl_ForcestatusValidNoEop_1_MSB 0xE -#define QIB_7322_HwDiagCtrl_ForcestatusValidNoEop_1_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_LSB 0xE +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_MSB 0xE +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_1_RMASK 0x1 #define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_LSB 0xD #define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_MSB 0xD #define QIB_7322_HwDiagCtrl_ForceIBCBusFromSPCParityErr_0_RMASK 0x1 -#define QIB_7322_HwDiagCtrl_ForcestatusValidNoEop_0_LSB 0xC -#define QIB_7322_HwDiagCtrl_ForcestatusValidNoEop_0_MSB 0xC -#define QIB_7322_HwDiagCtrl_ForcestatusValidNoEop_0_RMASK 0x1 +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_LSB 0xC +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_MSB 0xC +#define QIB_7322_HwDiagCtrl_ForceIBCBusToSPCParityErr_0_RMASK 0x1 #define QIB_7322_EXTStatus_OFFS 0xC0 #define QIB_7322_EXTStatus_DEF 0x000000000000X000 diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 3e9828be5010..8ee0ac6246e9 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1100,9 +1100,9 @@ static const struct qib_hwerror_msgs qib_7322_hwerror_msgs[] = { HWE_AUTO_P(SDmaMemReadErr, 1), HWE_AUTO_P(SDmaMemReadErr, 0), HWE_AUTO_P(IBCBusFromSPCParityErr, 1), + HWE_AUTO_P(IBCBusToSPCParityErr, 1), HWE_AUTO_P(IBCBusFromSPCParityErr, 0), - HWE_AUTO_P(statusValidNoEop, 1), - HWE_AUTO_P(statusValidNoEop, 0), + HWE_AUTO(statusValidNoEop), HWE_AUTO(LATriggered), { .mask = 0 } }; @@ -4763,6 +4763,8 @@ static void qib_7322_mini_pcs_reset(struct qib_pportdata *ppd) SYM_MASK(IBPCSConfig_0, tx_rx_reset); val = qib_read_kreg_port(ppd, krp_ib_pcsconfig); + qib_write_kreg(dd, kr_hwerrmask, + dd->cspec->hwerrmask & ~HWE_MASK(statusValidNoEop)); qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a & ~SYM_MASK(IBCCtrlA_0, IBLinkEn)); @@ -4772,6 +4774,9 @@ static void qib_7322_mini_pcs_reset(struct qib_pportdata *ppd) qib_write_kreg_port(ppd, krp_ib_pcsconfig, val & ~reset_bits); qib_write_kreg_port(ppd, krp_ibcctrl_a, ppd->cpspec->ibcctrl_a); qib_write_kreg(dd, kr_scratch, 0ULL); + qib_write_kreg(dd, kr_hwerrclear, + SYM_MASK(HwErrClear, statusValidNoEopClear)); + qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); } /* From 5df4223a444057e433e9e4f2e101ee7159f8c19d Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 17 Jun 2010 23:13:59 +0000 Subject: [PATCH 07/12] IB/qib: Clear eager buffer memory for each new process The eager buffers are not being cleared before being mmapped into a new user address space. This is a potential security risk and should be fixed. Note that the eager header queue is already being cleared. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 25895991dc52..1d4db4b19d76 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1472,6 +1472,9 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd) dma_addr_t pa = rcd->rcvegrbuf_phys[chunk]; unsigned i; + /* clear for security and sanity on each use */ + memset(rcd->rcvegrbuf[chunk], 0, size); + for (i = 0; e < egrcnt && i < egrperchunk; e++, i++) { dd->f_put_tid(dd, e + egroff + (u64 __iomem *) From 2d757a7ce06abb4afe5b3002d4cdc40e47d7facc Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 17 Jun 2010 23:14:04 +0000 Subject: [PATCH 08/12] IB/qib: Clear 6120 hardware error register The hardware error register needs to be cleared or another interrupt will be generated, thus causing an infinite loop. This is a regression introduced when removing debug output. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_iba6120.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 1eadadc13da8..a5e29dbb9537 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -1355,8 +1355,7 @@ static int qib_6120_bringup_serdes(struct qib_pportdata *ppd) hwstat = qib_read_kreg64(dd, kr_hwerrstatus); if (hwstat) { /* should just have PLL, clear all set, in an case */ - if (hwstat & ~QLOGIC_IB_HWE_SERDESPLLFAILED) - qib_write_kreg(dd, kr_hwerrclear, hwstat); + qib_write_kreg(dd, kr_hwerrclear, hwstat); qib_write_kreg(dd, kr_errclear, ERR_MASK(HardwareErr)); } From 7c7a416ef863a741c2031b5da1538773f9ab54f0 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 17 Jun 2010 23:14:09 +0000 Subject: [PATCH 09/12] IB/qib: Update 7322 serdes tables Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_iba7322.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 8ee0ac6246e9..5eedf83e2c3b 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -543,7 +543,7 @@ struct vendor_txdds_ent { static void write_tx_serdes_param(struct qib_pportdata *, struct txdds_ent *); #define TXDDS_TABLE_SZ 16 /* number of entries per speed in onchip table */ -#define TXDDS_EXTRA_SZ 11 /* number of extra tx settings entries */ +#define TXDDS_EXTRA_SZ 13 /* number of extra tx settings entries */ #define SERDES_CHANS 4 /* yes, it's obvious, but one less magic number */ #define H1_FORCE_VAL 8 @@ -5629,6 +5629,8 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change) if (ppd->port != port || !ppd->link_speed_supported) continue; ppd->cpspec->no_eep = val; + if (seth1) + ppd->cpspec->h1_val = h1; /* now change the IBC and serdes, overriding generic */ init_txdds_table(ppd, 1); any++; @@ -6069,9 +6071,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd) * the "cable info" setup here. Can be overridden * in adapter-specific routines. */ - if (!(ppd->dd->flags & QIB_HAS_QSFP)) { - if (!IS_QMH(ppd->dd) && !IS_QME(ppd->dd)) - qib_devinfo(ppd->dd->pcidev, "IB%u:%u: " + if (!(dd->flags & QIB_HAS_QSFP)) { + if (!IS_QMH(dd) && !IS_QME(dd)) + qib_devinfo(dd->pcidev, "IB%u:%u: " "Unknown mezzanine card type\n", dd->unit, ppd->port); cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; @@ -6953,6 +6955,8 @@ static const struct txdds_ent txdds_extra_sdr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 11 }, /* QME7342 backplane settings */ { 0, 0, 0, 11 }, /* QME7342 backplane settings */ { 0, 0, 0, 11 }, /* QME7342 backplane settings */ + { 0, 0, 0, 3 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 4 }, /* QMH7342 backplane settings */ }; static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { @@ -6968,6 +6972,8 @@ static const struct txdds_ent txdds_extra_ddr[TXDDS_EXTRA_SZ] = { { 0, 0, 0, 13 }, /* QME7342 backplane settings */ { 0, 0, 0, 13 }, /* QME7342 backplane settings */ { 0, 0, 0, 13 }, /* QME7342 backplane settings */ + { 0, 0, 0, 9 }, /* QMH7342 backplane settings */ + { 0, 0, 0, 10 }, /* QMH7342 backplane settings */ }; static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { @@ -6983,6 +6989,8 @@ static const struct txdds_ent txdds_extra_qdr[TXDDS_EXTRA_SZ] = { { 0, 1, 12, 6 }, /* QME7342 backplane setting */ { 0, 1, 12, 7 }, /* QME7342 backplane setting */ { 0, 1, 12, 8 }, /* QME7342 backplane setting */ + { 0, 1, 0, 10 }, /* QMH7342 backplane settings */ + { 0, 1, 0, 12 }, /* QMH7342 backplane settings */ }; static const struct txdds_ent *get_atten_table(const struct txdds_ent *txdds, From 950aff53949268eec4b0f2bd49f700f9585698f7 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 17 Jun 2010 23:14:15 +0000 Subject: [PATCH 10/12] IB/qib: Completion queue callback needs to be single threaded Workqueues aren't exactly equivalent to tasklets since the callback function may be called from multiple CPUs before the callback returns. This causes completion notification callbacks to have MT bugs since they weren't expecting this behavior. The fix is to use a single threaded work queue. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 1d4db4b19d76..7831ff835d15 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1059,7 +1059,7 @@ static int __init qlogic_ib_init(void) goto bail_dev; } - qib_cq_wq = create_workqueue("qib_cq"); + qib_cq_wq = create_singlethread_workqueue("qib_cq"); if (!qib_cq_wq) { ret = -ENOMEM; goto bail_wq; From 756a33b8dc3ed5c27685a130339de8a894d528a7 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Thu, 1 Jul 2010 20:25:45 +0000 Subject: [PATCH 11/12] IB/qib: Clean up properly if qib_init() fails If qib_init() fails, the driver fails to free memory, unregister device files, and unregister with the PCIe framework. The driver will unload without error but a subsequent driver load will cause the system to panic. This was found by changing the 7220 code to load the serdes microcode separately and not installing the microcode file. Signed-off-by: Ralph Campbell Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_init.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 7831ff835d15..a873dd596e81 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1289,8 +1289,18 @@ static int __devinit qib_init_one(struct pci_dev *pdev, if (qib_mini_init || initfail || ret) { qib_stop_timers(dd); + flush_scheduled_work(); for (pidx = 0; pidx < dd->num_pports; ++pidx) dd->f_quiet_serdes(dd->pport + pidx); + if (qib_mini_init) + goto bail; + if (!j) { + (void) qibfs_remove(dd); + qib_device_remove(dd); + } + if (!ret) + qib_unregister_ib_device(dd); + qib_postinit_cleanup(dd); if (initfail) ret = initfail; goto bail; From 7a52b34b07122ff5f45258d47f260f8a525518f0 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 6 Jun 2010 04:59:16 +0000 Subject: [PATCH 12/12] IPoIB: Fix world-writable child interface control sysfs attributes Sumeet Lahorani reported that the IPoIB child entries are world-writable; however we don't want ordinary users to be able to create and destroy child interfaces, so fix them to be writable only by root. Signed-off-by: Or Gerlitz Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index df3eb8c9fd96..b4b22576f12a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1163,7 +1163,7 @@ static ssize_t create_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(create_child, S_IWUGO, NULL, create_child); +static DEVICE_ATTR(create_child, S_IWUSR, NULL, create_child); static ssize_t delete_child(struct device *dev, struct device_attribute *attr, @@ -1183,7 +1183,7 @@ static ssize_t delete_child(struct device *dev, return ret ? ret : count; } -static DEVICE_ATTR(delete_child, S_IWUGO, NULL, delete_child); +static DEVICE_ATTR(delete_child, S_IWUSR, NULL, delete_child); int ipoib_add_pkey_attr(struct net_device *dev) {