From 09360d5408cb641abff0f32a172a3332f02e8e88 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 30 May 2007 13:14:31 +0300 Subject: [PATCH 1/9] mlx4_core: Fix CQ context layout The reserved6 field should be 64 bits, not just 16 bits. Without this, the structure does not match the hardware layout on 32-bit architectures: the db_rec_addr field ends up at offset 52 instead of offset 56. The bug slipped by because the alignment of __be64 members ends up putting it in the right place on x86-64. Signed-off-by: Eli Cohen Signed-off-by: Roland Dreier --- drivers/net/mlx4/cq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/mlx4/cq.c b/drivers/net/mlx4/cq.c index 437d78ad0912..39253d0c1590 100644 --- a/drivers/net/mlx4/cq.c +++ b/drivers/net/mlx4/cq.c @@ -61,7 +61,7 @@ struct mlx4_cq_context { __be32 solicit_producer_index; __be32 consumer_index; __be32 producer_index; - u8 reserved6[2]; + u32 reserved6[2]; __be64 db_rec_addr; }; From b581401ed0cc83a4483ed39c00a14a60bacecc3a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 7 Jun 2007 11:51:58 -0700 Subject: [PATCH 2/9] mlx4_core: Initialize ctx_list and ctx_lock earlier We may call mlx4_dispatch_event() before mlx4_register_device() is called for a device, because for example a catastrophic error happens immediately after we enable interrupts. Therefore priv->ctx_list and priv->ctx_lock need to be initialized earlier. This bug was actually exposed by the MSI-X bug that returned IRQ numbers to drivers in reverse order, so that the first FW command interrupt looked to mlx4 like a catastrophic error. Signed-off-by: Roland Dreier --- drivers/net/mlx4/intf.c | 3 --- drivers/net/mlx4/main.c | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/mlx4/intf.c b/drivers/net/mlx4/intf.c index 65854f9e9c76..9ae951bf6aa6 100644 --- a/drivers/net/mlx4/intf.c +++ b/drivers/net/mlx4/intf.c @@ -135,9 +135,6 @@ int mlx4_register_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; - INIT_LIST_HEAD(&priv->ctx_list); - spin_lock_init(&priv->ctx_lock); - mutex_lock(&intf_mutex); list_add_tail(&priv->dev_list, &dev_list); diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 20b8c0d3ced4..d4172937025b 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -787,6 +787,8 @@ static int __devinit mlx4_init_one(struct pci_dev *pdev, dev = &priv->dev; dev->pdev = pdev; + INIT_LIST_HEAD(&priv->ctx_list); + spin_lock_init(&priv->ctx_lock); /* * Now reset the HCA before we touch the PCI capabilities or From 2c5cb2355843f9958e19a4b243456be92f97c73b Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sat, 2 Jun 2007 07:16:02 -0700 Subject: [PATCH 3/9] mlx4_core: Free catastrophic error MSI-X interrupt with correct dev_id We need to pass the same dev_id to free_irq() and request_irq(). When using MSI-X, the MLX4_EQ_CATAS interrupt uses a different dev_id from the other interrupts. Signed-off-by: Roland Dreier --- drivers/net/mlx4/eq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/mlx4/eq.c b/drivers/net/mlx4/eq.c index 0f11adb8eb4a..27a82cecd693 100644 --- a/drivers/net/mlx4/eq.c +++ b/drivers/net/mlx4/eq.c @@ -490,9 +490,11 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) if (eq_table->have_irq) free_irq(dev->pdev->irq, dev); - for (i = 0; i < MLX4_NUM_EQ; ++i) + for (i = 0; i < MLX4_EQ_CATAS; ++i) if (eq_table->eq[i].have_irq) free_irq(eq_table->eq[i].irq, eq_table->eq + i); + if (eq_table->eq[MLX4_EQ_CATAS].have_irq) + free_irq(eq_table->eq[MLX4_EQ_CATAS].irq, dev); } static int __devinit mlx4_map_clr_int(struct mlx4_dev *dev) From 3e1db334dca97df9034ebeec4657329a2b37a811 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Sun, 3 Jun 2007 19:47:10 -0700 Subject: [PATCH 4/9] IB/mthca, mlx4_core: Fix typo in comment s/signifant/significant/ Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/net/mlx4/fw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index 38102520ffb3..f40558d76475 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -772,7 +772,7 @@ int mthca_QUERY_FW(struct mthca_dev *dev, u8 *status) MTHCA_GET(dev->fw_ver, outbox, QUERY_FW_VER_OFFSET); /* - * FW subminor version is at more signifant bits than minor + * FW subminor version is at more significant bits than minor * version, so swap here. */ dev->fw_ver = (dev->fw_ver & 0xffff00000000ull) | diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index cfa5cc072339..ab259b2e11a5 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -477,7 +477,7 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) MLX4_GET(fw_ver, outbox, QUERY_FW_VER_OFFSET); /* - * FW subminor version is at more signifant bits than minor + * FW subminor version is at more significant bits than minor * version, so swap here. */ dev->caps.fw_ver = (fw_ver & 0xffff00000000ull) | From fe40900f408642e772739088d30636e2f3f7d0d8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 7 Jun 2007 23:24:36 -0700 Subject: [PATCH 5/9] mlx4_core: Check firmware command interface revision HCA firmware with incompatible changes to the FW commmand interface is coming soon. Add a check of the interface revision during initialization and bail out if the firmware advertises a revision that the driver doesn't know about. This will avoid strange failures later if the driver goes on using the wrong interface revision. Signed-off-by: Roland Dreier --- drivers/net/mlx4/fw.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index ab259b2e11a5..e7ca118c8dfd 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -37,6 +37,10 @@ #include "fw.h" #include "icm.h" +enum { + MLX4_COMMAND_INTERFACE_REV = 1 +}; + extern void __buggy_use_of_MLX4_GET(void); extern void __buggy_use_of_MLX4_PUT(void); @@ -452,10 +456,12 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) u32 *outbox; int err = 0; u64 fw_ver; + u16 cmd_if_rev; u8 lg; #define QUERY_FW_OUT_SIZE 0x100 #define QUERY_FW_VER_OFFSET 0x00 +#define QUERY_FW_CMD_IF_REV_OFFSET 0x0a #define QUERY_FW_MAX_CMD_OFFSET 0x0f #define QUERY_FW_ERR_START_OFFSET 0x30 #define QUERY_FW_ERR_SIZE_OFFSET 0x38 @@ -484,14 +490,29 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev) ((fw_ver & 0xffff0000ull) >> 16) | ((fw_ver & 0x0000ffffull) << 16); + MLX4_GET(cmd_if_rev, outbox, QUERY_FW_CMD_IF_REV_OFFSET); + if (cmd_if_rev != MLX4_COMMAND_INTERFACE_REV) { + mlx4_err(dev, "Installed FW has unsupported " + "command interface revision %d.\n", + cmd_if_rev); + mlx4_err(dev, "(Installed FW version is %d.%d.%03d)\n", + (int) (dev->caps.fw_ver >> 32), + (int) (dev->caps.fw_ver >> 16) & 0xffff, + (int) dev->caps.fw_ver & 0xffff); + mlx4_err(dev, "This driver version supports only revision %d.\n", + MLX4_COMMAND_INTERFACE_REV); + err = -ENODEV; + goto out; + } + MLX4_GET(lg, outbox, QUERY_FW_MAX_CMD_OFFSET); cmd->max_cmds = 1 << lg; - mlx4_dbg(dev, "FW version %d.%d.%03d, max commands %d\n", + mlx4_dbg(dev, "FW version %d.%d.%03d (cmd intf rev %d), max commands %d\n", (int) (dev->caps.fw_ver >> 32), (int) (dev->caps.fw_ver >> 16) & 0xffff, (int) dev->caps.fw_ver & 0xffff, - cmd->max_cmds); + cmd_if_rev, cmd->max_cmds); MLX4_GET(fw->catas_offset, outbox, QUERY_FW_ERR_START_OFFSET); MLX4_GET(fw->catas_size, outbox, QUERY_FW_ERR_SIZE_OFFSET); From b2d9308ae43a9cfa56cc9682dc6ed63347a8d2bf Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 7 Jun 2007 23:24:38 -0700 Subject: [PATCH 6/9] mlx4_core: Don't set MTT address in dMPT entries with PA set If a dMPT entry has the PA flag (direct physical address) set, then the (unused) MTT base address field has to be set to 0. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/mlx4/mr.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c index b33864dab179..d0808fa3ec82 100644 --- a/drivers/net/mlx4/mr.c +++ b/drivers/net/mlx4/mr.c @@ -324,15 +324,17 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) MLX4_MPT_FLAG_MIO | MLX4_MPT_FLAG_REGION | mr->access); - if (mr->mtt.order < 0) - mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); mpt_entry->key = cpu_to_be32(key_to_hw_index(mr->key)); mpt_entry->pd = cpu_to_be32(mr->pd); mpt_entry->start = cpu_to_be64(mr->iova); mpt_entry->length = cpu_to_be64(mr->size); mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); - mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt)); + if (mr->mtt.order < 0) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); + mpt_entry->mtt_seg = 0; + } else + mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt)); err = mlx4_SW2HW_MPT(dev, mailbox, key_to_hw_index(mr->key) & (dev->caps.num_mpts - 1)); From 57f01b53398baebd809e7efd49fc10c10174b46d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 6 Jun 2007 19:35:04 +0300 Subject: [PATCH 7/9] IB/mlx4: Fix zeroing of rnr_retry value in ib_modify_qp() The code in __mlx4_ib_modify_qp() overwrites context->params1 after the RNR retry parameter is ORed in, which results in the RNR retry parameter always being set to 0. Fix this by moving where we OR in the value to later in the function, after the initial assignment of context->params1. Found by the Mellanox firmware group. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index dc137dec2308..cd2297586980 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -762,11 +762,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } - if (attr_mask & IB_QP_RNR_RETRY) { - context->params1 |= cpu_to_be32(attr->rnr_retry << 13); - optpar |= MLX4_QP_OPTPAR_RNR_RETRY; - } - if (attr_mask & IB_QP_AV) { if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) { @@ -802,6 +797,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn); context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28); + + if (attr_mask & IB_QP_RNR_RETRY) { + context->params1 |= cpu_to_be32(attr->rnr_retry << 13); + optpar |= MLX4_QP_OPTPAR_RNR_RETRY; + } + if (attr_mask & IB_QP_RETRY_CNT) { context->params1 |= cpu_to_be32(attr->retry_cnt << 16); optpar |= MLX4_QP_OPTPAR_RETRY_COUNT; From bf2944bd56c7a48cc3962a860dbc4ceee6b1ace8 Mon Sep 17 00:00:00 2001 From: Sean Hefty Date: Tue, 5 Jun 2007 09:57:31 -0700 Subject: [PATCH 8/9] RDMA/cma: Fix initialization of next_port next_port should be between sysctl_local_port_range[0] and [1]. However, it is initially set to a random value with get_random_bytes(). If the value is negative when treated as a signed integer, next_port can end up outside the expected range because of the result of the % operator being negative. Signed-off-by: Sean Hefty Signed-off-by: Roland Dreier --- drivers/infiniband/core/cma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 2eb52b7a71da..32a0e66d2a23 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2773,8 +2773,8 @@ static int cma_init(void) int ret; get_random_bytes(&next_port, sizeof next_port); - next_port = (next_port % (sysctl_local_port_range[1] - - sysctl_local_port_range[0])) + + next_port = ((unsigned int) next_port % + (sysctl_local_port_range[1] - sysctl_local_port_range[0])) + sysctl_local_port_range[0]; cma_wq = create_singlethread_workqueue("rdma_cm"); if (!cma_wq) From a4cd7ed86ff511aebcc97675937039f2321d6987 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 7 Jun 2007 23:24:39 -0700 Subject: [PATCH 9/9] IB/mlx4: Make sure RQ allocation is always valid QPs attached to an SRQ must never have their own RQ, and QPs not attached to SRQs must have an RQ with at least 1 entry. Enforce all of this in set_rq_size(). Based on a patch by Eli Cohen . Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/qp.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index cd2297586980..5c6d05427a0f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -189,18 +189,28 @@ static int send_wqe_overhead(enum ib_qp_type type) } static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - struct mlx4_ib_qp *qp) + int is_user, int has_srq, struct mlx4_ib_qp *qp) { /* Sanity check RQ size before proceeding */ if (cap->max_recv_wr > dev->dev->caps.max_wqes || cap->max_recv_sge > dev->dev->caps.max_rq_sg) return -EINVAL; - qp->rq.max = cap->max_recv_wr ? roundup_pow_of_two(cap->max_recv_wr) : 0; + if (has_srq) { + /* QPs attached to an SRQ should have no RQ */ + if (cap->max_recv_wr) + return -EINVAL; - qp->rq.wqe_shift = ilog2(roundup_pow_of_two(cap->max_recv_sge * - sizeof (struct mlx4_wqe_data_seg))); - qp->rq.max_gs = (1 << qp->rq.wqe_shift) / sizeof (struct mlx4_wqe_data_seg); + qp->rq.max = qp->rq.max_gs = 0; + } else { + /* HW requires >= 1 RQ entry with >= 1 gather entry */ + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) + return -EINVAL; + + qp->rq.max = roundup_pow_of_two(max(1, cap->max_recv_wr)); + qp->rq.max_gs = roundup_pow_of_two(max(1, cap->max_recv_sge)); + qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); + } cap->max_recv_wr = qp->rq.max; cap->max_recv_sge = qp->rq.max_gs; @@ -285,7 +295,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->sq.head = 0; qp->sq.tail = 0; - err = set_rq_size(dev, &init_attr->cap, qp); + err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp); if (err) goto err;