cnic: Improve error recovery on bnx2x devices

When a bnx2x device encounters parity errors, it will not respond to all
SPQ messages.  As a result, the shutdown sequence before reset can take
a long time as the ulp drivers (bnx2i/bnx2fc) have to wait for timeout
of all such messages.

To improve this scenario, when bnx2x returns error on the SPQ, we'll send
an immediate response to the ulp drivers to avoid such lengthy timeouts.

Adjust the return code of relevant functions to return error only if
the message cannot be sent on the SPQ so that we'll generate an error
completion to the ulp drivers.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Michael Chan 2012-01-04 12:12:28 +00:00 committed by David S. Miller
parent a9e0a4f2ca
commit 23021c2105
3 changed files with 74 additions and 8 deletions

View file

@ -1361,7 +1361,7 @@ static int cnic_submit_kwqe_16(struct cnic_dev *dev, u32 cmd, u32 cid,
if (ret == 1)
return 0;
return -EBUSY;
return ret;
}
static void cnic_reply_bnx2x_kcqes(struct cnic_dev *dev, int ulp_type,
@ -1849,7 +1849,7 @@ static int cnic_bnx2x_iscsi_ofld1(struct cnic_dev *dev, struct kwqe *wqes[],
done:
cqes[0] = (struct kcqe *) &kcqe;
cnic_reply_bnx2x_kcqes(dev, CNIC_ULP_ISCSI, cqes, 1);
return ret;
return 0;
}
@ -1947,7 +1947,7 @@ static int cnic_bnx2x_iscsi_destroy(struct cnic_dev *dev, struct kwqe *kwqe)
cqes[0] = (struct kcqe *) &kcqe;
cnic_reply_bnx2x_kcqes(dev, CNIC_ULP_ISCSI, cqes, 1);
return ret;
return 0;
}
static void cnic_init_storm_conn_bufs(struct cnic_dev *dev,
@ -2513,6 +2513,57 @@ static int cnic_bnx2x_fcoe_fw_destroy(struct cnic_dev *dev, struct kwqe *kwqe)
return ret;
}
static void cnic_bnx2x_kwqe_err(struct cnic_dev *dev, struct kwqe *kwqe)
{
struct cnic_local *cp = dev->cnic_priv;
struct kcqe kcqe;
struct kcqe *cqes[1];
u32 cid;
u32 opcode = KWQE_OPCODE(kwqe->kwqe_op_flag);
u32 layer_code = kwqe->kwqe_op_flag & KWQE_LAYER_MASK;
int ulp_type;
cid = kwqe->kwqe_info0;
memset(&kcqe, 0, sizeof(kcqe));
if (layer_code == KWQE_FLAGS_LAYER_MASK_L5_ISCSI) {
ulp_type = CNIC_ULP_ISCSI;
if (opcode == ISCSI_KWQE_OPCODE_UPDATE_CONN)
cid = kwqe->kwqe_info1;
kcqe.kcqe_op_flag = (opcode + 0x10) << KCQE_FLAGS_OPCODE_SHIFT;
kcqe.kcqe_op_flag |= KCQE_FLAGS_LAYER_MASK_L5_ISCSI;
kcqe.kcqe_info1 = ISCSI_KCQE_COMPLETION_STATUS_NIC_ERROR;
kcqe.kcqe_info2 = cid;
cnic_get_l5_cid(cp, BNX2X_SW_CID(cid), &kcqe.kcqe_info0);
} else if (layer_code == KWQE_FLAGS_LAYER_MASK_L4) {
struct l4_kcq *l4kcqe = (struct l4_kcq *) &kcqe;
u32 kcqe_op;
ulp_type = CNIC_ULP_L4;
if (opcode == L4_KWQE_OPCODE_VALUE_CONNECT1)
kcqe_op = L4_KCQE_OPCODE_VALUE_CONNECT_COMPLETE;
else if (opcode == L4_KWQE_OPCODE_VALUE_RESET)
kcqe_op = L4_KCQE_OPCODE_VALUE_RESET_COMP;
else if (opcode == L4_KWQE_OPCODE_VALUE_CLOSE)
kcqe_op = L4_KCQE_OPCODE_VALUE_CLOSE_COMP;
else
return;
kcqe.kcqe_op_flag = (kcqe_op << KCQE_FLAGS_OPCODE_SHIFT) |
KCQE_FLAGS_LAYER_MASK_L4;
l4kcqe->status = L4_KCQE_COMPLETION_STATUS_NIC_ERROR;
l4kcqe->cid = cid;
cnic_get_l5_cid(cp, BNX2X_SW_CID(cid), &l4kcqe->conn_id);
} else {
return;
}
cqes[0] = (struct kcqe *) &kcqe;
cnic_reply_bnx2x_kcqes(dev, ulp_type, cqes, 1);
}
static int cnic_submit_bnx2x_iscsi_kwqes(struct cnic_dev *dev,
struct kwqe *wqes[], u32 num_wqes)
{
@ -2570,9 +2621,17 @@ static int cnic_submit_bnx2x_iscsi_kwqes(struct cnic_dev *dev,
opcode);
break;
}
if (ret < 0)
if (ret < 0) {
netdev_err(dev->netdev, "KWQE(0x%x) failed\n",
opcode);
/* Possibly bnx2x parity error, send completion
* to ulp drivers with error code to speed up
* cleanup and reset recovery.
*/
if (ret == -EIO || ret == -EAGAIN)
cnic_bnx2x_kwqe_err(dev, kwqe);
}
i += work;
}
return 0;
@ -3849,6 +3908,9 @@ static void cnic_cm_process_kcqe(struct cnic_dev *dev, struct kcqe *kcqe)
case L4_KCQE_OPCODE_VALUE_RESET_COMP:
case L5CM_RAMROD_CMD_ID_SEARCHER_DELETE:
case L5CM_RAMROD_CMD_ID_TERMINATE_OFFLOAD:
if (l4kcqe->status == L4_KCQE_COMPLETION_STATUS_NIC_ERROR)
set_bit(SK_F_HW_ERR, &csk->flags);
cp->close_conn(csk, opcode);
break;
@ -3976,7 +4038,9 @@ static void cnic_close_bnx2x_conn(struct cnic_sock *csk, u32 opcode)
case L4_KCQE_OPCODE_VALUE_CLOSE_COMP:
case L4_KCQE_OPCODE_VALUE_RESET_COMP:
if (cnic_ready_to_close(csk, opcode)) {
if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags))
if (test_bit(SK_F_HW_ERR, &csk->flags))
close_complete = 1;
else if (test_bit(SK_F_PG_OFFLD_COMPLETE, &csk->flags))
cmd = L5CM_RAMROD_CMD_ID_SEARCHER_DELETE;
else
close_complete = 1;

View file

@ -85,6 +85,7 @@
/* KCQ (kernel completion queue) completion status */
#define L4_KCQE_COMPLETION_STATUS_SUCCESS (0)
#define L4_KCQE_COMPLETION_STATUS_NIC_ERROR (4)
#define L4_KCQE_COMPLETION_STATUS_TIMEOUT (0x93)
#define L4_KCQE_COMPLETION_STATUS_CTX_ALLOC_FAIL (0x83)

View file

@ -1,6 +1,6 @@
/* cnic_if.h: Broadcom CNIC core network driver.
*
* Copyright (c) 2006-2011 Broadcom Corporation
* Copyright (c) 2006-2012 Broadcom Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -12,8 +12,8 @@
#ifndef CNIC_IF_H
#define CNIC_IF_H
#define CNIC_MODULE_VERSION "2.5.7"
#define CNIC_MODULE_RELDATE "July 20, 2011"
#define CNIC_MODULE_VERSION "2.5.8"
#define CNIC_MODULE_RELDATE "Jan 3, 2012"
#define CNIC_ULP_RDMA 0
#define CNIC_ULP_ISCSI 1
@ -261,6 +261,7 @@ struct cnic_sock {
#define SK_F_CONNECT_START 4
#define SK_F_IPV6 5
#define SK_F_CLOSING 7
#define SK_F_HW_ERR 8
atomic_t ref_count;
u32 state;