IB/iser: Simplify connection management
iSER relies on refcounting to manage iser connections establishment
and teardown.
Following commit 39ff05dbbb
("IB/iser: Enhance disconnection logic
for multi-pathing"), iser connection maintain 3 references:
- iscsi_endpoint (at creation stage)
- cma_id (at connection request stage)
- iscsi_conn (at bind stage)
We can avoid taking explicit refcounts by correctly serializing iser
teardown flows (graceful and non-graceful).
Our approach is to trigger a scheduled work to handle ordered teardown
by gracefully waiting for 2 cleanup stages to complete:
1. Cleanup of live pending tasks indicated by iscsi_conn_stop completion
2. Flush errors processing
Each completed stage will notify a waiting worker thread when it is
done to allow teardwon continuation.
Since iSCSI connection establishment may trigger endpoint disconnect
without a successful endpoint connect, we rely on the iscsi <-> iser
binding (.conn_bind) to learn about the teardown policy we should take
wrt cleanup stages.
Since all cleanup worker threads are scheduled (release_wq) in
.ep_disconnect it is safe to assume that when module_exit is called,
all cleanup workers are already scheduled. Thus proper module unload
shall flush all scheduled works before allowing safe exit, to
guarantee no resources got left behind.
Signed-off-by: Ariel Nahum <arieln@mellanox.com>
Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
This commit is contained in:
parent
d6d211db37
commit
b73c3adabd
3 changed files with 99 additions and 91 deletions
|
@ -99,6 +99,7 @@ MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
|
|||
module_param_named(pi_guard, iser_pi_guard, int, 0644);
|
||||
MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)");
|
||||
|
||||
static struct workqueue_struct *release_wq;
|
||||
struct iser_global ig;
|
||||
|
||||
void
|
||||
|
@ -337,24 +338,6 @@ iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx)
|
|||
return cls_conn;
|
||||
}
|
||||
|
||||
static void
|
||||
iscsi_iser_conn_destroy(struct iscsi_cls_conn *cls_conn)
|
||||
{
|
||||
struct iscsi_conn *conn = cls_conn->dd_data;
|
||||
struct iser_conn *ib_conn = conn->dd_data;
|
||||
|
||||
iscsi_conn_teardown(cls_conn);
|
||||
/*
|
||||
* Userspace will normally call the stop callback and
|
||||
* already have freed the ib_conn, but if it goofed up then
|
||||
* we free it here.
|
||||
*/
|
||||
if (ib_conn) {
|
||||
ib_conn->iscsi_conn = NULL;
|
||||
iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
|
||||
struct iscsi_cls_conn *cls_conn, uint64_t transport_eph,
|
||||
|
@ -392,29 +375,39 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
|
|||
conn->dd_data = ib_conn;
|
||||
ib_conn->iscsi_conn = conn;
|
||||
|
||||
iser_conn_get(ib_conn); /* ref iscsi/ib conn binding */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
|
||||
{
|
||||
struct iscsi_conn *iscsi_conn;
|
||||
struct iser_conn *ib_conn;
|
||||
|
||||
iscsi_conn = cls_conn->dd_data;
|
||||
ib_conn = iscsi_conn->dd_data;
|
||||
reinit_completion(&ib_conn->stop_completion);
|
||||
|
||||
return iscsi_conn_start(cls_conn);
|
||||
}
|
||||
|
||||
static void
|
||||
iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
|
||||
{
|
||||
struct iscsi_conn *conn = cls_conn->dd_data;
|
||||
struct iser_conn *ib_conn = conn->dd_data;
|
||||
|
||||
iser_dbg("stopping iscsi_conn: %p, ib_conn: %p\n", conn, ib_conn);
|
||||
iscsi_conn_stop(cls_conn, flag);
|
||||
|
||||
/*
|
||||
* Userspace may have goofed up and not bound the connection or
|
||||
* might have only partially setup the connection.
|
||||
*/
|
||||
if (ib_conn) {
|
||||
iscsi_conn_stop(cls_conn, flag);
|
||||
/*
|
||||
* There is no unbind event so the stop callback
|
||||
* must release the ref from the bind.
|
||||
*/
|
||||
iser_conn_put(ib_conn, 1); /* deref iscsi/ib conn unbinding */
|
||||
conn->dd_data = NULL;
|
||||
complete(&ib_conn->stop_completion);
|
||||
}
|
||||
conn->dd_data = NULL;
|
||||
}
|
||||
|
||||
static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
|
||||
|
@ -652,19 +645,20 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
|
|||
struct iser_conn *ib_conn;
|
||||
|
||||
ib_conn = ep->dd_data;
|
||||
if (ib_conn->iscsi_conn)
|
||||
/*
|
||||
* Must suspend xmit path if the ep is bound to the
|
||||
* iscsi_conn, so we know we are not accessing the ib_conn
|
||||
* when we free it.
|
||||
*
|
||||
* This may not be bound if the ep poll failed.
|
||||
*/
|
||||
iscsi_suspend_tx(ib_conn->iscsi_conn);
|
||||
|
||||
|
||||
iser_info("ib conn %p state %d\n", ib_conn, ib_conn->state);
|
||||
iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
|
||||
iser_conn_terminate(ib_conn);
|
||||
|
||||
/*
|
||||
* if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop
|
||||
* call and ISER_CONN_DOWN state before freeing the iser resources.
|
||||
* otherwise we are safe to free resources immediately.
|
||||
*/
|
||||
if (ib_conn->iscsi_conn) {
|
||||
INIT_WORK(&ib_conn->release_work, iser_release_work);
|
||||
queue_work(release_wq, &ib_conn->release_work);
|
||||
} else {
|
||||
iser_conn_release(ib_conn);
|
||||
}
|
||||
}
|
||||
|
||||
static umode_t iser_attr_is_visible(int param_type, int param)
|
||||
|
@ -748,13 +742,13 @@ static struct iscsi_transport iscsi_iser_transport = {
|
|||
/* connection management */
|
||||
.create_conn = iscsi_iser_conn_create,
|
||||
.bind_conn = iscsi_iser_conn_bind,
|
||||
.destroy_conn = iscsi_iser_conn_destroy,
|
||||
.destroy_conn = iscsi_conn_teardown,
|
||||
.attr_is_visible = iser_attr_is_visible,
|
||||
.set_param = iscsi_iser_set_param,
|
||||
.get_conn_param = iscsi_conn_get_param,
|
||||
.get_ep_param = iscsi_iser_get_ep_param,
|
||||
.get_session_param = iscsi_session_get_param,
|
||||
.start_conn = iscsi_conn_start,
|
||||
.start_conn = iscsi_iser_conn_start,
|
||||
.stop_conn = iscsi_iser_conn_stop,
|
||||
/* iscsi host params */
|
||||
.get_host_param = iscsi_host_get_param,
|
||||
|
@ -801,6 +795,12 @@ static int __init iser_init(void)
|
|||
mutex_init(&ig.connlist_mutex);
|
||||
INIT_LIST_HEAD(&ig.connlist);
|
||||
|
||||
release_wq = alloc_workqueue("release workqueue", 0, 0);
|
||||
if (!release_wq) {
|
||||
iser_err("failed to allocate release workqueue\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
iscsi_iser_scsi_transport = iscsi_register_transport(
|
||||
&iscsi_iser_transport);
|
||||
if (!iscsi_iser_scsi_transport) {
|
||||
|
@ -819,7 +819,24 @@ static int __init iser_init(void)
|
|||
|
||||
static void __exit iser_exit(void)
|
||||
{
|
||||
struct iser_conn *ib_conn, *n;
|
||||
int connlist_empty;
|
||||
|
||||
iser_dbg("Removing iSER datamover...\n");
|
||||
destroy_workqueue(release_wq);
|
||||
|
||||
mutex_lock(&ig.connlist_mutex);
|
||||
connlist_empty = list_empty(&ig.connlist);
|
||||
mutex_unlock(&ig.connlist_mutex);
|
||||
|
||||
if (!connlist_empty) {
|
||||
iser_err("Error cleanup stage completed but we still have iser "
|
||||
"connections, destroying them anyway.\n");
|
||||
list_for_each_entry_safe(ib_conn, n, &ig.connlist, conn_list) {
|
||||
iser_conn_release(ib_conn);
|
||||
}
|
||||
}
|
||||
|
||||
iscsi_unregister_transport(&iscsi_iser_transport);
|
||||
kmem_cache_destroy(ig.desc_cache);
|
||||
}
|
||||
|
|
|
@ -333,6 +333,8 @@ struct iser_conn {
|
|||
int post_recv_buf_count; /* posted rx count */
|
||||
atomic_t post_send_buf_count; /* posted tx count */
|
||||
char name[ISER_OBJECT_NAME_SIZE];
|
||||
struct work_struct release_work;
|
||||
struct completion stop_completion;
|
||||
struct list_head conn_list; /* entry in ig conn list */
|
||||
|
||||
char *login_buf;
|
||||
|
@ -417,12 +419,12 @@ void iscsi_iser_recv(struct iscsi_conn *conn,
|
|||
|
||||
void iser_conn_init(struct iser_conn *ib_conn);
|
||||
|
||||
void iser_conn_get(struct iser_conn *ib_conn);
|
||||
|
||||
int iser_conn_put(struct iser_conn *ib_conn, int destroy_cma_id_allowed);
|
||||
void iser_conn_release(struct iser_conn *ib_conn);
|
||||
|
||||
void iser_conn_terminate(struct iser_conn *ib_conn);
|
||||
|
||||
void iser_release_work(struct work_struct *work);
|
||||
|
||||
void iser_rcv_completion(struct iser_rx_desc *desc,
|
||||
unsigned long dto_xfer_len,
|
||||
struct iser_conn *ib_conn);
|
||||
|
|
|
@ -581,14 +581,30 @@ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
|
|||
return ret;
|
||||
}
|
||||
|
||||
void iser_release_work(struct work_struct *work)
|
||||
{
|
||||
struct iser_conn *ib_conn;
|
||||
|
||||
ib_conn = container_of(work, struct iser_conn, release_work);
|
||||
|
||||
/* wait for .conn_stop callback */
|
||||
wait_for_completion(&ib_conn->stop_completion);
|
||||
|
||||
/* wait for the qp`s post send and post receive buffers to empty */
|
||||
wait_event_interruptible(ib_conn->wait,
|
||||
ib_conn->state == ISER_CONN_DOWN);
|
||||
|
||||
iser_conn_release(ib_conn);
|
||||
}
|
||||
|
||||
/**
|
||||
* Frees all conn objects and deallocs conn descriptor
|
||||
*/
|
||||
static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
|
||||
void iser_conn_release(struct iser_conn *ib_conn)
|
||||
{
|
||||
struct iser_device *device = ib_conn->device;
|
||||
|
||||
BUG_ON(ib_conn->state != ISER_CONN_DOWN);
|
||||
BUG_ON(ib_conn->state == ISER_CONN_UP);
|
||||
|
||||
mutex_lock(&ig.connlist_mutex);
|
||||
list_del(&ib_conn->conn_list);
|
||||
|
@ -600,27 +616,13 @@ static void iser_conn_release(struct iser_conn *ib_conn, int can_destroy_id)
|
|||
if (device != NULL)
|
||||
iser_device_try_release(device);
|
||||
/* if cma handler context, the caller actually destroy the id */
|
||||
if (ib_conn->cma_id != NULL && can_destroy_id) {
|
||||
if (ib_conn->cma_id != NULL) {
|
||||
rdma_destroy_id(ib_conn->cma_id);
|
||||
ib_conn->cma_id = NULL;
|
||||
}
|
||||
iscsi_destroy_endpoint(ib_conn->ep);
|
||||
}
|
||||
|
||||
void iser_conn_get(struct iser_conn *ib_conn)
|
||||
{
|
||||
atomic_inc(&ib_conn->refcount);
|
||||
}
|
||||
|
||||
int iser_conn_put(struct iser_conn *ib_conn, int can_destroy_id)
|
||||
{
|
||||
if (atomic_dec_and_test(&ib_conn->refcount)) {
|
||||
iser_conn_release(ib_conn, can_destroy_id);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* triggers start of the disconnect procedures and wait for them to be done
|
||||
*/
|
||||
|
@ -638,24 +640,19 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
|
|||
if (err)
|
||||
iser_err("Failed to disconnect, conn: 0x%p err %d\n",
|
||||
ib_conn,err);
|
||||
|
||||
wait_event_interruptible(ib_conn->wait,
|
||||
ib_conn->state == ISER_CONN_DOWN);
|
||||
|
||||
iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
|
||||
}
|
||||
|
||||
static int iser_connect_error(struct rdma_cm_id *cma_id)
|
||||
static void iser_connect_error(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct iser_conn *ib_conn;
|
||||
|
||||
ib_conn = (struct iser_conn *)cma_id->context;
|
||||
|
||||
ib_conn->state = ISER_CONN_DOWN;
|
||||
wake_up_interruptible(&ib_conn->wait);
|
||||
return iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
|
||||
}
|
||||
|
||||
static int iser_addr_handler(struct rdma_cm_id *cma_id)
|
||||
static void iser_addr_handler(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct iser_device *device;
|
||||
struct iser_conn *ib_conn;
|
||||
|
@ -664,7 +661,8 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
|
|||
device = iser_device_find_by_ib_device(cma_id);
|
||||
if (!device) {
|
||||
iser_err("device lookup/creation failed\n");
|
||||
return iser_connect_error(cma_id);
|
||||
iser_connect_error(cma_id);
|
||||
return;
|
||||
}
|
||||
|
||||
ib_conn = (struct iser_conn *)cma_id->context;
|
||||
|
@ -686,13 +684,12 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id)
|
|||
ret = rdma_resolve_route(cma_id, 1000);
|
||||
if (ret) {
|
||||
iser_err("resolve route failed: %d\n", ret);
|
||||
return iser_connect_error(cma_id);
|
||||
iser_connect_error(cma_id);
|
||||
return;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iser_route_handler(struct rdma_cm_id *cma_id)
|
||||
static void iser_route_handler(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct rdma_conn_param conn_param;
|
||||
int ret;
|
||||
|
@ -720,9 +717,9 @@ static int iser_route_handler(struct rdma_cm_id *cma_id)
|
|||
goto failure;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return;
|
||||
failure:
|
||||
return iser_connect_error(cma_id);
|
||||
iser_connect_error(cma_id);
|
||||
}
|
||||
|
||||
static void iser_connected_handler(struct rdma_cm_id *cma_id)
|
||||
|
@ -739,10 +736,9 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
|
|||
wake_up_interruptible(&ib_conn->wait);
|
||||
}
|
||||
|
||||
static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
|
||||
static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
|
||||
{
|
||||
struct iser_conn *ib_conn;
|
||||
int ret;
|
||||
|
||||
ib_conn = (struct iser_conn *)cma_id->context;
|
||||
|
||||
|
@ -762,24 +758,19 @@ static int iser_disconnected_handler(struct rdma_cm_id *cma_id)
|
|||
ib_conn->state = ISER_CONN_DOWN;
|
||||
wake_up_interruptible(&ib_conn->wait);
|
||||
}
|
||||
|
||||
ret = iser_conn_put(ib_conn, 0); /* deref ib conn's cma id */
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
iser_info("event %d status %d conn %p id %p\n",
|
||||
event->event, event->status, cma_id->context, cma_id);
|
||||
|
||||
switch (event->event) {
|
||||
case RDMA_CM_EVENT_ADDR_RESOLVED:
|
||||
ret = iser_addr_handler(cma_id);
|
||||
iser_addr_handler(cma_id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ROUTE_RESOLVED:
|
||||
ret = iser_route_handler(cma_id);
|
||||
iser_route_handler(cma_id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_ESTABLISHED:
|
||||
iser_connected_handler(cma_id);
|
||||
|
@ -789,18 +780,18 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
|
|||
case RDMA_CM_EVENT_CONNECT_ERROR:
|
||||
case RDMA_CM_EVENT_UNREACHABLE:
|
||||
case RDMA_CM_EVENT_REJECTED:
|
||||
ret = iser_connect_error(cma_id);
|
||||
iser_connect_error(cma_id);
|
||||
break;
|
||||
case RDMA_CM_EVENT_DISCONNECTED:
|
||||
case RDMA_CM_EVENT_DEVICE_REMOVAL:
|
||||
case RDMA_CM_EVENT_ADDR_CHANGE:
|
||||
ret = iser_disconnected_handler(cma_id);
|
||||
iser_disconnected_handler(cma_id);
|
||||
break;
|
||||
default:
|
||||
iser_err("Unexpected RDMA CM event (%d)\n", event->event);
|
||||
break;
|
||||
}
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void iser_conn_init(struct iser_conn *ib_conn)
|
||||
|
@ -809,7 +800,7 @@ void iser_conn_init(struct iser_conn *ib_conn)
|
|||
init_waitqueue_head(&ib_conn->wait);
|
||||
ib_conn->post_recv_buf_count = 0;
|
||||
atomic_set(&ib_conn->post_send_buf_count, 0);
|
||||
atomic_set(&ib_conn->refcount, 1); /* ref ib conn allocation */
|
||||
init_completion(&ib_conn->stop_completion);
|
||||
INIT_LIST_HEAD(&ib_conn->conn_list);
|
||||
spin_lock_init(&ib_conn->lock);
|
||||
}
|
||||
|
@ -837,7 +828,6 @@ int iser_connect(struct iser_conn *ib_conn,
|
|||
|
||||
ib_conn->state = ISER_CONN_PENDING;
|
||||
|
||||
iser_conn_get(ib_conn); /* ref ib conn's cma id */
|
||||
ib_conn->cma_id = rdma_create_id(iser_cma_handler,
|
||||
(void *)ib_conn,
|
||||
RDMA_PS_TCP, IB_QPT_RC);
|
||||
|
@ -874,9 +864,8 @@ int iser_connect(struct iser_conn *ib_conn,
|
|||
ib_conn->cma_id = NULL;
|
||||
addr_failure:
|
||||
ib_conn->state = ISER_CONN_DOWN;
|
||||
iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
|
||||
connect_failure:
|
||||
iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */
|
||||
iser_conn_release(ib_conn);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue