Make sure that userspace does not retrieve stale asynchronous or

completion events after destroying a CQ, QP or SRQ.  We do this by
sweeping the event lists before returning from a destroy calls, and
then return the number of events already reported before the destroy
call.  This allows userspace wait until it has processed all events
for an object returned from the kernel before it frees its context for
the object.

The ABI of the destroy CQ, destroy QP and destroy SRQ commands has to
change to return the event count, so bump the ABI version from 1 to 2.
The userspace libibverbs library has already been updated to handle
both the old and new ABI versions.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
This commit is contained in:
Roland Dreier 2005-09-09 15:55:08 -07:00 committed by Roland Dreier
parent 2e9f7cb786
commit 63aaf64752
4 changed files with 211 additions and 89 deletions

View file

@ -76,20 +76,28 @@ struct ib_uverbs_file {
struct ib_uverbs_event_file comp_file[1];
};
struct ib_uverbs_async_event {
struct ib_uverbs_async_event_desc desc;
struct ib_uverbs_event {
union {
struct ib_uverbs_async_event_desc async;
struct ib_uverbs_comp_event_desc comp;
} desc;
struct list_head list;
struct list_head obj_list;
u32 *counter;
};
struct ib_uverbs_comp_event {
struct ib_uverbs_comp_event_desc desc;
struct list_head list;
struct ib_uevent_object {
struct ib_uobject uobject;
struct list_head event_list;
u32 events_reported;
};
struct ib_uobject_mr {
struct ib_uobject uobj;
struct page *page_list;
struct scatterlist *sg_list;
struct ib_ucq_object {
struct ib_uobject uobject;
struct list_head comp_list;
struct list_head async_list;
u32 comp_events_reported;
u32 async_events_reported;
};
extern struct semaphore ib_uverbs_idr_mutex;

View file

@ -590,7 +590,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
struct ib_uverbs_create_cq cmd;
struct ib_uverbs_create_cq_resp resp;
struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_ucq_object *uobj;
struct ib_cq *cq;
int ret;
@ -611,8 +611,12 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
if (!uobj)
return -ENOMEM;
uobj->user_handle = cmd.user_handle;
uobj->context = file->ucontext;
uobj->uobject.user_handle = cmd.user_handle;
uobj->uobject.context = file->ucontext;
uobj->comp_events_reported = 0;
uobj->async_events_reported = 0;
INIT_LIST_HEAD(&uobj->comp_list);
INIT_LIST_HEAD(&uobj->async_list);
cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe,
file->ucontext, &udata);
@ -622,7 +626,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
}
cq->device = file->device->ib_dev;
cq->uobject = uobj;
cq->uobject = &uobj->uobject;
cq->comp_handler = ib_uverbs_comp_handler;
cq->event_handler = ib_uverbs_cq_event_handler;
cq->cq_context = file;
@ -635,7 +639,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
}
down(&ib_uverbs_idr_mutex);
ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id);
ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->uobject.id);
up(&ib_uverbs_idr_mutex);
if (ret == -EAGAIN)
@ -644,11 +648,11 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
goto err_cq;
spin_lock_irq(&file->ucontext->lock);
list_add_tail(&uobj->list, &file->ucontext->cq_list);
list_add_tail(&uobj->uobject.list, &file->ucontext->cq_list);
spin_unlock_irq(&file->ucontext->lock);
memset(&resp, 0, sizeof resp);
resp.cq_handle = uobj->id;
resp.cq_handle = uobj->uobject.id;
resp.cqe = cq->cqe;
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@ -661,11 +665,11 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
err_list:
spin_lock_irq(&file->ucontext->lock);
list_del(&uobj->list);
list_del(&uobj->uobject.list);
spin_unlock_irq(&file->ucontext->lock);
down(&ib_uverbs_idr_mutex);
idr_remove(&ib_uverbs_cq_idr, uobj->id);
idr_remove(&ib_uverbs_cq_idr, uobj->uobject.id);
up(&ib_uverbs_idr_mutex);
err_cq:
@ -681,20 +685,26 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_destroy_cq cmd;
struct ib_uverbs_destroy_cq_resp resp;
struct ib_cq *cq;
struct ib_uobject *uobj;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *evt, *tmp;
u64 user_handle;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
memset(&resp, 0, sizeof resp);
down(&ib_uverbs_idr_mutex);
cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle);
if (!cq || cq->uobject->context != file->ucontext)
goto out;
uobj = cq->uobject;
user_handle = cq->uobject->user_handle;
uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
ret = ib_destroy_cq(cq);
if (ret)
@ -703,11 +713,32 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle);
spin_lock_irq(&file->ucontext->lock);
list_del(&uobj->list);
list_del(&uobj->uobject.list);
spin_unlock_irq(&file->ucontext->lock);
spin_lock_irq(&file->comp_file[0].lock);
list_for_each_entry_safe(evt, tmp, &uobj->comp_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->comp_file[0].lock);
spin_lock_irq(&file->async_file.lock);
list_for_each_entry_safe(evt, tmp, &uobj->async_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->async_file.lock);
resp.comp_events_reported = uobj->comp_events_reported;
resp.async_events_reported = uobj->async_events_reported;
kfree(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
out:
up(&ib_uverbs_idr_mutex);
@ -721,7 +752,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
struct ib_uverbs_create_qp cmd;
struct ib_uverbs_create_qp_resp resp;
struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_uevent_object *uobj;
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
@ -772,8 +803,10 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
attr.cap.max_recv_sge = cmd.max_recv_sge;
attr.cap.max_inline_data = cmd.max_inline_data;
uobj->user_handle = cmd.user_handle;
uobj->context = file->ucontext;
uobj->uobject.user_handle = cmd.user_handle;
uobj->uobject.context = file->ucontext;
uobj->events_reported = 0;
INIT_LIST_HEAD(&uobj->event_list);
qp = pd->device->create_qp(pd, &attr, &udata);
if (IS_ERR(qp)) {
@ -786,7 +819,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
qp->send_cq = attr.send_cq;
qp->recv_cq = attr.recv_cq;
qp->srq = attr.srq;
qp->uobject = uobj;
qp->uobject = &uobj->uobject;
qp->event_handler = attr.event_handler;
qp->qp_context = attr.qp_context;
qp->qp_type = attr.qp_type;
@ -805,17 +838,17 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
goto err_destroy;
}
ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id);
ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->uobject.id);
if (ret == -EAGAIN)
goto retry;
if (ret)
goto err_destroy;
resp.qp_handle = uobj->id;
resp.qp_handle = uobj->uobject.id;
spin_lock_irq(&file->ucontext->lock);
list_add_tail(&uobj->list, &file->ucontext->qp_list);
list_add_tail(&uobj->uobject.list, &file->ucontext->qp_list);
spin_unlock_irq(&file->ucontext->lock);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@ -830,7 +863,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
err_list:
spin_lock_irq(&file->ucontext->lock);
list_del(&uobj->list);
list_del(&uobj->uobject.list);
spin_unlock_irq(&file->ucontext->lock);
err_destroy:
@ -931,20 +964,24 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_destroy_qp cmd;
struct ib_uverbs_destroy_qp_resp resp;
struct ib_qp *qp;
struct ib_uobject *uobj;
struct ib_uevent_object *uobj;
struct ib_uverbs_event *evt, *tmp;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
memset(&resp, 0, sizeof resp);
down(&ib_uverbs_idr_mutex);
qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle);
if (!qp || qp->uobject->context != file->ucontext)
goto out;
uobj = qp->uobject;
uobj = container_of(qp->uobject, struct ib_uevent_object, uobject);
ret = ib_destroy_qp(qp);
if (ret)
@ -953,11 +990,24 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle);
spin_lock_irq(&file->ucontext->lock);
list_del(&uobj->list);
list_del(&uobj->uobject.list);
spin_unlock_irq(&file->ucontext->lock);
spin_lock_irq(&file->async_file.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->async_file.lock);
resp.events_reported = uobj->events_reported;
kfree(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
out:
up(&ib_uverbs_idr_mutex);
@ -1015,7 +1065,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
struct ib_uverbs_create_srq cmd;
struct ib_uverbs_create_srq_resp resp;
struct ib_udata udata;
struct ib_uobject *uobj;
struct ib_uevent_object *uobj;
struct ib_pd *pd;
struct ib_srq *srq;
struct ib_srq_init_attr attr;
@ -1050,8 +1100,10 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
attr.attr.max_sge = cmd.max_sge;
attr.attr.srq_limit = cmd.srq_limit;
uobj->user_handle = cmd.user_handle;
uobj->context = file->ucontext;
uobj->uobject.user_handle = cmd.user_handle;
uobj->uobject.context = file->ucontext;
uobj->events_reported = 0;
INIT_LIST_HEAD(&uobj->event_list);
srq = pd->device->create_srq(pd, &attr, &udata);
if (IS_ERR(srq)) {
@ -1061,7 +1113,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
srq->device = pd->device;
srq->pd = pd;
srq->uobject = uobj;
srq->uobject = &uobj->uobject;
srq->event_handler = attr.event_handler;
srq->srq_context = attr.srq_context;
atomic_inc(&pd->usecnt);
@ -1075,17 +1127,17 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
goto err_destroy;
}
ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->id);
ret = idr_get_new(&ib_uverbs_srq_idr, srq, &uobj->uobject.id);
if (ret == -EAGAIN)
goto retry;
if (ret)
goto err_destroy;
resp.srq_handle = uobj->id;
resp.srq_handle = uobj->uobject.id;
spin_lock_irq(&file->ucontext->lock);
list_add_tail(&uobj->list, &file->ucontext->srq_list);
list_add_tail(&uobj->uobject.list, &file->ucontext->srq_list);
spin_unlock_irq(&file->ucontext->lock);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
@ -1100,7 +1152,7 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
err_list:
spin_lock_irq(&file->ucontext->lock);
list_del(&uobj->list);
list_del(&uobj->uobject.list);
spin_unlock_irq(&file->ucontext->lock);
err_destroy:
@ -1150,8 +1202,10 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_destroy_srq cmd;
struct ib_uverbs_destroy_srq_resp resp;
struct ib_srq *srq;
struct ib_uobject *uobj;
struct ib_uevent_object *uobj;
struct ib_uverbs_event *evt, *tmp;
int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
@ -1159,11 +1213,13 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
down(&ib_uverbs_idr_mutex);
memset(&resp, 0, sizeof resp);
srq = idr_find(&ib_uverbs_srq_idr, cmd.srq_handle);
if (!srq || srq->uobject->context != file->ucontext)
goto out;
uobj = srq->uobject;
uobj = container_of(srq->uobject, struct ib_uevent_object, uobject);
ret = ib_destroy_srq(srq);
if (ret)
@ -1172,11 +1228,24 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
idr_remove(&ib_uverbs_srq_idr, cmd.srq_handle);
spin_lock_irq(&file->ucontext->lock);
list_del(&uobj->list);
list_del(&uobj->uobject.list);
spin_unlock_irq(&file->ucontext->lock);
spin_lock_irq(&file->async_file.lock);
list_for_each_entry_safe(evt, tmp, &uobj->event_list, obj_list) {
list_del(&evt->list);
kfree(evt);
}
spin_unlock_irq(&file->async_file.lock);
resp.events_reported = uobj->events_reported;
kfree(uobj);
if (copy_to_user((void __user *) (unsigned long) cmd.response,
&resp, sizeof resp))
ret = -EFAULT;
out:
up(&ib_uverbs_idr_mutex);

View file

@ -120,7 +120,7 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
idr_remove(&ib_uverbs_qp_idr, uobj->id);
ib_destroy_qp(qp);
list_del(&uobj->list);
kfree(uobj);
kfree(container_of(uobj, struct ib_uevent_object, uobject));
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
@ -128,7 +128,7 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
idr_remove(&ib_uverbs_cq_idr, uobj->id);
ib_destroy_cq(cq);
list_del(&uobj->list);
kfree(uobj);
kfree(container_of(uobj, struct ib_ucq_object, uobject));
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
@ -136,7 +136,7 @@ static int ib_dealloc_ucontext(struct ib_ucontext *context)
idr_remove(&ib_uverbs_srq_idr, uobj->id);
ib_destroy_srq(srq);
list_del(&uobj->list);
kfree(uobj);
kfree(container_of(uobj, struct ib_uevent_object, uobject));
}
/* XXX Free MWs */
@ -182,7 +182,7 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
size_t count, loff_t *pos)
{
struct ib_uverbs_event_file *file = filp->private_data;
void *event;
struct ib_uverbs_event *event;
int eventsz;
int ret = 0;
@ -207,21 +207,23 @@ static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf,
return -ENODEV;
}
if (file->is_async) {
event = list_entry(file->event_list.next,
struct ib_uverbs_async_event, list);
event = list_entry(file->event_list.next, struct ib_uverbs_event, list);
if (file->is_async)
eventsz = sizeof (struct ib_uverbs_async_event_desc);
} else {
event = list_entry(file->event_list.next,
struct ib_uverbs_comp_event, list);
else
eventsz = sizeof (struct ib_uverbs_comp_event_desc);
}
if (eventsz > count) {
ret = -EINVAL;
event = NULL;
} else
} else {
list_del(file->event_list.next);
if (event->counter) {
++(*event->counter);
list_del(&event->obj_list);
}
}
spin_unlock_irq(&file->lock);
@ -257,16 +259,13 @@ static unsigned int ib_uverbs_event_poll(struct file *filp,
static void ib_uverbs_event_release(struct ib_uverbs_event_file *file)
{
struct list_head *entry, *tmp;
struct ib_uverbs_event *entry, *tmp;
spin_lock_irq(&file->lock);
if (file->fd != -1) {
file->fd = -1;
list_for_each_safe(entry, tmp, &file->event_list)
if (file->is_async)
kfree(list_entry(entry, struct ib_uverbs_async_event, list));
else
kfree(list_entry(entry, struct ib_uverbs_comp_event, list));
list_for_each_entry_safe(entry, tmp, &file->event_list, list)
kfree(entry);
}
spin_unlock_irq(&file->lock);
}
@ -305,17 +304,22 @@ static struct file_operations uverbs_event_fops = {
void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
{
struct ib_uverbs_file *file = cq_context;
struct ib_uverbs_comp_event *entry;
struct ib_ucq_object *uobj;
struct ib_uverbs_event *entry;
unsigned long flags;
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry)
return;
entry->desc.cq_handle = cq->uobject->user_handle;
uobj = container_of(cq->uobject, struct ib_ucq_object, uobject);
entry->desc.comp.cq_handle = cq->uobject->user_handle;
entry->counter = &uobj->comp_events_reported;
spin_lock_irqsave(&file->comp_file[0].lock, flags);
list_add_tail(&entry->list, &file->comp_file[0].event_list);
list_add_tail(&entry->obj_list, &uobj->comp_list);
spin_unlock_irqrestore(&file->comp_file[0].lock, flags);
wake_up_interruptible(&file->comp_file[0].poll_wait);
@ -323,20 +327,25 @@ void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context)
}
static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
__u64 element, __u64 event)
__u64 element, __u64 event,
struct list_head *obj_list,
u32 *counter)
{
struct ib_uverbs_async_event *entry;
struct ib_uverbs_event *entry;
unsigned long flags;
entry = kmalloc(sizeof *entry, GFP_ATOMIC);
if (!entry)
return;
entry->desc.element = element;
entry->desc.event_type = event;
entry->desc.async.element = element;
entry->desc.async.event_type = event;
entry->counter = counter;
spin_lock_irqsave(&file->async_file.lock, flags);
list_add_tail(&entry->list, &file->async_file.event_list);
if (obj_list)
list_add_tail(&entry->obj_list, obj_list);
spin_unlock_irqrestore(&file->async_file.lock, flags);
wake_up_interruptible(&file->async_file.poll_wait);
@ -345,23 +354,39 @@ static void ib_uverbs_async_handler(struct ib_uverbs_file *file,
void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
{
ib_uverbs_async_handler(context_ptr,
event->element.cq->uobject->user_handle,
event->event);
struct ib_ucq_object *uobj;
uobj = container_of(event->element.cq->uobject,
struct ib_ucq_object, uobject);
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->async_list,
&uobj->async_events_reported);
}
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{
ib_uverbs_async_handler(context_ptr,
event->element.qp->uobject->user_handle,
event->event);
struct ib_uevent_object *uobj;
uobj = container_of(event->element.qp->uobject,
struct ib_uevent_object, uobject);
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->event_list,
&uobj->events_reported);
}
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
ib_uverbs_async_handler(context_ptr,
event->element.srq->uobject->user_handle,
event->event);
struct ib_uevent_object *uobj;
uobj = container_of(event->element.srq->uobject,
struct ib_uevent_object, uobject);
ib_uverbs_async_handler(context_ptr, uobj->uobject.user_handle,
event->event, &uobj->event_list,
&uobj->events_reported);
}
static void ib_uverbs_event_handler(struct ib_event_handler *handler,
@ -370,7 +395,8 @@ static void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_uverbs_file *file =
container_of(handler, struct ib_uverbs_file, event_handler);
ib_uverbs_async_handler(file, event->element.port_num, event->event);
ib_uverbs_async_handler(file, event->element.port_num, event->event,
NULL, NULL);
}
static int ib_uverbs_event_init(struct ib_uverbs_event_file *file,

View file

@ -42,7 +42,7 @@
* Increment this value if any changes that break userspace ABI
* compatibility are made.
*/
#define IB_USER_VERBS_ABI_VERSION 1
#define IB_USER_VERBS_ABI_VERSION 2
enum {
IB_USER_VERBS_CMD_QUERY_PARAMS,
@ -292,7 +292,14 @@ struct ib_uverbs_create_cq_resp {
};
struct ib_uverbs_destroy_cq {
__u64 response;
__u32 cq_handle;
__u32 reserved;
};
struct ib_uverbs_destroy_cq_resp {
__u32 comp_events_reported;
__u32 async_events_reported;
};
struct ib_uverbs_create_qp {
@ -372,7 +379,13 @@ struct ib_uverbs_modify_qp_resp {
};
struct ib_uverbs_destroy_qp {
__u64 response;
__u32 qp_handle;
__u32 reserved;
};
struct ib_uverbs_destroy_qp_resp {
__u32 events_reported;
};
struct ib_uverbs_attach_mcast {
@ -416,7 +429,13 @@ struct ib_uverbs_modify_srq {
};
struct ib_uverbs_destroy_srq {
__u64 response;
__u32 srq_handle;
__u32 reserved;
};
struct ib_uverbs_destroy_srq_resp {
__u32 events_reported;
};
#endif /* IB_USER_VERBS_H */