From fba822722e3f9d438fca8fd9541d7ddd447d7a48 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 18 Apr 2006 09:44:06 +0200 Subject: [PATCH 1/3] [PATCH 1/2] iosched: fix typo and barrier() On rmmod path, cfq/as waits to make sure all io-contexts was freed. However, it's using complete(), not wait_for_completion(). I think barrier() is not enough in here. To avoid the following case, this patch replaces barrier() with smb_wmb(). cpu0 visibility cpu1 [ioc_gnone=NULL,ioc_count=1] ioc_gnone = &all_gone NULL,ioc_count=1 atomic_read(&ioc_count) NULL,ioc_count=1 wait_for_completion() NULL,ioc_count=0 atomic_sub_and_test() NULL,ioc_count=0 if ( && ioc_gone) [ioc_gone==NULL, so doesn't call complete()] &all_gone,ioc_count=0 Signed-off-by: OGAWA Hirofumi Signed-off-by: Jens Axboe --- block/as-iosched.c | 5 +++-- block/cfq-iosched.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/block/as-iosched.c b/block/as-iosched.c index 296708ceceb2..e25a5d79ab27 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1844,9 +1844,10 @@ static void __exit as_exit(void) DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_as); ioc_gone = &all_gone; - barrier(); + /* ioc_gone's update must be visible before reading ioc_count */ + smp_wmb(); if (atomic_read(&ioc_count)) - complete(ioc_gone); + wait_for_completion(ioc_gone); synchronize_rcu(); kmem_cache_destroy(arq_pool); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 67d446de0227..01820b1094e9 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -2439,9 +2439,10 @@ static void __exit cfq_exit(void) DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_cfq); ioc_gone = &all_gone; - barrier(); + /* ioc_gone's update must be visible before reading ioc_count */ + smp_wmb(); if (atomic_read(&ioc_count)) - complete(ioc_gone); + wait_for_completion(ioc_gone); synchronize_rcu(); cfq_slab_kill(); } From dbecf3ab40b5a6cc4499543778cd9f9682c0abad Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 18 Apr 2006 09:45:18 +0200 Subject: [PATCH 2/3] [PATCH 2/2] cfq: fix cic's rbtree traversal When queue dies, we set cic->key=NULL as dead mark. So, when we traverse a rbtree, we must check whether it's still valid key. if it was invalidated, drop it, then restart the traversal from top. Signed-off-by: OGAWA Hirofumi Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 01820b1094e9..246feae16c60 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1472,15 +1472,31 @@ cfq_get_queue(struct cfq_data *cfqd, unsigned int key, struct task_struct *tsk, return cfqq; } +static void +cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) +{ + read_lock(&cfq_exit_lock); + rb_erase(&cic->rb_node, &ioc->cic_root); + read_unlock(&cfq_exit_lock); + kmem_cache_free(cfq_ioc_pool, cic); + atomic_dec(&ioc_count); +} + static struct cfq_io_context * cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) { - struct rb_node *n = ioc->cic_root.rb_node; + struct rb_node *n; struct cfq_io_context *cic; void *key = cfqd; +restart: + n = ioc->cic_root.rb_node; while (n) { cic = rb_entry(n, struct cfq_io_context, rb_node); + if (unlikely(!cic->key)) { + cfq_drop_dead_cic(ioc, cic); + goto restart; + } if (key < cic->key) n = n->rb_left; @@ -1497,20 +1513,24 @@ static inline void cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, struct cfq_io_context *cic) { - struct rb_node **p = &ioc->cic_root.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p; + struct rb_node *parent; struct cfq_io_context *__cic; - read_lock(&cfq_exit_lock); - cic->ioc = ioc; cic->key = cfqd; ioc->set_ioprio = cfq_ioc_set_ioprio; - +restart: + parent = NULL; + p = &ioc->cic_root.rb_node; while (*p) { parent = *p; __cic = rb_entry(parent, struct cfq_io_context, rb_node); + if (unlikely(!__cic->key)) { + cfq_drop_dead_cic(ioc, cic); + goto restart; + } if (cic->key < __cic->key) p = &(*p)->rb_left; @@ -1520,6 +1540,7 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, BUG(); } + read_lock(&cfq_exit_lock); rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); list_add(&cic->queue_list, &cfqd->cic_list); From be3b075354e170368a0d29558cae492205e80a64 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Tue, 18 Apr 2006 19:18:31 +0200 Subject: [PATCH 3/3] [PATCH] cfq: Further rbtree traversal and cfq_exit_queue() race fix In current code, we are re-reading cic->key after dead cic->key check. So, in theory, it may really re-read *after* cfq_exit_queue() seted NULL. To avoid race, we copy it to stack, then use it. With this change, I guess gcc will assign cic->key to a register or stack, and it wouldn't be re-readed. Signed-off-by: OGAWA Hirofumi Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 246feae16c60..2540dfaa3e38 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1487,20 +1487,22 @@ cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) { struct rb_node *n; struct cfq_io_context *cic; - void *key = cfqd; + void *k, *key = cfqd; restart: n = ioc->cic_root.rb_node; while (n) { cic = rb_entry(n, struct cfq_io_context, rb_node); - if (unlikely(!cic->key)) { + /* ->key must be copied to avoid race with cfq_exit_queue() */ + k = cic->key; + if (unlikely(!k)) { cfq_drop_dead_cic(ioc, cic); goto restart; } - if (key < cic->key) + if (key < k) n = n->rb_left; - else if (key > cic->key) + else if (key > k) n = n->rb_right; else return cic; @@ -1516,6 +1518,7 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, struct rb_node **p; struct rb_node *parent; struct cfq_io_context *__cic; + void *k; cic->ioc = ioc; cic->key = cfqd; @@ -1527,14 +1530,16 @@ cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, while (*p) { parent = *p; __cic = rb_entry(parent, struct cfq_io_context, rb_node); - if (unlikely(!__cic->key)) { + /* ->key must be copied to avoid race with cfq_exit_queue() */ + k = __cic->key; + if (unlikely(!k)) { cfq_drop_dead_cic(ioc, cic); goto restart; } - if (cic->key < __cic->key) + if (cic->key < k) p = &(*p)->rb_left; - else if (cic->key > __cic->key) + else if (cic->key > k) p = &(*p)->rb_right; else BUG();