blkio: Take care of cgroup deletion and cfq group reference counting
o One can choose to change elevator or delete a cgroup. Implement group reference counting so that both elevator exit and cgroup deletion can take place gracefully. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Nauman Rafique <nauman@google.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
This commit is contained in:
parent
25fb5169d4
commit
b1c3576961
3 changed files with 160 additions and 2 deletions
|
@ -13,6 +13,8 @@
|
||||||
#include <linux/ioprio.h>
|
#include <linux/ioprio.h>
|
||||||
#include "blk-cgroup.h"
|
#include "blk-cgroup.h"
|
||||||
|
|
||||||
|
extern void cfq_unlink_blkio_group(void *, struct blkio_group *);
|
||||||
|
|
||||||
struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
|
struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
|
||||||
|
|
||||||
struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
|
struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
|
||||||
|
@ -28,14 +30,43 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
|
||||||
|
|
||||||
spin_lock_irqsave(&blkcg->lock, flags);
|
spin_lock_irqsave(&blkcg->lock, flags);
|
||||||
rcu_assign_pointer(blkg->key, key);
|
rcu_assign_pointer(blkg->key, key);
|
||||||
|
blkg->blkcg_id = css_id(&blkcg->css);
|
||||||
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
|
||||||
spin_unlock_irqrestore(&blkcg->lock, flags);
|
spin_unlock_irqrestore(&blkcg->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
|
||||||
|
{
|
||||||
|
hlist_del_init_rcu(&blkg->blkcg_node);
|
||||||
|
blkg->blkcg_id = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
|
||||||
|
* indicating that blk_group was unhashed by the time we got to it.
|
||||||
|
*/
|
||||||
int blkiocg_del_blkio_group(struct blkio_group *blkg)
|
int blkiocg_del_blkio_group(struct blkio_group *blkg)
|
||||||
{
|
{
|
||||||
/* Implemented later */
|
struct blkio_cgroup *blkcg;
|
||||||
return 0;
|
unsigned long flags;
|
||||||
|
struct cgroup_subsys_state *css;
|
||||||
|
int ret = 1;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
css = css_lookup(&blkio_subsys, blkg->blkcg_id);
|
||||||
|
if (!css)
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
blkcg = container_of(css, struct blkio_cgroup, css);
|
||||||
|
spin_lock_irqsave(&blkcg->lock, flags);
|
||||||
|
if (!hlist_unhashed(&blkg->blkcg_node)) {
|
||||||
|
__blkiocg_del_blkio_group(blkg);
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
spin_unlock_irqrestore(&blkcg->lock, flags);
|
||||||
|
out:
|
||||||
|
rcu_read_unlock();
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* called under rcu_read_lock(). */
|
/* called under rcu_read_lock(). */
|
||||||
|
@ -97,8 +128,39 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
|
||||||
static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
|
static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
|
||||||
{
|
{
|
||||||
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
|
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
|
||||||
|
unsigned long flags;
|
||||||
|
struct blkio_group *blkg;
|
||||||
|
void *key;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
remove_entry:
|
||||||
|
spin_lock_irqsave(&blkcg->lock, flags);
|
||||||
|
|
||||||
|
if (hlist_empty(&blkcg->blkg_list)) {
|
||||||
|
spin_unlock_irqrestore(&blkcg->lock, flags);
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
|
||||||
|
blkcg_node);
|
||||||
|
key = rcu_dereference(blkg->key);
|
||||||
|
__blkiocg_del_blkio_group(blkg);
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&blkcg->lock, flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This blkio_group is being unlinked as associated cgroup is going
|
||||||
|
* away. Let all the IO controlling policies know about this event.
|
||||||
|
*
|
||||||
|
* Currently this is static call to one io controlling policy. Once
|
||||||
|
* we have more policies in place, we need some dynamic registration
|
||||||
|
* of callback function.
|
||||||
|
*/
|
||||||
|
cfq_unlink_blkio_group(key, blkg);
|
||||||
|
goto remove_entry;
|
||||||
|
done:
|
||||||
free_css_id(&blkio_subsys, &blkcg->css);
|
free_css_id(&blkio_subsys, &blkcg->css);
|
||||||
|
rcu_read_unlock();
|
||||||
kfree(blkcg);
|
kfree(blkcg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,7 @@ struct blkio_group {
|
||||||
/* An rcu protected unique identifier for the group */
|
/* An rcu protected unique identifier for the group */
|
||||||
void *key;
|
void *key;
|
||||||
struct hlist_node blkcg_node;
|
struct hlist_node blkcg_node;
|
||||||
|
unsigned short blkcg_id;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define BLKIO_WEIGHT_MIN 100
|
#define BLKIO_WEIGHT_MIN 100
|
||||||
|
|
|
@ -192,6 +192,7 @@ struct cfq_group {
|
||||||
struct blkio_group blkg;
|
struct blkio_group blkg;
|
||||||
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
||||||
struct hlist_node cfqd_node;
|
struct hlist_node cfqd_node;
|
||||||
|
atomic_t ref;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -924,6 +925,14 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
|
||||||
*st = CFQ_RB_ROOT;
|
*st = CFQ_RB_ROOT;
|
||||||
RB_CLEAR_NODE(&cfqg->rb_node);
|
RB_CLEAR_NODE(&cfqg->rb_node);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Take the initial reference that will be released on destroy
|
||||||
|
* This can be thought of a joint reference by cgroup and
|
||||||
|
* elevator which will be dropped by either elevator exit
|
||||||
|
* or cgroup deletion path depending on who is exiting first.
|
||||||
|
*/
|
||||||
|
atomic_set(&cfqg->ref, 1);
|
||||||
|
|
||||||
/* Add group onto cgroup list */
|
/* Add group onto cgroup list */
|
||||||
blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd);
|
blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd);
|
||||||
|
|
||||||
|
@ -960,7 +969,77 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
|
||||||
cfqg = &cfqq->cfqd->root_group;
|
cfqg = &cfqq->cfqd->root_group;
|
||||||
|
|
||||||
cfqq->cfqg = cfqg;
|
cfqq->cfqg = cfqg;
|
||||||
|
/* cfqq reference on cfqg */
|
||||||
|
atomic_inc(&cfqq->cfqg->ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void cfq_put_cfqg(struct cfq_group *cfqg)
|
||||||
|
{
|
||||||
|
struct cfq_rb_root *st;
|
||||||
|
int i, j;
|
||||||
|
|
||||||
|
BUG_ON(atomic_read(&cfqg->ref) <= 0);
|
||||||
|
if (!atomic_dec_and_test(&cfqg->ref))
|
||||||
|
return;
|
||||||
|
for_each_cfqg_st(cfqg, i, j, st)
|
||||||
|
BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
|
||||||
|
kfree(cfqg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
|
||||||
|
{
|
||||||
|
/* Something wrong if we are trying to remove same group twice */
|
||||||
|
BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
|
||||||
|
|
||||||
|
hlist_del_init(&cfqg->cfqd_node);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Put the reference taken at the time of creation so that when all
|
||||||
|
* queues are gone, group can be destroyed.
|
||||||
|
*/
|
||||||
|
cfq_put_cfqg(cfqg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cfq_release_cfq_groups(struct cfq_data *cfqd)
|
||||||
|
{
|
||||||
|
struct hlist_node *pos, *n;
|
||||||
|
struct cfq_group *cfqg;
|
||||||
|
|
||||||
|
hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
|
||||||
|
/*
|
||||||
|
* If cgroup removal path got to blk_group first and removed
|
||||||
|
* it from cgroup list, then it will take care of destroying
|
||||||
|
* cfqg also.
|
||||||
|
*/
|
||||||
|
if (!blkiocg_del_blkio_group(&cfqg->blkg))
|
||||||
|
cfq_destroy_cfqg(cfqd, cfqg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Blk cgroup controller notification saying that blkio_group object is being
|
||||||
|
* delinked as associated cgroup object is going away. That also means that
|
||||||
|
* no new IO will come in this group. So get rid of this group as soon as
|
||||||
|
* any pending IO in the group is finished.
|
||||||
|
*
|
||||||
|
* This function is called under rcu_read_lock(). key is the rcu protected
|
||||||
|
* pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
|
||||||
|
* read lock.
|
||||||
|
*
|
||||||
|
* "key" was fetched from blkio_group under blkio_cgroup->lock. That means
|
||||||
|
* it should not be NULL as even if elevator was exiting, cgroup deltion
|
||||||
|
* path got to it first.
|
||||||
|
*/
|
||||||
|
void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
struct cfq_data *cfqd = key;
|
||||||
|
|
||||||
|
spin_lock_irqsave(cfqd->queue->queue_lock, flags);
|
||||||
|
cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
|
||||||
|
spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
#else /* GROUP_IOSCHED */
|
#else /* GROUP_IOSCHED */
|
||||||
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
|
static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
|
||||||
{
|
{
|
||||||
|
@ -971,6 +1050,9 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
|
||||||
cfqq->cfqg = cfqg;
|
cfqq->cfqg = cfqg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
|
||||||
|
static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
|
||||||
|
|
||||||
#endif /* GROUP_IOSCHED */
|
#endif /* GROUP_IOSCHED */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -2172,11 +2254,13 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
|
||||||
* task holds one reference to the queue, dropped when task exits. each rq
|
* task holds one reference to the queue, dropped when task exits. each rq
|
||||||
* in-flight on this queue also holds a reference, dropped when rq is freed.
|
* in-flight on this queue also holds a reference, dropped when rq is freed.
|
||||||
*
|
*
|
||||||
|
* Each cfq queue took a reference on the parent group. Drop it now.
|
||||||
* queue lock must be held here.
|
* queue lock must be held here.
|
||||||
*/
|
*/
|
||||||
static void cfq_put_queue(struct cfq_queue *cfqq)
|
static void cfq_put_queue(struct cfq_queue *cfqq)
|
||||||
{
|
{
|
||||||
struct cfq_data *cfqd = cfqq->cfqd;
|
struct cfq_data *cfqd = cfqq->cfqd;
|
||||||
|
struct cfq_group *cfqg;
|
||||||
|
|
||||||
BUG_ON(atomic_read(&cfqq->ref) <= 0);
|
BUG_ON(atomic_read(&cfqq->ref) <= 0);
|
||||||
|
|
||||||
|
@ -2186,6 +2270,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
|
||||||
cfq_log_cfqq(cfqd, cfqq, "put_queue");
|
cfq_log_cfqq(cfqd, cfqq, "put_queue");
|
||||||
BUG_ON(rb_first(&cfqq->sort_list));
|
BUG_ON(rb_first(&cfqq->sort_list));
|
||||||
BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
|
BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
|
||||||
|
cfqg = cfqq->cfqg;
|
||||||
|
|
||||||
if (unlikely(cfqd->active_queue == cfqq)) {
|
if (unlikely(cfqd->active_queue == cfqq)) {
|
||||||
__cfq_slice_expired(cfqd, cfqq, 0);
|
__cfq_slice_expired(cfqd, cfqq, 0);
|
||||||
|
@ -2194,6 +2279,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
|
||||||
|
|
||||||
BUG_ON(cfq_cfqq_on_rr(cfqq));
|
BUG_ON(cfq_cfqq_on_rr(cfqq));
|
||||||
kmem_cache_free(cfq_pool, cfqq);
|
kmem_cache_free(cfq_pool, cfqq);
|
||||||
|
cfq_put_cfqg(cfqg);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -3369,11 +3455,15 @@ static void cfq_exit_queue(struct elevator_queue *e)
|
||||||
}
|
}
|
||||||
|
|
||||||
cfq_put_async_queues(cfqd);
|
cfq_put_async_queues(cfqd);
|
||||||
|
cfq_release_cfq_groups(cfqd);
|
||||||
|
blkiocg_del_blkio_group(&cfqd->root_group.blkg);
|
||||||
|
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
cfq_shutdown_timer_wq(cfqd);
|
cfq_shutdown_timer_wq(cfqd);
|
||||||
|
|
||||||
|
/* Wait for cfqg->blkg->key accessors to exit their grace periods. */
|
||||||
|
synchronize_rcu();
|
||||||
kfree(cfqd);
|
kfree(cfqd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3401,6 +3491,11 @@ static void *cfq_init_queue(struct request_queue *q)
|
||||||
cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
|
cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
|
||||||
|
|
||||||
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
#ifdef CONFIG_CFQ_GROUP_IOSCHED
|
||||||
|
/*
|
||||||
|
* Take a reference to root group which we never drop. This is just
|
||||||
|
* to make sure that cfq_put_cfqg() does not try to kfree root group
|
||||||
|
*/
|
||||||
|
atomic_set(&cfqg->ref, 1);
|
||||||
blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd);
|
blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd);
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in a new issue