blkcg: make sure blkg_lookup() returns %NULL if @q is bypassing
Currently, blkg_lookup() doesn't check @q bypass state. This patch updates blk_queue_bypass_start() to do synchronize_rcu() before returning and updates blkg_lookup() to check blk_queue_bypass() and return %NULL if bypassing. This ensures blkg_lookup() returns %NULL if @q is bypassing. This is to guarantee that nobody is accessing policy data while @q is bypassing, which is necessary to allow replacing blkio_cgroup->pd[] in place on policy [de]activation. v2: Added more comments explaining bypass guarantees as suggested by Vivek. v3: Added more comments explaining why there's no synchronize_rcu() in blk_cleanup_queue() as suggested by Vivek. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
da8b066262
commit
80fd99792b
2 changed files with 46 additions and 19 deletions
|
@ -137,6 +137,38 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
|
||||||
return blkg;
|
return blkg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct blkio_group *__blkg_lookup(struct blkio_cgroup *blkcg,
|
||||||
|
struct request_queue *q)
|
||||||
|
{
|
||||||
|
struct blkio_group *blkg;
|
||||||
|
struct hlist_node *n;
|
||||||
|
|
||||||
|
hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
|
||||||
|
if (blkg->q == q)
|
||||||
|
return blkg;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* blkg_lookup - lookup blkg for the specified blkcg - q pair
|
||||||
|
* @blkcg: blkcg of interest
|
||||||
|
* @q: request_queue of interest
|
||||||
|
*
|
||||||
|
* Lookup blkg for the @blkcg - @q pair. This function should be called
|
||||||
|
* under RCU read lock and is guaranteed to return %NULL if @q is bypassing
|
||||||
|
* - see blk_queue_bypass_start() for details.
|
||||||
|
*/
|
||||||
|
struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
||||||
|
struct request_queue *q)
|
||||||
|
{
|
||||||
|
WARN_ON_ONCE(!rcu_read_lock_held());
|
||||||
|
|
||||||
|
if (unlikely(blk_queue_bypass(q)))
|
||||||
|
return NULL;
|
||||||
|
return __blkg_lookup(blkcg, q);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(blkg_lookup);
|
||||||
|
|
||||||
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
|
struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
|
||||||
struct request_queue *q,
|
struct request_queue *q,
|
||||||
bool for_root)
|
bool for_root)
|
||||||
|
@ -150,13 +182,11 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
|
||||||
/*
|
/*
|
||||||
* This could be the first entry point of blkcg implementation and
|
* This could be the first entry point of blkcg implementation and
|
||||||
* we shouldn't allow anything to go through for a bypassing queue.
|
* we shouldn't allow anything to go through for a bypassing queue.
|
||||||
* The following can be removed if blkg lookup is guaranteed to
|
|
||||||
* fail on a bypassing queue.
|
|
||||||
*/
|
*/
|
||||||
if (unlikely(blk_queue_bypass(q)) && !for_root)
|
if (unlikely(blk_queue_bypass(q)) && !for_root)
|
||||||
return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
|
return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
|
||||||
|
|
||||||
blkg = blkg_lookup(blkcg, q);
|
blkg = __blkg_lookup(blkcg, q);
|
||||||
if (blkg)
|
if (blkg)
|
||||||
return blkg;
|
return blkg;
|
||||||
|
|
||||||
|
@ -185,20 +215,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blkg_lookup_create);
|
EXPORT_SYMBOL_GPL(blkg_lookup_create);
|
||||||
|
|
||||||
/* called under rcu_read_lock(). */
|
|
||||||
struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
|
|
||||||
struct request_queue *q)
|
|
||||||
{
|
|
||||||
struct blkio_group *blkg;
|
|
||||||
struct hlist_node *n;
|
|
||||||
|
|
||||||
hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
|
|
||||||
if (blkg->q == q)
|
|
||||||
return blkg;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(blkg_lookup);
|
|
||||||
|
|
||||||
static void blkg_destroy(struct blkio_group *blkg)
|
static void blkg_destroy(struct blkio_group *blkg)
|
||||||
{
|
{
|
||||||
struct request_queue *q = blkg->q;
|
struct request_queue *q = blkg->q;
|
||||||
|
|
|
@ -416,7 +416,8 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
|
||||||
* In bypass mode, only the dispatch FIFO queue of @q is used. This
|
* In bypass mode, only the dispatch FIFO queue of @q is used. This
|
||||||
* function makes @q enter bypass mode and drains all requests which were
|
* function makes @q enter bypass mode and drains all requests which were
|
||||||
* throttled or issued before. On return, it's guaranteed that no request
|
* throttled or issued before. On return, it's guaranteed that no request
|
||||||
* is being throttled or has ELVPRIV set.
|
* is being throttled or has ELVPRIV set and blk_queue_bypass() %true
|
||||||
|
* inside queue or RCU read lock.
|
||||||
*/
|
*/
|
||||||
void blk_queue_bypass_start(struct request_queue *q)
|
void blk_queue_bypass_start(struct request_queue *q)
|
||||||
{
|
{
|
||||||
|
@ -426,6 +427,8 @@ void blk_queue_bypass_start(struct request_queue *q)
|
||||||
spin_unlock_irq(q->queue_lock);
|
spin_unlock_irq(q->queue_lock);
|
||||||
|
|
||||||
blk_drain_queue(q, false);
|
blk_drain_queue(q, false);
|
||||||
|
/* ensure blk_queue_bypass() is %true inside RCU read lock */
|
||||||
|
synchronize_rcu();
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
|
EXPORT_SYMBOL_GPL(blk_queue_bypass_start);
|
||||||
|
|
||||||
|
@ -462,7 +465,15 @@ void blk_cleanup_queue(struct request_queue *q)
|
||||||
|
|
||||||
spin_lock_irq(lock);
|
spin_lock_irq(lock);
|
||||||
|
|
||||||
/* dead queue is permanently in bypass mode till released */
|
/*
|
||||||
|
* Dead queue is permanently in bypass mode till released. Note
|
||||||
|
* that, unlike blk_queue_bypass_start(), we aren't performing
|
||||||
|
* synchronize_rcu() after entering bypass mode to avoid the delay
|
||||||
|
* as some drivers create and destroy a lot of queues while
|
||||||
|
* probing. This is still safe because blk_release_queue() will be
|
||||||
|
* called only after the queue refcnt drops to zero and nothing,
|
||||||
|
* RCU or not, would be traversing the queue by then.
|
||||||
|
*/
|
||||||
q->bypass_depth++;
|
q->bypass_depth++;
|
||||||
queue_flag_set(QUEUE_FLAG_BYPASS, q);
|
queue_flag_set(QUEUE_FLAG_BYPASS, q);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue