From 59d13bf5f57ded658c872fa22276f75ab8f12841 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 9 May 2014 13:41:15 -0600 Subject: [PATCH 1/5] blk-mq: use sparser tag layout for lower queue depth For best performance, spreading tags over multiple cachelines makes the tagging more efficient on multicore systems. But since we have 8 * sizeof(unsigned long) tags per cacheline, we don't always get a nice spread. Attempt to spread the tags over at least 4 cachelines, using fewer number of bits per unsigned long if we have to. This improves tagging performance in setups with 32-128 tags. For higher depths, the spread is the same as before (BITS_PER_LONG tags per cacheline). Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 45 ++++++++++++++++++++++++++++++++------------- block/blk-mq-tag.h | 7 ++++--- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 467f3a20b355..6c78c08865e3 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -44,7 +44,7 @@ static int __bt_get_word(struct blk_mq_bitmap *bm, unsigned int last_tag) { int tag, org_last_tag, end; - org_last_tag = last_tag = TAG_TO_BIT(last_tag); + org_last_tag = last_tag; end = bm->depth; do { restart: @@ -84,12 +84,12 @@ static int __bt_get(struct blk_mq_bitmap_tags *bt, unsigned int *tag_cache) int index, i, tag; last_tag = org_last_tag = *tag_cache; - index = TAG_TO_INDEX(last_tag); + index = TAG_TO_INDEX(bt, last_tag); for (i = 0; i < bt->map_nr; i++) { - tag = __bt_get_word(&bt->map[index], last_tag); + tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); if (tag != -1) { - tag += index * BITS_PER_LONG; + tag += (index << bt->bits_per_word); goto done; } @@ -233,10 +233,10 @@ static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) { - const int index = TAG_TO_INDEX(tag); + const int index = TAG_TO_INDEX(bt, tag); struct bt_wait_state *bs; - clear_bit(TAG_TO_BIT(tag), &bt->map[index].word); + clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word); bs = bt_wake_ptr(bt); if (bs && atomic_dec_and_test(&bs->wait_cnt)) { @@ -292,7 +292,7 @@ static void bt_for_each_free(struct blk_mq_bitmap_tags *bt, bit++; } while (1); - off += BITS_PER_LONG; + off += (1 << bt->bits_per_word); } } @@ -333,14 +333,31 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, { int i; + bt->bits_per_word = ilog2(BITS_PER_LONG); + /* * Depth can be zero for reserved tags, that's not a failure * condition. */ if (depth) { - int nr, i, map_depth; + unsigned int nr, i, map_depth, tags_per_word; - nr = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG; + tags_per_word = (1 << bt->bits_per_word); + + /* + * If the tag space is small, shrink the number of tags + * per word so we spread over a few cachelines, at least. + * If less than 4 tags, just forget about it, it's not + * going to work optimally anyway. + */ + if (depth >= 4) { + while (tags_per_word * 4 > depth) { + bt->bits_per_word--; + tags_per_word = (1 << bt->bits_per_word); + } + } + + nr = ALIGN(depth, tags_per_word) / tags_per_word; bt->map = kzalloc_node(nr * sizeof(struct blk_mq_bitmap), GFP_KERNEL, node); if (!bt->map) @@ -349,8 +366,8 @@ static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, bt->map_nr = nr; map_depth = depth; for (i = 0; i < nr; i++) { - bt->map[i].depth = min(map_depth, BITS_PER_LONG); - map_depth -= BITS_PER_LONG; + bt->map[i].depth = min(map_depth, tags_per_word); + map_depth -= tags_per_word; } } @@ -443,8 +460,10 @@ ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) if (!tags) return 0; - page += sprintf(page, "nr_tags=%u, reserved_tags=%u\n", - tags->nr_tags, tags->nr_reserved_tags); + page += sprintf(page, "nr_tags=%u, reserved_tags=%u, " + "bits_per_word=%u\n", + tags->nr_tags, tags->nr_reserved_tags, + tags->bitmap_tags.bits_per_word); free = bt_unused_tags(&tags->bitmap_tags); res = bt_unused_tags(&tags->breserved_tags); diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 06d4a2f0f7a0..7aa9f0665489 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h @@ -11,8 +11,8 @@ struct bt_wait_state { wait_queue_head_t wait; } ____cacheline_aligned_in_smp; -#define TAG_TO_INDEX(tag) ((tag) / BITS_PER_LONG) -#define TAG_TO_BIT(tag) ((tag) & (BITS_PER_LONG - 1)) +#define TAG_TO_INDEX(bt, tag) ((tag) >> (bt)->bits_per_word) +#define TAG_TO_BIT(bt, tag) ((tag) & ((1 << (bt)->bits_per_word) - 1)) struct blk_mq_bitmap { unsigned long word; @@ -22,9 +22,10 @@ struct blk_mq_bitmap { struct blk_mq_bitmap_tags { unsigned int depth; unsigned int wake_cnt; + unsigned int bits_per_word; - struct blk_mq_bitmap *map; unsigned int map_nr; + struct blk_mq_bitmap *map; unsigned int wake_index; struct bt_wait_state *bs; From 0289b2e110b7824b2f76d194ad6f8f0844e270ad Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 11 May 2014 01:01:48 +0800 Subject: [PATCH 2/5] blk-mq: bitmap tag: use clear_bit_unlock in bt_clear_tag() The unlock memory barrier need to order access to req in free path and clearing tag bit, otherwise either request free path may see a allocated request, or initialized request in allocate path might be modified by the ongoing free path. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 6c78c08865e3..a81b138e89fe 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -236,7 +236,11 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) const int index = TAG_TO_INDEX(bt, tag); struct bt_wait_state *bs; - clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word); + /* + * The unlock memory barrier need to order access to req in free + * path and clearing tag bit + */ + clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); bs = bt_wake_ptr(bt); if (bs && atomic_dec_and_test(&bs->wait_cnt)) { From 60f2df8a29df5f2db2c87fd23122a1cebdf2011a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 11 May 2014 01:01:49 +0800 Subject: [PATCH 3/5] blk-mq: bitmap tag: remove barrier in bt_clear_tag() The barrier isn't necessary because both atomic_dec_and_test() and wake_up() implicate one barrier. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 1 - 1 file changed, 1 deletion(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index a81b138e89fe..5a83d8e587f7 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -244,7 +244,6 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) bs = bt_wake_ptr(bt); if (bs && atomic_dec_and_test(&bs->wait_cnt)) { - smp_mb__after_clear_bit(); atomic_set(&bs->wait_cnt, bt->wake_cnt); bt_index_inc(&bt->wake_index); wake_up(&bs->wait); From 9d3d21aeb4f194cd7ac205abe68b14b47ae736a8 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 10 May 2014 15:43:14 -0600 Subject: [PATCH 4/5] blk-mq: bitmap tag: select random tag betweet 0 and (depth - 1) The selected tag should be selected at random between 0 and (depth - 1) with probability 1/depth, instead between 0 and (depth - 2) with probability 1/(depth - 1). Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 5a83d8e587f7..f196e60178f4 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -449,10 +449,7 @@ void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag) { unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; - if (depth > 1) - *tag = prandom_u32() % (depth - 1); - else - *tag = 0; + *tag = prandom_u32() % depth; } ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) From 1f236ab22ce3bc5d4f975aa116966c0ea7ec2013 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 11 May 2014 01:01:51 +0800 Subject: [PATCH 5/5] blk-mq: bitmap tag: cleanup blk_mq_init_tags Both nr_cache and nr_tags arn't needed for bitmap tag anymore. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-tag.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index f196e60178f4..8d526a3e02f6 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -417,7 +417,6 @@ static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, unsigned int reserved_tags, int node) { - unsigned int nr_tags, nr_cache; struct blk_mq_tags *tags; if (total_tags > BLK_MQ_TAG_MAX) { @@ -429,9 +428,6 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, if (!tags) return NULL; - nr_tags = total_tags - reserved_tags; - nr_cache = nr_tags / num_online_cpus(); - tags->nr_tags = total_tags; tags->nr_reserved_tags = reserved_tags;