Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A collection of fixes that should go into this series. This contains: - A set of NVMe fixes, pulled from Christoph. This includes a set of fixes for the fiber channel bits from James Smart, rdma queue depth fix from Marta, controller removal fixes from Ming, and some more APST quirk updates from Andy. - A blk-mq debugfs fix from Bart, fixing a problem with the untangling of the sysfs and debugfs blk-mq bits that was added in this series. - Error code fix in add_partition() from Dan. - A small series of fixes for the new blk-throttle code from Shaohua" * 'for-linus' of git://git.kernel.dk/linux-block: (21 commits) blk-mq: Only register debugfs attributes for blk-mq queues nvme: Quirk APST on Intel 600P/P3100 devices nvme: only setup block integrity if supported by the driver nvme: replace is_flags field in nvme_ctrl_ops with a flags field nvme-pci: consistencly use ctrl->device for logging partitions/msdos: FreeBSD UFS2 file systems are not recognized block: fix an error code in add_partition() blk-throttle: force user to configure all settings for io.low blk-throttle: respect 0 bps/iops settings for io.low blk-throttle: output some debug info in trace blk-throttle: add hierarchy support for latency target and idle time nvme_fc: remove extra controller reference taken on reconnect nvme_fc: correct nvme status set on abort nvme_fc: set logging level on resets/deletes nvme_fc: revise comment on teardown nvme_fc: Support ctrl_loss_tmo nvme_fc: get rid of local reconnect_delay blk-mq: remove blk_mq_abort_requeue_list() nvme: avoid to use blk_mq_abort_requeue_list() nvme: use blk_mq_start_hw_queues() in nvme_kill_queues() ...
This commit is contained in:
commit
1b8f2ffc79
12 changed files with 248 additions and 211 deletions
|
@ -628,25 +628,6 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q,
|
|||
}
|
||||
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
|
||||
|
||||
void blk_mq_abort_requeue_list(struct request_queue *q)
|
||||
{
|
||||
unsigned long flags;
|
||||
LIST_HEAD(rq_list);
|
||||
|
||||
spin_lock_irqsave(&q->requeue_lock, flags);
|
||||
list_splice_init(&q->requeue_list, &rq_list);
|
||||
spin_unlock_irqrestore(&q->requeue_lock, flags);
|
||||
|
||||
while (!list_empty(&rq_list)) {
|
||||
struct request *rq;
|
||||
|
||||
rq = list_first_entry(&rq_list, struct request, queuelist);
|
||||
list_del_init(&rq->queuelist);
|
||||
blk_mq_end_request(rq, -EIO);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(blk_mq_abort_requeue_list);
|
||||
|
||||
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
|
||||
{
|
||||
if (tag < tags->nr_tags) {
|
||||
|
|
|
@ -887,10 +887,10 @@ int blk_register_queue(struct gendisk *disk)
|
|||
goto unlock;
|
||||
}
|
||||
|
||||
if (q->mq_ops)
|
||||
if (q->mq_ops) {
|
||||
__blk_mq_register_dev(dev, q);
|
||||
|
||||
blk_mq_debugfs_register(q);
|
||||
blk_mq_debugfs_register(q);
|
||||
}
|
||||
|
||||
kobject_uevent(&q->kobj, KOBJ_ADD);
|
||||
|
||||
|
|
|
@ -22,11 +22,11 @@ static int throtl_quantum = 32;
|
|||
#define DFL_THROTL_SLICE_HD (HZ / 10)
|
||||
#define DFL_THROTL_SLICE_SSD (HZ / 50)
|
||||
#define MAX_THROTL_SLICE (HZ)
|
||||
#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
|
||||
#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
|
||||
#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
|
||||
/* default latency target is 0, eg, guarantee IO latency by default */
|
||||
#define DFL_LATENCY_TARGET (0)
|
||||
#define MIN_THROTL_BPS (320 * 1024)
|
||||
#define MIN_THROTL_IOPS (10)
|
||||
#define DFL_LATENCY_TARGET (-1L)
|
||||
#define DFL_IDLE_THRESHOLD (0)
|
||||
|
||||
#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
|
||||
|
||||
|
@ -157,6 +157,7 @@ struct throtl_grp {
|
|||
unsigned long last_check_time;
|
||||
|
||||
unsigned long latency_target; /* us */
|
||||
unsigned long latency_target_conf; /* us */
|
||||
/* When did we start a new slice */
|
||||
unsigned long slice_start[2];
|
||||
unsigned long slice_end[2];
|
||||
|
@ -165,6 +166,7 @@ struct throtl_grp {
|
|||
unsigned long checked_last_finish_time; /* ns / 1024 */
|
||||
unsigned long avg_idletime; /* ns / 1024 */
|
||||
unsigned long idletime_threshold; /* us */
|
||||
unsigned long idletime_threshold_conf; /* us */
|
||||
|
||||
unsigned int bio_cnt; /* total bios */
|
||||
unsigned int bad_bio_cnt; /* bios exceeding latency threshold */
|
||||
|
@ -201,8 +203,6 @@ struct throtl_data
|
|||
unsigned int limit_index;
|
||||
bool limit_valid[LIMIT_CNT];
|
||||
|
||||
unsigned long dft_idletime_threshold; /* us */
|
||||
|
||||
unsigned long low_upgrade_time;
|
||||
unsigned long low_downgrade_time;
|
||||
|
||||
|
@ -294,8 +294,14 @@ static uint64_t tg_bps_limit(struct throtl_grp *tg, int rw)
|
|||
|
||||
td = tg->td;
|
||||
ret = tg->bps[rw][td->limit_index];
|
||||
if (ret == 0 && td->limit_index == LIMIT_LOW)
|
||||
return tg->bps[rw][LIMIT_MAX];
|
||||
if (ret == 0 && td->limit_index == LIMIT_LOW) {
|
||||
/* intermediate node or iops isn't 0 */
|
||||
if (!list_empty(&blkg->blkcg->css.children) ||
|
||||
tg->iops[rw][td->limit_index])
|
||||
return U64_MAX;
|
||||
else
|
||||
return MIN_THROTL_BPS;
|
||||
}
|
||||
|
||||
if (td->limit_index == LIMIT_MAX && tg->bps[rw][LIMIT_LOW] &&
|
||||
tg->bps[rw][LIMIT_LOW] != tg->bps[rw][LIMIT_MAX]) {
|
||||
|
@ -315,10 +321,17 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
|
|||
|
||||
if (cgroup_subsys_on_dfl(io_cgrp_subsys) && !blkg->parent)
|
||||
return UINT_MAX;
|
||||
|
||||
td = tg->td;
|
||||
ret = tg->iops[rw][td->limit_index];
|
||||
if (ret == 0 && tg->td->limit_index == LIMIT_LOW)
|
||||
return tg->iops[rw][LIMIT_MAX];
|
||||
if (ret == 0 && tg->td->limit_index == LIMIT_LOW) {
|
||||
/* intermediate node or bps isn't 0 */
|
||||
if (!list_empty(&blkg->blkcg->css.children) ||
|
||||
tg->bps[rw][td->limit_index])
|
||||
return UINT_MAX;
|
||||
else
|
||||
return MIN_THROTL_IOPS;
|
||||
}
|
||||
|
||||
if (td->limit_index == LIMIT_MAX && tg->iops[rw][LIMIT_LOW] &&
|
||||
tg->iops[rw][LIMIT_LOW] != tg->iops[rw][LIMIT_MAX]) {
|
||||
|
@ -482,6 +495,9 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
|
|||
/* LIMIT_LOW will have default value 0 */
|
||||
|
||||
tg->latency_target = DFL_LATENCY_TARGET;
|
||||
tg->latency_target_conf = DFL_LATENCY_TARGET;
|
||||
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
|
||||
tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
|
||||
|
||||
return &tg->pd;
|
||||
}
|
||||
|
@ -510,8 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
|
|||
if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
|
||||
sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
|
||||
tg->td = td;
|
||||
|
||||
tg->idletime_threshold = td->dft_idletime_threshold;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1349,7 +1363,7 @@ static int tg_print_conf_uint(struct seq_file *sf, void *v)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void tg_conf_updated(struct throtl_grp *tg)
|
||||
static void tg_conf_updated(struct throtl_grp *tg, bool global)
|
||||
{
|
||||
struct throtl_service_queue *sq = &tg->service_queue;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
|
@ -1367,8 +1381,26 @@ static void tg_conf_updated(struct throtl_grp *tg)
|
|||
* restrictions in the whole hierarchy and allows them to bypass
|
||||
* blk-throttle.
|
||||
*/
|
||||
blkg_for_each_descendant_pre(blkg, pos_css, tg_to_blkg(tg))
|
||||
tg_update_has_rules(blkg_to_tg(blkg));
|
||||
blkg_for_each_descendant_pre(blkg, pos_css,
|
||||
global ? tg->td->queue->root_blkg : tg_to_blkg(tg)) {
|
||||
struct throtl_grp *this_tg = blkg_to_tg(blkg);
|
||||
struct throtl_grp *parent_tg;
|
||||
|
||||
tg_update_has_rules(this_tg);
|
||||
/* ignore root/second level */
|
||||
if (!cgroup_subsys_on_dfl(io_cgrp_subsys) || !blkg->parent ||
|
||||
!blkg->parent->parent)
|
||||
continue;
|
||||
parent_tg = blkg_to_tg(blkg->parent);
|
||||
/*
|
||||
* make sure all children has lower idle time threshold and
|
||||
* higher latency target
|
||||
*/
|
||||
this_tg->idletime_threshold = min(this_tg->idletime_threshold,
|
||||
parent_tg->idletime_threshold);
|
||||
this_tg->latency_target = max(this_tg->latency_target,
|
||||
parent_tg->latency_target);
|
||||
}
|
||||
|
||||
/*
|
||||
* We're already holding queue_lock and know @tg is valid. Let's
|
||||
|
@ -1413,7 +1445,7 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of,
|
|||
else
|
||||
*(unsigned int *)((void *)tg + of_cft(of)->private) = v;
|
||||
|
||||
tg_conf_updated(tg);
|
||||
tg_conf_updated(tg, false);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
|
@ -1497,34 +1529,34 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
|
|||
tg->iops_conf[READ][off] == iops_dft &&
|
||||
tg->iops_conf[WRITE][off] == iops_dft &&
|
||||
(off != LIMIT_LOW ||
|
||||
(tg->idletime_threshold == tg->td->dft_idletime_threshold &&
|
||||
tg->latency_target == DFL_LATENCY_TARGET)))
|
||||
(tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
|
||||
tg->latency_target_conf == DFL_LATENCY_TARGET)))
|
||||
return 0;
|
||||
|
||||
if (tg->bps_conf[READ][off] != bps_dft)
|
||||
if (tg->bps_conf[READ][off] != U64_MAX)
|
||||
snprintf(bufs[0], sizeof(bufs[0]), "%llu",
|
||||
tg->bps_conf[READ][off]);
|
||||
if (tg->bps_conf[WRITE][off] != bps_dft)
|
||||
if (tg->bps_conf[WRITE][off] != U64_MAX)
|
||||
snprintf(bufs[1], sizeof(bufs[1]), "%llu",
|
||||
tg->bps_conf[WRITE][off]);
|
||||
if (tg->iops_conf[READ][off] != iops_dft)
|
||||
if (tg->iops_conf[READ][off] != UINT_MAX)
|
||||
snprintf(bufs[2], sizeof(bufs[2]), "%u",
|
||||
tg->iops_conf[READ][off]);
|
||||
if (tg->iops_conf[WRITE][off] != iops_dft)
|
||||
if (tg->iops_conf[WRITE][off] != UINT_MAX)
|
||||
snprintf(bufs[3], sizeof(bufs[3]), "%u",
|
||||
tg->iops_conf[WRITE][off]);
|
||||
if (off == LIMIT_LOW) {
|
||||
if (tg->idletime_threshold == ULONG_MAX)
|
||||
if (tg->idletime_threshold_conf == ULONG_MAX)
|
||||
strcpy(idle_time, " idle=max");
|
||||
else
|
||||
snprintf(idle_time, sizeof(idle_time), " idle=%lu",
|
||||
tg->idletime_threshold);
|
||||
tg->idletime_threshold_conf);
|
||||
|
||||
if (tg->latency_target == ULONG_MAX)
|
||||
if (tg->latency_target_conf == ULONG_MAX)
|
||||
strcpy(latency_time, " latency=max");
|
||||
else
|
||||
snprintf(latency_time, sizeof(latency_time),
|
||||
" latency=%lu", tg->latency_target);
|
||||
" latency=%lu", tg->latency_target_conf);
|
||||
}
|
||||
|
||||
seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
|
||||
|
@ -1563,8 +1595,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
|||
v[2] = tg->iops_conf[READ][index];
|
||||
v[3] = tg->iops_conf[WRITE][index];
|
||||
|
||||
idle_time = tg->idletime_threshold;
|
||||
latency_time = tg->latency_target;
|
||||
idle_time = tg->idletime_threshold_conf;
|
||||
latency_time = tg->latency_target_conf;
|
||||
while (true) {
|
||||
char tok[27]; /* wiops=18446744073709551616 */
|
||||
char *p;
|
||||
|
@ -1623,17 +1655,33 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
|
|||
tg->iops_conf[READ][LIMIT_MAX]);
|
||||
tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
|
||||
tg->iops_conf[WRITE][LIMIT_MAX]);
|
||||
tg->idletime_threshold_conf = idle_time;
|
||||
tg->latency_target_conf = latency_time;
|
||||
|
||||
if (index == LIMIT_LOW) {
|
||||
blk_throtl_update_limit_valid(tg->td);
|
||||
if (tg->td->limit_valid[LIMIT_LOW])
|
||||
tg->td->limit_index = LIMIT_LOW;
|
||||
tg->idletime_threshold = (idle_time == ULONG_MAX) ?
|
||||
ULONG_MAX : idle_time;
|
||||
tg->latency_target = (latency_time == ULONG_MAX) ?
|
||||
ULONG_MAX : latency_time;
|
||||
/* force user to configure all settings for low limit */
|
||||
if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
|
||||
tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
|
||||
tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
|
||||
tg->latency_target_conf == DFL_LATENCY_TARGET) {
|
||||
tg->bps[READ][LIMIT_LOW] = 0;
|
||||
tg->bps[WRITE][LIMIT_LOW] = 0;
|
||||
tg->iops[READ][LIMIT_LOW] = 0;
|
||||
tg->iops[WRITE][LIMIT_LOW] = 0;
|
||||
tg->idletime_threshold = DFL_IDLE_THRESHOLD;
|
||||
tg->latency_target = DFL_LATENCY_TARGET;
|
||||
} else if (index == LIMIT_LOW) {
|
||||
tg->idletime_threshold = tg->idletime_threshold_conf;
|
||||
tg->latency_target = tg->latency_target_conf;
|
||||
}
|
||||
tg_conf_updated(tg);
|
||||
|
||||
blk_throtl_update_limit_valid(tg->td);
|
||||
if (tg->td->limit_valid[LIMIT_LOW]) {
|
||||
if (index == LIMIT_LOW)
|
||||
tg->td->limit_index = LIMIT_LOW;
|
||||
} else
|
||||
tg->td->limit_index = LIMIT_MAX;
|
||||
tg_conf_updated(tg, index == LIMIT_LOW &&
|
||||
tg->td->limit_valid[LIMIT_LOW]);
|
||||
ret = 0;
|
||||
out_finish:
|
||||
blkg_conf_finish(&ctx);
|
||||
|
@ -1722,17 +1770,25 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
|
|||
/*
|
||||
* cgroup is idle if:
|
||||
* - single idle is too long, longer than a fixed value (in case user
|
||||
* configure a too big threshold) or 4 times of slice
|
||||
* configure a too big threshold) or 4 times of idletime threshold
|
||||
* - average think time is more than threshold
|
||||
* - IO latency is largely below threshold
|
||||
*/
|
||||
unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice);
|
||||
unsigned long time;
|
||||
bool ret;
|
||||
|
||||
time = min_t(unsigned long, MAX_IDLE_TIME, time);
|
||||
return (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
|
||||
tg->avg_idletime > tg->idletime_threshold ||
|
||||
(tg->latency_target && tg->bio_cnt &&
|
||||
time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
|
||||
ret = tg->latency_target == DFL_LATENCY_TARGET ||
|
||||
tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
|
||||
(ktime_get_ns() >> 10) - tg->last_finish_time > time ||
|
||||
tg->avg_idletime > tg->idletime_threshold ||
|
||||
(tg->latency_target && tg->bio_cnt &&
|
||||
tg->bad_bio_cnt * 5 < tg->bio_cnt);
|
||||
throtl_log(&tg->service_queue,
|
||||
"avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
|
||||
tg->avg_idletime, tg->idletime_threshold, tg->bad_bio_cnt,
|
||||
tg->bio_cnt, ret, tg->td->scale);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
|
||||
|
@ -1828,6 +1884,7 @@ static void throtl_upgrade_state(struct throtl_data *td)
|
|||
struct cgroup_subsys_state *pos_css;
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
throtl_log(&td->service_queue, "upgrade to max");
|
||||
td->limit_index = LIMIT_MAX;
|
||||
td->low_upgrade_time = jiffies;
|
||||
td->scale = 0;
|
||||
|
@ -1850,6 +1907,7 @@ static void throtl_downgrade_state(struct throtl_data *td, int new)
|
|||
{
|
||||
td->scale /= 2;
|
||||
|
||||
throtl_log(&td->service_queue, "downgrade, scale %d", td->scale);
|
||||
if (td->scale) {
|
||||
td->low_upgrade_time = jiffies - td->scale * td->throtl_slice;
|
||||
return;
|
||||
|
@ -2023,6 +2081,11 @@ static void throtl_update_latency_buckets(struct throtl_data *td)
|
|||
td->avg_buckets[i].valid = true;
|
||||
last_latency = td->avg_buckets[i].latency;
|
||||
}
|
||||
|
||||
for (i = 0; i < LATENCY_BUCKET_SIZE; i++)
|
||||
throtl_log(&td->service_queue,
|
||||
"Latency bucket %d: latency=%ld, valid=%d", i,
|
||||
td->avg_buckets[i].latency, td->avg_buckets[i].valid);
|
||||
}
|
||||
#else
|
||||
static inline void throtl_update_latency_buckets(struct throtl_data *td)
|
||||
|
@ -2354,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
|
|||
void blk_throtl_register_queue(struct request_queue *q)
|
||||
{
|
||||
struct throtl_data *td;
|
||||
struct cgroup_subsys_state *pos_css;
|
||||
struct blkcg_gq *blkg;
|
||||
|
||||
td = q->td;
|
||||
BUG_ON(!td);
|
||||
|
||||
if (blk_queue_nonrot(q)) {
|
||||
if (blk_queue_nonrot(q))
|
||||
td->throtl_slice = DFL_THROTL_SLICE_SSD;
|
||||
td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD;
|
||||
} else {
|
||||
else
|
||||
td->throtl_slice = DFL_THROTL_SLICE_HD;
|
||||
td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
|
||||
}
|
||||
#ifndef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
/* if no low limit, use previous default */
|
||||
td->throtl_slice = DFL_THROTL_SLICE_HD;
|
||||
|
@ -2375,18 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
|
|||
td->track_bio_latency = !q->mq_ops && !q->request_fn;
|
||||
if (!td->track_bio_latency)
|
||||
blk_stat_enable_accounting(q);
|
||||
|
||||
/*
|
||||
* some tg are created before queue is fully initialized, eg, nonrot
|
||||
* isn't initialized yet
|
||||
*/
|
||||
rcu_read_lock();
|
||||
blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
|
||||
struct throtl_grp *tg = blkg_to_tg(blkg);
|
||||
|
||||
tg->idletime_threshold = td->dft_idletime_threshold;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
|
|
|
@ -320,8 +320,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
|||
|
||||
if (info) {
|
||||
struct partition_meta_info *pinfo = alloc_part_info(disk);
|
||||
if (!pinfo)
|
||||
if (!pinfo) {
|
||||
err = -ENOMEM;
|
||||
goto out_free_stats;
|
||||
}
|
||||
memcpy(pinfo, info, sizeof(*info));
|
||||
p->info = pinfo;
|
||||
}
|
||||
|
|
|
@ -300,6 +300,8 @@ static void parse_bsd(struct parsed_partitions *state,
|
|||
continue;
|
||||
bsd_start = le32_to_cpu(p->p_offset);
|
||||
bsd_size = le32_to_cpu(p->p_size);
|
||||
if (memcmp(flavour, "bsd\0", 4) == 0)
|
||||
bsd_start += offset;
|
||||
if (offset == bsd_start && size == bsd_size)
|
||||
/* full parent partition, we have it already */
|
||||
continue;
|
||||
|
|
|
@ -925,6 +925,29 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
|||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INTEGRITY
|
||||
static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
|
||||
u16 bs)
|
||||
{
|
||||
struct nvme_ns *ns = disk->private_data;
|
||||
u16 old_ms = ns->ms;
|
||||
u8 pi_type = 0;
|
||||
|
||||
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
|
||||
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
|
||||
|
||||
/* PI implementation requires metadata equal t10 pi tuple size */
|
||||
if (ns->ms == sizeof(struct t10_pi_tuple))
|
||||
pi_type = id->dps & NVME_NS_DPS_PI_MASK;
|
||||
|
||||
if (blk_get_integrity(disk) &&
|
||||
(ns->pi_type != pi_type || ns->ms != old_ms ||
|
||||
bs != queue_logical_block_size(disk->queue) ||
|
||||
(ns->ms && ns->ext)))
|
||||
blk_integrity_unregister(disk);
|
||||
|
||||
ns->pi_type = pi_type;
|
||||
}
|
||||
|
||||
static void nvme_init_integrity(struct nvme_ns *ns)
|
||||
{
|
||||
struct blk_integrity integrity;
|
||||
|
@ -951,6 +974,10 @@ static void nvme_init_integrity(struct nvme_ns *ns)
|
|||
blk_queue_max_integrity_segments(ns->queue, 1);
|
||||
}
|
||||
#else
|
||||
static void nvme_prep_integrity(struct gendisk *disk, struct nvme_id_ns *id,
|
||||
u16 bs)
|
||||
{
|
||||
}
|
||||
static void nvme_init_integrity(struct nvme_ns *ns)
|
||||
{
|
||||
}
|
||||
|
@ -997,37 +1024,22 @@ static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id)
|
|||
static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
|
||||
{
|
||||
struct nvme_ns *ns = disk->private_data;
|
||||
u8 lbaf, pi_type;
|
||||
u16 old_ms;
|
||||
unsigned short bs;
|
||||
|
||||
old_ms = ns->ms;
|
||||
lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
|
||||
ns->lba_shift = id->lbaf[lbaf].ds;
|
||||
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
|
||||
ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
|
||||
u16 bs;
|
||||
|
||||
/*
|
||||
* If identify namespace failed, use default 512 byte block size so
|
||||
* block layer can use before failing read/write for 0 capacity.
|
||||
*/
|
||||
ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
|
||||
if (ns->lba_shift == 0)
|
||||
ns->lba_shift = 9;
|
||||
bs = 1 << ns->lba_shift;
|
||||
/* XXX: PI implementation requires metadata equal t10 pi tuple size */
|
||||
pi_type = ns->ms == sizeof(struct t10_pi_tuple) ?
|
||||
id->dps & NVME_NS_DPS_PI_MASK : 0;
|
||||
|
||||
blk_mq_freeze_queue(disk->queue);
|
||||
if (blk_get_integrity(disk) && (ns->pi_type != pi_type ||
|
||||
ns->ms != old_ms ||
|
||||
bs != queue_logical_block_size(disk->queue) ||
|
||||
(ns->ms && ns->ext)))
|
||||
blk_integrity_unregister(disk);
|
||||
|
||||
ns->pi_type = pi_type;
|
||||
if (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
|
||||
nvme_prep_integrity(disk, id, bs);
|
||||
blk_queue_logical_block_size(ns->queue, bs);
|
||||
|
||||
if (ns->ms && !blk_get_integrity(disk) && !ns->ext)
|
||||
nvme_init_integrity(ns);
|
||||
if (ns->ms && !(ns->ms == 8 && ns->pi_type) && !blk_get_integrity(disk))
|
||||
|
@ -1605,7 +1617,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
|
|||
}
|
||||
memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd));
|
||||
|
||||
if (ctrl->ops->is_fabrics) {
|
||||
if (ctrl->ops->flags & NVME_F_FABRICS) {
|
||||
ctrl->icdoff = le16_to_cpu(id->icdoff);
|
||||
ctrl->ioccsz = le32_to_cpu(id->ioccsz);
|
||||
ctrl->iorcsz = le32_to_cpu(id->iorcsz);
|
||||
|
@ -2098,7 +2110,6 @@ static void nvme_ns_remove(struct nvme_ns *ns)
|
|||
if (ns->ndev)
|
||||
nvme_nvm_unregister_sysfs(ns);
|
||||
del_gendisk(ns->disk);
|
||||
blk_mq_abort_requeue_list(ns->queue);
|
||||
blk_cleanup_queue(ns->queue);
|
||||
}
|
||||
|
||||
|
@ -2436,8 +2447,16 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
|
|||
continue;
|
||||
revalidate_disk(ns->disk);
|
||||
blk_set_queue_dying(ns->queue);
|
||||
blk_mq_abort_requeue_list(ns->queue);
|
||||
blk_mq_start_stopped_hw_queues(ns->queue, true);
|
||||
|
||||
/*
|
||||
* Forcibly start all queues to avoid having stuck requests.
|
||||
* Note that we must ensure the queues are not stopped
|
||||
* when the final removal happens.
|
||||
*/
|
||||
blk_mq_start_hw_queues(ns->queue);
|
||||
|
||||
/* draining requests in requeue list */
|
||||
blk_mq_kick_requeue_list(ns->queue);
|
||||
}
|
||||
mutex_unlock(&ctrl->namespaces_mutex);
|
||||
}
|
||||
|
|
|
@ -45,8 +45,6 @@ enum nvme_fc_queue_flags {
|
|||
|
||||
#define NVMEFC_QUEUE_DELAY 3 /* ms units */
|
||||
|
||||
#define NVME_FC_MAX_CONNECT_ATTEMPTS 1
|
||||
|
||||
struct nvme_fc_queue {
|
||||
struct nvme_fc_ctrl *ctrl;
|
||||
struct device *dev;
|
||||
|
@ -165,8 +163,6 @@ struct nvme_fc_ctrl {
|
|||
struct work_struct delete_work;
|
||||
struct work_struct reset_work;
|
||||
struct delayed_work connect_work;
|
||||
int reconnect_delay;
|
||||
int connect_attempts;
|
||||
|
||||
struct kref ref;
|
||||
u32 flags;
|
||||
|
@ -1376,9 +1372,9 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
|
|||
complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op);
|
||||
if (!complete_rq) {
|
||||
if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) {
|
||||
status = cpu_to_le16(NVME_SC_ABORT_REQ);
|
||||
status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
|
||||
if (blk_queue_dying(rq->q))
|
||||
status |= cpu_to_le16(NVME_SC_DNR);
|
||||
status |= cpu_to_le16(NVME_SC_DNR << 1);
|
||||
}
|
||||
nvme_end_request(rq, status, result);
|
||||
} else
|
||||
|
@ -1751,7 +1747,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
|
|||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: transport association error detected: %s\n",
|
||||
ctrl->cnum, errmsg);
|
||||
dev_info(ctrl->ctrl.device,
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: resetting controller\n", ctrl->cnum);
|
||||
|
||||
/* stop the queues on error, cleanup is in reset thread */
|
||||
|
@ -2195,9 +2191,6 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
|
|||
if (!opts->nr_io_queues)
|
||||
return 0;
|
||||
|
||||
dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
|
||||
opts->nr_io_queues);
|
||||
|
||||
nvme_fc_init_io_queues(ctrl);
|
||||
|
||||
memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
|
||||
|
@ -2268,9 +2261,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
|
|||
if (ctrl->queue_count == 1)
|
||||
return 0;
|
||||
|
||||
dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n",
|
||||
opts->nr_io_queues);
|
||||
|
||||
nvme_fc_init_io_queues(ctrl);
|
||||
|
||||
ret = blk_mq_reinit_tagset(&ctrl->tag_set);
|
||||
|
@ -2306,7 +2296,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
|
|||
int ret;
|
||||
bool changed;
|
||||
|
||||
ctrl->connect_attempts++;
|
||||
++ctrl->ctrl.opts->nr_reconnects;
|
||||
|
||||
/*
|
||||
* Create the admin queue
|
||||
|
@ -2403,9 +2393,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
|
|||
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
|
||||
WARN_ON_ONCE(!changed);
|
||||
|
||||
ctrl->connect_attempts = 0;
|
||||
|
||||
kref_get(&ctrl->ctrl.kref);
|
||||
ctrl->ctrl.opts->nr_reconnects = 0;
|
||||
|
||||
if (ctrl->queue_count > 1) {
|
||||
nvme_start_queues(&ctrl->ctrl);
|
||||
|
@ -2536,26 +2524,32 @@ nvme_fc_delete_ctrl_work(struct work_struct *work)
|
|||
|
||||
/*
|
||||
* tear down the controller
|
||||
* This will result in the last reference on the nvme ctrl to
|
||||
* expire, calling the transport nvme_fc_nvme_ctrl_freed() callback.
|
||||
* From there, the transport will tear down it's logical queues and
|
||||
* association.
|
||||
* After the last reference on the nvme ctrl is removed,
|
||||
* the transport nvme_fc_nvme_ctrl_freed() callback will be
|
||||
* invoked. From there, the transport will tear down it's
|
||||
* logical queues and association.
|
||||
*/
|
||||
nvme_uninit_ctrl(&ctrl->ctrl);
|
||||
|
||||
nvme_put_ctrl(&ctrl->ctrl);
|
||||
}
|
||||
|
||||
static bool
|
||||
__nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl)
|
||||
{
|
||||
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
|
||||
return true;
|
||||
|
||||
if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl)
|
||||
{
|
||||
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
|
||||
return -EBUSY;
|
||||
|
||||
if (!queue_work(nvme_fc_wq, &ctrl->delete_work))
|
||||
return -EBUSY;
|
||||
|
||||
return 0;
|
||||
return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -2580,6 +2574,35 @@ nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl)
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
|
||||
{
|
||||
/* If we are resetting/deleting then do nothing */
|
||||
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
|
||||
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
|
||||
ctrl->ctrl.state == NVME_CTRL_LIVE);
|
||||
return;
|
||||
}
|
||||
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
|
||||
ctrl->cnum, status);
|
||||
|
||||
if (nvmf_should_reconnect(&ctrl->ctrl)) {
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
|
||||
ctrl->cnum, ctrl->ctrl.opts->reconnect_delay);
|
||||
queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
|
||||
ctrl->ctrl.opts->reconnect_delay * HZ);
|
||||
} else {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Max reconnect attempts (%d) "
|
||||
"reached. Removing controller\n",
|
||||
ctrl->cnum, ctrl->ctrl.opts->nr_reconnects);
|
||||
WARN_ON(__nvme_fc_schedule_delete_work(ctrl));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nvme_fc_reset_ctrl_work(struct work_struct *work)
|
||||
{
|
||||
|
@ -2591,34 +2614,9 @@ nvme_fc_reset_ctrl_work(struct work_struct *work)
|
|||
nvme_fc_delete_association(ctrl);
|
||||
|
||||
ret = nvme_fc_create_association(ctrl);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
|
||||
ctrl->cnum, ret);
|
||||
if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Max reconnect attempts (%d) "
|
||||
"reached. Removing controller\n",
|
||||
ctrl->cnum, ctrl->connect_attempts);
|
||||
|
||||
if (!nvme_change_ctrl_state(&ctrl->ctrl,
|
||||
NVME_CTRL_DELETING)) {
|
||||
dev_err(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: failed to change state "
|
||||
"to DELETING\n", ctrl->cnum);
|
||||
return;
|
||||
}
|
||||
|
||||
WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
|
||||
return;
|
||||
}
|
||||
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
|
||||
ctrl->cnum, ctrl->reconnect_delay);
|
||||
queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
|
||||
ctrl->reconnect_delay * HZ);
|
||||
} else
|
||||
if (ret)
|
||||
nvme_fc_reconnect_or_delete(ctrl, ret);
|
||||
else
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: controller reset complete\n", ctrl->cnum);
|
||||
}
|
||||
|
@ -2632,7 +2630,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
|
|||
{
|
||||
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
|
||||
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum);
|
||||
|
||||
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
|
||||
|
@ -2649,7 +2647,7 @@ nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl)
|
|||
static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
|
||||
.name = "fc",
|
||||
.module = THIS_MODULE,
|
||||
.is_fabrics = true,
|
||||
.flags = NVME_F_FABRICS,
|
||||
.reg_read32 = nvmf_reg_read32,
|
||||
.reg_read64 = nvmf_reg_read64,
|
||||
.reg_write32 = nvmf_reg_write32,
|
||||
|
@ -2671,34 +2669,9 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
|
|||
struct nvme_fc_ctrl, connect_work);
|
||||
|
||||
ret = nvme_fc_create_association(ctrl);
|
||||
if (ret) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Reconnect attempt failed (%d)\n",
|
||||
ctrl->cnum, ret);
|
||||
if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) {
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Max reconnect attempts (%d) "
|
||||
"reached. Removing controller\n",
|
||||
ctrl->cnum, ctrl->connect_attempts);
|
||||
|
||||
if (!nvme_change_ctrl_state(&ctrl->ctrl,
|
||||
NVME_CTRL_DELETING)) {
|
||||
dev_err(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: failed to change state "
|
||||
"to DELETING\n", ctrl->cnum);
|
||||
return;
|
||||
}
|
||||
|
||||
WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work));
|
||||
return;
|
||||
}
|
||||
|
||||
dev_warn(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
|
||||
ctrl->cnum, ctrl->reconnect_delay);
|
||||
queue_delayed_work(nvme_fc_wq, &ctrl->connect_work,
|
||||
ctrl->reconnect_delay * HZ);
|
||||
} else
|
||||
if (ret)
|
||||
nvme_fc_reconnect_or_delete(ctrl, ret);
|
||||
else
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: controller reconnect complete\n",
|
||||
ctrl->cnum);
|
||||
|
@ -2755,7 +2728,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
|||
INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work);
|
||||
INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work);
|
||||
INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work);
|
||||
ctrl->reconnect_delay = opts->reconnect_delay;
|
||||
spin_lock_init(&ctrl->lock);
|
||||
|
||||
/* io queue count */
|
||||
|
@ -2819,7 +2791,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
|||
ctrl->ctrl.opts = NULL;
|
||||
/* initiate nvme ctrl ref counting teardown */
|
||||
nvme_uninit_ctrl(&ctrl->ctrl);
|
||||
nvme_put_ctrl(&ctrl->ctrl);
|
||||
|
||||
/* as we're past the point where we transition to the ref
|
||||
* counting teardown path, if we return a bad pointer here,
|
||||
|
@ -2835,6 +2806,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
|
|||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
kref_get(&ctrl->ctrl.kref);
|
||||
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
|
||||
ctrl->cnum, ctrl->ctrl.opts->subsysnqn);
|
||||
|
@ -2971,7 +2944,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts)
|
|||
static struct nvmf_transport_ops nvme_fc_transport = {
|
||||
.name = "fc",
|
||||
.required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR,
|
||||
.allowed_opts = NVMF_OPT_RECONNECT_DELAY,
|
||||
.allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO,
|
||||
.create_ctrl = nvme_fc_create_ctrl,
|
||||
};
|
||||
|
||||
|
|
|
@ -208,7 +208,9 @@ struct nvme_ns {
|
|||
struct nvme_ctrl_ops {
|
||||
const char *name;
|
||||
struct module *module;
|
||||
bool is_fabrics;
|
||||
unsigned int flags;
|
||||
#define NVME_F_FABRICS (1 << 0)
|
||||
#define NVME_F_METADATA_SUPPORTED (1 << 1)
|
||||
int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val);
|
||||
int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val);
|
||||
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
|
||||
|
|
|
@ -263,7 +263,7 @@ static void nvme_dbbuf_set(struct nvme_dev *dev)
|
|||
c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
|
||||
|
||||
if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
|
||||
dev_warn(dev->dev, "unable to set dbbuf\n");
|
||||
dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
|
||||
/* Free memory and continue on */
|
||||
nvme_dbbuf_dma_free(dev);
|
||||
}
|
||||
|
@ -1394,11 +1394,11 @@ static void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
|
|||
result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
|
||||
&pci_status);
|
||||
if (result == PCIBIOS_SUCCESSFUL)
|
||||
dev_warn(dev->dev,
|
||||
dev_warn(dev->ctrl.device,
|
||||
"controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
|
||||
csts, pci_status);
|
||||
else
|
||||
dev_warn(dev->dev,
|
||||
dev_warn(dev->ctrl.device,
|
||||
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
|
||||
csts, result);
|
||||
}
|
||||
|
@ -1740,8 +1740,8 @@ static int nvme_pci_enable(struct nvme_dev *dev)
|
|||
*/
|
||||
if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
|
||||
dev->q_depth = 2;
|
||||
dev_warn(dev->dev, "detected Apple NVMe controller, set "
|
||||
"queue depth=%u to work around controller resets\n",
|
||||
dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
|
||||
"set queue depth=%u to work around controller resets\n",
|
||||
dev->q_depth);
|
||||
}
|
||||
|
||||
|
@ -1759,7 +1759,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
|
|||
if (dev->cmbsz) {
|
||||
if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
|
||||
&dev_attr_cmb.attr, NULL))
|
||||
dev_warn(dev->dev,
|
||||
dev_warn(dev->ctrl.device,
|
||||
"failed to add sysfs attribute for CMB\n");
|
||||
}
|
||||
}
|
||||
|
@ -2047,6 +2047,7 @@ static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
|
|||
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
|
||||
.name = "pcie",
|
||||
.module = THIS_MODULE,
|
||||
.flags = NVME_F_METADATA_SUPPORTED,
|
||||
.reg_read32 = nvme_pci_reg_read32,
|
||||
.reg_write32 = nvme_pci_reg_write32,
|
||||
.reg_read64 = nvme_pci_reg_read64,
|
||||
|
@ -2293,6 +2294,8 @@ static const struct pci_device_id nvme_id_table[] = {
|
|||
{ PCI_VDEVICE(INTEL, 0x0a54),
|
||||
.driver_data = NVME_QUIRK_STRIPE_SIZE |
|
||||
NVME_QUIRK_DEALLOCATE_ZEROES, },
|
||||
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
|
||||
.driver_data = NVME_QUIRK_NO_DEEPEST_PS },
|
||||
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
|
||||
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
|
||||
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
|
||||
|
|
|
@ -1038,6 +1038,19 @@ static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
|
|||
nvme_rdma_wr_error(cq, wc, "SEND");
|
||||
}
|
||||
|
||||
static inline int nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
|
||||
{
|
||||
int sig_limit;
|
||||
|
||||
/*
|
||||
* We signal completion every queue depth/2 and also handle the
|
||||
* degenerated case of a device with queue_depth=1, where we
|
||||
* would need to signal every message.
|
||||
*/
|
||||
sig_limit = max(queue->queue_size / 2, 1);
|
||||
return (++queue->sig_count % sig_limit) == 0;
|
||||
}
|
||||
|
||||
static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
||||
struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
|
||||
struct ib_send_wr *first, bool flush)
|
||||
|
@ -1065,9 +1078,6 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
|||
* Would have been way to obvious to handle this in hardware or
|
||||
* at least the RDMA stack..
|
||||
*
|
||||
* This messy and racy code sniplet is copy and pasted from the iSER
|
||||
* initiator, and the magic '32' comes from there as well.
|
||||
*
|
||||
* Always signal the flushes. The magic request used for the flush
|
||||
* sequencer is not allocated in our driver's tagset and it's
|
||||
* triggered to be freed by blk_cleanup_queue(). So we need to
|
||||
|
@ -1075,7 +1085,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
|
|||
* embedded in request's payload, is not freed when __ib_process_cq()
|
||||
* calls wr_cqe->done().
|
||||
*/
|
||||
if ((++queue->sig_count % 32) == 0 || flush)
|
||||
if (nvme_rdma_queue_sig_limit(queue) || flush)
|
||||
wr.send_flags |= IB_SEND_SIGNALED;
|
||||
|
||||
if (first)
|
||||
|
@ -1782,7 +1792,7 @@ static int nvme_rdma_reset_ctrl(struct nvme_ctrl *nctrl)
|
|||
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
|
||||
.name = "rdma",
|
||||
.module = THIS_MODULE,
|
||||
.is_fabrics = true,
|
||||
.flags = NVME_F_FABRICS,
|
||||
.reg_read32 = nvmf_reg_read32,
|
||||
.reg_read64 = nvmf_reg_read64,
|
||||
.reg_write32 = nvmf_reg_write32,
|
||||
|
|
|
@ -558,7 +558,7 @@ static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
|
|||
static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
|
||||
.name = "loop",
|
||||
.module = THIS_MODULE,
|
||||
.is_fabrics = true,
|
||||
.flags = NVME_F_FABRICS,
|
||||
.reg_read32 = nvmf_reg_read32,
|
||||
.reg_read64 = nvmf_reg_read64,
|
||||
.reg_write32 = nvmf_reg_write32,
|
||||
|
|
|
@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
|
|||
bool kick_requeue_list);
|
||||
void blk_mq_kick_requeue_list(struct request_queue *q);
|
||||
void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
|
||||
void blk_mq_abort_requeue_list(struct request_queue *q);
|
||||
void blk_mq_complete_request(struct request *rq);
|
||||
|
||||
bool blk_mq_queue_stopped(struct request_queue *q);
|
||||
|
|
Loading…
Reference in a new issue