From 076098e51bd502402a7c166b5c2bb59a196e84e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2017 16:08:57 -0400 Subject: [PATCH 01/21] bio_map_user_iov(): switch to iov_iter_get_pages()/iov_iter_advance() ... and to hell with iov_for_each() nonsense Signed-off-by: Al Viro --- block/bio.c | 55 ++++++++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 32 deletions(-) diff --git a/block/bio.c b/block/bio.c index 101c2a9b5481..5de54dedab66 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1328,7 +1328,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, struct page **pages; struct bio *bio; int cur_page = 0; - int ret, offset; + int ret; struct iov_iter i; struct iovec iov; struct bio_vec *bvec; @@ -1365,43 +1365,32 @@ struct bio *bio_map_user_iov(struct request_queue *q, if (!pages) goto out; - iov_for_each(iov, i, *iter) { - unsigned long uaddr = (unsigned long) iov.iov_base; - unsigned long len = iov.iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int local_nr_pages = end - start; - const int page_limit = cur_page + local_nr_pages; + i = *iter; + while (iov_iter_count(&i)) { + ssize_t bytes; + size_t offs, added = 0; + int npages; - ret = get_user_pages_fast(uaddr, local_nr_pages, - (iter->type & WRITE) != WRITE, - &pages[cur_page]); - if (unlikely(ret < local_nr_pages)) { - for (j = cur_page; j < page_limit; j++) { - if (!pages[j]) - break; - put_page(pages[j]); - } - ret = -EFAULT; + bytes = iov_iter_get_pages(&i, pages + cur_page, LONG_MAX, + nr_pages - cur_page, &offs); + if (unlikely(bytes <= 0)) { + ret = bytes ? bytes : -EFAULT; goto out_unmap; } - offset = offset_in_page(uaddr); - for (j = cur_page; j < page_limit; j++) { - unsigned int bytes = PAGE_SIZE - offset; + npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); + + for (j = cur_page; j < cur_page + npages; j++) { + unsigned int n = PAGE_SIZE - offs; unsigned short prev_bi_vcnt = bio->bi_vcnt; - if (len <= 0) - break; - - if (bytes > len) - bytes = len; + if (n > bytes) + n = bytes; /* * sorry... */ - if (bio_add_pc_page(q, bio, pages[j], bytes, offset) < - bytes) + if (bio_add_pc_page(q, bio, pages[j], n, offs) < n) break; /* @@ -1411,16 +1400,18 @@ struct bio *bio_map_user_iov(struct request_queue *q, if (bio->bi_vcnt == prev_bi_vcnt) put_page(pages[j]); - len -= bytes; - offset = 0; + added += n; + bytes -= n; + offs = 0; } + iov_iter_advance(&i, added); - cur_page = j; /* * release the pages we didn't map into the bio, if any */ - while (j < page_limit) + while (j < cur_page + npages) put_page(pages[j++]); + cur_page = j; } kfree(pages); From 629e42bcc3d0bc04b4e0e40ef3f831507a4693bd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2017 16:13:10 -0400 Subject: [PATCH 02/21] ... and with iov_iter_get_pages_alloc() it becomes even simpler Signed-off-by: Al Viro --- block/bio.c | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/block/bio.c b/block/bio.c index 5de54dedab66..5dbc5e90d716 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1325,9 +1325,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, { int j; int nr_pages = 0; - struct page **pages; struct bio *bio; - int cur_page = 0; int ret; struct iov_iter i; struct iovec iov; @@ -1360,19 +1358,14 @@ struct bio *bio_map_user_iov(struct request_queue *q, if (!bio) return ERR_PTR(-ENOMEM); - ret = -ENOMEM; - pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); - if (!pages) - goto out; - i = *iter; while (iov_iter_count(&i)) { + struct page **pages; ssize_t bytes; size_t offs, added = 0; int npages; - bytes = iov_iter_get_pages(&i, pages + cur_page, LONG_MAX, - nr_pages - cur_page, &offs); + bytes = iov_iter_get_pages_alloc(&i, &pages, LONG_MAX, &offs); if (unlikely(bytes <= 0)) { ret = bytes ? bytes : -EFAULT; goto out_unmap; @@ -1380,7 +1373,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - for (j = cur_page; j < cur_page + npages; j++) { + for (j = 0; j < npages; j++) { unsigned int n = PAGE_SIZE - offs; unsigned short prev_bi_vcnt = bio->bi_vcnt; @@ -1409,13 +1402,11 @@ struct bio *bio_map_user_iov(struct request_queue *q, /* * release the pages we didn't map into the bio, if any */ - while (j < cur_page + npages) + while (j < npages) put_page(pages[j++]); - cur_page = j; + kvfree(pages); } - kfree(pages); - bio_set_flag(bio, BIO_USER_MAPPED); /* @@ -1431,8 +1422,6 @@ struct bio *bio_map_user_iov(struct request_queue *q, bio_for_each_segment_all(bvec, bio, j) { put_page(bvec->bv_page); } - out: - kfree(pages); bio_put(bio); return ERR_PTR(ret); } From e2e115d18b76467274d8f818f8828ba168f9c80b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2017 16:16:06 -0400 Subject: [PATCH 03/21] don't rely upon subsequent bio_add_pc_page() calls failing ... they might actually succeed in some cases (when we are at the queue-imposed segments limit, the next page is not mergable with the last one we'd got in, but the first page covered by the next iovec *is* mergable). Make sure that once it's failed, we are done with that bio. Signed-off-by: Al Viro --- block/bio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/bio.c b/block/bio.c index 5dbc5e90d716..fe40948d62af 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1380,10 +1380,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, if (n > bytes) n = bytes; - /* - * sorry... - */ - if (bio_add_pc_page(q, bio, pages[j], n, offs) < n) + if (!bio_add_pc_page(q, bio, pages[j], n, offs)) break; /* @@ -1405,6 +1402,9 @@ struct bio *bio_map_user_iov(struct request_queue *q, while (j < npages) put_page(pages[j++]); kvfree(pages); + /* couldn't stuff something into bio? */ + if (bytes) + break; } bio_set_flag(bio, BIO_USER_MAPPED); From 98f0bc99055da5797fae0c35d3d18261d59df9ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2017 16:23:18 -0400 Subject: [PATCH 04/21] bio_map_user_iov(): move alignment check into the main loop Signed-off-by: Al Viro --- block/bio.c | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/block/bio.c b/block/bio.c index fe40948d62af..d851f68727f1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1344,11 +1344,6 @@ struct bio *bio_map_user_iov(struct request_queue *q, return ERR_PTR(-EINVAL); nr_pages += end - start; - /* - * buffer must be aligned to at least logical block size for now - */ - if (uaddr & queue_dma_alignment(q)) - return ERR_PTR(-EINVAL); } if (!nr_pages) @@ -1373,29 +1368,34 @@ struct bio *bio_map_user_iov(struct request_queue *q, npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); - for (j = 0; j < npages; j++) { - unsigned int n = PAGE_SIZE - offs; - unsigned short prev_bi_vcnt = bio->bi_vcnt; + if (unlikely(offs & queue_dma_alignment(q))) { + ret = -EINVAL; + j = 0; + } else { + for (j = 0; j < npages; j++) { + struct page *page = pages[j]; + unsigned int n = PAGE_SIZE - offs; + unsigned short prev_bi_vcnt = bio->bi_vcnt; - if (n > bytes) - n = bytes; + if (n > bytes) + n = bytes; - if (!bio_add_pc_page(q, bio, pages[j], n, offs)) - break; + if (!bio_add_pc_page(q, bio, page, n, offs)) + break; - /* - * check if vector was merged with previous - * drop page reference if needed - */ - if (bio->bi_vcnt == prev_bi_vcnt) - put_page(pages[j]); + /* + * check if vector was merged with previous + * drop page reference if needed + */ + if (bio->bi_vcnt == prev_bi_vcnt) + put_page(page); - added += n; - bytes -= n; - offs = 0; + added += n; + bytes -= n; + offs = 0; + } + iov_iter_advance(&i, added); } - iov_iter_advance(&i, added); - /* * release the pages we didn't map into the bio, if any */ From b282cc766958af161249b7b04d50e3eae12a2a1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Sep 2017 16:24:59 -0400 Subject: [PATCH 05/21] bio_map_user_iov(): get rid of the iov_for_each() Use iov_iter_npages() Signed-off-by: Al Viro --- block/bio.c | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/block/bio.c b/block/bio.c index d851f68727f1..d1ca7eecc8aa 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1324,32 +1324,15 @@ struct bio *bio_map_user_iov(struct request_queue *q, gfp_t gfp_mask) { int j; - int nr_pages = 0; struct bio *bio; int ret; struct iov_iter i; - struct iovec iov; struct bio_vec *bvec; - iov_for_each(iov, i, *iter) { - unsigned long uaddr = (unsigned long) iov.iov_base; - unsigned long len = iov.iov_len; - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - } - - if (!nr_pages) + if (!iov_iter_count(iter)) return ERR_PTR(-EINVAL); - bio = bio_kmalloc(gfp_mask, nr_pages); + bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); if (!bio) return ERR_PTR(-ENOMEM); From e81cef5d3001501350b4e596b4bd6dfd26187afa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 09:25:39 -0400 Subject: [PATCH 06/21] blk_rq_map_user_iov(): move iov_iter_advance() down ... into bio_{map,copy}_user_iov() Signed-off-by: Al Viro --- block/bio.c | 6 ++++-- block/blk-map.c | 1 - include/linux/bio.h | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/block/bio.c b/block/bio.c index d1ca7eecc8aa..cd1282db03cb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1195,7 +1195,7 @@ int bio_uncopy_user(struct bio *bio) */ struct bio *bio_copy_user_iov(struct request_queue *q, struct rq_map_data *map_data, - const struct iov_iter *iter, + struct iov_iter *iter, gfp_t gfp_mask) { struct bio_map_data *bmd; @@ -1298,6 +1298,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (ret) goto cleanup; } + iov_iter_advance(iter, bio->bi_iter.bi_size); bio->bi_private = bmd; return bio; @@ -1320,7 +1321,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, * device. Returns an error pointer in case of error. */ struct bio *bio_map_user_iov(struct request_queue *q, - const struct iov_iter *iter, + struct iov_iter *iter, gfp_t gfp_mask) { int j; @@ -1399,6 +1400,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, * reference to it */ bio_get(bio); + iov_iter_advance(iter, bio->bi_iter.bi_size); return bio; out_unmap: diff --git a/block/blk-map.c b/block/blk-map.c index 2547016aa7aa..891eea11f68e 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -69,7 +69,6 @@ static int __blk_rq_map_user_iov(struct request *rq, if (map_data && map_data->null_mapped) bio_set_flag(bio, BIO_NULL_MAPPED); - iov_iter_advance(iter, bio->bi_iter.bi_size); if (map_data) map_data->offset += bio->bi_iter.bi_size; diff --git a/include/linux/bio.h b/include/linux/bio.h index 275c91c99516..6050c0caa4e1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -462,7 +462,7 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, - const struct iov_iter *, gfp_t); + struct iov_iter *, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -494,7 +494,7 @@ extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, struct rq_map_data *, - const struct iov_iter *, + struct iov_iter *, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); From 2884d0be878eb5cbbc6d983c6054feef3b9aa86d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 12:09:21 -0400 Subject: [PATCH 07/21] move more stuff down into bio_copy_user_iov() Signed-off-by: Al Viro --- block/bio.c | 5 +++++ block/blk-map.c | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/block/bio.c b/block/bio.c index cd1282db03cb..02457c2d4379 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1289,6 +1289,9 @@ struct bio *bio_copy_user_iov(struct request_queue *q, if (ret) goto cleanup; + if (map_data) + map_data->offset += bio->bi_iter.bi_size; + /* * success */ @@ -1301,6 +1304,8 @@ struct bio *bio_copy_user_iov(struct request_queue *q, iov_iter_advance(iter, bio->bi_iter.bi_size); bio->bi_private = bmd; + if (map_data && map_data->null_mapped) + bio_set_flag(bio, BIO_NULL_MAPPED); return bio; cleanup: if (!map_data) diff --git a/block/blk-map.c b/block/blk-map.c index 891eea11f68e..c872d62b62fb 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -66,12 +66,6 @@ static int __blk_rq_map_user_iov(struct request *rq, bio->bi_opf &= ~REQ_OP_MASK; bio->bi_opf |= req_op(rq); - if (map_data && map_data->null_mapped) - bio_set_flag(bio, BIO_NULL_MAPPED); - - if (map_data) - map_data->offset += bio->bi_iter.bi_size; - orig_bio = bio; /* From 98a09d6106660ec6e69ff2a6fa14039bd504412b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 12:14:36 -0400 Subject: [PATCH 08/21] bio_copy_from_iter(): get rid of copying iov_iter we want the one passed to it advanced, anyway Signed-off-by: Al Viro --- block/bio.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/block/bio.c b/block/bio.c index 02457c2d4379..f5002b44c963 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1088,7 +1088,7 @@ static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, * Copy all pages from iov_iter to bio. * Returns 0 on success, or error on failure. */ -static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) +static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) { int i; struct bio_vec *bvec; @@ -1099,9 +1099,9 @@ static int bio_copy_from_iter(struct bio *bio, struct iov_iter iter) ret = copy_page_from_iter(bvec->bv_page, bvec->bv_offset, bvec->bv_len, - &iter); + iter); - if (!iov_iter_count(&iter)) + if (!iov_iter_count(iter)) break; if (ret < bvec->bv_len) @@ -1297,11 +1297,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q, */ if (((iter->type & WRITE) && (!map_data || !map_data->null_mapped)) || (map_data && map_data->from_user)) { - ret = bio_copy_from_iter(bio, *iter); + ret = bio_copy_from_iter(bio, iter); if (ret) goto cleanup; + } else { + iov_iter_advance(iter, bio->bi_iter.bi_size); } - iov_iter_advance(iter, bio->bi_iter.bi_size); bio->bi_private = bmd; if (map_data && map_data->null_mapped) From 0a0f151364f5bf836ad1d4de6113adb103a6628c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 12:30:17 -0400 Subject: [PATCH 09/21] bio_map_user_iov(): get rid of copying iov_iter we do want *iter advanced Signed-off-by: Al Viro --- block/bio.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/block/bio.c b/block/bio.c index f5002b44c963..28f66e2edc53 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1333,7 +1333,6 @@ struct bio *bio_map_user_iov(struct request_queue *q, int j; struct bio *bio; int ret; - struct iov_iter i; struct bio_vec *bvec; if (!iov_iter_count(iter)) @@ -1343,14 +1342,13 @@ struct bio *bio_map_user_iov(struct request_queue *q, if (!bio) return ERR_PTR(-ENOMEM); - i = *iter; - while (iov_iter_count(&i)) { + while (iov_iter_count(iter)) { struct page **pages; ssize_t bytes; size_t offs, added = 0; int npages; - bytes = iov_iter_get_pages_alloc(&i, &pages, LONG_MAX, &offs); + bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); if (unlikely(bytes <= 0)) { ret = bytes ? bytes : -EFAULT; goto out_unmap; @@ -1384,7 +1382,7 @@ struct bio *bio_map_user_iov(struct request_queue *q, bytes -= n; offs = 0; } - iov_iter_advance(&i, added); + iov_iter_advance(iter, added); } /* * release the pages we didn't map into the bio, if any @@ -1406,7 +1404,6 @@ struct bio *bio_map_user_iov(struct request_queue *q, * reference to it */ bio_get(bio); - iov_iter_advance(iter, bio->bi_iter.bi_size); return bio; out_unmap: From d16d44ebb016792285ec1b9566dbd9d022ce70f9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 13:09:18 -0400 Subject: [PATCH 10/21] bio_copy_user_iov(): saner bio size calculation it's a bounce buffer; we don't *care* how badly is the real source/destination fragmented, all that matters is the total size. Signed-off-by: Al Viro --- block/bio.c | 30 ++++++------------------------ 1 file changed, 6 insertions(+), 24 deletions(-) diff --git a/block/bio.c b/block/bio.c index 28f66e2edc53..e87f70cd528e 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1201,33 +1201,11 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct bio_map_data *bmd; struct page *page; struct bio *bio; - int i, ret; - int nr_pages = 0; + int i = 0, ret; + int nr_pages; unsigned int len = iter->count; unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; - for (i = 0; i < iter->nr_segs; i++) { - unsigned long uaddr; - unsigned long end; - unsigned long start; - - uaddr = (unsigned long) iter->iov[i].iov_base; - end = (uaddr + iter->iov[i].iov_len + PAGE_SIZE - 1) - >> PAGE_SHIFT; - start = uaddr >> PAGE_SHIFT; - - /* - * Overflow, abort - */ - if (end < start) - return ERR_PTR(-EINVAL); - - nr_pages += end - start; - } - - if (offset) - nr_pages++; - bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); @@ -1242,6 +1220,10 @@ struct bio *bio_copy_user_iov(struct request_queue *q, bmd->iter = *iter; bmd->iter.iov = bmd->iov; + nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); + if (nr_pages > BIO_MAX_PAGES) + nr_pages = BIO_MAX_PAGES; + ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); if (!bio) From 0e5b935d43f385ab23d2e38e7134b1abb0e7907e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 13:14:35 -0400 Subject: [PATCH 11/21] bio_alloc_map_data(): do bmd->iter setup right there just need to copy it iter instead of iter->nr_segs Signed-off-by: Al Viro --- block/bio.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/block/bio.c b/block/bio.c index e87f70cd528e..ad34cdb99ad2 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1070,14 +1070,21 @@ struct bio_map_data { struct iovec iov[]; }; -static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, +static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, gfp_t gfp_mask) { - if (iov_count > UIO_MAXIOV) + struct bio_map_data *bmd; + if (data->nr_segs > UIO_MAXIOV) return NULL; - return kmalloc(sizeof(struct bio_map_data) + - sizeof(struct iovec) * iov_count, gfp_mask); + bmd = kmalloc(sizeof(struct bio_map_data) + + sizeof(struct iovec) * data->nr_segs, gfp_mask); + if (!bmd) + return NULL; + memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); + bmd->iter = *data; + bmd->iter.iov = bmd->iov; + return bmd; } /** @@ -1206,7 +1213,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, unsigned int len = iter->count; unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; - bmd = bio_alloc_map_data(iter->nr_segs, gfp_mask); + bmd = bio_alloc_map_data(iter, gfp_mask); if (!bmd) return ERR_PTR(-ENOMEM); @@ -1216,9 +1223,6 @@ struct bio *bio_copy_user_iov(struct request_queue *q, * shortlived one. */ bmd->is_our_pages = map_data ? 0 : 1; - memcpy(bmd->iov, iter->iov, sizeof(struct iovec) * iter->nr_segs); - bmd->iter = *iter; - bmd->iter.iov = bmd->iov; nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); if (nr_pages > BIO_MAX_PAGES) From faea13297ea739f94913d56d7b865134b4fc8726 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 14:03:29 -0400 Subject: [PATCH 12/21] kill iov_shorten() Signed-off-by: Al Viro --- fs/read_write.c | 21 --------------------- include/linux/uio.h | 2 -- 2 files changed, 23 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index f0d4b16873e8..b8bf2cb3298a 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -634,27 +634,6 @@ SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, return ret; } -/* - * Reduce an iovec's length in-place. Return the resulting number of segments - */ -unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) -{ - unsigned long seg = 0; - size_t len = 0; - - while (seg < nr_segs) { - seg++; - if (len + iov->iov_len >= to) { - iov->iov_len = to - len; - break; - } - len += iov->iov_len; - iov++; - } - return seg; -} -EXPORT_SYMBOL(iov_shorten); - static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, loff_t *ppos, int type, rwf_t flags) { diff --git a/include/linux/uio.h b/include/linux/uio.h index 8a642cda641c..5885daeae721 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -80,8 +80,6 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) ((iov = iov_iter_iovec(&(iter))), 1); \ iov_iter_advance(&(iter), (iov).iov_len)) -unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); - size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); From 6570f0dd6004b5b925b19666e9c60da432d197fd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 17:43:00 -0400 Subject: [PATCH 13/21] orangefs: remove detritus from struct orangefs_kiocb_s Signed-off-by: Al Viro --- fs/orangefs/orangefs-kernel.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index ea0ce507a6ab..937a5dd2cb14 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -274,12 +274,6 @@ struct orangefs_kiocb_s { /* orangefs kernel operation type */ struct orangefs_kernel_op_s *op; - /* The user space buffers from/to which I/O is being staged */ - struct iovec *iov; - - /* number of elements in the iovector */ - unsigned long nr_segs; - /* set to indicate the type of the operation */ int rw; From 83683dc6e017dabfd8c2c0e9301b94b8a2d01233 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 17:46:21 -0400 Subject: [PATCH 14/21] xen: don't open-code iov_iter_kvec() Signed-off-by: Al Viro --- drivers/xen/pvcalls-back.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index b209cd44bb8d..ddc671170a5f 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -134,20 +134,16 @@ static void pvcalls_conn_back_read(void *opaque) masked_cons = pvcalls_mask(cons, array_size); memset(&msg, 0, sizeof(msg)); - msg.msg_iter.type = ITER_KVEC|WRITE; - msg.msg_iter.count = wanted; if (masked_prod < masked_cons) { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = wanted; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 1; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 1, wanted); } else { vec[0].iov_base = data->in + masked_prod; vec[0].iov_len = array_size - masked_prod; vec[1].iov_base = data->in; vec[1].iov_len = wanted - vec[0].iov_len; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 2; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|WRITE, vec, 2, wanted); } atomic_set(&map->read, 0); @@ -196,20 +192,16 @@ static void pvcalls_conn_back_write(struct sock_mapping *map) memset(&msg, 0, sizeof(msg)); msg.msg_flags |= MSG_DONTWAIT; - msg.msg_iter.type = ITER_KVEC|READ; - msg.msg_iter.count = size; if (pvcalls_mask(prod, array_size) > pvcalls_mask(cons, array_size)) { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = size; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 1; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 1, size); } else { vec[0].iov_base = data->out + pvcalls_mask(cons, array_size); vec[0].iov_len = array_size - pvcalls_mask(cons, array_size); vec[1].iov_base = data->out; vec[1].iov_len = size - vec[0].iov_len; - msg.msg_iter.kvec = vec; - msg.msg_iter.nr_segs = 2; + iov_iter_kvec(&msg.msg_iter, ITER_KVEC|READ, vec, 2, size); } atomic_set(&map->write, 0); From 6d1ff4d6f37dc3c14d9fc08dbbbb920a12c21f84 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Feb 2017 00:37:49 -0500 Subject: [PATCH 15/21] lnet_return_rx_credits_locked: don't abuse list_entry Signed-off-by: Al Viro --- drivers/staging/lustre/lnet/lnet/lib-move.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/lnet/lnet/lib-move.c b/drivers/staging/lustre/lnet/lnet/lib-move.c index bc0779c02d97..2b96c02bed50 100644 --- a/drivers/staging/lustre/lnet/lnet/lib-move.c +++ b/drivers/staging/lustre/lnet/lnet/lib-move.c @@ -889,7 +889,7 @@ lnet_return_rx_credits_locked(struct lnet_msg *msg) */ LASSERT(msg->msg_kiov); - rb = list_entry(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]); + rb = container_of(msg->msg_kiov, struct lnet_rtrbuf, rb_kiov[0]); rbp = rb->rb_pool; msg->msg_kiov = NULL; From 09cf698a594276139b7dfafb232af3fe4fbc4438 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Feb 2017 01:44:03 -0500 Subject: [PATCH 16/21] new primitive: iov_iter_for_each_range() For kvec and bvec: feeds segments to given callback as long as it returns 0. For iovec and pipe: fails. Signed-off-by: Al Viro --- include/linux/uio.h | 4 ++++ lib/iov_iter.c | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/uio.h b/include/linux/uio.h index 5885daeae721..e67e12adb136 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -244,4 +244,8 @@ int compat_import_iovec(int type, const struct compat_iovec __user * uvector, int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); +int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, + int (*f)(struct kvec *vec, void *context), + void *context); + #endif diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1c1c06ddc20a..970212670b6a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1446,3 +1446,25 @@ int import_single_range(int rw, void __user *buf, size_t len, return 0; } EXPORT_SYMBOL(import_single_range); + +int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, + int (*f)(struct kvec *vec, void *context), + void *context) +{ + struct kvec w; + int err = -EINVAL; + if (!bytes) + return 0; + + iterate_all_kinds(i, bytes, v, -EINVAL, ({ + w.iov_base = kmap(v.bv_page) + v.bv_offset; + w.iov_len = v.bv_len; + err = f(&w, context); + kunmap(v.bv_page); + err;}), ({ + w = v; + err = f(&w, context);}) + ) + return err; +} +EXPORT_SYMBOL(iov_iter_for_each_range); From 11d49e9d089ccec81be87c2386dfdd010d7f7f6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 18:36:44 -0400 Subject: [PATCH 17/21] fix a page leak in vhost_scsi_iov_to_sgl() error recovery we are advancing sg as we go, so the pages we need to drop in case of error are *before* the current sg. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- drivers/vhost/scsi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 046f6d280af5..e47c5bc3ddca 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -688,6 +688,7 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, struct scatterlist *sg, int sg_count) { size_t off = iter->iov_offset; + struct scatterlist *p = sg; int i, ret; for (i = 0; i < iter->nr_segs; i++) { @@ -696,8 +697,8 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write); if (ret < 0) { - for (i = 0; i < sg_count; i++) { - struct page *page = sg_page(&sg[i]); + while (p < sg) { + struct page *page = sg_page(p++); if (page) put_page(page); } From 2f240c4ae8869907d4c8161ee015bedb366512e2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 22:07:59 -0400 Subject: [PATCH 18/21] vhost/scsi: switch to iov_iter_get_pages() Signed-off-by: Al Viro --- drivers/vhost/scsi.c | 68 +++++++++++++------------------------------- 1 file changed, 20 insertions(+), 48 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index e47c5bc3ddca..e5b18320f1a3 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -210,12 +210,6 @@ static struct workqueue_struct *vhost_scsi_workqueue; static DEFINE_MUTEX(vhost_scsi_mutex); static LIST_HEAD(vhost_scsi_list); -static int iov_num_pages(void __user *iov_base, size_t iov_len) -{ - return (PAGE_ALIGN((unsigned long)iov_base + iov_len) - - ((unsigned long)iov_base & PAGE_MASK)) >> PAGE_SHIFT; -} - static void vhost_scsi_done_inflight(struct kref *kref) { struct vhost_scsi_inflight *inflight; @@ -618,48 +612,31 @@ vhost_scsi_get_tag(struct vhost_virtqueue *vq, struct vhost_scsi_tpg *tpg, */ static int vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd, - void __user *ptr, - size_t len, + struct iov_iter *iter, struct scatterlist *sgl, bool write) { - unsigned int npages = 0, offset, nbytes; - unsigned int pages_nr = iov_num_pages(ptr, len); - struct scatterlist *sg = sgl; struct page **pages = cmd->tvc_upages; - int ret, i; + struct scatterlist *sg = sgl; + ssize_t bytes; + size_t offset; + unsigned int npages = 0; - if (pages_nr > VHOST_SCSI_PREALLOC_UPAGES) { - pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than" - " preallocated VHOST_SCSI_PREALLOC_UPAGES: %u\n", - pages_nr, VHOST_SCSI_PREALLOC_UPAGES); - return -ENOBUFS; - } - - ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages); + bytes = iov_iter_get_pages(iter, pages, LONG_MAX, + VHOST_SCSI_PREALLOC_UPAGES, &offset); /* No pages were pinned */ - if (ret < 0) - goto out; - /* Less pages pinned than wanted */ - if (ret != pages_nr) { - for (i = 0; i < ret; i++) - put_page(pages[i]); - ret = -EFAULT; - goto out; - } + if (bytes <= 0) + return bytes < 0 ? bytes : -EFAULT; - while (len > 0) { - offset = (uintptr_t)ptr & ~PAGE_MASK; - nbytes = min_t(unsigned int, PAGE_SIZE - offset, len); - sg_set_page(sg, pages[npages], nbytes, offset); - ptr += nbytes; - len -= nbytes; - sg++; - npages++; - } + iov_iter_advance(iter, bytes); -out: - return ret; + while (bytes) { + unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes); + sg_set_page(sg++, pages[npages++], n, offset); + bytes -= n; + offset = 0; + } + return npages; } static int @@ -687,15 +664,11 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, struct iov_iter *iter, struct scatterlist *sg, int sg_count) { - size_t off = iter->iov_offset; struct scatterlist *p = sg; - int i, ret; + int ret; - for (i = 0; i < iter->nr_segs; i++) { - void __user *base = iter->iov[i].iov_base + off; - size_t len = iter->iov[i].iov_len - off; - - ret = vhost_scsi_map_to_sgl(cmd, base, len, sg, write); + while (iov_iter_count(iter)) { + ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write); if (ret < 0) { while (p < sg) { struct page *page = sg_page(p++); @@ -705,7 +678,6 @@ vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write, return ret; } sg += ret; - off = 0; } return 0; } From 33ec9c5cae2526ee2ef4f8224ddd2d7779b64e89 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 22:54:53 -0400 Subject: [PATCH 19/21] lustre: switch struct ksock_conn to iov_iter Merge ksnc_rx_{no_wanted,iov,kiov,niov,nkiov} into a single iov_iter (ksnc_rx_to). Signed-off-by: Al Viro --- .../lustre/lnet/klnds/socklnd/socklnd.c | 4 +- .../lustre/lnet/klnds/socklnd/socklnd.h | 6 +- .../lustre/lnet/klnds/socklnd/socklnd_cb.c | 123 +++++------------- .../lustre/lnet/klnds/socklnd/socklnd_lib.c | 40 +++--- 4 files changed, 52 insertions(+), 121 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c index fbbd8a5489e9..b390813d74be 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.c @@ -1688,10 +1688,10 @@ ksocknal_destroy_conn(struct ksock_conn *conn) case SOCKNAL_RX_LNET_PAYLOAD: last_rcv = conn->ksnc_rx_deadline - cfs_time_seconds(*ksocknal_tunables.ksnd_timeout); - CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %d, left: %d, last alive is %ld secs ago\n", + CERROR("Completing partial receive from %s[%d], ip %pI4h:%d, with error, wanted: %zd, left: %d, last alive is %ld secs ago\n", libcfs_id2str(conn->ksnc_peer->ksnp_id), conn->ksnc_type, &conn->ksnc_ipaddr, conn->ksnc_port, - conn->ksnc_rx_nob_wanted, conn->ksnc_rx_nob_left, + iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left, cfs_duration_sec(cfs_time_sub(cfs_time_current(), last_rcv))); lnet_finalize(conn->ksnc_peer->ksnp_ni, diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h index e6428c4b7aec..694d45b45976 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h @@ -357,11 +357,7 @@ struct ksock_conn { __u8 ksnc_rx_scheduled; /* being progressed */ __u8 ksnc_rx_state; /* what is being read */ int ksnc_rx_nob_left; /* # bytes to next hdr/body */ - int ksnc_rx_nob_wanted;/* bytes actually wanted */ - int ksnc_rx_niov; /* # iovec frags */ - struct kvec *ksnc_rx_iov; /* the iovec frags */ - int ksnc_rx_nkiov; /* # page frags */ - struct bio_vec *ksnc_rx_kiov; /* the page frags */ + struct iov_iter ksnc_rx_to; /* copy destination */ union ksock_rxiovspace ksnc_rx_iov_space; /* space for frag descriptors */ __u32 ksnc_rx_csum; /* partial checksum for incoming * data diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index 6b38d5a8fe92..c79aeb463a5c 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -251,14 +251,11 @@ ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx) static int ksocknal_recv_iov(struct ksock_conn *conn) { - struct kvec *iov = conn->ksnc_rx_iov; int nob; int rc; - LASSERT(conn->ksnc_rx_niov > 0); - /* - * Never touch conn->ksnc_rx_iov or change connection + * Never touch conn->ksnc_rx_to or change connection * status inside ksocknal_lib_recv_iov */ rc = ksocknal_lib_recv_iov(conn); @@ -275,22 +272,11 @@ ksocknal_recv_iov(struct ksock_conn *conn) mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; - conn->ksnc_rx_nob_wanted -= nob; conn->ksnc_rx_nob_left -= nob; - do { - LASSERT(conn->ksnc_rx_niov > 0); - - if (nob < (int)iov->iov_len) { - iov->iov_len -= nob; - iov->iov_base += nob; - return -EAGAIN; - } - - nob -= iov->iov_len; - conn->ksnc_rx_iov = ++iov; - conn->ksnc_rx_niov--; - } while (nob); + iov_iter_advance(&conn->ksnc_rx_to, nob); + if (iov_iter_count(&conn->ksnc_rx_to)) + return -EAGAIN; return rc; } @@ -298,14 +284,11 @@ ksocknal_recv_iov(struct ksock_conn *conn) static int ksocknal_recv_kiov(struct ksock_conn *conn) { - struct bio_vec *kiov = conn->ksnc_rx_kiov; int nob; int rc; - LASSERT(conn->ksnc_rx_nkiov > 0); - /* - * Never touch conn->ksnc_rx_kiov or change connection + * Never touch conn->ksnc_rx_to or change connection * status inside ksocknal_lib_recv_iov */ rc = ksocknal_lib_recv_kiov(conn); @@ -322,22 +305,10 @@ ksocknal_recv_kiov(struct ksock_conn *conn) mb(); /* order with setting rx_started */ conn->ksnc_rx_started = 1; - conn->ksnc_rx_nob_wanted -= nob; conn->ksnc_rx_nob_left -= nob; - - do { - LASSERT(conn->ksnc_rx_nkiov > 0); - - if (nob < (int)kiov->bv_len) { - kiov->bv_offset += nob; - kiov->bv_len -= nob; - return -EAGAIN; - } - - nob -= kiov->bv_len; - conn->ksnc_rx_kiov = ++kiov; - conn->ksnc_rx_nkiov--; - } while (nob); + iov_iter_advance(&conn->ksnc_rx_to, nob); + if (iov_iter_count(&conn->ksnc_rx_to)) + return -EAGAIN; return 1; } @@ -347,7 +318,7 @@ ksocknal_receive(struct ksock_conn *conn) { /* * Return 1 on success, 0 on EOF, < 0 on error. - * Caller checks ksnc_rx_nob_wanted to determine + * Caller checks ksnc_rx_to to determine * progress/completion. */ int rc; @@ -364,7 +335,7 @@ ksocknal_receive(struct ksock_conn *conn) } for (;;) { - if (conn->ksnc_rx_niov) + if (conn->ksnc_rx_to.type & ITER_KVEC) rc = ksocknal_recv_iov(conn); else rc = ksocknal_recv_kiov(conn); @@ -382,7 +353,7 @@ ksocknal_receive(struct ksock_conn *conn) /* Completed a fragment */ - if (!conn->ksnc_rx_nob_wanted) { + if (!iov_iter_count(&conn->ksnc_rx_to)) { rc = 1; break; } @@ -1050,6 +1021,7 @@ int ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) { static char ksocknal_slop_buffer[4096]; + struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space; int nob; unsigned int niov; @@ -1070,32 +1042,26 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) case KSOCK_PROTO_V2: case KSOCK_PROTO_V3: conn->ksnc_rx_state = SOCKNAL_RX_KSM_HEADER; - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg; - - conn->ksnc_rx_nob_wanted = offsetof(struct ksock_msg, ksm_u); + kvec->iov_base = &conn->ksnc_msg; + kvec->iov_len = offsetof(struct ksock_msg, ksm_u); conn->ksnc_rx_nob_left = offsetof(struct ksock_msg, ksm_u); - conn->ksnc_rx_iov[0].iov_len = offsetof(struct ksock_msg, ksm_u); + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, + 1, offsetof(struct ksock_msg, ksm_u)); break; case KSOCK_PROTO_V1: /* Receiving bare struct lnet_hdr */ conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(struct lnet_hdr); + kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; + kvec->iov_len = sizeof(struct lnet_hdr); conn->ksnc_rx_nob_left = sizeof(struct lnet_hdr); - - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof(struct lnet_hdr); + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, + 1, sizeof(struct lnet_hdr)); break; default: LBUG(); } - conn->ksnc_rx_niov = 1; - - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; conn->ksnc_rx_csum = ~0; return 1; } @@ -1106,15 +1072,14 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) */ conn->ksnc_rx_state = SOCKNAL_RX_SLOP; conn->ksnc_rx_nob_left = nob_to_skip; - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; skipped = 0; niov = 0; do { nob = min_t(int, nob_to_skip, sizeof(ksocknal_slop_buffer)); - conn->ksnc_rx_iov[niov].iov_base = ksocknal_slop_buffer; - conn->ksnc_rx_iov[niov].iov_len = nob; + kvec[niov].iov_base = ksocknal_slop_buffer; + kvec[niov].iov_len = nob; niov++; skipped += nob; nob_to_skip -= nob; @@ -1122,16 +1087,14 @@ ksocknal_new_packet(struct ksock_conn *conn, int nob_to_skip) } while (nob_to_skip && /* mustn't overflow conn's rx iov */ niov < sizeof(conn->ksnc_rx_iov_space) / sizeof(struct iovec)); - conn->ksnc_rx_niov = niov; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_nob_wanted = skipped; + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, niov, skipped); return 0; } static int ksocknal_process_receive(struct ksock_conn *conn) { + struct kvec *kvec = (struct kvec *)&conn->ksnc_rx_iov_space; struct lnet_hdr *lhdr; struct lnet_process_id *id; int rc; @@ -1145,7 +1108,7 @@ ksocknal_process_receive(struct ksock_conn *conn) conn->ksnc_rx_state == SOCKNAL_RX_LNET_HEADER || conn->ksnc_rx_state == SOCKNAL_RX_SLOP); again: - if (conn->ksnc_rx_nob_wanted) { + if (iov_iter_count(&conn->ksnc_rx_to)) { rc = ksocknal_receive(conn); if (rc <= 0) { @@ -1170,7 +1133,7 @@ ksocknal_process_receive(struct ksock_conn *conn) return (!rc ? -ESHUTDOWN : rc); } - if (conn->ksnc_rx_nob_wanted) { + if (iov_iter_count(&conn->ksnc_rx_to)) { /* short read */ return -EAGAIN; } @@ -1233,16 +1196,13 @@ ksocknal_process_receive(struct ksock_conn *conn) } conn->ksnc_rx_state = SOCKNAL_RX_LNET_HEADER; - conn->ksnc_rx_nob_wanted = sizeof(struct ksock_lnet_msg); conn->ksnc_rx_nob_left = sizeof(struct ksock_lnet_msg); - conn->ksnc_rx_iov = (struct kvec *)&conn->ksnc_rx_iov_space; - conn->ksnc_rx_iov[0].iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; - conn->ksnc_rx_iov[0].iov_len = sizeof(struct ksock_lnet_msg); + kvec->iov_base = &conn->ksnc_msg.ksm_u.lnetmsg; + kvec->iov_len = sizeof(struct ksock_lnet_msg); - conn->ksnc_rx_niov = 1; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_nkiov = 0; + iov_iter_kvec(&conn->ksnc_rx_to, READ|ITER_KVEC, kvec, + 1, sizeof(struct ksock_lnet_msg)); goto again; /* read lnet header now */ @@ -1344,26 +1304,9 @@ ksocknal_recv(struct lnet_ni *ni, void *private, struct lnet_msg *msg, LASSERT(to->nr_segs <= LNET_MAX_IOV); conn->ksnc_cookie = msg; - conn->ksnc_rx_nob_wanted = iov_iter_count(to); conn->ksnc_rx_nob_left = rlen; - if (to->type & ITER_KVEC) { - conn->ksnc_rx_nkiov = 0; - conn->ksnc_rx_kiov = NULL; - conn->ksnc_rx_iov = conn->ksnc_rx_iov_space.iov; - conn->ksnc_rx_niov = - lnet_extract_iov(LNET_MAX_IOV, conn->ksnc_rx_iov, - to->nr_segs, to->kvec, - to->iov_offset, iov_iter_count(to)); - } else { - conn->ksnc_rx_niov = 0; - conn->ksnc_rx_iov = NULL; - conn->ksnc_rx_kiov = conn->ksnc_rx_iov_space.kiov; - conn->ksnc_rx_nkiov = - lnet_extract_kiov(LNET_MAX_IOV, conn->ksnc_rx_kiov, - to->nr_segs, to->bvec, - to->iov_offset, iov_iter_count(to)); - } + conn->ksnc_rx_to = *to; LASSERT(conn->ksnc_rx_scheduled); @@ -2328,12 +2271,12 @@ ksocknal_find_timed_out_conn(struct ksock_peer *peer) conn->ksnc_rx_deadline)) { /* Timed out incomplete incoming message */ ksocknal_conn_addref(conn); - CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %d left %d\n", + CNETERR("Timeout receiving from %s (%pI4h:%d), state %d wanted %zd left %d\n", libcfs_id2str(peer->ksnp_id), &conn->ksnc_ipaddr, conn->ksnc_port, conn->ksnc_rx_state, - conn->ksnc_rx_nob_wanted, + iov_iter_count(&conn->ksnc_rx_to), conn->ksnc_rx_nob_left); return conn; } diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 9c328dc6537b..0ec032373497 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -164,26 +164,19 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn) int ksocknal_lib_recv_iov(struct ksock_conn *conn) { - unsigned int niov = conn->ksnc_rx_niov; - struct kvec *iov = conn->ksnc_rx_iov; + unsigned int niov = conn->ksnc_rx_to.nr_segs; + const struct iovec *iov = conn->ksnc_rx_to.iov; struct msghdr msg = { .msg_flags = 0 }; - int nob; int i; int rc; int fragnob; int sum; __u32 saved_csum; + int off = conn->ksnc_rx_to.iov_offset; - LASSERT(niov > 0); - - for (nob = i = 0; i < niov; i++) - nob += iov[i].iov_len; - - LASSERT(nob <= conn->ksnc_rx_nob_wanted); - - iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, iov, niov, nob); + msg.msg_iter = conn->ksnc_rx_to; rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); saved_csum = 0; @@ -197,13 +190,14 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn) for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT(i < niov); - fragnob = iov[i].iov_len; + fragnob = iov[i].iov_len - off; if (fragnob > sum) fragnob = sum; conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, - iov[i].iov_base, + iov[i].iov_base + off, fragnob); + off = 0; } conn->ksnc_msg.ksm_csum = saved_csum; } @@ -214,32 +208,29 @@ ksocknal_lib_recv_iov(struct ksock_conn *conn) int ksocknal_lib_recv_kiov(struct ksock_conn *conn) { - unsigned int niov = conn->ksnc_rx_nkiov; - struct bio_vec *kiov = conn->ksnc_rx_kiov; + unsigned int niov = conn->ksnc_rx_to.nr_segs; + const struct bio_vec *kiov = conn->ksnc_rx_to.bvec; + int off = conn->ksnc_rx_to.iov_offset; struct msghdr msg = { .msg_flags = 0 }; - int nob; int i; int rc; void *base; int sum; int fragnob; - for (nob = i = 0; i < niov; i++) - nob += kiov[i].bv_len; - - LASSERT(nob <= conn->ksnc_rx_nob_wanted); - - iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, kiov, niov, nob); + msg.msg_iter = conn->ksnc_rx_to; rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); + if (rc <= 0) + return rc; if (conn->ksnc_msg.ksm_csum) { for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { LASSERT(i < niov); - base = kmap(kiov[i].bv_page) + kiov[i].bv_offset; - fragnob = kiov[i].bv_len; + base = kmap(kiov[i].bv_page) + kiov[i].bv_offset + off; + fragnob = kiov[i].bv_len - off; if (fragnob > sum) fragnob = sum; @@ -247,6 +238,7 @@ ksocknal_lib_recv_kiov(struct ksock_conn *conn) base, fragnob); kunmap(kiov[i].bv_page); + off = 0; } } return rc; From 8ececffa12f5555171075ce52e1226f570836b26 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Sep 2017 13:58:39 -0400 Subject: [PATCH 20/21] switch ksocknal_lib_recv_...() to use of iov_iter_for_each_range() ... and fold kvec and bio_vec variants in one Signed-off-by: Al Viro --- .../lustre/lnet/klnds/socklnd/socklnd.h | 3 +- .../lustre/lnet/klnds/socklnd/socklnd_cb.c | 44 +--------- .../lustre/lnet/klnds/socklnd/socklnd_lib.c | 87 +++++-------------- 3 files changed, 25 insertions(+), 109 deletions(-) diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h index 694d45b45976..a18a57c4de81 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd.h @@ -696,8 +696,7 @@ int ksocknal_lib_setup_sock(struct socket *so); int ksocknal_lib_send_iov(struct ksock_conn *conn, struct ksock_tx *tx); int ksocknal_lib_send_kiov(struct ksock_conn *conn, struct ksock_tx *tx); void ksocknal_lib_eager_ack(struct ksock_conn *conn); -int ksocknal_lib_recv_iov(struct ksock_conn *conn); -int ksocknal_lib_recv_kiov(struct ksock_conn *conn); +int ksocknal_lib_recv(struct ksock_conn *conn); int ksocknal_lib_get_conn_tunables(struct ksock_conn *conn, int *txmem, int *rxmem, int *nagle); diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c index c79aeb463a5c..b12c3b0f39e1 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_cb.c @@ -249,16 +249,16 @@ ksocknal_transmit(struct ksock_conn *conn, struct ksock_tx *tx) } static int -ksocknal_recv_iov(struct ksock_conn *conn) +ksocknal_recv_iter(struct ksock_conn *conn) { int nob; int rc; /* * Never touch conn->ksnc_rx_to or change connection - * status inside ksocknal_lib_recv_iov + * status inside ksocknal_lib_recv */ - rc = ksocknal_lib_recv_iov(conn); + rc = ksocknal_lib_recv(conn); if (rc <= 0) return rc; @@ -274,38 +274,6 @@ ksocknal_recv_iov(struct ksock_conn *conn) conn->ksnc_rx_nob_left -= nob; - iov_iter_advance(&conn->ksnc_rx_to, nob); - if (iov_iter_count(&conn->ksnc_rx_to)) - return -EAGAIN; - - return rc; -} - -static int -ksocknal_recv_kiov(struct ksock_conn *conn) -{ - int nob; - int rc; - - /* - * Never touch conn->ksnc_rx_to or change connection - * status inside ksocknal_lib_recv_iov - */ - rc = ksocknal_lib_recv_kiov(conn); - - if (rc <= 0) - return rc; - - /* received something... */ - nob = rc; - - conn->ksnc_peer->ksnp_last_alive = cfs_time_current(); - conn->ksnc_rx_deadline = - cfs_time_shift(*ksocknal_tunables.ksnd_timeout); - mb(); /* order with setting rx_started */ - conn->ksnc_rx_started = 1; - - conn->ksnc_rx_nob_left -= nob; iov_iter_advance(&conn->ksnc_rx_to, nob); if (iov_iter_count(&conn->ksnc_rx_to)) return -EAGAIN; @@ -335,11 +303,7 @@ ksocknal_receive(struct ksock_conn *conn) } for (;;) { - if (conn->ksnc_rx_to.type & ITER_KVEC) - rc = ksocknal_recv_iov(conn); - else - rc = ksocknal_recv_kiov(conn); - + rc = ksocknal_recv_iter(conn); if (rc <= 0) { /* error/EOF or partial receive */ if (rc == -EAGAIN) { diff --git a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c index 0ec032373497..2e3e2dd70baa 100644 --- a/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c +++ b/drivers/staging/lustre/lnet/klnds/socklnd/socklnd_lib.c @@ -161,86 +161,39 @@ ksocknal_lib_eager_ack(struct ksock_conn *conn) sizeof(opt)); } -int -ksocknal_lib_recv_iov(struct ksock_conn *conn) +static int lustre_csum(struct kvec *v, void *context) { - unsigned int niov = conn->ksnc_rx_to.nr_segs; - const struct iovec *iov = conn->ksnc_rx_to.iov; - struct msghdr msg = { - .msg_flags = 0 - }; - int i; - int rc; - int fragnob; - int sum; - __u32 saved_csum; - int off = conn->ksnc_rx_to.iov_offset; - - msg.msg_iter = conn->ksnc_rx_to; - rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); - - saved_csum = 0; - if (conn->ksnc_proto == &ksocknal_protocol_v2x) { - saved_csum = conn->ksnc_msg.ksm_csum; - conn->ksnc_msg.ksm_csum = 0; - } - - if (saved_csum) { - /* accumulate checksum */ - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT(i < niov); - - fragnob = iov[i].iov_len - off; - if (fragnob > sum) - fragnob = sum; - - conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, - iov[i].iov_base + off, - fragnob); - off = 0; - } - conn->ksnc_msg.ksm_csum = saved_csum; - } - - return rc; + struct ksock_conn *conn = context; + conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, + v->iov_base, v->iov_len); + return 0; } int -ksocknal_lib_recv_kiov(struct ksock_conn *conn) +ksocknal_lib_recv(struct ksock_conn *conn) { - unsigned int niov = conn->ksnc_rx_to.nr_segs; - const struct bio_vec *kiov = conn->ksnc_rx_to.bvec; - int off = conn->ksnc_rx_to.iov_offset; - struct msghdr msg = { - .msg_flags = 0 - }; - int i; + struct msghdr msg = { .msg_iter = conn->ksnc_rx_to }; + __u32 saved_csum; int rc; - void *base; - int sum; - int fragnob; - msg.msg_iter = conn->ksnc_rx_to; rc = sock_recvmsg(conn->ksnc_sock, &msg, MSG_DONTWAIT); if (rc <= 0) return rc; - if (conn->ksnc_msg.ksm_csum) { - for (i = 0, sum = rc; sum > 0; i++, sum -= fragnob) { - LASSERT(i < niov); + saved_csum = conn->ksnc_msg.ksm_csum; + if (!saved_csum) + return rc; - base = kmap(kiov[i].bv_page) + kiov[i].bv_offset + off; - fragnob = kiov[i].bv_len - off; - if (fragnob > sum) - fragnob = sum; + /* header is included only in V2 - V3 checksums only the bulk data */ + if (!(conn->ksnc_rx_to.type & ITER_BVEC) && + conn->ksnc_proto != &ksocknal_protocol_v2x) + return rc; + + /* accumulate checksum */ + conn->ksnc_msg.ksm_csum = 0; + iov_iter_for_each_range(&conn->ksnc_rx_to, rc, lustre_csum, conn); + conn->ksnc_msg.ksm_csum = saved_csum; - conn->ksnc_rx_csum = crc32_le(conn->ksnc_rx_csum, - base, fragnob); - - kunmap(kiov[i].bv_page); - off = 0; - } - } return rc; } From cfe057f7db1ff026c8db75469a3f9ba9736e1975 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Sep 2017 21:17:09 +0100 Subject: [PATCH 21/21] iomap_dio_actor(): fix iov_iter bugs 1) Ignoring return value from iov_iter_zero() is wrong for iovec-backed case as well as for pipes - it can fail. 2) Failure to fault destination pages in 25Mb into a 50Mb iovec should not act as if nothing in the area had been read, nevermind that the first 25Mb might have *already* been read by that point. Signed-off-by: Al Viro --- fs/iomap.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index be61cf742b5e..9c41008833ac 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -848,6 +848,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, struct bio *bio; bool need_zeroout = false; int nr_pages, ret; + size_t copied = 0; if ((pos | length | align) & ((1 << blkbits) - 1)) return -EINVAL; @@ -859,7 +860,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, /*FALLTHRU*/ case IOMAP_UNWRITTEN: if (!(dio->flags & IOMAP_DIO_WRITE)) { - iov_iter_zero(length, dio->submit.iter); + length = iov_iter_zero(length, dio->submit.iter); dio->size += length; return length; } @@ -896,8 +897,11 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, } do { - if (dio->error) + size_t n; + if (dio->error) { + iov_iter_revert(dio->submit.iter, copied); return 0; + } bio = bio_alloc(GFP_KERNEL, nr_pages); bio_set_dev(bio, iomap->bdev); @@ -910,20 +914,24 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, ret = bio_iov_iter_get_pages(bio, &iter); if (unlikely(ret)) { bio_put(bio); - return ret; + return copied ? copied : ret; } + n = bio->bi_iter.bi_size; if (dio->flags & IOMAP_DIO_WRITE) { bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC | REQ_IDLE); - task_io_account_write(bio->bi_iter.bi_size); + task_io_account_write(n); } else { bio_set_op_attrs(bio, REQ_OP_READ, 0); if (dio->flags & IOMAP_DIO_DIRTY) bio_set_pages_dirty(bio); } - dio->size += bio->bi_iter.bi_size; - pos += bio->bi_iter.bi_size; + iov_iter_advance(dio->submit.iter, n); + + dio->size += n; + pos += n; + copied += n; nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); @@ -939,9 +947,7 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, if (pad) iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); } - - iov_iter_advance(dio->submit.iter, length); - return length; + return copied; } ssize_t