UPSTREAM: zram: idle writeback fixes and cleanup
This patch includes some fixes and cleanup for idle-page writeback. 1. writeback_limit interface Now writeback_limit interface is rather conusing. For example, once writeback limit budget is exausted, admin can see 0 from /sys/block/zramX/writeback_limit which is same semantic with disable writeback_limit at this moment. IOW, admin cannot tell that zero came from disable writeback limit or exausted writeback limit. To make the interface clear, let's sepatate enable of writeback limit to another knob - /sys/block/zram0/writeback_limit_enable * before: while true : # to re-enable writeback limit once previous one is used up echo 0 > /sys/block/zram0/writeback_limit echo $((200<<20)) > /sys/block/zram0/writeback_limit .. .. # used up the writeback limit budget * new # To enable writeback limit, from the beginning, admin should # enable it. echo $((200<<20)) > /sys/block/zram0/writeback_limit echo 1 > /sys/block/zram/0/writeback_limit_enable while true : echo $((200<<20)) > /sys/block/zram0/writeback_limit .. .. # used up the writeback limit budget It's much strightforward. 2. fix condition check idle/huge writeback mode check The mode in writeback_store is not bit opeartion any more so no need to use bit operations. Furthermore, current condition check is broken in that it does writeback every pages regardless of huge/idle. 3. clean up idle_store No need to use goto. [minchan@kernel.org: missed spin_lock_init] Link: http://lkml.kernel.org/r/20190103001601.GA255139@google.com Link: http://lkml.kernel.org/r/20181224033529.19450-1-minchan@kernel.org Signed-off-by: Minchan Kim <minchan@kernel.org> Suggested-by: John Dias <joaodias@google.com> Cc: Sergey Senozhatsky <sergey.senozhatsky.work@gmail.com> Cc: John Dias <joaodias@google.com> Cc: Srinivas Paladugu <srnvs@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> (cherry picked from commit 1d69a3f8ae77e3dbfdc1356225cce5ea9c366aec) Bug: 117683045 Change-Id: Ibdedcfd5fd886d6771f1300efb1caa039488a0ca Signed-off-by: Srinivas Paladugu <srnvs@google.com>
This commit is contained in:
parent
e610df0c90
commit
5f5d7b0063
4 changed files with 125 additions and 55 deletions
|
@ -122,11 +122,18 @@ Description:
|
||||||
statistics (bd_count, bd_reads, bd_writes) in a format
|
statistics (bd_count, bd_reads, bd_writes) in a format
|
||||||
similar to block layer statistics file format.
|
similar to block layer statistics file format.
|
||||||
|
|
||||||
|
What: /sys/block/zram<id>/writeback_limit_enable
|
||||||
|
Date: November 2018
|
||||||
|
Contact: Minchan Kim <minchan@kernel.org>
|
||||||
|
Description:
|
||||||
|
The writeback_limit_enable file is read-write and specifies
|
||||||
|
eanbe of writeback_limit feature. "1" means eable the feature.
|
||||||
|
No limit "0" is the initial state.
|
||||||
|
|
||||||
What: /sys/block/zram<id>/writeback_limit
|
What: /sys/block/zram<id>/writeback_limit
|
||||||
Date: November 2018
|
Date: November 2018
|
||||||
Contact: Minchan Kim <minchan@kernel.org>
|
Contact: Minchan Kim <minchan@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
The writeback_limit file is read-write and specifies the maximum
|
The writeback_limit file is read-write and specifies the maximum
|
||||||
amount of writeback ZRAM can do. The limit could be changed
|
amount of writeback ZRAM can do. The limit could be changed
|
||||||
in run time and "0" means disable the limit.
|
in run time.
|
||||||
No limit is the initial state.
|
|
||||||
|
|
|
@ -166,6 +166,7 @@ mem_limit WO specifies the maximum amount of memory ZRAM can use
|
||||||
to store the compressed data
|
to store the compressed data
|
||||||
writeback_limit WO specifies the maximum amount of write IO zram can
|
writeback_limit WO specifies the maximum amount of write IO zram can
|
||||||
write out to backing device as 4KB unit
|
write out to backing device as 4KB unit
|
||||||
|
writeback_limit_enable RW show and set writeback_limit feature
|
||||||
max_comp_streams RW the number of possible concurrent compress operations
|
max_comp_streams RW the number of possible concurrent compress operations
|
||||||
comp_algorithm RW show and change the compression algorithm
|
comp_algorithm RW show and change the compression algorithm
|
||||||
compact WO trigger memory compaction
|
compact WO trigger memory compaction
|
||||||
|
@ -280,32 +281,51 @@ With the command, zram writeback idle pages from memory to the storage.
|
||||||
If there are lots of write IO with flash device, potentially, it has
|
If there are lots of write IO with flash device, potentially, it has
|
||||||
flash wearout problem so that admin needs to design write limitation
|
flash wearout problem so that admin needs to design write limitation
|
||||||
to guarantee storage health for entire product life.
|
to guarantee storage health for entire product life.
|
||||||
To overcome the concern, zram supports "writeback_limit".
|
|
||||||
The "writeback_limit"'s default value is 0 so that it doesn't limit
|
To overcome the concern, zram supports "writeback_limit" feature.
|
||||||
any writeback. If admin want to measure writeback count in a certain
|
The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
|
||||||
period, he could know it via /sys/block/zram0/bd_stat's 3rd column.
|
any writeback. IOW, if admin want to apply writeback budget, he should
|
||||||
|
enable writeback_limit_enable via
|
||||||
|
|
||||||
|
$ echo 1 > /sys/block/zramX/writeback_limit_enable
|
||||||
|
|
||||||
|
Once writeback_limit_enable is set, zram doesn't allow any writeback
|
||||||
|
until admin set the budget via /sys/block/zramX/writeback_limit.
|
||||||
|
|
||||||
|
(If admin doesn't enable writeback_limit_enable, writeback_limit's value
|
||||||
|
assigned via /sys/block/zramX/writeback_limit is meaninless.)
|
||||||
|
|
||||||
If admin want to limit writeback as per-day 400M, he could do it
|
If admin want to limit writeback as per-day 400M, he could do it
|
||||||
like below.
|
like below.
|
||||||
|
|
||||||
MB_SHIFT=20
|
$ MB_SHIFT=20
|
||||||
4K_SHIFT=12
|
$ 4K_SHIFT=12
|
||||||
echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
||||||
/sys/block/zram0/writeback_limit.
|
/sys/block/zram0/writeback_limit.
|
||||||
|
$ echo 1 > /sys/block/zram0/writeback_limit_enable
|
||||||
|
|
||||||
If admin want to allow further write again, he could do it like below
|
If admin want to allow further write again once the bugdet is exausted,
|
||||||
|
he could do it like below
|
||||||
|
|
||||||
echo 0 > /sys/block/zram0/writeback_limit
|
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
||||||
|
/sys/block/zram0/writeback_limit
|
||||||
|
|
||||||
If admin want to see remaining writeback budget since he set,
|
If admin want to see remaining writeback budget since he set,
|
||||||
|
|
||||||
cat /sys/block/zram0/writeback_limit
|
$ cat /sys/block/zramX/writeback_limit
|
||||||
|
|
||||||
|
If admin want to disable writeback limit, he could do
|
||||||
|
|
||||||
|
$ echo 0 > /sys/block/zramX/writeback_limit_enable
|
||||||
|
|
||||||
The writeback_limit count will reset whenever you reset zram(e.g.,
|
The writeback_limit count will reset whenever you reset zram(e.g.,
|
||||||
system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
|
system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
|
||||||
writeback happened until you reset the zram to allocate extra writeback
|
writeback happened until you reset the zram to allocate extra writeback
|
||||||
budget in next setting is user's job.
|
budget in next setting is user's job.
|
||||||
|
|
||||||
|
If admin want to measure writeback count in a certain period, he could
|
||||||
|
know it via /sys/block/zram0/bd_stat's 3rd column.
|
||||||
|
|
||||||
= memory tracking
|
= memory tracking
|
||||||
|
|
||||||
With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
|
With CONFIG_ZRAM_MEMORY_TRACKING, user can know information of the
|
||||||
|
|
|
@ -316,11 +316,9 @@ static ssize_t idle_store(struct device *dev,
|
||||||
* See the comment in writeback_store.
|
* See the comment in writeback_store.
|
||||||
*/
|
*/
|
||||||
zram_slot_lock(zram, index);
|
zram_slot_lock(zram, index);
|
||||||
if (!zram_allocated(zram, index) ||
|
if (zram_allocated(zram, index) &&
|
||||||
zram_test_flag(zram, index, ZRAM_UNDER_WB))
|
!zram_test_flag(zram, index, ZRAM_UNDER_WB))
|
||||||
goto next;
|
|
||||||
zram_set_flag(zram, index, ZRAM_IDLE);
|
zram_set_flag(zram, index, ZRAM_IDLE);
|
||||||
next:
|
|
||||||
zram_slot_unlock(zram, index);
|
zram_slot_unlock(zram, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -330,6 +328,41 @@ static ssize_t idle_store(struct device *dev,
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||||
|
static ssize_t writeback_limit_enable_store(struct device *dev,
|
||||||
|
struct device_attribute *attr, const char *buf, size_t len)
|
||||||
|
{
|
||||||
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
u64 val;
|
||||||
|
ssize_t ret = -EINVAL;
|
||||||
|
|
||||||
|
if (kstrtoull(buf, 10, &val))
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
down_read(&zram->init_lock);
|
||||||
|
spin_lock(&zram->wb_limit_lock);
|
||||||
|
zram->wb_limit_enable = val;
|
||||||
|
spin_unlock(&zram->wb_limit_lock);
|
||||||
|
up_read(&zram->init_lock);
|
||||||
|
ret = len;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t writeback_limit_enable_show(struct device *dev,
|
||||||
|
struct device_attribute *attr, char *buf)
|
||||||
|
{
|
||||||
|
bool val;
|
||||||
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
|
||||||
|
down_read(&zram->init_lock);
|
||||||
|
spin_lock(&zram->wb_limit_lock);
|
||||||
|
val = zram->wb_limit_enable;
|
||||||
|
spin_unlock(&zram->wb_limit_lock);
|
||||||
|
up_read(&zram->init_lock);
|
||||||
|
|
||||||
|
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||||
|
}
|
||||||
|
|
||||||
static ssize_t writeback_limit_store(struct device *dev,
|
static ssize_t writeback_limit_store(struct device *dev,
|
||||||
struct device_attribute *attr, const char *buf, size_t len)
|
struct device_attribute *attr, const char *buf, size_t len)
|
||||||
{
|
{
|
||||||
|
@ -341,9 +374,9 @@ static ssize_t writeback_limit_store(struct device *dev,
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
down_read(&zram->init_lock);
|
down_read(&zram->init_lock);
|
||||||
atomic64_set(&zram->stats.bd_wb_limit, val);
|
spin_lock(&zram->wb_limit_lock);
|
||||||
if (val == 0)
|
zram->bd_wb_limit = val;
|
||||||
zram->stop_writeback = false;
|
spin_unlock(&zram->wb_limit_lock);
|
||||||
up_read(&zram->init_lock);
|
up_read(&zram->init_lock);
|
||||||
ret = len;
|
ret = len;
|
||||||
|
|
||||||
|
@ -357,7 +390,9 @@ static ssize_t writeback_limit_show(struct device *dev,
|
||||||
struct zram *zram = dev_to_zram(dev);
|
struct zram *zram = dev_to_zram(dev);
|
||||||
|
|
||||||
down_read(&zram->init_lock);
|
down_read(&zram->init_lock);
|
||||||
val = atomic64_read(&zram->stats.bd_wb_limit);
|
spin_lock(&zram->wb_limit_lock);
|
||||||
|
val = zram->bd_wb_limit;
|
||||||
|
spin_unlock(&zram->wb_limit_lock);
|
||||||
up_read(&zram->init_lock);
|
up_read(&zram->init_lock);
|
||||||
|
|
||||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||||
|
@ -588,8 +623,8 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define HUGE_WRITEBACK 0x1
|
#define HUGE_WRITEBACK 1
|
||||||
#define IDLE_WRITEBACK 0x2
|
#define IDLE_WRITEBACK 2
|
||||||
|
|
||||||
static ssize_t writeback_store(struct device *dev,
|
static ssize_t writeback_store(struct device *dev,
|
||||||
struct device_attribute *attr, const char *buf, size_t len)
|
struct device_attribute *attr, const char *buf, size_t len)
|
||||||
|
@ -602,7 +637,7 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
struct page *page;
|
struct page *page;
|
||||||
ssize_t ret, sz;
|
ssize_t ret, sz;
|
||||||
char mode_buf[8];
|
char mode_buf[8];
|
||||||
unsigned long mode = -1UL;
|
int mode = -1;
|
||||||
unsigned long blk_idx = 0;
|
unsigned long blk_idx = 0;
|
||||||
|
|
||||||
sz = strscpy(mode_buf, buf, sizeof(mode_buf));
|
sz = strscpy(mode_buf, buf, sizeof(mode_buf));
|
||||||
|
@ -618,7 +653,7 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
else if (!strcmp(mode_buf, "huge"))
|
else if (!strcmp(mode_buf, "huge"))
|
||||||
mode = HUGE_WRITEBACK;
|
mode = HUGE_WRITEBACK;
|
||||||
|
|
||||||
if (mode == -1UL)
|
if (mode == -1)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
down_read(&zram->init_lock);
|
down_read(&zram->init_lock);
|
||||||
|
@ -645,10 +680,13 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
bvec.bv_len = PAGE_SIZE;
|
bvec.bv_len = PAGE_SIZE;
|
||||||
bvec.bv_offset = 0;
|
bvec.bv_offset = 0;
|
||||||
|
|
||||||
if (zram->stop_writeback) {
|
spin_lock(&zram->wb_limit_lock);
|
||||||
|
if (zram->wb_limit_enable && !zram->bd_wb_limit) {
|
||||||
|
spin_unlock(&zram->wb_limit_lock);
|
||||||
ret = -EIO;
|
ret = -EIO;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
spin_unlock(&zram->wb_limit_lock);
|
||||||
|
|
||||||
if (!blk_idx) {
|
if (!blk_idx) {
|
||||||
blk_idx = alloc_block_bdev(zram);
|
blk_idx = alloc_block_bdev(zram);
|
||||||
|
@ -667,10 +705,11 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
zram_test_flag(zram, index, ZRAM_UNDER_WB))
|
zram_test_flag(zram, index, ZRAM_UNDER_WB))
|
||||||
goto next;
|
goto next;
|
||||||
|
|
||||||
if ((mode & IDLE_WRITEBACK &&
|
if (mode == IDLE_WRITEBACK &&
|
||||||
!zram_test_flag(zram, index, ZRAM_IDLE)) &&
|
!zram_test_flag(zram, index, ZRAM_IDLE))
|
||||||
(mode & HUGE_WRITEBACK &&
|
goto next;
|
||||||
!zram_test_flag(zram, index, ZRAM_HUGE)))
|
if (mode == HUGE_WRITEBACK &&
|
||||||
|
!zram_test_flag(zram, index, ZRAM_HUGE))
|
||||||
goto next;
|
goto next;
|
||||||
/*
|
/*
|
||||||
* Clearing ZRAM_UNDER_WB is duty of caller.
|
* Clearing ZRAM_UNDER_WB is duty of caller.
|
||||||
|
@ -732,11 +771,10 @@ static ssize_t writeback_store(struct device *dev,
|
||||||
zram_set_element(zram, index, blk_idx);
|
zram_set_element(zram, index, blk_idx);
|
||||||
blk_idx = 0;
|
blk_idx = 0;
|
||||||
atomic64_inc(&zram->stats.pages_stored);
|
atomic64_inc(&zram->stats.pages_stored);
|
||||||
if (atomic64_add_unless(&zram->stats.bd_wb_limit,
|
spin_lock(&zram->wb_limit_lock);
|
||||||
-1 << (PAGE_SHIFT - 12), 0)) {
|
if (zram->wb_limit_enable && zram->bd_wb_limit > 0)
|
||||||
if (atomic64_read(&zram->stats.bd_wb_limit) == 0)
|
zram->bd_wb_limit -= 1UL << (PAGE_SHIFT - 12);
|
||||||
zram->stop_writeback = true;
|
spin_unlock(&zram->wb_limit_lock);
|
||||||
}
|
|
||||||
next:
|
next:
|
||||||
zram_slot_unlock(zram, index);
|
zram_slot_unlock(zram, index);
|
||||||
}
|
}
|
||||||
|
@ -1812,6 +1850,7 @@ static DEVICE_ATTR_RW(comp_algorithm);
|
||||||
static DEVICE_ATTR_RW(backing_dev);
|
static DEVICE_ATTR_RW(backing_dev);
|
||||||
static DEVICE_ATTR_WO(writeback);
|
static DEVICE_ATTR_WO(writeback);
|
||||||
static DEVICE_ATTR_RW(writeback_limit);
|
static DEVICE_ATTR_RW(writeback_limit);
|
||||||
|
static DEVICE_ATTR_RW(writeback_limit_enable);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static struct attribute *zram_disk_attrs[] = {
|
static struct attribute *zram_disk_attrs[] = {
|
||||||
|
@ -1828,6 +1867,7 @@ static struct attribute *zram_disk_attrs[] = {
|
||||||
&dev_attr_backing_dev.attr,
|
&dev_attr_backing_dev.attr,
|
||||||
&dev_attr_writeback.attr,
|
&dev_attr_writeback.attr,
|
||||||
&dev_attr_writeback_limit.attr,
|
&dev_attr_writeback_limit.attr,
|
||||||
|
&dev_attr_writeback_limit_enable.attr,
|
||||||
#endif
|
#endif
|
||||||
&dev_attr_io_stat.attr,
|
&dev_attr_io_stat.attr,
|
||||||
&dev_attr_mm_stat.attr,
|
&dev_attr_mm_stat.attr,
|
||||||
|
@ -1867,7 +1907,9 @@ static int zram_add(void)
|
||||||
device_id = ret;
|
device_id = ret;
|
||||||
|
|
||||||
init_rwsem(&zram->init_lock);
|
init_rwsem(&zram->init_lock);
|
||||||
|
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||||
|
spin_lock_init(&zram->wb_limit_lock);
|
||||||
|
#endif
|
||||||
queue = blk_alloc_queue(GFP_KERNEL);
|
queue = blk_alloc_queue(GFP_KERNEL);
|
||||||
if (!queue) {
|
if (!queue) {
|
||||||
pr_err("Error allocating disk queue for device %d\n",
|
pr_err("Error allocating disk queue for device %d\n",
|
||||||
|
|
|
@ -86,7 +86,6 @@ struct zram_stats {
|
||||||
atomic64_t bd_count; /* no. of pages in backing device */
|
atomic64_t bd_count; /* no. of pages in backing device */
|
||||||
atomic64_t bd_reads; /* no. of reads from backing device */
|
atomic64_t bd_reads; /* no. of reads from backing device */
|
||||||
atomic64_t bd_writes; /* no. of writes from backing device */
|
atomic64_t bd_writes; /* no. of writes from backing device */
|
||||||
atomic64_t bd_wb_limit; /* writeback limit of backing device */
|
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -114,8 +113,10 @@ struct zram {
|
||||||
*/
|
*/
|
||||||
bool claim; /* Protected by bdev->bd_mutex */
|
bool claim; /* Protected by bdev->bd_mutex */
|
||||||
struct file *backing_dev;
|
struct file *backing_dev;
|
||||||
bool stop_writeback;
|
|
||||||
#ifdef CONFIG_ZRAM_WRITEBACK
|
#ifdef CONFIG_ZRAM_WRITEBACK
|
||||||
|
spinlock_t wb_limit_lock;
|
||||||
|
bool wb_limit_enable;
|
||||||
|
u64 bd_wb_limit;
|
||||||
struct block_device *bdev;
|
struct block_device *bdev;
|
||||||
unsigned int old_block_size;
|
unsigned int old_block_size;
|
||||||
unsigned long *bitmap;
|
unsigned long *bitmap;
|
||||||
|
|
Loading…
Reference in a new issue