[PATCH] separate bdi congestion functions from queue congestion functions
Separate out the concept of "queue congestion" from "backing-dev congestion". Congestion is a backing-dev concept, not a queue concept. The blk_* congestion functions are retained, as wrappers around the core backing-dev congestion functions. This proper layering is needed so that NFS can cleanly use the congestion functions, and so that CONFIG_BLOCK=n actually links. Cc: "Thomas Maier" <balagi@justmail.de> Cc: "Jens Axboe" <jens.axboe@oracle.com> Cc: Trond Myklebust <trond.myklebust@fys.uio.no> Cc: David Howells <dhowells@redhat.com> Cc: Peter Osterlund <petero2@telia.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
79e2de4bc5
commit
3fcfab16c5
17 changed files with 126 additions and 104 deletions
arch/i386/lib
block
drivers/md
fs
include/linux
mm
|
@ -9,6 +9,7 @@
|
|||
#include <linux/highmem.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/mmx.h>
|
||||
|
||||
|
@ -741,7 +742,7 @@ unsigned long __copy_to_user_ll(void __user *to, const void *from,
|
|||
|
||||
if (retval == -ENOMEM && is_init(current)) {
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
blk_congestion_wait(WRITE, HZ/50);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
goto survive;
|
||||
}
|
||||
|
||||
|
|
|
@ -56,11 +56,6 @@ static kmem_cache_t *requestq_cachep;
|
|||
*/
|
||||
static kmem_cache_t *iocontext_cachep;
|
||||
|
||||
static wait_queue_head_t congestion_wqh[2] = {
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
|
||||
};
|
||||
|
||||
/*
|
||||
* Controlling structure to kblockd
|
||||
*/
|
||||
|
@ -112,37 +107,6 @@ static void blk_queue_congestion_threshold(struct request_queue *q)
|
|||
q->nr_congestion_off = nr;
|
||||
}
|
||||
|
||||
/*
|
||||
* A queue has just exitted congestion. Note this in the global counter of
|
||||
* congested queues, and wake up anyone who was waiting for requests to be
|
||||
* put back.
|
||||
*/
|
||||
void blk_clear_queue_congested(request_queue_t *q, int rw)
|
||||
{
|
||||
enum bdi_state bit;
|
||||
wait_queue_head_t *wqh = &congestion_wqh[rw];
|
||||
|
||||
bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
|
||||
clear_bit(bit, &q->backing_dev_info.state);
|
||||
smp_mb__after_clear_bit();
|
||||
if (waitqueue_active(wqh))
|
||||
wake_up(wqh);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_clear_queue_congested);
|
||||
|
||||
/*
|
||||
* A queue has just entered congestion. Flag that in the queue's VM-visible
|
||||
* state flags and increment the global gounter of congested queues.
|
||||
*/
|
||||
void blk_set_queue_congested(request_queue_t *q, int rw)
|
||||
{
|
||||
enum bdi_state bit;
|
||||
|
||||
bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
|
||||
set_bit(bit, &q->backing_dev_info.state);
|
||||
}
|
||||
EXPORT_SYMBOL(blk_set_queue_congested);
|
||||
|
||||
/**
|
||||
* blk_get_backing_dev_info - get the address of a queue's backing_dev_info
|
||||
* @bdev: device
|
||||
|
@ -2755,41 +2719,6 @@ void blk_end_sync_rq(struct request *rq, int error)
|
|||
}
|
||||
EXPORT_SYMBOL(blk_end_sync_rq);
|
||||
|
||||
/**
|
||||
* blk_congestion_wait - wait for a queue to become uncongested
|
||||
* @rw: READ or WRITE
|
||||
* @timeout: timeout in jiffies
|
||||
*
|
||||
* Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
|
||||
* If no queues are congested then just wait for the next request to be
|
||||
* returned.
|
||||
*/
|
||||
long blk_congestion_wait(int rw, long timeout)
|
||||
{
|
||||
long ret;
|
||||
DEFINE_WAIT(wait);
|
||||
wait_queue_head_t *wqh = &congestion_wqh[rw];
|
||||
|
||||
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
|
||||
ret = io_schedule_timeout(timeout);
|
||||
finish_wait(wqh, &wait);
|
||||
return ret;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(blk_congestion_wait);
|
||||
|
||||
/**
|
||||
* blk_congestion_end - wake up sleepers on a congestion queue
|
||||
* @rw: READ or WRITE
|
||||
*/
|
||||
void blk_congestion_end(int rw)
|
||||
{
|
||||
wait_queue_head_t *wqh = &congestion_wqh[rw];
|
||||
|
||||
if (waitqueue_active(wqh))
|
||||
wake_up(wqh);
|
||||
}
|
||||
|
||||
/*
|
||||
* Has to be called with the request spinlock acquired
|
||||
*/
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/slab.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <asm/atomic.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <asm/page.h>
|
||||
|
@ -602,7 +603,7 @@ static void process_write(struct crypt_io *io)
|
|||
|
||||
/* out of memory -> run queues */
|
||||
if (remaining)
|
||||
blk_congestion_wait(bio_data_dir(clone), HZ/100);
|
||||
congestion_wait(bio_data_dir(clone), HZ/100);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <linux/smp_lock.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/blkdev.h>
|
||||
|
||||
int fat_generic_ioctl(struct inode *inode, struct file *filp,
|
||||
|
@ -118,7 +119,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
|
|||
if ((filp->f_mode & FMODE_WRITE) &&
|
||||
MSDOS_SB(inode->i_sb)->options.flush) {
|
||||
fat_flush_inodes(inode->i_sb, inode, NULL);
|
||||
blk_congestion_wait(WRITE, HZ/10);
|
||||
congestion_wait(WRITE, HZ/10);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -57,6 +57,8 @@
|
|||
#include <linux/nfs_fs.h>
|
||||
#include <linux/nfs_mount.h>
|
||||
#include <linux/nfs_page.h>
|
||||
#include <linux/backing-dev.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <linux/smp_lock.h>
|
||||
|
||||
|
@ -395,7 +397,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
|
|||
out:
|
||||
clear_bit(BDI_write_congested, &bdi->state);
|
||||
wake_up_all(&nfs_write_congestion);
|
||||
writeback_congestion_end();
|
||||
congestion_end(WRITE);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
@ -53,6 +53,7 @@
|
|||
#include <linux/workqueue.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/backing-dev.h>
|
||||
|
||||
/* gets a struct reiserfs_journal_list * from a list head */
|
||||
#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
|
||||
|
@ -970,7 +971,7 @@ int reiserfs_async_progress_wait(struct super_block *s)
|
|||
DEFINE_WAIT(wait);
|
||||
struct reiserfs_journal *j = SB_JOURNAL(s);
|
||||
if (atomic_read(&j->j_async_throttle))
|
||||
blk_congestion_wait(WRITE, HZ / 10);
|
||||
congestion_wait(WRITE, HZ / 10);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <linux/highmem.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include "time.h"
|
||||
#include "kmem.h"
|
||||
|
||||
|
@ -53,7 +54,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
|
|||
printk(KERN_ERR "XFS: possible memory allocation "
|
||||
"deadlock in %s (mode:0x%x)\n",
|
||||
__FUNCTION__, lflags);
|
||||
blk_congestion_wait(WRITE, HZ/50);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
|
@ -131,7 +132,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
|
|||
printk(KERN_ERR "XFS: possible memory allocation "
|
||||
"deadlock in %s (mode:0x%x)\n",
|
||||
__FUNCTION__, lflags);
|
||||
blk_congestion_wait(WRITE, HZ/50);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include <linux/hash.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include "xfs_linux.h"
|
||||
|
||||
STATIC kmem_zone_t *xfs_buf_zone;
|
||||
|
@ -395,7 +396,7 @@ _xfs_buf_lookup_pages(
|
|||
|
||||
XFS_STATS_INC(xb_page_retries);
|
||||
xfsbufd_wakeup(0, gfp_mask);
|
||||
blk_congestion_wait(WRITE, HZ/50);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
|
||||
#include <asm/atomic.h>
|
||||
|
||||
struct page;
|
||||
|
||||
/*
|
||||
* Bits in backing_dev_info.state
|
||||
*/
|
||||
|
@ -88,6 +90,11 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
|
|||
(1 << BDI_write_congested));
|
||||
}
|
||||
|
||||
void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
|
||||
void set_bdi_congested(struct backing_dev_info *bdi, int rw);
|
||||
long congestion_wait(int rw, long timeout);
|
||||
void congestion_end(int rw);
|
||||
|
||||
#define bdi_cap_writeback_dirty(bdi) \
|
||||
(!((bdi)->capabilities & BDI_CAP_NO_WRITEBACK))
|
||||
|
||||
|
|
|
@ -651,8 +651,26 @@ extern void blk_recount_segments(request_queue_t *, struct bio *);
|
|||
extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
|
||||
extern int sg_scsi_ioctl(struct file *, struct request_queue *,
|
||||
struct gendisk *, struct scsi_ioctl_command __user *);
|
||||
extern void blk_clear_queue_congested(request_queue_t *q, int rw);
|
||||
extern void blk_set_queue_congested(request_queue_t *q, int rw);
|
||||
|
||||
/*
|
||||
* A queue has just exitted congestion. Note this in the global counter of
|
||||
* congested queues, and wake up anyone who was waiting for requests to be
|
||||
* put back.
|
||||
*/
|
||||
static inline void blk_clear_queue_congested(request_queue_t *q, int rw)
|
||||
{
|
||||
clear_bdi_congested(&q->backing_dev_info, rw);
|
||||
}
|
||||
|
||||
/*
|
||||
* A queue has just entered congestion. Flag that in the queue's VM-visible
|
||||
* state flags and increment the global gounter of congested queues.
|
||||
*/
|
||||
static inline void blk_set_queue_congested(request_queue_t *q, int rw)
|
||||
{
|
||||
set_bdi_congested(&q->backing_dev_info, rw);
|
||||
}
|
||||
|
||||
extern void blk_start_queue(request_queue_t *q);
|
||||
extern void blk_stop_queue(request_queue_t *q);
|
||||
extern void blk_sync_queue(struct request_queue *q);
|
||||
|
@ -767,10 +785,8 @@ extern int blk_queue_init_tags(request_queue_t *, int, struct blk_queue_tag *);
|
|||
extern void blk_queue_free_tags(request_queue_t *);
|
||||
extern int blk_queue_resize_tags(request_queue_t *, int);
|
||||
extern void blk_queue_invalidate_tags(request_queue_t *);
|
||||
extern long blk_congestion_wait(int rw, long timeout);
|
||||
extern struct blk_queue_tag *blk_init_tags(int);
|
||||
extern void blk_free_tags(struct blk_queue_tag *);
|
||||
extern void blk_congestion_end(int rw);
|
||||
|
||||
static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
|
||||
int tag)
|
||||
|
|
|
@ -85,7 +85,6 @@ int wakeup_pdflush(long nr_pages);
|
|||
void laptop_io_completion(void);
|
||||
void laptop_sync_completion(void);
|
||||
void throttle_vm_writeout(void);
|
||||
void writeback_congestion_end(void);
|
||||
|
||||
/* These are exported to sysctl. */
|
||||
extern int dirty_background_ratio;
|
||||
|
|
|
@ -10,7 +10,8 @@ mmu-$(CONFIG_MMU) := fremap.o highmem.o madvise.o memory.o mincore.o \
|
|||
obj-y := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
|
||||
page_alloc.o page-writeback.o pdflush.o \
|
||||
readahead.o swap.o truncate.o vmscan.o \
|
||||
prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
|
||||
prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
|
||||
$(mmu-y)
|
||||
|
||||
ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
|
||||
obj-y += bounce.o
|
||||
|
|
69
mm/backing-dev.c
Normal file
69
mm/backing-dev.c
Normal file
|
@ -0,0 +1,69 @@
|
|||
|
||||
#include <linux/wait.h>
|
||||
#include <linux/backing-dev.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
static wait_queue_head_t congestion_wqh[2] = {
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
|
||||
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
|
||||
};
|
||||
|
||||
|
||||
void clear_bdi_congested(struct backing_dev_info *bdi, int rw)
|
||||
{
|
||||
enum bdi_state bit;
|
||||
wait_queue_head_t *wqh = &congestion_wqh[rw];
|
||||
|
||||
bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
|
||||
clear_bit(bit, &bdi->state);
|
||||
smp_mb__after_clear_bit();
|
||||
if (waitqueue_active(wqh))
|
||||
wake_up(wqh);
|
||||
}
|
||||
EXPORT_SYMBOL(clear_bdi_congested);
|
||||
|
||||
void set_bdi_congested(struct backing_dev_info *bdi, int rw)
|
||||
{
|
||||
enum bdi_state bit;
|
||||
|
||||
bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
|
||||
set_bit(bit, &bdi->state);
|
||||
}
|
||||
EXPORT_SYMBOL(set_bdi_congested);
|
||||
|
||||
/**
|
||||
* congestion_wait - wait for a backing_dev to become uncongested
|
||||
* @rw: READ or WRITE
|
||||
* @timeout: timeout in jiffies
|
||||
*
|
||||
* Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
|
||||
* write congestion. If no backing_devs are congested then just wait for the
|
||||
* next write to be completed.
|
||||
*/
|
||||
long congestion_wait(int rw, long timeout)
|
||||
{
|
||||
long ret;
|
||||
DEFINE_WAIT(wait);
|
||||
wait_queue_head_t *wqh = &congestion_wqh[rw];
|
||||
|
||||
prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
|
||||
ret = io_schedule_timeout(timeout);
|
||||
finish_wait(wqh, &wait);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(congestion_wait);
|
||||
|
||||
/**
|
||||
* congestion_end - wake up sleepers on a congested backing_dev_info
|
||||
* @rw: READ or WRITE
|
||||
*/
|
||||
void congestion_end(int rw)
|
||||
{
|
||||
wait_queue_head_t *wqh = &congestion_wqh[rw];
|
||||
|
||||
if (waitqueue_active(wqh))
|
||||
wake_up(wqh);
|
||||
}
|
||||
EXPORT_SYMBOL(congestion_end);
|
|
@ -222,7 +222,7 @@ static void balance_dirty_pages(struct address_space *mapping)
|
|||
if (pages_written >= write_chunk)
|
||||
break; /* We've done our duty */
|
||||
}
|
||||
blk_congestion_wait(WRITE, HZ/10);
|
||||
congestion_wait(WRITE, HZ/10);
|
||||
}
|
||||
|
||||
if (nr_reclaimable + global_page_state(NR_WRITEBACK)
|
||||
|
@ -314,7 +314,7 @@ void throttle_vm_writeout(void)
|
|||
if (global_page_state(NR_UNSTABLE_NFS) +
|
||||
global_page_state(NR_WRITEBACK) <= dirty_thresh)
|
||||
break;
|
||||
blk_congestion_wait(WRITE, HZ/10);
|
||||
congestion_wait(WRITE, HZ/10);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -351,7 +351,7 @@ static void background_writeout(unsigned long _min_pages)
|
|||
min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
|
||||
if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
|
||||
/* Wrote less than expected */
|
||||
blk_congestion_wait(WRITE, HZ/10);
|
||||
congestion_wait(WRITE, HZ/10);
|
||||
if (!wbc.encountered_congestion)
|
||||
break;
|
||||
}
|
||||
|
@ -422,7 +422,7 @@ static void wb_kupdate(unsigned long arg)
|
|||
writeback_inodes(&wbc);
|
||||
if (wbc.nr_to_write > 0) {
|
||||
if (wbc.encountered_congestion)
|
||||
blk_congestion_wait(WRITE, HZ/10);
|
||||
congestion_wait(WRITE, HZ/10);
|
||||
else
|
||||
break; /* All the old data is written */
|
||||
}
|
||||
|
@ -955,15 +955,6 @@ int test_set_page_writeback(struct page *page)
|
|||
}
|
||||
EXPORT_SYMBOL(test_set_page_writeback);
|
||||
|
||||
/*
|
||||
* Wakes up tasks that are being throttled due to writeback congestion
|
||||
*/
|
||||
void writeback_congestion_end(void)
|
||||
{
|
||||
blk_congestion_end(WRITE);
|
||||
}
|
||||
EXPORT_SYMBOL(writeback_congestion_end);
|
||||
|
||||
/*
|
||||
* Return true if any of the pages in the mapping are marged with the
|
||||
* passed tag.
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include <linux/stop_machine.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/backing-dev.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/div64.h>
|
||||
|
@ -1050,7 +1051,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
|
|||
if (page)
|
||||
goto got_pg;
|
||||
if (gfp_mask & __GFP_NOFAIL) {
|
||||
blk_congestion_wait(WRITE, HZ/50);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
goto nofail_alloc;
|
||||
}
|
||||
}
|
||||
|
@ -1113,7 +1114,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
|
|||
do_retry = 1;
|
||||
}
|
||||
if (do_retry) {
|
||||
blk_congestion_wait(WRITE, HZ/50);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
goto rebalance;
|
||||
}
|
||||
|
||||
|
|
|
@ -48,6 +48,7 @@
|
|||
#include <linux/ctype.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/backing-dev.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/div64.h>
|
||||
|
@ -1131,7 +1132,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
|
|||
page_cache_release(swappage);
|
||||
if (error == -ENOMEM) {
|
||||
/* let kswapd refresh zone for GFP_ATOMICs */
|
||||
blk_congestion_wait(WRITE, HZ/50);
|
||||
congestion_wait(WRITE, HZ/50);
|
||||
}
|
||||
goto repeat;
|
||||
}
|
||||
|
|
|
@ -1059,7 +1059,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
|
|||
|
||||
/* Take a nap, wait for some writeback to complete */
|
||||
if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
|
||||
blk_congestion_wait(WRITE, HZ/10);
|
||||
congestion_wait(WRITE, HZ/10);
|
||||
}
|
||||
/* top priority shrink_caches still had more to do? don't OOM, then */
|
||||
if (!sc.all_unreclaimable)
|
||||
|
@ -1214,7 +1214,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
|
|||
* another pass across the zones.
|
||||
*/
|
||||
if (total_scanned && priority < DEF_PRIORITY - 2)
|
||||
blk_congestion_wait(WRITE, HZ/10);
|
||||
congestion_wait(WRITE, HZ/10);
|
||||
|
||||
/*
|
||||
* We do this so kswapd doesn't build up large priorities for
|
||||
|
@ -1458,7 +1458,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
|
|||
goto out;
|
||||
|
||||
if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
|
||||
blk_congestion_wait(WRITE, HZ / 10);
|
||||
congestion_wait(WRITE, HZ / 10);
|
||||
}
|
||||
|
||||
lru_pages = 0;
|
||||
|
|
Loading…
Reference in a new issue