mm: kswapd: keep kswapd awake for high-order allocations until a percentage of the node is balanced

When reclaiming for high-orders, kswapd is responsible for balancing a
node but it should not reclaim excessively.  It avoids excessive reclaim
by considering if any zone in a node is balanced then the node is
balanced.  In the cases where there are imbalanced zone sizes (e.g.
ZONE_DMA with both ZONE_DMA32 and ZONE_NORMAL), kswapd can go to sleep
prematurely as just one small zone was balanced.

This alters the sleep logic of kswapd slightly.  It counts the number of
pages that make up the balanced zones.  If the total number of balanced
pages is more than a quarter of the zone, kswapd will go back to sleep.
This should keep a node balanced without reclaiming an excessive number of
pages.

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Eric B Munson <emunson@mgebm.net>
Cc: Simon Kirby <sim@hostway.ca>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Shaohua Li <shaohua.li@intel.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Mel Gorman 2011-01-13 15:46:21 -08:00 committed by Linus Torvalds
parent 9950474883
commit 1741c87757

View file

@ -2198,10 +2198,40 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
} }
#endif #endif
/*
* pgdat_balanced is used when checking if a node is balanced for high-order
* allocations. Only zones that meet watermarks and are in a zone allowed
* by the callers classzone_idx are added to balanced_pages. The total of
* balanced pages must be at least 25% of the zones allowed by classzone_idx
* for the node to be considered balanced. Forcing all zones to be balanced
* for high orders can cause excessive reclaim when there are imbalanced zones.
* The choice of 25% is due to
* o a 16M DMA zone that is balanced will not balance a zone on any
* reasonable sized machine
* o On all other machines, the top zone must be at least a reasonable
* precentage of the middle zones. For example, on 32-bit x86, highmem
* would need to be at least 256M for it to be balance a whole node.
* Similarly, on x86-64 the Normal zone would need to be at least 1G
* to balance a node on its own. These seemed like reasonable ratios.
*/
static bool pgdat_balanced(pg_data_t *pgdat, unsigned long balanced_pages,
int classzone_idx)
{
unsigned long present_pages = 0;
int i;
for (i = 0; i <= classzone_idx; i++)
present_pages += pgdat->node_zones[i].present_pages;
return balanced_pages > (present_pages >> 2);
}
/* is kswapd sleeping prematurely? */ /* is kswapd sleeping prematurely? */
static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining) static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
{ {
int i; int i;
unsigned long balanced = 0;
bool all_zones_ok = true;
/* If a direct reclaimer woke kswapd within HZ/10, it's premature */ /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
if (remaining) if (remaining)
@ -2219,10 +2249,20 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone), if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
0, 0)) 0, 0))
return 1; all_zones_ok = false;
else
balanced += zone->present_pages;
} }
return 0; /*
* For high-order requests, the balanced zones must contain at least
* 25% of the nodes pages for kswapd to sleep. For order-0, all zones
* must be balanced
*/
if (order)
return pgdat_balanced(pgdat, balanced, 0);
else
return !all_zones_ok;
} }
/* /*
@ -2250,7 +2290,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
int classzone_idx) int classzone_idx)
{ {
int all_zones_ok; int all_zones_ok;
int any_zone_ok; unsigned long balanced;
int priority; int priority;
int i; int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
@ -2284,7 +2324,7 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
disable_swap_token(); disable_swap_token();
all_zones_ok = 1; all_zones_ok = 1;
any_zone_ok = 0; balanced = 0;
/* /*
* Scan in the highmem->dma direction for the highest * Scan in the highmem->dma direction for the highest
@ -2404,11 +2444,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
*/ */
zone_clear_flag(zone, ZONE_CONGESTED); zone_clear_flag(zone, ZONE_CONGESTED);
if (i <= classzone_idx) if (i <= classzone_idx)
any_zone_ok = 1; balanced += zone->present_pages;
} }
} }
if (all_zones_ok || (order && any_zone_ok)) if (all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))
break; /* kswapd: all done */ break; /* kswapd: all done */
/* /*
* OK, kswapd is getting into trouble. Take a nap, then take * OK, kswapd is getting into trouble. Take a nap, then take
@ -2434,10 +2474,10 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
/* /*
* order-0: All zones must meet high watermark for a balanced node * order-0: All zones must meet high watermark for a balanced node
* high-order: Any zone below pgdats classzone_idx must meet the high * high-order: Balanced zones must make up at least 25% of the node
* watermark for a balanced node * for the node to be balanced
*/ */
if (!(all_zones_ok || (order && any_zone_ok))) { if (!(all_zones_ok || (order && pgdat_balanced(pgdat, balanced, classzone_idx)))) {
cond_resched(); cond_resched();
try_to_freeze(); try_to_freeze();