hugetlb: new sysfs interface
Provide new hugepages user APIs that are more suited to multiple hstates in sysfs. There is a new directory, /sys/kernel/hugepages. Underneath that directory there will be a directory per-supported hugepage size, e.g.: /sys/kernel/hugepages/hugepages-64kB /sys/kernel/hugepages/hugepages-16384kB /sys/kernel/hugepages/hugepages-16777216kB corresponding to 64k, 16m and 16g respectively. Within each hugepages-size directory there are a number of files, corresponding to the tracked counters in the hstate, e.g.: /sys/kernel/hugepages/hugepages-64/nr_hugepages /sys/kernel/hugepages/hugepages-64/nr_overcommit_hugepages /sys/kernel/hugepages/hugepages-64/free_hugepages /sys/kernel/hugepages/hugepages-64/resv_hugepages /sys/kernel/hugepages/hugepages-64/surplus_hugepages Of these files, the first two are read-write and the latter three are read-only. The size of the hugepage being manipulated is trivially deducible from the enclosing directory and is always expressed in kB (to match meminfo). [dave@linux.vnet.ibm.com: fix build] [nacc@us.ibm.com: hugetlb: hang off of /sys/kernel/mm rather than /sys/kernel] [nacc@us.ibm.com: hugetlb: remove CONFIG_SYSFS dependency] Acked-by: Greg Kroah-Hartman <gregkh@suse.de> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Nick Piggin <npiggin@suse.de> Cc: Dave Hansen <dave@linux.vnet.ibm.com> Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
a137e1cc6d
commit
a343787016
4 changed files with 262 additions and 66 deletions
15
Documentation/ABI/testing/sysfs-kernel-mm-hugepages
Normal file
15
Documentation/ABI/testing/sysfs-kernel-mm-hugepages
Normal file
|
@ -0,0 +1,15 @@
|
|||
What: /sys/kernel/mm/hugepages/
|
||||
Date: June 2008
|
||||
Contact: Nishanth Aravamudan <nacc@us.ibm.com>, hugetlb maintainers
|
||||
Description:
|
||||
/sys/kernel/mm/hugepages/ contains a number of subdirectories
|
||||
of the form hugepages-<size>kB, where <size> is the page size
|
||||
of the hugepages supported by the kernel/CPU combination.
|
||||
|
||||
Under these directories are a number of files:
|
||||
nr_hugepages
|
||||
nr_overcommit_hugepages
|
||||
free_hugepages
|
||||
surplus_hugepages
|
||||
resv_hugepages
|
||||
See Documentation/vm/hugetlbpage.txt for details.
|
|
@ -95,6 +95,29 @@ this condition holds, however, no more surplus huge pages will be
|
|||
allowed on the system until one of the two sysctls are increased
|
||||
sufficiently, or the surplus huge pages go out of use and are freed.
|
||||
|
||||
With support for multiple hugepage pools at run-time available, much of
|
||||
the hugepage userspace interface has been duplicated in sysfs. The above
|
||||
information applies to the default hugepage size (which will be
|
||||
controlled by the proc interfaces for backwards compatibility). The root
|
||||
hugepage control directory is
|
||||
|
||||
/sys/kernel/mm/hugepages
|
||||
|
||||
For each hugepage size supported by the running kernel, a subdirectory
|
||||
will exist, of the form
|
||||
|
||||
hugepages-${size}kB
|
||||
|
||||
Inside each of these directories, the same set of files will exist:
|
||||
|
||||
nr_hugepages
|
||||
nr_overcommit_hugepages
|
||||
free_hugepages
|
||||
resv_hugepages
|
||||
surplus_hugepages
|
||||
|
||||
which function as described above for the default hugepage-sized case.
|
||||
|
||||
If the user applications are going to request hugepages using mmap system
|
||||
call, then it is required that system administrator mount a file system of
|
||||
type hugetlbfs:
|
||||
|
|
|
@ -164,6 +164,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
|
|||
|
||||
#ifdef CONFIG_HUGETLB_PAGE
|
||||
|
||||
#define HSTATE_NAME_LEN 32
|
||||
/* Defines one hugetlb page size */
|
||||
struct hstate {
|
||||
int hugetlb_next_nid;
|
||||
|
@ -179,6 +180,7 @@ struct hstate {
|
|||
unsigned int nr_huge_pages_node[MAX_NUMNODES];
|
||||
unsigned int free_huge_pages_node[MAX_NUMNODES];
|
||||
unsigned int surplus_huge_pages_node[MAX_NUMNODES];
|
||||
char name[HSTATE_NAME_LEN];
|
||||
};
|
||||
|
||||
void __init hugetlb_add_hstate(unsigned order);
|
||||
|
|
288
mm/hugetlb.c
288
mm/hugetlb.c
|
@ -14,6 +14,7 @@
|
|||
#include <linux/mempolicy.h>
|
||||
#include <linux/cpuset.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
#include <asm/page.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
@ -942,72 +943,6 @@ static void __init report_hugepages(void)
|
|||
}
|
||||
}
|
||||
|
||||
static int __init hugetlb_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(HPAGE_SHIFT == 0);
|
||||
|
||||
if (!size_to_hstate(HPAGE_SIZE)) {
|
||||
hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
|
||||
parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
|
||||
}
|
||||
default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
|
||||
|
||||
hugetlb_init_hstates();
|
||||
|
||||
report_hugepages();
|
||||
|
||||
return 0;
|
||||
}
|
||||
module_init(hugetlb_init);
|
||||
|
||||
/* Should be called on processing a hugepagesz=... option */
|
||||
void __init hugetlb_add_hstate(unsigned order)
|
||||
{
|
||||
struct hstate *h;
|
||||
if (size_to_hstate(PAGE_SIZE << order)) {
|
||||
printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
|
||||
return;
|
||||
}
|
||||
BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
|
||||
BUG_ON(order == 0);
|
||||
h = &hstates[max_hstate++];
|
||||
h->order = order;
|
||||
h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
|
||||
hugetlb_init_one_hstate(h);
|
||||
parsed_hstate = h;
|
||||
}
|
||||
|
||||
static int __init hugetlb_setup(char *s)
|
||||
{
|
||||
unsigned long *mhp;
|
||||
|
||||
/*
|
||||
* !max_hstate means we haven't parsed a hugepagesz= parameter yet,
|
||||
* so this hugepages= parameter goes to the "default hstate".
|
||||
*/
|
||||
if (!max_hstate)
|
||||
mhp = &default_hstate_max_huge_pages;
|
||||
else
|
||||
mhp = &parsed_hstate->max_huge_pages;
|
||||
|
||||
if (sscanf(s, "%lu", mhp) <= 0)
|
||||
*mhp = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("hugepages=", hugetlb_setup);
|
||||
|
||||
static unsigned int cpuset_mems_nr(unsigned int *array)
|
||||
{
|
||||
int node;
|
||||
unsigned int nr = 0;
|
||||
|
||||
for_each_node_mask(node, cpuset_current_mems_allowed)
|
||||
nr += array[node];
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
#ifdef CONFIG_HIGHMEM
|
||||
static void try_to_free_low(struct hstate *h, unsigned long count)
|
||||
|
@ -1105,6 +1040,227 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
|
|||
return ret;
|
||||
}
|
||||
|
||||
#define HSTATE_ATTR_RO(_name) \
|
||||
static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
|
||||
|
||||
#define HSTATE_ATTR(_name) \
|
||||
static struct kobj_attribute _name##_attr = \
|
||||
__ATTR(_name, 0644, _name##_show, _name##_store)
|
||||
|
||||
static struct kobject *hugepages_kobj;
|
||||
static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
|
||||
|
||||
static struct hstate *kobj_to_hstate(struct kobject *kobj)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < HUGE_MAX_HSTATE; i++)
|
||||
if (hstate_kobjs[i] == kobj)
|
||||
return &hstates[i];
|
||||
BUG();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static ssize_t nr_hugepages_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct hstate *h = kobj_to_hstate(kobj);
|
||||
return sprintf(buf, "%lu\n", h->nr_huge_pages);
|
||||
}
|
||||
static ssize_t nr_hugepages_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
int err;
|
||||
unsigned long input;
|
||||
struct hstate *h = kobj_to_hstate(kobj);
|
||||
|
||||
err = strict_strtoul(buf, 10, &input);
|
||||
if (err)
|
||||
return 0;
|
||||
|
||||
h->max_huge_pages = set_max_huge_pages(h, input);
|
||||
|
||||
return count;
|
||||
}
|
||||
HSTATE_ATTR(nr_hugepages);
|
||||
|
||||
static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct hstate *h = kobj_to_hstate(kobj);
|
||||
return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
|
||||
}
|
||||
static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
int err;
|
||||
unsigned long input;
|
||||
struct hstate *h = kobj_to_hstate(kobj);
|
||||
|
||||
err = strict_strtoul(buf, 10, &input);
|
||||
if (err)
|
||||
return 0;
|
||||
|
||||
spin_lock(&hugetlb_lock);
|
||||
h->nr_overcommit_huge_pages = input;
|
||||
spin_unlock(&hugetlb_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
HSTATE_ATTR(nr_overcommit_hugepages);
|
||||
|
||||
static ssize_t free_hugepages_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct hstate *h = kobj_to_hstate(kobj);
|
||||
return sprintf(buf, "%lu\n", h->free_huge_pages);
|
||||
}
|
||||
HSTATE_ATTR_RO(free_hugepages);
|
||||
|
||||
static ssize_t resv_hugepages_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct hstate *h = kobj_to_hstate(kobj);
|
||||
return sprintf(buf, "%lu\n", h->resv_huge_pages);
|
||||
}
|
||||
HSTATE_ATTR_RO(resv_hugepages);
|
||||
|
||||
static ssize_t surplus_hugepages_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct hstate *h = kobj_to_hstate(kobj);
|
||||
return sprintf(buf, "%lu\n", h->surplus_huge_pages);
|
||||
}
|
||||
HSTATE_ATTR_RO(surplus_hugepages);
|
||||
|
||||
static struct attribute *hstate_attrs[] = {
|
||||
&nr_hugepages_attr.attr,
|
||||
&nr_overcommit_hugepages_attr.attr,
|
||||
&free_hugepages_attr.attr,
|
||||
&resv_hugepages_attr.attr,
|
||||
&surplus_hugepages_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group hstate_attr_group = {
|
||||
.attrs = hstate_attrs,
|
||||
};
|
||||
|
||||
static int __init hugetlb_sysfs_add_hstate(struct hstate *h)
|
||||
{
|
||||
int retval;
|
||||
|
||||
hstate_kobjs[h - hstates] = kobject_create_and_add(h->name,
|
||||
hugepages_kobj);
|
||||
if (!hstate_kobjs[h - hstates])
|
||||
return -ENOMEM;
|
||||
|
||||
retval = sysfs_create_group(hstate_kobjs[h - hstates],
|
||||
&hstate_attr_group);
|
||||
if (retval)
|
||||
kobject_put(hstate_kobjs[h - hstates]);
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void __init hugetlb_sysfs_init(void)
|
||||
{
|
||||
struct hstate *h;
|
||||
int err;
|
||||
|
||||
hugepages_kobj = kobject_create_and_add("hugepages", mm_kobj);
|
||||
if (!hugepages_kobj)
|
||||
return;
|
||||
|
||||
for_each_hstate(h) {
|
||||
err = hugetlb_sysfs_add_hstate(h);
|
||||
if (err)
|
||||
printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
|
||||
h->name);
|
||||
}
|
||||
}
|
||||
|
||||
static void __exit hugetlb_exit(void)
|
||||
{
|
||||
struct hstate *h;
|
||||
|
||||
for_each_hstate(h) {
|
||||
kobject_put(hstate_kobjs[h - hstates]);
|
||||
}
|
||||
|
||||
kobject_put(hugepages_kobj);
|
||||
}
|
||||
module_exit(hugetlb_exit);
|
||||
|
||||
static int __init hugetlb_init(void)
|
||||
{
|
||||
BUILD_BUG_ON(HPAGE_SHIFT == 0);
|
||||
|
||||
if (!size_to_hstate(HPAGE_SIZE)) {
|
||||
hugetlb_add_hstate(HUGETLB_PAGE_ORDER);
|
||||
parsed_hstate->max_huge_pages = default_hstate_max_huge_pages;
|
||||
}
|
||||
default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates;
|
||||
|
||||
hugetlb_init_hstates();
|
||||
|
||||
report_hugepages();
|
||||
|
||||
hugetlb_sysfs_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
module_init(hugetlb_init);
|
||||
|
||||
/* Should be called on processing a hugepagesz=... option */
|
||||
void __init hugetlb_add_hstate(unsigned order)
|
||||
{
|
||||
struct hstate *h;
|
||||
if (size_to_hstate(PAGE_SIZE << order)) {
|
||||
printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n");
|
||||
return;
|
||||
}
|
||||
BUG_ON(max_hstate >= HUGE_MAX_HSTATE);
|
||||
BUG_ON(order == 0);
|
||||
h = &hstates[max_hstate++];
|
||||
h->order = order;
|
||||
h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
|
||||
snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
|
||||
huge_page_size(h)/1024);
|
||||
hugetlb_init_one_hstate(h);
|
||||
parsed_hstate = h;
|
||||
}
|
||||
|
||||
static int __init hugetlb_setup(char *s)
|
||||
{
|
||||
unsigned long *mhp;
|
||||
|
||||
/*
|
||||
* !max_hstate means we haven't parsed a hugepagesz= parameter yet,
|
||||
* so this hugepages= parameter goes to the "default hstate".
|
||||
*/
|
||||
if (!max_hstate)
|
||||
mhp = &default_hstate_max_huge_pages;
|
||||
else
|
||||
mhp = &parsed_hstate->max_huge_pages;
|
||||
|
||||
if (sscanf(s, "%lu", mhp) <= 0)
|
||||
*mhp = 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("hugepages=", hugetlb_setup);
|
||||
|
||||
static unsigned int cpuset_mems_nr(unsigned int *array)
|
||||
{
|
||||
int node;
|
||||
unsigned int nr = 0;
|
||||
|
||||
for_each_node_mask(node, cpuset_current_mems_allowed)
|
||||
nr += array[node];
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
int hugetlb_sysctl_handler(struct ctl_table *table, int write,
|
||||
struct file *file, void __user *buffer,
|
||||
size_t *length, loff_t *ppos)
|
||||
|
|
Loading…
Reference in a new issue