diff --git a/drivers/base/node.c b/drivers/base/node.c index 1fe5536d404f..f502711d28db 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -173,6 +173,43 @@ static ssize_t node_read_distance(struct sys_device * dev, } static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); +#ifdef CONFIG_HUGETLBFS +/* + * hugetlbfs per node attributes registration interface: + * When/if hugetlb[fs] subsystem initializes [sometime after this module], + * it will register its per node attributes for all nodes online at that + * time. It will also call register_hugetlbfs_with_node(), below, to + * register its attribute registration functions with this node driver. + * Once these hooks have been initialized, the node driver will call into + * the hugetlb module to [un]register attributes for hot-plugged nodes. + */ +static node_registration_func_t __hugetlb_register_node; +static node_registration_func_t __hugetlb_unregister_node; + +static inline void hugetlb_register_node(struct node *node) +{ + if (__hugetlb_register_node) + __hugetlb_register_node(node); +} + +static inline void hugetlb_unregister_node(struct node *node) +{ + if (__hugetlb_unregister_node) + __hugetlb_unregister_node(node); +} + +void register_hugetlbfs_with_node(node_registration_func_t doregister, + node_registration_func_t unregister) +{ + __hugetlb_register_node = doregister; + __hugetlb_unregister_node = unregister; +} +#else +static inline void hugetlb_register_node(struct node *node) {} + +static inline void hugetlb_unregister_node(struct node *node) {} +#endif + /* * register_node - Setup a sysfs device for a node. @@ -196,6 +233,7 @@ int register_node(struct node *node, int num, struct node *parent) sysdev_create_file(&node->sysdev, &attr_distance); scan_unevictable_register_node(node); + hugetlb_register_node(node); } return error; } @@ -216,6 +254,7 @@ void unregister_node(struct node *node) sysdev_remove_file(&node->sysdev, &attr_distance); scan_unevictable_unregister_node(node); + hugetlb_unregister_node(node); sysdev_unregister(&node->sysdev); } diff --git a/include/linux/node.h b/include/linux/node.h index 681a697b9a86..dae1521e1f05 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -28,6 +28,7 @@ struct node { struct memory_block; extern struct node node_devices[]; +typedef void (*node_registration_func_t)(struct node *); extern int register_node(struct node *, int, struct node *); extern void unregister_node(struct node *node); @@ -39,6 +40,11 @@ extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, int nid); extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk); + +#ifdef CONFIG_HUGETLBFS +extern void register_hugetlbfs_with_node(node_registration_func_t doregister, + node_registration_func_t unregister); +#endif #else static inline int register_one_node(int nid) { @@ -65,6 +71,11 @@ static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) { return 0; } + +static inline void register_hugetlbfs_with_node(node_registration_func_t reg, + node_registration_func_t unreg) +{ +} #endif #define to_node(sys_device) container_of(sys_device, struct node, sysdev) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1125d818ea06..544f7bcb615e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -24,6 +24,7 @@ #include #include +#include #include "internal.h" const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; @@ -1320,39 +1321,71 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count, static struct kobject *hugepages_kobj; static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; -static struct hstate *kobj_to_hstate(struct kobject *kobj) +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp); + +static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp) { int i; + for (i = 0; i < HUGE_MAX_HSTATE; i++) - if (hstate_kobjs[i] == kobj) + if (hstate_kobjs[i] == kobj) { + if (nidp) + *nidp = NUMA_NO_NODE; return &hstates[i]; - BUG(); - return NULL; + } + + return kobj_to_node_hstate(kobj, nidp); } static ssize_t nr_hugepages_show_common(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct hstate *h = kobj_to_hstate(kobj); - return sprintf(buf, "%lu\n", h->nr_huge_pages); + struct hstate *h; + unsigned long nr_huge_pages; + int nid; + + h = kobj_to_hstate(kobj, &nid); + if (nid == NUMA_NO_NODE) + nr_huge_pages = h->nr_huge_pages; + else + nr_huge_pages = h->nr_huge_pages_node[nid]; + + return sprintf(buf, "%lu\n", nr_huge_pages); } static ssize_t nr_hugepages_store_common(bool obey_mempolicy, struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { int err; + int nid; unsigned long count; - struct hstate *h = kobj_to_hstate(kobj); + struct hstate *h; NODEMASK_ALLOC(nodemask_t, nodes_allowed); err = strict_strtoul(buf, 10, &count); if (err) return 0; - if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) { - NODEMASK_FREE(nodes_allowed); - nodes_allowed = &node_online_map; - } + h = kobj_to_hstate(kobj, &nid); + if (nid == NUMA_NO_NODE) { + /* + * global hstate attribute + */ + if (!(obey_mempolicy && + init_nodemask_of_mempolicy(nodes_allowed))) { + NODEMASK_FREE(nodes_allowed); + nodes_allowed = &node_states[N_HIGH_MEMORY]; + } + } else if (nodes_allowed) { + /* + * per node hstate attribute: adjust count to global, + * but restrict alloc/free to the specified node. + */ + count += h->nr_huge_pages - h->nr_huge_pages_node[nid]; + init_nodemask_of_node(nodes_allowed, nid); + } else + nodes_allowed = &node_states[N_HIGH_MEMORY]; + h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed); if (nodes_allowed != &node_online_map) @@ -1398,7 +1431,7 @@ HSTATE_ATTR(nr_hugepages_mempolicy); static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct hstate *h = kobj_to_hstate(kobj); + struct hstate *h = kobj_to_hstate(kobj, NULL); return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages); } static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, @@ -1406,7 +1439,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, { int err; unsigned long input; - struct hstate *h = kobj_to_hstate(kobj); + struct hstate *h = kobj_to_hstate(kobj, NULL); err = strict_strtoul(buf, 10, &input); if (err) @@ -1423,15 +1456,24 @@ HSTATE_ATTR(nr_overcommit_hugepages); static ssize_t free_hugepages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct hstate *h = kobj_to_hstate(kobj); - return sprintf(buf, "%lu\n", h->free_huge_pages); + struct hstate *h; + unsigned long free_huge_pages; + int nid; + + h = kobj_to_hstate(kobj, &nid); + if (nid == NUMA_NO_NODE) + free_huge_pages = h->free_huge_pages; + else + free_huge_pages = h->free_huge_pages_node[nid]; + + return sprintf(buf, "%lu\n", free_huge_pages); } HSTATE_ATTR_RO(free_hugepages); static ssize_t resv_hugepages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct hstate *h = kobj_to_hstate(kobj); + struct hstate *h = kobj_to_hstate(kobj, NULL); return sprintf(buf, "%lu\n", h->resv_huge_pages); } HSTATE_ATTR_RO(resv_hugepages); @@ -1439,8 +1481,17 @@ HSTATE_ATTR_RO(resv_hugepages); static ssize_t surplus_hugepages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - struct hstate *h = kobj_to_hstate(kobj); - return sprintf(buf, "%lu\n", h->surplus_huge_pages); + struct hstate *h; + unsigned long surplus_huge_pages; + int nid; + + h = kobj_to_hstate(kobj, &nid); + if (nid == NUMA_NO_NODE) + surplus_huge_pages = h->surplus_huge_pages; + else + surplus_huge_pages = h->surplus_huge_pages_node[nid]; + + return sprintf(buf, "%lu\n", surplus_huge_pages); } HSTATE_ATTR_RO(surplus_hugepages); @@ -1460,19 +1511,21 @@ static struct attribute_group hstate_attr_group = { .attrs = hstate_attrs, }; -static int __init hugetlb_sysfs_add_hstate(struct hstate *h) +static int __init hugetlb_sysfs_add_hstate(struct hstate *h, + struct kobject *parent, + struct kobject **hstate_kobjs, + struct attribute_group *hstate_attr_group) { int retval; + int hi = h - hstates; - hstate_kobjs[h - hstates] = kobject_create_and_add(h->name, - hugepages_kobj); - if (!hstate_kobjs[h - hstates]) + hstate_kobjs[hi] = kobject_create_and_add(h->name, parent); + if (!hstate_kobjs[hi]) return -ENOMEM; - retval = sysfs_create_group(hstate_kobjs[h - hstates], - &hstate_attr_group); + retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group); if (retval) - kobject_put(hstate_kobjs[h - hstates]); + kobject_put(hstate_kobjs[hi]); return retval; } @@ -1487,17 +1540,184 @@ static void __init hugetlb_sysfs_init(void) return; for_each_hstate(h) { - err = hugetlb_sysfs_add_hstate(h); + err = hugetlb_sysfs_add_hstate(h, hugepages_kobj, + hstate_kobjs, &hstate_attr_group); if (err) printk(KERN_ERR "Hugetlb: Unable to add hstate %s", h->name); } } +#ifdef CONFIG_NUMA + +/* + * node_hstate/s - associate per node hstate attributes, via their kobjects, + * with node sysdevs in node_devices[] using a parallel array. The array + * index of a node sysdev or _hstate == node id. + * This is here to avoid any static dependency of the node sysdev driver, in + * the base kernel, on the hugetlb module. + */ +struct node_hstate { + struct kobject *hugepages_kobj; + struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; +}; +struct node_hstate node_hstates[MAX_NUMNODES]; + +/* + * A subset of global hstate attributes for node sysdevs + */ +static struct attribute *per_node_hstate_attrs[] = { + &nr_hugepages_attr.attr, + &free_hugepages_attr.attr, + &surplus_hugepages_attr.attr, + NULL, +}; + +static struct attribute_group per_node_hstate_attr_group = { + .attrs = per_node_hstate_attrs, +}; + +/* + * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj. + * Returns node id via non-NULL nidp. + */ +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) +{ + int nid; + + for (nid = 0; nid < nr_node_ids; nid++) { + struct node_hstate *nhs = &node_hstates[nid]; + int i; + for (i = 0; i < HUGE_MAX_HSTATE; i++) + if (nhs->hstate_kobjs[i] == kobj) { + if (nidp) + *nidp = nid; + return &hstates[i]; + } + } + + BUG(); + return NULL; +} + +/* + * Unregister hstate attributes from a single node sysdev. + * No-op if no hstate attributes attached. + */ +void hugetlb_unregister_node(struct node *node) +{ + struct hstate *h; + struct node_hstate *nhs = &node_hstates[node->sysdev.id]; + + if (!nhs->hugepages_kobj) + return; + + for_each_hstate(h) + if (nhs->hstate_kobjs[h - hstates]) { + kobject_put(nhs->hstate_kobjs[h - hstates]); + nhs->hstate_kobjs[h - hstates] = NULL; + } + + kobject_put(nhs->hugepages_kobj); + nhs->hugepages_kobj = NULL; +} + +/* + * hugetlb module exit: unregister hstate attributes from node sysdevs + * that have them. + */ +static void hugetlb_unregister_all_nodes(void) +{ + int nid; + + /* + * disable node sysdev registrations. + */ + register_hugetlbfs_with_node(NULL, NULL); + + /* + * remove hstate attributes from any nodes that have them. + */ + for (nid = 0; nid < nr_node_ids; nid++) + hugetlb_unregister_node(&node_devices[nid]); +} + +/* + * Register hstate attributes for a single node sysdev. + * No-op if attributes already registered. + */ +void hugetlb_register_node(struct node *node) +{ + struct hstate *h; + struct node_hstate *nhs = &node_hstates[node->sysdev.id]; + int err; + + if (nhs->hugepages_kobj) + return; /* already allocated */ + + nhs->hugepages_kobj = kobject_create_and_add("hugepages", + &node->sysdev.kobj); + if (!nhs->hugepages_kobj) + return; + + for_each_hstate(h) { + err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj, + nhs->hstate_kobjs, + &per_node_hstate_attr_group); + if (err) { + printk(KERN_ERR "Hugetlb: Unable to add hstate %s" + " for node %d\n", + h->name, node->sysdev.id); + hugetlb_unregister_node(node); + break; + } + } +} + +/* + * hugetlb init time: register hstate attributes for all registered + * node sysdevs. All on-line nodes should have registered their + * associated sysdev by the time the hugetlb module initializes. + */ +static void hugetlb_register_all_nodes(void) +{ + int nid; + + for (nid = 0; nid < nr_node_ids; nid++) { + struct node *node = &node_devices[nid]; + if (node->sysdev.id == nid) + hugetlb_register_node(node); + } + + /* + * Let the node sysdev driver know we're here so it can + * [un]register hstate attributes on node hotplug. + */ + register_hugetlbfs_with_node(hugetlb_register_node, + hugetlb_unregister_node); +} +#else /* !CONFIG_NUMA */ + +static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp) +{ + BUG(); + if (nidp) + *nidp = -1; + return NULL; +} + +static void hugetlb_unregister_all_nodes(void) { } + +static void hugetlb_register_all_nodes(void) { } + +#endif + static void __exit hugetlb_exit(void) { struct hstate *h; + hugetlb_unregister_all_nodes(); + for_each_hstate(h) { kobject_put(hstate_kobjs[h - hstates]); } @@ -1532,6 +1752,8 @@ static int __init hugetlb_init(void) hugetlb_sysfs_init(); + hugetlb_register_all_nodes(); + return 0; } module_init(hugetlb_init);