From e11b956e9ebef098cc4f81964a1f57e40fe75cd4 Mon Sep 17 00:00:00 2001 From: Li Bin Date: Sat, 30 Jan 2016 11:54:03 +0800 Subject: [PATCH 01/44] kernel/Makefile: remove the useless CFLAGS_REMOVE_cgroup-debug.o The file cgroup-debug.c had been removed from commit fe6934354f8e (cgroups: move the cgroup debug subsys into cgroup.c to access internal state). Remain the CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE) useless in kernel/Makefile. Signed-off-by: Li Bin Acked-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index 53abf008ecb3..baa55e50a315 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -14,8 +14,7 @@ obj-y = fork.o exec_domain.o panic.o \ obj-$(CONFIG_MULTIUSER) += groups.o ifdef CONFIG_FUNCTION_TRACER -# Do not trace debug files and internal ftrace files -CFLAGS_REMOVE_cgroup-debug.o = $(CC_FLAGS_FTRACE) +# Do not trace internal ftrace files CFLAGS_REMOVE_irq_work.o = $(CC_FLAGS_FTRACE) endif From 223ffb29f9723a4b485cacf6dc7e6d639fffc322 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 11 Feb 2016 13:34:49 -0500 Subject: [PATCH 02/44] cgroup: provide cgroup_nov1= to disable controllers in v1 mounts Testing cgroup2 can be painful with system software automatically mounting and populating all cgroup controllers in v1 mode. Sometimes they can be unmounted from rc.local, sometimes even that is too late. Provide a commandline option to disable certain controllers in v1 mounts, so that they remain available for cgroup2 mounts. Example use: cgroup_no_v1=memory,cpu cgroup_no_v1=all Disabling will be confirmed at boot-time as such: [ 0.013770] Disabling cpu control group subsystem in v1 mounts [ 0.016004] Disabling memory control group subsystem in v1 mounts Signed-off-by: Johannes Weiner Signed-off-by: Tejun Heo --- kernel/cgroup.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d27904c193da..7ad61915967f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -180,6 +180,9 @@ EXPORT_SYMBOL_GPL(cgrp_dfl_root); */ static bool cgrp_dfl_root_visible; +/* Controllers blocked by the commandline in v1 */ +static unsigned long cgroup_no_v1_mask; + /* some controllers are not supported in the default hierarchy */ static unsigned long cgrp_dfl_root_inhibit_ss_mask; @@ -241,6 +244,11 @@ static bool cgroup_ssid_enabled(int ssid) return static_key_enabled(cgroup_subsys_enabled_key[ssid]); } +static bool cgroup_ssid_no_v1(int ssid) +{ + return cgroup_no_v1_mask & (1 << ssid); +} + /** * cgroup_on_dfl - test whether a cgroup is on the default hierarchy * @cgrp: the cgroup of interest @@ -1678,6 +1686,8 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) continue; if (!cgroup_ssid_enabled(i)) continue; + if (cgroup_ssid_no_v1(i)) + continue; /* Mutually exclusive option 'all' + subsystem name */ if (all_ss) @@ -1698,7 +1708,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) */ if (all_ss || (!one_ss && !opts->none && !opts->name)) for_each_subsys(ss, i) - if (cgroup_ssid_enabled(i)) + if (cgroup_ssid_enabled(i) && !cgroup_ssid_no_v1(i)) opts->subsys_mask |= (1 << i); /* @@ -5324,6 +5334,10 @@ int __init cgroup_init(void) continue; } + if (cgroup_ssid_no_v1(ssid)) + printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n", + ss->name); + cgrp_dfl_root.subsys_mask |= 1 << ss->id; if (!ss->dfl_cftypes) @@ -5750,6 +5764,33 @@ static int __init cgroup_disable(char *str) } __setup("cgroup_disable=", cgroup_disable); +static int __init cgroup_no_v1(char *str) +{ + struct cgroup_subsys *ss; + char *token; + int i; + + while ((token = strsep(&str, ",")) != NULL) { + if (!*token) + continue; + + if (!strcmp(token, "all")) { + cgroup_no_v1_mask = ~0UL; + break; + } + + for_each_subsys(ss, i) { + if (strcmp(token, ss->name) && + strcmp(token, ss->legacy_name)) + continue; + + cgroup_no_v1_mask |= 1 << i; + } + } + return 1; +} +__setup("cgroup_no_v1=", cgroup_no_v1); + /** * css_tryget_online_from_dir - get corresponding css from a cgroup dentry * @dentry: directory dentry of interest From 1619b6d4fdd776b1b91bd288ad6f7e37fe183485 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 16 Feb 2016 13:21:14 -0500 Subject: [PATCH 03/44] cgroup: document cgroup_no_v1= Add cgroup_no_v1= to kernel-parameters.txt, and a small blurb to cgroup-v2.txt section about transitioning from cgroup to cgroup2. Signed-off-by: Johannes Weiner Signed-off-by: Tejun Heo --- Documentation/cgroup-v2.txt | 6 ++++++ Documentation/kernel-parameters.txt | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 9ae148ab1255..3922ae1654fd 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -132,6 +132,12 @@ strongly discouraged for production use. It is recommended to decide the hierarchies and controller associations before starting using the controllers after system boot. +During transition to v2, system management software might still +automount the v1 cgroup filesystem and so hijack all controllers +during boot, before manual intervention is possible. To make testing +and experimenting easier, the kernel parameter cgroup_no_v1= allows +disabling controllers in v1 and make them always available in v2. + 2-2. Organizing Processes diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cfb2c0f1a4a8..0d962a1ff11c 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -608,6 +608,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. cut the overhead, others just disable the usage. So only cgroup_disable=memory is actually worthy} + cgroup_no_v1= [KNL] Disable one, multiple, all cgroup controllers in v1 + Format: { controller[,controller...] | "all" } + Like cgroup_disable, but only applies to cgroup v1; + the blacklisted controllers remain available in cgroup2. + cgroup.memory= [KNL] Pass options to the cgroup memory controller. Format: nosocket -- Disable socket memory accounting. From b598dde354de22d87f664a7b99b8c21437da8efb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:45 -0500 Subject: [PATCH 04/44] cgroup: fix error return value of cgroup_addrm_files() cgroup_addrm_files() incorrectly returned 0 after add failure. Fix it. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7ad61915967f..68b032df77f5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3369,7 +3369,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, bool is_add) { struct cftype *cft, *cft_end = NULL; - int ret; + int ret = 0; lockdep_assert_held(&cgroup_mutex); @@ -3398,7 +3398,7 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, cgroup_rm_file(cgrp, cft); } } - return 0; + return ret; } static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add) From 5eb385cc5ae1b31fbcdd727854a00c5a083f6b9b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:46 -0500 Subject: [PATCH 05/44] Revert "cgroup: add cgroup_subsys->css_e_css_changed()" This reverts commit 56c807ba4e91f0980567b6a69de239677879b17f. cgroup_subsys->css_e_css_changed() was supposed to be used by cgroup writeback support; however, the change to per-inode cgroup association made it unnecessary and the callback doesn't have any user. Remove it. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- include/linux/cgroup-defs.h | 1 - kernel/cgroup.c | 18 ------------------ 2 files changed, 19 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 789471dba6fb..4f3c0dac26b5 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -434,7 +434,6 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); - void (*css_e_css_changed)(struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 68b032df77f5..7727b6e43e10 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3127,24 +3127,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, } } - /* - * The effective csses of all the descendants (excluding @cgrp) may - * have changed. Subsystems can optionally subscribe to this event - * by implementing ->css_e_css_changed() which is invoked if any of - * the effective csses seen from the css's cgroup may have changed. - */ - for_each_subsys(ss, ssid) { - struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss); - struct cgroup_subsys_state *css; - - if (!ss->css_e_css_changed || !this_css) - continue; - - css_for_each_descendant_pre(css, this_css) - if (css != this_css) - ss->css_e_css_changed(css); - } - kernfs_activate(cgrp->kn); ret = 0; out_unlock: From 8699b7762a623c46ced891b3cf490058b56cf99c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:46 -0500 Subject: [PATCH 06/44] cgroup: s/child_subsys_mask/subtree_ss_mask/ For consistency with cgroup->subtree_control. * cgroup->child_subsys_mask -> cgroup->subtree_ss_mask * cgroup_calc_child_subsys_mask() -> cgroup_calc_subtree_ss_mask() * cgroup_refresh_child_subsys_mask() -> cgroup_refresh_subtree_ss_mask() No functional changes. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- include/linux/cgroup-defs.h | 11 ++++----- kernel/cgroup.c | 48 ++++++++++++++++++------------------- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4f3c0dac26b5..c68ae7f0fb5f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -253,13 +253,12 @@ struct cgroup { /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through - * "cgroup.subtree_control" while ->child_subsys_mask is the - * effective one which may have more subsystems enabled. - * Controller knobs are made available iff it's enabled in - * ->subtree_control. + * "cgroup.subtree_control" while ->child_ss_mask is the effective + * one which may have more subsystems enabled. Controller knobs + * are made available iff it's enabled in ->subtree_control. */ - unsigned int subtree_control; - unsigned int child_subsys_mask; + unsigned long subtree_control; + unsigned long subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7727b6e43e10..f3cd67bfe6c0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -391,10 +391,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, /* * This function is used while updating css associations and thus - * can't test the csses directly. Use ->child_subsys_mask. + * can't test the csses directly. Use ->subtree_ss_mask. */ while (cgroup_parent(cgrp) && - !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id))) + !(cgroup_parent(cgrp)->subtree_ss_mask & (1 << ss->id))) cgrp = cgroup_parent(cgrp); return cgroup_css(cgrp, ss); @@ -1256,7 +1256,7 @@ static umode_t cgroup_file_mode(const struct cftype *cft) } /** - * cgroup_calc_child_subsys_mask - calculate child_subsys_mask + * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask * @cgrp: the target cgroup * @subtree_control: the new subtree_control mask to consider * @@ -1268,8 +1268,8 @@ static umode_t cgroup_file_mode(const struct cftype *cft) * @subtree_control is to be applied to @cgrp. The returned mask is always * a superset of @subtree_control and follows the usual hierarchy rules. */ -static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, - unsigned long subtree_control) +static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, + unsigned long subtree_control) { struct cgroup *parent = cgroup_parent(cgrp); unsigned long cur_ss_mask = subtree_control; @@ -1293,7 +1293,7 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, * to non-default hierarchies. */ if (parent) - new_ss_mask &= parent->child_subsys_mask; + new_ss_mask &= parent->subtree_ss_mask; else new_ss_mask &= cgrp->root->subsys_mask; @@ -1306,16 +1306,16 @@ static unsigned long cgroup_calc_child_subsys_mask(struct cgroup *cgrp, } /** - * cgroup_refresh_child_subsys_mask - update child_subsys_mask + * cgroup_refresh_subtree_ss_mask - update subtree_ss_mask * @cgrp: the target cgroup * - * Update @cgrp->child_subsys_mask according to the current - * @cgrp->subtree_control using cgroup_calc_child_subsys_mask(). + * Update @cgrp->subtree_ss_mask according to the current + * @cgrp->subtree_control using cgroup_calc_subtree_ss_mask(). */ -static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp) +static void cgroup_refresh_subtree_ss_mask(struct cgroup *cgrp) { - cgrp->child_subsys_mask = - cgroup_calc_child_subsys_mask(cgrp, cgrp->subtree_control); + cgrp->subtree_ss_mask = + cgroup_calc_subtree_ss_mask(cgrp, cgrp->subtree_control); } /** @@ -1542,7 +1542,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, src_root->subsys_mask &= ~(1 << ssid); scgrp->subtree_control &= ~(1 << ssid); - cgroup_refresh_child_subsys_mask(scgrp); + cgroup_refresh_subtree_ss_mask(scgrp); /* default hierarchy doesn't enable controllers by default */ dst_root->subsys_mask |= 1 << ssid; @@ -1550,7 +1550,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, static_branch_enable(cgroup_subsys_on_dfl_key[ssid]); } else { dcgrp->subtree_control |= 1 << ssid; - cgroup_refresh_child_subsys_mask(dcgrp); + cgroup_refresh_subtree_ss_mask(dcgrp); static_branch_disable(cgroup_subsys_on_dfl_key[ssid]); } @@ -2523,11 +2523,11 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, lockdep_assert_held(&cgroup_mutex); /* - * Except for the root, child_subsys_mask must be zero for a cgroup + * Except for the root, subtree_ss_mask must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) && - dst_cgrp->child_subsys_mask) + dst_cgrp->subtree_ss_mask) return -EBUSY; /* look up the dst cset for each src cset and link it to src */ @@ -2880,7 +2880,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v) * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy * @cgrp: root of the subtree to update csses for * - * @cgrp's child_subsys_mask has changed and its subtree's (self excluded) + * @cgrp's subtree_ss_mask has changed and its subtree's (self excluded) * css associations need to be updated accordingly. This function looks up * all css_sets which are attached to the subtree, creates the matching * updated css_sets and migrates the tasks to the new ones. @@ -2902,7 +2902,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { struct cgrp_cset_link *link; - /* self is not affected by child_subsys_mask change */ + /* self is not affected by subtree_ss_mask change */ if (css->cgroup == cgrp) continue; @@ -3034,9 +3034,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * depending on subsystem dependencies. */ old_sc = cgrp->subtree_control; - old_ss = cgrp->child_subsys_mask; + old_ss = cgrp->subtree_ss_mask; new_sc = (old_sc | enable) & ~disable; - new_ss = cgroup_calc_child_subsys_mask(cgrp, new_sc); + new_ss = cgroup_calc_subtree_ss_mask(cgrp, new_sc); css_enable = ~old_ss & new_ss; css_disable = old_ss & ~new_ss; @@ -3069,7 +3069,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, } cgrp->subtree_control = new_sc; - cgrp->child_subsys_mask = new_ss; + cgrp->subtree_ss_mask = new_ss; /* * Create new csses or make the existing ones visible. A css is @@ -3135,7 +3135,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, err_undo_css: cgrp->subtree_control = old_sc; - cgrp->child_subsys_mask = old_ss; + cgrp->subtree_ss_mask = old_ss; for_each_subsys(ss, ssid) { if (!(enable & (1 << ssid))) @@ -4969,7 +4969,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* let's create and online css's */ for_each_subsys(ss, ssid) { - if (parent->child_subsys_mask & (1 << ssid)) { + if (parent->subtree_ss_mask & (1 << ssid)) { ret = create_css(cgrp, ss, parent->subtree_control & (1 << ssid)); if (ret) @@ -4983,7 +4983,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, */ if (!cgroup_on_dfl(cgrp)) { cgrp->subtree_control = parent->subtree_control; - cgroup_refresh_child_subsys_mask(cgrp); + cgroup_refresh_subtree_ss_mask(cgrp); } kernfs_activate(kn); From b4e0eeafba61b141c3af22d6636be3f477c5d3bd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:46 -0500 Subject: [PATCH 07/44] cgroup: convert for_each_subsys_which() to do-while style for_each_subsys_which() allows iterating subsystems specified in a subsystem bitmask; unfortunately, it requires the mask to be an unsigned long l-value which can be inconvenient and makes it awkward to use a smaller type for subsystem masks. This patch converts for_each_subsy_which() to do-while style which allows it to drop the l-value requirement. The new iterator is named do_each_subsys_mask() / while_each_subsys_mask(). Signed-off-by: Tejun Heo Cc: Aleksa Sarai Acked-by: Johannes Weiner --- kernel/cgroup.c | 72 +++++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 32 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f3cd67bfe6c0..5d102980dc27 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -514,22 +514,28 @@ static int notify_on_release(const struct cgroup *cgrp) (((ss) = cgroup_subsys[ssid]) || true); (ssid)++) /** - * for_each_subsys_which - filter for_each_subsys with a bitmask + * do_each_subsys_mask - filter for_each_subsys with a bitmask * @ss: the iteration cursor * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end - * @ss_maskp: a pointer to the bitmask + * @ss_mask: the bitmask * * The block will only run for cases where the ssid-th bit (1 << ssid) of - * mask is set to 1. + * @ss_mask is set. */ -#define for_each_subsys_which(ss, ssid, ss_maskp) \ - if (!CGROUP_SUBSYS_COUNT) /* to avoid spurious gcc warning */ \ +#define do_each_subsys_mask(ss, ssid, ss_mask) do { \ + unsigned long __ss_mask = (ss_mask); \ + if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \ (ssid) = 0; \ - else \ - for_each_set_bit(ssid, ss_maskp, CGROUP_SUBSYS_COUNT) \ - if (((ss) = cgroup_subsys[ssid]) && false) \ - break; \ - else + break; \ + } \ + for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \ + (ss) = cgroup_subsys[ssid]; \ + { + +#define while_each_subsys_mask() \ + } \ + } \ +} while (false) /* iterate across the hierarchies */ #define for_each_root(root) \ @@ -1284,8 +1290,9 @@ static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, while (true) { unsigned long new_ss_mask = cur_ss_mask; - for_each_subsys_which(ss, ssid, &cur_ss_mask) + do_each_subsys_mask(ss, ssid, cur_ss_mask) { new_ss_mask |= ss->depends_on; + } while_each_subsys_mask(); /* * Mask out subsystems which aren't available. This can @@ -1469,7 +1476,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, lockdep_assert_held(&cgroup_mutex); - for_each_subsys_which(ss, ssid, &ss_mask) { + do_each_subsys_mask(ss, ssid, ss_mask) { /* if @ss has non-root csses attached to it, can't move */ if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss))) return -EBUSY; @@ -1477,14 +1484,14 @@ static int rebind_subsystems(struct cgroup_root *dst_root, /* can't move between two non-dummy roots either */ if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) return -EBUSY; - } + } while_each_subsys_mask(); /* skip creating root files on dfl_root for inhibited subsystems */ tmp_ss_mask = ss_mask; if (dst_root == &cgrp_dfl_root) tmp_ss_mask &= ~cgrp_dfl_root_inhibit_ss_mask; - for_each_subsys_which(ss, ssid, &tmp_ss_mask) { + do_each_subsys_mask(ss, ssid, tmp_ss_mask) { struct cgroup *scgrp = &ss->root->cgrp; int tssid; @@ -1507,19 +1514,19 @@ static int rebind_subsystems(struct cgroup_root *dst_root, continue; } - for_each_subsys_which(ss, tssid, &tmp_ss_mask) { + do_each_subsys_mask(ss, tssid, tmp_ss_mask) { if (tssid == ssid) break; css_clear_dir(cgroup_css(scgrp, ss), dcgrp); - } + } while_each_subsys_mask(); return ret; - } + } while_each_subsys_mask(); /* * Nothing can fail from this point on. Remove files for the * removed subsystems and rebind each subsystem. */ - for_each_subsys_which(ss, ssid, &ss_mask) { + do_each_subsys_mask(ss, ssid, ss_mask) { struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; struct cgroup_subsys_state *css = cgroup_css(scgrp, ss); @@ -1556,7 +1563,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, if (ss->bind) ss->bind(css); - } + } while_each_subsys_mask(); kernfs_activate(dcgrp->kn); return 0; @@ -2838,12 +2845,12 @@ static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask) bool printed = false; int ssid; - for_each_subsys_which(ss, ssid, &ss_mask) { + do_each_subsys_mask(ss, ssid, ss_mask) { if (printed) seq_putc(seq, ' '); seq_printf(seq, "%s", ss->name); printed = true; - } + } while_each_subsys_mask(); if (printed) seq_putc(seq, '\n'); } @@ -2956,11 +2963,9 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, */ buf = strstrip(buf); while ((tok = strsep(&buf, " "))) { - unsigned long tmp_ss_mask = ~cgrp_dfl_root_inhibit_ss_mask; - if (tok[0] == '\0') continue; - for_each_subsys_which(ss, ssid, &tmp_ss_mask) { + do_each_subsys_mask(ss, ssid, ~cgrp_dfl_root_inhibit_ss_mask) { if (!cgroup_ssid_enabled(ssid) || strcmp(tok + 1, ss->name)) continue; @@ -2975,7 +2980,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return -EINVAL; } break; - } + } while_each_subsys_mask(); if (ssid == CGROUP_SUBSYS_COUNT) return -EINVAL; } @@ -3049,7 +3054,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * still around. In such cases, wait till it's gone using * offline_waitq. */ - for_each_subsys_which(ss, ssid, &css_enable) { + do_each_subsys_mask(ss, ssid, css_enable) { cgroup_for_each_live_child(child, cgrp) { DEFINE_WAIT(wait); @@ -3066,7 +3071,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return restart_syscall(); } - } + } while_each_subsys_mask(); cgrp->subtree_control = new_sc; cgrp->subtree_ss_mask = new_ss; @@ -5509,11 +5514,11 @@ int cgroup_can_fork(struct task_struct *child) struct cgroup_subsys *ss; int i, j, ret; - for_each_subsys_which(ss, i, &have_canfork_callback) { + do_each_subsys_mask(ss, i, have_canfork_callback) { ret = ss->can_fork(child); if (ret) goto out_revert; - } + } while_each_subsys_mask(); return 0; @@ -5598,8 +5603,9 @@ void cgroup_post_fork(struct task_struct *child) * css_set; otherwise, @child might change state between ->fork() * and addition to css_set. */ - for_each_subsys_which(ss, i, &have_fork_callback) + do_each_subsys_mask(ss, i, have_fork_callback) { ss->fork(child); + } while_each_subsys_mask(); } /** @@ -5642,8 +5648,9 @@ void cgroup_exit(struct task_struct *tsk) } /* see cgroup_post_fork() for details */ - for_each_subsys_which(ss, i, &have_exit_callback) + do_each_subsys_mask(ss, i, have_exit_callback) { ss->exit(tsk); + } while_each_subsys_mask(); } void cgroup_free(struct task_struct *task) @@ -5652,8 +5659,9 @@ void cgroup_free(struct task_struct *task) struct cgroup_subsys *ss; int ssid; - for_each_subsys_which(ss, ssid, &have_free_callback) + do_each_subsys_mask(ss, ssid, have_free_callback) { ss->free(task); + } while_each_subsys_mask(); put_css_set(cset); } From 996cd1fb7383cf087496e8a441bb10b9873b1eb6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:46 -0500 Subject: [PATCH 08/44] cgroup: use do_each_subsys_mask() where applicable There are several places in cgroup_subtree_control_write() which can use do_each_subsys_mask() instead of manual mask testing. Use it. No functional changes. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- kernel/cgroup.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5d102980dc27..1e561bd990b9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3082,10 +3082,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * dependency. An invisible css is made visible when the userland * explicitly enables it. */ - for_each_subsys(ss, ssid) { - if (!(enable & (1 << ssid))) - continue; - + do_each_subsys_mask(ss, ssid, enable) { cgroup_for_each_live_child(child, cgrp) { if (css_enable & (1 << ssid)) ret = create_css(child, ss, @@ -3096,7 +3093,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, if (ret) goto err_undo_css; } - } + } while_each_subsys_mask(); /* * At this point, cgroup_e_css() results reflect the new csses @@ -3115,10 +3112,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, * state if it's made visible again later. Controllers which may * be depended upon should provide ->css_reset() for this purpose. */ - for_each_subsys(ss, ssid) { - if (!(disable & (1 << ssid))) - continue; - + do_each_subsys_mask(ss, ssid, disable) { cgroup_for_each_live_child(child, cgrp) { struct cgroup_subsys_state *css = cgroup_css(child, ss); @@ -3130,7 +3124,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, ss->css_reset(css); } } - } + } while_each_subsys_mask(); kernfs_activate(cgrp->kn); ret = 0; @@ -3142,10 +3136,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, cgrp->subtree_control = old_sc; cgrp->subtree_ss_mask = old_ss; - for_each_subsys(ss, ssid) { - if (!(enable & (1 << ssid))) - continue; - + do_each_subsys_mask(ss, ssid, enable) { cgroup_for_each_live_child(child, cgrp) { struct cgroup_subsys_state *css = cgroup_css(child, ss); @@ -3157,7 +3148,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, else css_clear_dir(css, NULL); } - } + } while_each_subsys_mask(); goto out_unlock; } @@ -4973,14 +4964,12 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, goto out_destroy; /* let's create and online css's */ - for_each_subsys(ss, ssid) { - if (parent->subtree_ss_mask & (1 << ssid)) { - ret = create_css(cgrp, ss, - parent->subtree_control & (1 << ssid)); - if (ret) - goto out_destroy; - } - } + do_each_subsys_mask(ss, ssid, parent->subtree_ss_mask) { + ret = create_css(cgrp, ss, + parent->subtree_control & (1 << ssid)); + if (ret) + goto out_destroy; + } while_each_subsys_mask(); /* * On the default hierarchy, a child doesn't automatically inherit From 6e5c830770f9045a17b1b931c3e11fbd5591e630 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Feb 2016 22:25:47 -0500 Subject: [PATCH 09/44] cgroup: make cgroup subsystem masks u16 After the recent do_each_subsys_mask() conversion, there's no reason to use ulong for subsystem masks. We'll be adding more subsystem masks to persistent data structures, let's reduce its size to u16 which should be enough for now and the foreseeable future. This doesn't create any noticeable behavior differences. v2: Johannes spotted that the initial patch missed cgroup_no_v1_mask. Converted. Signed-off-by: Tejun Heo Acked-by: Johannes Weiner --- include/linux/cgroup-defs.h | 4 +-- kernel/cgroup.c | 50 ++++++++++++++++++------------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index c68ae7f0fb5f..0abf6aa86c81 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -257,8 +257,8 @@ struct cgroup { * one which may have more subsystems enabled. Controller knobs * are made available iff it's enabled in ->subtree_control. */ - unsigned long subtree_control; - unsigned long subtree_ss_mask; + u16 subtree_control; + u16 subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1e561bd990b9..7669f68077b8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -181,10 +181,10 @@ EXPORT_SYMBOL_GPL(cgrp_dfl_root); static bool cgrp_dfl_root_visible; /* Controllers blocked by the commandline in v1 */ -static unsigned long cgroup_no_v1_mask; +static u16 cgroup_no_v1_mask; /* some controllers are not supported in the default hierarchy */ -static unsigned long cgrp_dfl_root_inhibit_ss_mask; +static u16 cgrp_dfl_root_inhibit_ss_mask; /* The list of hierarchy roots */ @@ -208,19 +208,18 @@ static u64 css_serial_nr_next = 1; * fork/exit handlers to call. This avoids us having to do extra work in the * fork/exit path to check which subsystems have fork/exit callbacks. */ -static unsigned long have_fork_callback __read_mostly; -static unsigned long have_exit_callback __read_mostly; -static unsigned long have_free_callback __read_mostly; +static u16 have_fork_callback __read_mostly; +static u16 have_exit_callback __read_mostly; +static u16 have_free_callback __read_mostly; /* Ditto for the can_fork callback. */ -static unsigned long have_canfork_callback __read_mostly; +static u16 have_canfork_callback __read_mostly; static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; -static int rebind_subsystems(struct cgroup_root *dst_root, - unsigned long ss_mask); +static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); static void css_task_iter_advance(struct css_task_iter *it); static int cgroup_destroy_locked(struct cgroup *cgrp); static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, @@ -1274,11 +1273,10 @@ static umode_t cgroup_file_mode(const struct cftype *cft) * @subtree_control is to be applied to @cgrp. The returned mask is always * a superset of @subtree_control and follows the usual hierarchy rules. */ -static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, - unsigned long subtree_control) +static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) { struct cgroup *parent = cgroup_parent(cgrp); - unsigned long cur_ss_mask = subtree_control; + u16 cur_ss_mask = subtree_control; struct cgroup_subsys *ss; int ssid; @@ -1288,7 +1286,7 @@ static unsigned long cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, return cur_ss_mask; while (true) { - unsigned long new_ss_mask = cur_ss_mask; + u16 new_ss_mask = cur_ss_mask; do_each_subsys_mask(ss, ssid, cur_ss_mask) { new_ss_mask |= ss->depends_on; @@ -1466,12 +1464,11 @@ static int css_populate_dir(struct cgroup_subsys_state *css, return ret; } -static int rebind_subsystems(struct cgroup_root *dst_root, - unsigned long ss_mask) +static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) { struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; - unsigned long tmp_ss_mask; + u16 tmp_ss_mask; int ssid, i, ret; lockdep_assert_held(&cgroup_mutex); @@ -1507,7 +1504,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, */ if (dst_root == &cgrp_dfl_root) { if (cgrp_dfl_root_visible) { - pr_warn("failed to create files (%d) while rebinding 0x%lx to default root\n", + pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n", ret, ss_mask); pr_warn("you may retry by moving them to a different hierarchy and unbinding\n"); } @@ -1599,7 +1596,7 @@ static int cgroup_show_options(struct seq_file *seq, } struct cgroup_sb_opts { - unsigned long subsys_mask; + u16 subsys_mask; unsigned int flags; char *release_agent; bool cpuset_clone_children; @@ -1612,13 +1609,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) { char *token, *o = data; bool all_ss = false, one_ss = false; - unsigned long mask = -1UL; + u16 mask = U16_MAX; struct cgroup_subsys *ss; int nr_opts = 0; int i; #ifdef CONFIG_CPUSETS - mask = ~(1U << cpuset_cgrp_id); + mask = ~((u16)1 << cpuset_cgrp_id); #endif memset(opts, 0, sizeof(*opts)); @@ -1745,7 +1742,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) int ret = 0; struct cgroup_root *root = cgroup_root_from_kf(kf_root); struct cgroup_sb_opts opts; - unsigned long added_mask, removed_mask; + u16 added_mask, removed_mask; if (root == &cgrp_dfl_root) { pr_err("remount is not allowed\n"); @@ -1893,7 +1890,7 @@ static void init_cgroup_root(struct cgroup_root *root, set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } -static int cgroup_setup_root(struct cgroup_root *root, unsigned long ss_mask) +static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; @@ -2839,7 +2836,7 @@ static int cgroup_sane_behavior_show(struct seq_file *seq, void *v) return 0; } -static void cgroup_print_ss_mask(struct seq_file *seq, unsigned long ss_mask) +static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask) { struct cgroup_subsys *ss; bool printed = false; @@ -2950,8 +2947,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { - unsigned long enable = 0, disable = 0; - unsigned long css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; + u16 enable = 0, disable = 0; + u16 css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; @@ -5255,7 +5252,7 @@ int __init cgroup_init_early(void) return 0; } -static unsigned long cgroup_disable_mask __initdata; +static u16 cgroup_disable_mask __initdata; /** * cgroup_init - cgroup initialization @@ -5269,6 +5266,7 @@ int __init cgroup_init(void) unsigned long key; int ssid; + BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); BUG_ON(percpu_init_rwsem(&cgroup_threadgroup_rwsem)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files)); @@ -5754,7 +5752,7 @@ static int __init cgroup_no_v1(char *str) continue; if (!strcmp(token, "all")) { - cgroup_no_v1_mask = ~0UL; + cgroup_no_v1_mask = U16_MAX; break; } From a7165264429b7b0d95557f306f310e77407fc2ee Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Feb 2016 10:00:50 -0500 Subject: [PATCH 10/44] cgroup: s/cgrp_dfl_root_/cgrp_dfl_/ These var names are unnecessarily unwiedly and another similar variable will be added. Let's shorten them. Signed-off-by: Tejun Heo --- kernel/cgroup.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7669f68077b8..afbed523b22f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -178,13 +178,13 @@ EXPORT_SYMBOL_GPL(cgrp_dfl_root); * The default hierarchy always exists but is hidden until mounted for the * first time. This is for backward compatibility. */ -static bool cgrp_dfl_root_visible; +static bool cgrp_dfl_visible; /* Controllers blocked by the commandline in v1 */ static u16 cgroup_no_v1_mask; /* some controllers are not supported in the default hierarchy */ -static u16 cgrp_dfl_root_inhibit_ss_mask; +static u16 cgrp_dfl_inhibit_ss_mask; /* The list of hierarchy roots */ @@ -1486,7 +1486,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) /* skip creating root files on dfl_root for inhibited subsystems */ tmp_ss_mask = ss_mask; if (dst_root == &cgrp_dfl_root) - tmp_ss_mask &= ~cgrp_dfl_root_inhibit_ss_mask; + tmp_ss_mask &= ~cgrp_dfl_inhibit_ss_mask; do_each_subsys_mask(ss, ssid, tmp_ss_mask) { struct cgroup *scgrp = &ss->root->cgrp; @@ -1503,7 +1503,7 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) * Just warn about it and continue. */ if (dst_root == &cgrp_dfl_root) { - if (cgrp_dfl_root_visible) { + if (cgrp_dfl_visible) { pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n", ret, ss_mask); pr_warn("you may retry by moving them to a different hierarchy and unbinding\n"); @@ -2006,7 +2006,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, pr_err("cgroup2: unknown option \"%s\"\n", (char *)data); return ERR_PTR(-EINVAL); } - cgrp_dfl_root_visible = true; + cgrp_dfl_visible = true; root = &cgrp_dfl_root; cgroup_get(&root->cgrp); goto out_mount; @@ -2858,7 +2858,7 @@ static int cgroup_root_controllers_show(struct seq_file *seq, void *v) struct cgroup *cgrp = seq_css(seq)->cgroup; cgroup_print_ss_mask(seq, cgrp->root->subsys_mask & - ~cgrp_dfl_root_inhibit_ss_mask); + ~cgrp_dfl_inhibit_ss_mask); return 0; } @@ -2962,7 +2962,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, while ((tok = strsep(&buf, " "))) { if (tok[0] == '\0') continue; - do_each_subsys_mask(ss, ssid, ~cgrp_dfl_root_inhibit_ss_mask) { + do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) { if (!cgroup_ssid_enabled(ssid) || strcmp(tok + 1, ss->name)) continue; @@ -5315,7 +5315,7 @@ int __init cgroup_init(void) cgrp_dfl_root.subsys_mask |= 1 << ss->id; if (!ss->dfl_cftypes) - cgrp_dfl_root_inhibit_ss_mask |= 1 << ss->id; + cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; if (ss->dfl_cftypes == ss->legacy_cftypes) { WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); @@ -5386,7 +5386,7 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct cgroup *cgrp; int ssid, count = 0; - if (root == &cgrp_dfl_root && !cgrp_dfl_root_visible) + if (root == &cgrp_dfl_root && !cgrp_dfl_visible) continue; seq_printf(m, "%d:", root->hierarchy_id); From b38e42e962dbc2fbc3839ce70750881db7c9277e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Feb 2016 10:00:50 -0500 Subject: [PATCH 11/44] cgroup: convert cgroup_subsys flag fields to bool bitfields Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Ingo Molnar Cc: Peter Zijlstra --- include/linux/cgroup-defs.h | 6 +++--- kernel/cpuset.c | 2 +- kernel/sched/core.c | 2 +- kernel/sched/cpuacct.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 0abf6aa86c81..8fc3f0452289 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -444,7 +444,7 @@ struct cgroup_subsys { void (*free)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); - int early_init; + bool early_init:1; /* * If %false, this subsystem is properly hierarchical - @@ -458,8 +458,8 @@ struct cgroup_subsys { * cases. Eventually, all subsystems will be made properly * hierarchical and this will go away. */ - bool broken_hierarchy; - bool warned_broken_hierarchy; + bool broken_hierarchy:1; + bool warned_broken_hierarchy:1; /* the following two fields are initialized automtically during boot */ int id; diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 41989ab4db57..90899837ea78 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2089,7 +2089,7 @@ struct cgroup_subsys cpuset_cgrp_subsys = { .attach = cpuset_attach, .bind = cpuset_bind, .legacy_cftypes = files, - .early_init = 1, + .early_init = true, }; /** diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 44253adb3c36..0f5abc6e4ff3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8706,7 +8706,7 @@ struct cgroup_subsys cpu_cgrp_subsys = { .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, .legacy_cftypes = cpu_files, - .early_init = 1, + .early_init = true, }; #endif /* CONFIG_CGROUP_SCHED */ diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index dd7cbb55bbf2..2ddaebf7469a 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -279,5 +279,5 @@ struct cgroup_subsys cpuacct_cgrp_subsys = { .css_alloc = cpuacct_css_alloc, .css_free = cpuacct_css_free, .legacy_cftypes = files, - .early_init = 1, + .early_init = true, }; From f17fc25f2b4f4bd8edafe36af6d7379eb9db27a0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Feb 2016 10:00:51 -0500 Subject: [PATCH 12/44] cgroup: make css_tryget_online_from_dir() also recognize cgroup2 fs The function currently returns -EBADF for a directory on the default hierarchy. Make it also recognize cgroup2_fs_type. This will be used for perf_event cgroup2 support. Signed-off-by: Tejun Heo --- kernel/cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index afbed523b22f..2b114368666c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5781,12 +5781,13 @@ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss) { struct kernfs_node *kn = kernfs_node_from_dentry(dentry); + struct file_system_type *s_type = dentry->d_sb->s_type; struct cgroup_subsys_state *css = NULL; struct cgroup *cgrp; /* is @dentry a cgroup dir? */ - if (dentry->d_sb->s_type != &cgroup_fs_type || !kn || - kernfs_type(kn) != KERNFS_DIR) + if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) || + !kn || kernfs_type(kn) != KERNFS_DIR) return ERR_PTR(-EBADF); rcu_read_lock(); From 62716ea0f2ea4253984008fd4a96a532674ac58f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Feb 2016 10:00:51 -0500 Subject: [PATCH 13/44] cgroup: use ->subtree_control when testing no internal process rule No internal process rule is enforced by cgroup_migrate_prepare_dst() during process migration. It tests whether the target cgroup's ->child_subsys_mask is zero which is different from "subtree_control" write path which tests ->subtree_control. This hasn't mattered because up until now, both ->child_subsys_mask and ->subtree_control are zero or non-zero at the same time. However, with the planned addition of implicit controllers, this will no longer be true. This patch prepares for the change by making cgorup_migrate_prepare_dst() test ->subtree_control instead. Signed-off-by: Tejun Heo --- kernel/cgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2b114368666c..ac5451e7c458 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2527,11 +2527,11 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, lockdep_assert_held(&cgroup_mutex); /* - * Except for the root, subtree_ss_mask must be zero for a cgroup + * Except for the root, subtree_control must be zero for a cgroup * with tasks so that child cgroups don't compete against tasks. */ if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) && - dst_cgrp->subtree_ss_mask) + dst_cgrp->subtree_control) return -EBUSY; /* look up the dst cset for each src cset and link it to src */ From 63253ad814db726d43c04011c752d83b7aaca998 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 26 Feb 2016 13:07:38 +0800 Subject: [PATCH 14/44] cgroup: fix a mistake in warning message There is a mistake about the print format name:id <--> %d:%s, which the name is 'char *' type and id is 'int' type. Change "name:id" to "id:name" instead to be consistent with "cgroup_subsys %d:%s". Signed-off-by: Xiubo Li Acked-by: Zefan Li Signed-off-by: Tejun Heo --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ac5451e7c458..fcfad82149b1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5235,7 +5235,7 @@ int __init cgroup_init_early(void) for_each_subsys(ss, i) { WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id, - "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p name:id=%d:%s\n", + "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n", i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free, ss->id, ss->name); WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN, From fa06235b8eb0ae87a962e023243dba1eb4e7160d Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Tue, 1 Mar 2016 19:56:30 +0300 Subject: [PATCH 15/44] cgroup: reset css on destruction An associated css can be around for quite a while after a cgroup directory has been removed. In general, it makes sense to reset it to defaults so as not to worry about any remnants. For instance, memory cgroup needs to reset memory.low, otherwise pages charged to a dead cgroup might never get reclaimed. There's ->css_reset callback, which would fit perfectly for the purpose. Currently, it's only called when a subsystem is disabled in the unified hierarchy and there are other subsystems dependant on it. Let's call it on css destruction as well. Suggested-by: Johannes Weiner Signed-off-by: Vladimir Davydov Signed-off-by: Tejun Heo --- kernel/cgroup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fcfad82149b1..46529502e9d5 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4787,6 +4787,9 @@ static void offline_css(struct cgroup_subsys_state *css) if (!(css->flags & CSS_ONLINE)) return; + if (ss->css_reset) + ss->css_reset(css); + if (ss->css_offline) ss->css_offline(css); From 2378d8b8ba3d2adffb4f98c0c60ee2f448b3be69 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:57 -0500 Subject: [PATCH 16/44] cgroup: re-hash init_css_set after subsystems are initialized css_sets are hashed by their subsys[] contents and in cgroup_init() init_css_set is hashed early, before subsystem inits, when all entries in its subsys[] are NULL, so that cgroup_dfl_root initialization can find and link to it. As subsystems are initialized, init_css_set.subsys[] is filled up but the hashing is never updated making init_css_set hashed in the wrong place. While incorrect, this doesn't cause a critical failure as css_set management code would create an identical css_set dynamically. Fix it by rehashing init_css_set after subsystems are initialized. While at it, drop unnecessary @key local variable. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 46529502e9d5..e97772b42dfb 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -5266,7 +5266,6 @@ static u16 cgroup_disable_mask __initdata; int __init cgroup_init(void) { struct cgroup_subsys *ss; - unsigned long key; int ssid; BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); @@ -5276,9 +5275,12 @@ int __init cgroup_init(void) mutex_lock(&cgroup_mutex); - /* Add init_css_set to the hash table */ - key = css_set_hash(init_css_set.subsys); - hash_add(css_set_table, &init_css_set.hlist, key); + /* + * Add init_css_set to the hash table so that dfl_root can link to + * it during init. + */ + hash_add(css_set_table, &init_css_set.hlist, + css_set_hash(init_css_set.subsys)); BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); @@ -5331,6 +5333,11 @@ int __init cgroup_init(void) ss->bind(init_css_set.subsys[ssid]); } + /* init_css_set.subsys[] has been updated, re-hash */ + hash_del(&init_css_set.hlist); + hash_add(css_set_table, &init_css_set.hlist, + css_set_hash(init_css_set.subsys)); + WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup")); WARN_ON(register_filesystem(&cgroup_fs_type)); WARN_ON(register_filesystem(&cgroup2_fs_type)); From 20b454a61fba59be13de52b4493898583ea26d20 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:57 -0500 Subject: [PATCH 17/44] cgroup: suppress spurious de-populated events During task migration, tasks may transfer between two css_sets which are associated with the same cgroup. If those tasks are the only tasks in the cgroup, this currently triggers a spurious de-populated event on the cgroup. Fix it by bumping up populated count before bumping it down during migration to ensure that it doesn't reach zero spuriously. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e97772b42dfb..5d452e7fcb4f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -678,6 +678,9 @@ static void css_set_move_task(struct task_struct *task, { lockdep_assert_held(&css_set_lock); + if (to_cset && !css_set_populated(to_cset)) + css_set_update_populated(to_cset, true); + if (from_cset) { struct css_task_iter *it, *pos; @@ -711,8 +714,6 @@ static void css_set_move_task(struct task_struct *task, */ WARN_ON_ONCE(task->flags & PF_EXITING); - if (!css_set_populated(to_cset)) - css_set_update_populated(to_cset, true); rcu_assign_pointer(task->cgroups, to_cset); list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks : &to_cset->tasks); From 6cd0f5bbaf594f40a97d01dbc565dda41f30d37c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:58 -0500 Subject: [PATCH 18/44] cgroup: separate out interface file creation from css creation Currently, interface files are created when a css is created depending on whether @visible is set. This patch separates out the two into separate steps to help code refactoring and eventually allow cgroups which aren't visible through cgroup fs. Move css_populate_dir() out of create_css() and drop @visible. While at it, rename the function to css_create() for consistency. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 72 +++++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5d452e7fcb4f..4178e45becb4 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -222,8 +222,8 @@ static struct cftype cgroup_legacy_base_files[]; static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); static void css_task_iter_advance(struct css_task_iter *it); static int cgroup_destroy_locked(struct cgroup *cgrp); -static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, - bool visible); +static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, + struct cgroup_subsys *ss); static void css_release(struct percpu_ref *ref); static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup_subsys_state *css, @@ -3082,14 +3082,26 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, */ do_each_subsys_mask(ss, ssid, enable) { cgroup_for_each_live_child(child, cgrp) { - if (css_enable & (1 << ssid)) - ret = create_css(child, ss, - cgrp->subtree_control & (1 << ssid)); - else + if (css_enable & (1 << ssid)) { + struct cgroup_subsys_state *css; + + css = css_create(child, ss); + if (IS_ERR(css)) { + ret = PTR_ERR(css); + goto err_undo_css; + } + + if (cgrp->subtree_control & (1 << ssid)) { + ret = css_populate_dir(css, NULL); + if (ret) + goto err_undo_css; + } + } else { ret = css_populate_dir(cgroup_css(child, ss), NULL); - if (ret) - goto err_undo_css; + if (ret) + goto err_undo_css; + } } } while_each_subsys_mask(); @@ -4717,7 +4729,9 @@ static void css_release_work_fn(struct work_struct *work) * Those are supported by RCU protecting clearing of * cgrp->kn->priv backpointer. */ - RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); + if (cgrp->kn) + RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, + NULL); } mutex_unlock(&cgroup_mutex); @@ -4801,17 +4815,16 @@ static void offline_css(struct cgroup_subsys_state *css) } /** - * create_css - create a cgroup_subsys_state + * css_create - create a cgroup_subsys_state * @cgrp: the cgroup new css will be associated with * @ss: the subsys of new css - * @visible: whether to create control knobs for the new css or not * * Create a new css associated with @cgrp - @ss pair. On success, the new - * css is online and installed in @cgrp with all interface files created if - * @visible. Returns 0 on success, -errno on failure. + * css is online and installed in @cgrp. This function doesn't create the + * interface files. Returns 0 on success, -errno on failure. */ -static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, - bool visible) +static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, + struct cgroup_subsys *ss) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss); @@ -4822,7 +4835,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, css = ss->css_alloc(parent_css); if (IS_ERR(css)) - return PTR_ERR(css); + return css; init_and_link_css(css, ss, cgrp); @@ -4835,12 +4848,6 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, goto err_free_percpu_ref; css->id = err; - if (visible) { - err = css_populate_dir(css, NULL); - if (err) - goto err_free_id; - } - /* @css is ready to be brought online now, make it visible */ list_add_tail_rcu(&css->sibling, &parent_css->children); cgroup_idr_replace(&ss->css_idr, css, css->id); @@ -4858,18 +4865,16 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss, ss->warned_broken_hierarchy = true; } - return 0; + return css; err_list_del: list_del_rcu(&css->sibling); - css_clear_dir(css, NULL); -err_free_id: cgroup_idr_remove(&ss->css_idr, css->id); err_free_percpu_ref: percpu_ref_exit(&css->refcnt); err_free_css: call_rcu(&css->rcu_head, css_free_rcu_fn); - return err; + return ERR_PTR(err); } static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, @@ -4966,10 +4971,19 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, /* let's create and online css's */ do_each_subsys_mask(ss, ssid, parent->subtree_ss_mask) { - ret = create_css(cgrp, ss, - parent->subtree_control & (1 << ssid)); - if (ret) + struct cgroup_subsys_state *css; + + css = css_create(cgrp, ss); + if (IS_ERR(css)) { + ret = PTR_ERR(css); goto out_destroy; + } + + if (parent->subtree_control & (1 << ssid)) { + ret = css_populate_dir(css, NULL); + if (ret) + goto out_destroy; + } } while_each_subsys_mask(); /* From 88cb04b96a1934ecbfd1d324e7cde55890c1a576 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:58 -0500 Subject: [PATCH 19/44] cgroup: explicitly track whether a cgroup_subsys_state is visible to userland Currently, whether a css (cgroup_subsys_state) has its interface files created is not tracked and assumed to change together with the owning cgroup's lifecycle. cgroup directory and interface creation is being separated out from internal object creation to help refactoring and eventually allow cgroups which are not visible through cgroupfs. This patch adds CSS_VISIBLE to track whether a css has its interface files created and perform management operations only when necessary which helps decoupling interface file handling from internal object lifecycle. After this patch, all css interface file management functions can be called regardless of the current state and will achieve the expected result. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 1 + kernel/cgroup.c | 13 ++++++++++++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8fc3f0452289..7593c1a46786 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -45,6 +45,7 @@ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ + CSS_VISIBLE = (1 << 3), /* css is visible to userland */ }; /* bits in struct cgroup flags field */ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4178e45becb4..a9a53ca942f3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1421,6 +1421,11 @@ static void css_clear_dir(struct cgroup_subsys_state *css, struct cgroup *cgrp = cgrp_override ?: css->cgroup; struct cftype *cfts; + if (!(css->flags & CSS_VISIBLE)) + return; + + css->flags &= ~CSS_VISIBLE; + list_for_each_entry(cfts, &css->ss->cfts, node) cgroup_addrm_files(css, cgrp, cfts, false); } @@ -1439,6 +1444,9 @@ static int css_populate_dir(struct cgroup_subsys_state *css, struct cftype *cfts, *failed_cfts; int ret; + if (css->flags & CSS_VISIBLE) + return 0; + if (!css->ss) { if (cgroup_on_dfl(cgrp)) cfts = cgroup_dfl_base_files; @@ -1455,6 +1463,9 @@ static int css_populate_dir(struct cgroup_subsys_state *css, goto err; } } + + css->flags |= CSS_VISIBLE; + return 0; err: list_for_each_entry(cfts, &css->ss->cfts, node) { @@ -3403,7 +3414,7 @@ static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add) css_for_each_descendant_pre(css, cgroup_css(root, ss)) { struct cgroup *cgrp = css->cgroup; - if (cgroup_is_dead(cgrp)) + if (!(css->flags & CSS_VISIBLE)) continue; ret = cgroup_addrm_files(css, cgrp, cfts, is_add); From 195e9b6c4b09434dad6ec3c163fdf037e16b3c96 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:58 -0500 Subject: [PATCH 20/44] cgroup: reorder operations in cgroup_mkdir() Currently, operations to initialize internal objects and create interface directory and files are intermixed in cgroup_mkdir(). We're in the process of refactoring cgroup and css management paths to separate them out to eventually allow cgroups which aren't visible through cgroup fs. This patch reorders operations inside cgroup_mkdir() so that interface directory and file handling comes after internal object initialization. This will enable further refactoring. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 61 ++++++++++++++++++++++++------------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a9a53ca942f3..a6d484a667aa 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4945,20 +4945,6 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); - /* create the directory */ - kn = kernfs_create_dir(parent->kn, name, mode, cgrp); - if (IS_ERR(kn)) { - ret = PTR_ERR(kn); - goto out_free_id; - } - cgrp->kn = kn; - - /* - * This extra ref will be put in cgroup_free_fn() and guarantees - * that @cgrp->kn is always accessible. - */ - kernfs_get(kn); - cgrp->self.serial_nr = css_serial_nr_next++; /* allocation complete, commit to creation */ @@ -4972,15 +4958,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, */ cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id); - ret = cgroup_kn_set_ugid(kn); - if (ret) - goto out_destroy; - - ret = css_populate_dir(&cgrp->self, NULL); - if (ret) - goto out_destroy; - - /* let's create and online css's */ + /* create the csses */ do_each_subsys_mask(ss, ssid, parent->subtree_ss_mask) { struct cgroup_subsys_state *css; @@ -4989,12 +4967,6 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, ret = PTR_ERR(css); goto out_destroy; } - - if (parent->subtree_control & (1 << ssid)) { - ret = css_populate_dir(css, NULL); - if (ret) - goto out_destroy; - } } while_each_subsys_mask(); /* @@ -5006,13 +4978,40 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, cgroup_refresh_subtree_ss_mask(cgrp); } + /* create the directory */ + kn = kernfs_create_dir(parent->kn, name, mode, cgrp); + if (IS_ERR(kn)) { + ret = PTR_ERR(kn); + goto out_destroy; + } + cgrp->kn = kn; + + /* + * This extra ref will be put in cgroup_free_fn() and guarantees + * that @cgrp->kn is always accessible. + */ + kernfs_get(kn); + + ret = cgroup_kn_set_ugid(kn); + if (ret) + goto out_destroy; + + ret = css_populate_dir(&cgrp->self, NULL); + if (ret) + goto out_destroy; + + do_each_subsys_mask(ss, ssid, parent->subtree_control) { + ret = css_populate_dir(cgroup_css(cgrp, ss), NULL); + if (ret) + goto out_destroy; + } while_each_subsys_mask(); + + /* let's create and online css's */ kernfs_activate(kn); ret = 0; goto out_unlock; -out_free_id: - cgroup_idr_remove(&root->cgroup_idr, cgrp->id); out_cancel_ref: percpu_ref_exit(&cgrp->self.refcnt); out_free_cgrp: From a5bca2152036de826595723437c5cbe8f6c13983 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:58 -0500 Subject: [PATCH 21/44] cgroup: factor out cgroup_create() out of cgroup_mkdir() We're in the process of refactoring cgroup and css management paths to separate them out to eventually allow cgroups which aren't visible through cgroup fs. This patch factors out cgroup_create() out of cgroup_mkdir(). cgroup_create() contains all internal object creation and initialization. cgroup_mkdir() uses cgroup_create() to create the internal cgroup and adds interface directory and file creation. This patch doesn't cause any behavior differences. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 72 +++++++++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a6d484a667aa..e1b3d0fead05 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4888,33 +4888,19 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, return ERR_PTR(err); } -static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, - umode_t mode) +static struct cgroup *cgroup_create(struct cgroup *parent) { - struct cgroup *parent, *cgrp, *tcgrp; - struct cgroup_root *root; + struct cgroup_root *root = parent->root; struct cgroup_subsys *ss; - struct kernfs_node *kn; - int level, ssid, ret; - - /* Do not accept '\n' to prevent making /proc//cgroup unparsable. - */ - if (strchr(name, '\n')) - return -EINVAL; - - parent = cgroup_kn_lock_live(parent_kn); - if (!parent) - return -ENODEV; - root = parent->root; - level = parent->level + 1; + struct cgroup *cgrp, *tcgrp; + int level = parent->level + 1; + int ssid, ret; /* allocate the cgroup and its ID, 0 is reserved for the root */ cgrp = kzalloc(sizeof(*cgrp) + sizeof(cgrp->ancestor_ids[0]) * (level + 1), GFP_KERNEL); - if (!cgrp) { - ret = -ENOMEM; - goto out_unlock; - } + if (!cgrp) + return ERR_PTR(-ENOMEM); ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL); if (ret) @@ -4978,6 +4964,40 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, cgroup_refresh_subtree_ss_mask(cgrp); } + return cgrp; + +out_cancel_ref: + percpu_ref_exit(&cgrp->self.refcnt); +out_free_cgrp: + kfree(cgrp); + return ERR_PTR(ret); +out_destroy: + cgroup_destroy_locked(cgrp); + return ERR_PTR(ret); +} + +static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, + umode_t mode) +{ + struct cgroup *parent, *cgrp; + struct cgroup_subsys *ss; + struct kernfs_node *kn; + int ssid, ret; + + /* do not accept '\n' to prevent making /proc//cgroup unparsable */ + if (strchr(name, '\n')) + return -EINVAL; + + parent = cgroup_kn_lock_live(parent_kn); + if (!parent) + return -ENODEV; + + cgrp = cgroup_create(parent); + if (IS_ERR(cgrp)) { + ret = PTR_ERR(cgrp); + goto out_unlock; + } + /* create the directory */ kn = kernfs_create_dir(parent->kn, name, mode, cgrp); if (IS_ERR(kn)) { @@ -5012,17 +5032,11 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, ret = 0; goto out_unlock; -out_cancel_ref: - percpu_ref_exit(&cgrp->self.refcnt); -out_free_cgrp: - kfree(cgrp); +out_destroy: + cgroup_destroy_locked(cgrp); out_unlock: cgroup_kn_unlock(parent_kn); return ret; - -out_destroy: - cgroup_destroy_locked(cgrp); - goto out_unlock; } /* From 5531dc915ba10b78a80dcffa48d7360d3c0bab61 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:58 -0500 Subject: [PATCH 22/44] cgroup: introduce cgroup_control() and cgroup_ss_mask() When a controller is enabled and visible on a non-root cgroup is determined by subtree_control and subtree_ss_mask of the parent cgroup. For a root cgroup, by the type of the hierarchy and which controllers are attached to it. Deciding the above on each usage is fragile and unnecessarily complicates the users. This patch introduces cgroup_control() and cgroup_ss_mask() which calculate and return the [visibly] enabled subsyste mask for the specified cgroup and conver the existing usages. * cgroup_e_css() is restructured for simplicity. * cgroup_calc_subtree_ss_mask() and cgroup_subtree_control_write() no longer need to distinguish root and non-root cases. * With cgroup_control(), cgroup_controllers_show() can now handle both root and non-root cases. cgroup_root_controllers_show() is removed. v2: cgroup_control() updated to yield the correct result on v1 hierarchies too. cgroup_subtree_control_write() converted. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 72 +++++++++++++++++++++++++------------------------ 1 file changed, 37 insertions(+), 35 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e1b3d0fead05..2cb4b5419852 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -346,6 +346,32 @@ static struct cgroup *cgroup_parent(struct cgroup *cgrp) return NULL; } +/* subsystems visibly enabled on a cgroup */ +static u16 cgroup_control(struct cgroup *cgrp) +{ + struct cgroup *parent = cgroup_parent(cgrp); + u16 root_ss_mask = cgrp->root->subsys_mask; + + if (parent) + return parent->subtree_control; + + if (cgroup_on_dfl(cgrp)) + root_ss_mask &= ~cgrp_dfl_inhibit_ss_mask; + + return root_ss_mask; +} + +/* subsystems enabled on a cgroup */ +static u16 cgroup_ss_mask(struct cgroup *cgrp) +{ + struct cgroup *parent = cgroup_parent(cgrp); + + if (parent) + return parent->subtree_ss_mask; + + return cgrp->root->subsys_mask; +} + /** * cgroup_css - obtain a cgroup's css for the specified subsystem * @cgrp: the cgroup of interest @@ -385,16 +411,15 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, if (!ss) return &cgrp->self; - if (!(cgrp->root->subsys_mask & (1 << ss->id))) - return NULL; - /* * This function is used while updating css associations and thus - * can't test the csses directly. Use ->subtree_ss_mask. + * can't test the csses directly. Test ss_mask. */ - while (cgroup_parent(cgrp) && - !(cgroup_parent(cgrp)->subtree_ss_mask & (1 << ss->id))) + while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) { cgrp = cgroup_parent(cgrp); + if (!cgrp) + return NULL; + } return cgroup_css(cgrp, ss); } @@ -1276,7 +1301,6 @@ static umode_t cgroup_file_mode(const struct cftype *cft) */ static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) { - struct cgroup *parent = cgroup_parent(cgrp); u16 cur_ss_mask = subtree_control; struct cgroup_subsys *ss; int ssid; @@ -1298,10 +1322,7 @@ static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) * happen only if some depended-upon subsystems were bound * to non-default hierarchies. */ - if (parent) - new_ss_mask &= parent->subtree_ss_mask; - else - new_ss_mask &= cgrp->root->subsys_mask; + new_ss_mask &= cgroup_ss_mask(cgrp); if (new_ss_mask == cur_ss_mask) break; @@ -2864,22 +2885,12 @@ static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask) seq_putc(seq, '\n'); } -/* show controllers which are currently attached to the default hierarchy */ -static int cgroup_root_controllers_show(struct seq_file *seq, void *v) -{ - struct cgroup *cgrp = seq_css(seq)->cgroup; - - cgroup_print_ss_mask(seq, cgrp->root->subsys_mask & - ~cgrp_dfl_inhibit_ss_mask); - return 0; -} - /* show controllers which are enabled from the parent */ static int cgroup_controllers_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; - cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control); + cgroup_print_ss_mask(seq, cgroup_control(cgrp)); return 0; } @@ -3005,10 +3016,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, continue; } - /* unavailable or not enabled on the parent? */ - if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) || - (cgroup_parent(cgrp) && - !(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) { + if (!(cgroup_control(cgrp) & (1 << ssid))) { ret = -ENOENT; goto out_unlock; } @@ -4566,12 +4574,6 @@ static struct cftype cgroup_dfl_base_files[] = { }, { .name = "cgroup.controllers", - .flags = CFTYPE_ONLY_ON_ROOT, - .seq_show = cgroup_root_controllers_show, - }, - { - .name = "cgroup.controllers", - .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_controllers_show, }, { @@ -4945,7 +4947,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent) cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id); /* create the csses */ - do_each_subsys_mask(ss, ssid, parent->subtree_ss_mask) { + do_each_subsys_mask(ss, ssid, cgroup_ss_mask(cgrp)) { struct cgroup_subsys_state *css; css = css_create(cgrp, ss); @@ -4960,7 +4962,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent) * subtree_control from the parent. Each is configured manually. */ if (!cgroup_on_dfl(cgrp)) { - cgrp->subtree_control = parent->subtree_control; + cgrp->subtree_control = cgroup_control(cgrp); cgroup_refresh_subtree_ss_mask(cgrp); } @@ -5020,7 +5022,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (ret) goto out_destroy; - do_each_subsys_mask(ss, ssid, parent->subtree_control) { + do_each_subsys_mask(ss, ssid, cgroup_control(cgrp)) { ret = css_populate_dir(cgroup_css(cgrp, ss), NULL); if (ret) goto out_destroy; From 1b9b96a12b5433ccc477265111122720ccb4965e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:59 -0500 Subject: [PATCH 23/44] cgroup: factor out cgroup_drain_offline() from cgroup_subtree_control_write() Factor out async css offline draining into cgroup_drain_offline(). * Nest subsystem walk inside child walk. The child walk will later be converted to subtree walk which is a bit more expensive. * Relocate the draining above subsystem mask preparation, which doesn't create any behavior differences but helps further refactoring. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 77 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2cb4b5419852..d295e6a91cdc 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2965,6 +2965,53 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) return ret; } +/** + * cgroup_drain_offline - wait for previously offlined csses to go away + * @cgrp: parent of the target cgroups + * + * Because css offlining is asynchronous, userland may try to re-enable a + * controller while the previous css is still around. This function drains + * the previous css instances of @cgrp's children. + * + * Must be called with cgroup_mutex held. Returns %false if there were no + * dying css instances. Returns %true if there were one or more and this + * function waited. On %true return, cgroup_mutex has been dropped and + * re-acquired inbetween which anything could have happened. The caller + * typically would have to start over. + */ +static bool cgroup_drain_offline(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys *ss; + int ssid; + + lockdep_assert_held(&cgroup_mutex); + + cgroup_for_each_live_child(dsct, cgrp) { + for_each_subsys(ss, ssid) { + struct cgroup_subsys_state *css = cgroup_css(dsct, ss); + DEFINE_WAIT(wait); + + if (!css) + continue; + + cgroup_get(dsct); + prepare_to_wait(&dsct->offline_waitq, &wait, + TASK_UNINTERRUPTIBLE); + + mutex_unlock(&cgroup_mutex); + schedule(); + finish_wait(&dsct->offline_waitq, &wait); + mutex_lock(&cgroup_mutex); + + cgroup_put(dsct); + return true; + } + } + + return false; +} + /* change the enabled child controllers for a cgroup in the default hierarchy */ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, char *buf, size_t nbytes, @@ -3050,6 +3097,11 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } + if (cgroup_drain_offline(cgrp)) { + cgroup_kn_unlock(of->kn); + return restart_syscall(); + } + /* * Update subsys masks and calculate what needs to be done. More * subsystems than specified may need to be enabled or disabled @@ -3065,31 +3117,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, enable |= css_enable; disable |= css_disable; - /* - * Because css offlining is asynchronous, userland might try to - * re-enable the same controller while the previous instance is - * still around. In such cases, wait till it's gone using - * offline_waitq. - */ - do_each_subsys_mask(ss, ssid, css_enable) { - cgroup_for_each_live_child(child, cgrp) { - DEFINE_WAIT(wait); - - if (!cgroup_css(child, ss)) - continue; - - cgroup_get(child); - prepare_to_wait(&child->offline_waitq, &wait, - TASK_UNINTERRUPTIBLE); - cgroup_kn_unlock(of->kn); - schedule(); - finish_wait(&child->offline_waitq, &wait); - cgroup_put(child); - - return restart_syscall(); - } - } while_each_subsys_mask(); - cgrp->subtree_control = new_sc; cgrp->subtree_ss_mask = new_ss; From 12b3bb6af862477f96e1adac51b201a143a8f3c4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:59 -0500 Subject: [PATCH 24/44] cgroup: factor out cgroup_apply_control_disable() from cgroup_subtree_control_write() Factor out css disabling and hiding into cgroup_apply_control_disable(). * Nest subsystem walk inside child walk. The child walk will later be converted to subtree walk which is a bit more expensive. * Instead of operating on the differential masks @css_enable and @css_disable, simply disable or hide csses according to the current cgroup_control() and cgroup_ss_mask(). This leads to the same result and is simpler and more robust. * This allows error handling path to share the same code. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 74 +++++++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 33 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d295e6a91cdc..97cb1315bcac 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3012,6 +3012,43 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) return false; } +/** + * cgroup_apply_control_disable - kill or hide csses according to control + * @cgrp: parent of the target cgroups + * + * Walk @cgrp's children and kill and hide csses so that they match + * cgroup_ss_mask() and cgroup_visible_mask(). + * + * A css is hidden when the userland requests it to be disabled while other + * subsystems are still depending on it. The css must not actively control + * resources and be in the vanilla state if it's made visible again later. + * Controllers which may be depended upon should provide ->css_reset() for + * this purpose. + */ +static void cgroup_apply_control_disable(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys *ss; + int ssid; + + cgroup_for_each_live_child(dsct, cgrp) { + for_each_subsys(ss, ssid) { + struct cgroup_subsys_state *css = cgroup_css(dsct, ss); + + if (!css) + continue; + + if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) { + kill_css(css); + } else if (!(cgroup_control(dsct) & (1 << ss->id))) { + css_clear_dir(css, NULL); + if (ss->css_reset) + ss->css_reset(css); + } + } + } +} + /* change the enabled child controllers for a cgroup in the default hierarchy */ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, char *buf, size_t nbytes, @@ -3160,27 +3197,8 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, if (ret) goto err_undo_css; - /* - * All tasks are migrated out of disabled csses. Kill or hide - * them. A css is hidden when the userland requests it to be - * disabled while other subsystems are still depending on it. The - * css must not actively control resources and be in the vanilla - * state if it's made visible again later. Controllers which may - * be depended upon should provide ->css_reset() for this purpose. - */ - do_each_subsys_mask(ss, ssid, disable) { - cgroup_for_each_live_child(child, cgrp) { - struct cgroup_subsys_state *css = cgroup_css(child, ss); - - if (css_disable & (1 << ssid)) { - kill_css(css); - } else { - css_clear_dir(css, NULL); - if (ss->css_reset) - ss->css_reset(css); - } - } - } while_each_subsys_mask(); + /* all tasks are migrated out of disabled csses, commit disable */ + cgroup_apply_control_disable(cgrp); kernfs_activate(cgrp->kn); ret = 0; @@ -3189,22 +3207,12 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return ret ?: nbytes; err_undo_css: + /* restore masks and shoot down new csses */ cgrp->subtree_control = old_sc; cgrp->subtree_ss_mask = old_ss; - do_each_subsys_mask(ss, ssid, enable) { - cgroup_for_each_live_child(child, cgrp) { - struct cgroup_subsys_state *css = cgroup_css(child, ss); + cgroup_apply_control_disable(cgrp); - if (!css) - continue; - - if (css_enable & (1 << ssid)) - kill_css(css); - else - css_clear_dir(css, NULL); - } - } while_each_subsys_mask(); goto out_unlock; } From bdb53bd797dcef46d1a252b9529f8fd511bf714c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:59 -0500 Subject: [PATCH 25/44] cgroup: factor out cgroup_apply_control_enable() from cgroup_subtree_control_write() Factor out css enabling and showing into cgroup_apply_control_enable(). * Nest subsystem walk inside child walk. The child walk will later be converted to subtree walk which is a bit more expensive. * Instead of operating on the differential masks @css_enable, simply enable or show csses according to the current cgroup_control() and cgroup_ss_mask(). This leads to the same result and is simpler and more robust. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 77 ++++++++++++++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 97cb1315bcac..1193038d0729 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3012,6 +3012,49 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) return false; } +/** + * cgroup_apply_control_enable - enable or show csses according to control + * @cgrp: parent of the target cgroups + * + * Walk @cgrp's children and create new csses or make the existing ones + * visible. A css is created invisible if it's being implicitly enabled + * through dependency. An invisible css is made visible when the userland + * explicitly enables it. + * + * Returns 0 on success, -errno on failure. On failure, csses which have + * been processed already aren't cleaned up. The caller is responsible for + * cleaning up with cgroup_apply_control_disble(). + */ +static int cgroup_apply_control_enable(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys *ss; + int ssid, ret; + + cgroup_for_each_live_child(dsct, cgrp) { + for_each_subsys(ss, ssid) { + struct cgroup_subsys_state *css = cgroup_css(dsct, ss); + + if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) + continue; + + if (!css) { + css = css_create(dsct, ss); + if (IS_ERR(css)) + return PTR_ERR(css); + } + + if (cgroup_control(dsct) & (1 << ss->id)) { + ret = css_populate_dir(css, NULL); + if (ret) + return ret; + } + } + } + + return 0; +} + /** * cgroup_apply_control_disable - kill or hide csses according to control * @cgrp: parent of the target cgroups @@ -3157,36 +3200,10 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, cgrp->subtree_control = new_sc; cgrp->subtree_ss_mask = new_ss; - /* - * Create new csses or make the existing ones visible. A css is - * created invisible if it's being implicitly enabled through - * dependency. An invisible css is made visible when the userland - * explicitly enables it. - */ - do_each_subsys_mask(ss, ssid, enable) { - cgroup_for_each_live_child(child, cgrp) { - if (css_enable & (1 << ssid)) { - struct cgroup_subsys_state *css; - - css = css_create(child, ss); - if (IS_ERR(css)) { - ret = PTR_ERR(css); - goto err_undo_css; - } - - if (cgrp->subtree_control & (1 << ssid)) { - ret = css_populate_dir(css, NULL); - if (ret) - goto err_undo_css; - } - } else { - ret = css_populate_dir(cgroup_css(child, ss), - NULL); - if (ret) - goto err_undo_css; - } - } - } while_each_subsys_mask(); + /* prepare csses */ + ret = cgroup_apply_control_enable(cgrp); + if (ret) + goto err_undo_css; /* * At this point, cgroup_e_css() results reflect the new csses From ce3f1d9d19371045981a64815227bab822554878 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:59 -0500 Subject: [PATCH 26/44] cgroup: make cgroup_drain_offline() and cgroup_apply_control_{disable|enable}() recursive The three factored out css management operations - cgroup_drain_offline() and cgroup_apply_control_{disable|enable}() - only depend on the current state of the target cgroups and idempotent and thus can be easily made to operate on the subtree instead of the immediate children. This patch introduces the iterators which walk live subtree and converts the three functions to operate on the subtree including self instead of the children. While this leads to spurious walking and be slightly more expensive, it will allow them to be used for wider scope of operations. Note that cgroup_drain_offline() now tests for whether a css is dying before trying to drain it. This is to avoid trying to drain live csses as there can be mix of live and dying csses in a subtree unlike children of the same parent. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 1193038d0729..0398f2a6673b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -573,6 +573,24 @@ static int notify_on_release(const struct cgroup *cgrp) ; \ else +/* walk live descendants in preorder */ +#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ + css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + (dsct) = (d_css)->cgroup; \ + cgroup_is_dead(dsct); })) \ + ; \ + else + +/* walk live descendants in postorder */ +#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \ + css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + (dsct) = (d_css)->cgroup; \ + cgroup_is_dead(dsct); })) \ + ; \ + else + static void cgroup_release_agent(struct work_struct *work); static void check_for_release(struct cgroup *cgrp); @@ -2967,11 +2985,11 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) /** * cgroup_drain_offline - wait for previously offlined csses to go away - * @cgrp: parent of the target cgroups + * @cgrp: root of the target subtree * * Because css offlining is asynchronous, userland may try to re-enable a * controller while the previous css is still around. This function drains - * the previous css instances of @cgrp's children. + * the previous css instances of @cgrp's subtree. * * Must be called with cgroup_mutex held. Returns %false if there were no * dying css instances. Returns %true if there were one or more and this @@ -2982,17 +3000,18 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) static bool cgroup_drain_offline(struct cgroup *cgrp) { struct cgroup *dsct; + struct cgroup_subsys_state *d_css; struct cgroup_subsys *ss; int ssid; lockdep_assert_held(&cgroup_mutex); - cgroup_for_each_live_child(dsct, cgrp) { + cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); DEFINE_WAIT(wait); - if (!css) + if (!css || !percpu_ref_is_dying(&css->refcnt)) continue; cgroup_get(dsct); @@ -3014,9 +3033,9 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) /** * cgroup_apply_control_enable - enable or show csses according to control - * @cgrp: parent of the target cgroups + * @cgrp: root of the target subtree * - * Walk @cgrp's children and create new csses or make the existing ones + * Walk @cgrp's subtree and create new csses or make the existing ones * visible. A css is created invisible if it's being implicitly enabled * through dependency. An invisible css is made visible when the userland * explicitly enables it. @@ -3028,10 +3047,11 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) static int cgroup_apply_control_enable(struct cgroup *cgrp) { struct cgroup *dsct; + struct cgroup_subsys_state *d_css; struct cgroup_subsys *ss; int ssid, ret; - cgroup_for_each_live_child(dsct, cgrp) { + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); @@ -3057,9 +3077,9 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) /** * cgroup_apply_control_disable - kill or hide csses according to control - * @cgrp: parent of the target cgroups + * @cgrp: root of the target subtree * - * Walk @cgrp's children and kill and hide csses so that they match + * Walk @cgrp's subtree and kill and hide csses so that they match * cgroup_ss_mask() and cgroup_visible_mask(). * * A css is hidden when the userland requests it to be disabled while other @@ -3071,10 +3091,11 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) static void cgroup_apply_control_disable(struct cgroup *cgrp) { struct cgroup *dsct; + struct cgroup_subsys_state *d_css; struct cgroup_subsys *ss; int ssid; - cgroup_for_each_live_child(dsct, cgrp) { + cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); From 15a27c362d54378f17ec078579b2f6af88495a3f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:57:59 -0500 Subject: [PATCH 27/44] cgroup: introduce cgroup_{save|propagate|restore}_control() While controllers are being enabled and disabled in cgroup_subtree_control_write(), the original subsystem masks are stashed in local variables so that they can be restored if the operation fails in the middle. This patch adds dedicated fields to struct cgroup to be used instead of the local variables and implements functions to stash the current values, propagate the changes and restore them recursively. Combined with the previous changes, this makes subsystem management operations fully recursive and modularlized. This will be used to expand cgroup core functionalities. While at it, remove now unused @css_enable and @css_disable from cgroup_subtree_control_write(). Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup-defs.h | 2 + kernel/cgroup.c | 82 ++++++++++++++++++++++++++++--------- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 7593c1a46786..aae8c94de4b3 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -260,6 +260,8 @@ struct cgroup { */ u16 subtree_control; u16 subtree_ss_mask; + u16 old_subtree_control; + u16 old_subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0398f2a6673b..452a90e455fa 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3031,6 +3031,62 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) return false; } +/** + * cgroup_save_control - save control masks of a subtree + * @cgrp: root of the target subtree + * + * Save ->subtree_control and ->subtree_ss_mask to the respective old_ + * prefixed fields for @cgrp's subtree including @cgrp itself. + */ +static void cgroup_save_control(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; + + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { + dsct->old_subtree_control = dsct->subtree_control; + dsct->old_subtree_ss_mask = dsct->subtree_ss_mask; + } +} + +/** + * cgroup_propagate_control - refresh control masks of a subtree + * @cgrp: root of the target subtree + * + * For @cgrp and its subtree, ensure ->subtree_ss_mask matches + * ->subtree_control and propagate controller availability through the + * subtree so that descendants don't have unavailable controllers enabled. + */ +static void cgroup_propagate_control(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; + + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { + dsct->subtree_control &= cgroup_control(dsct); + dsct->subtree_ss_mask = cgroup_calc_subtree_ss_mask(dsct, + dsct->subtree_control); + } +} + +/** + * cgroup_restore_control - restore control masks of a subtree + * @cgrp: root of the target subtree + * + * Restore ->subtree_control and ->subtree_ss_mask from the respective old_ + * prefixed fields for @cgrp's subtree including @cgrp itself. + */ +static void cgroup_restore_control(struct cgroup *cgrp) +{ + struct cgroup *dsct; + struct cgroup_subsys_state *d_css; + + cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { + dsct->subtree_control = dsct->old_subtree_control; + dsct->subtree_ss_mask = dsct->old_subtree_ss_mask; + } +} + /** * cgroup_apply_control_enable - enable or show csses according to control * @cgrp: root of the target subtree @@ -3119,7 +3175,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, loff_t off) { u16 enable = 0, disable = 0; - u16 css_enable, css_disable, old_sc, new_sc, old_ss, new_ss; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; @@ -3203,25 +3258,14 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return restart_syscall(); } - /* - * Update subsys masks and calculate what needs to be done. More - * subsystems than specified may need to be enabled or disabled - * depending on subsystem dependencies. - */ - old_sc = cgrp->subtree_control; - old_ss = cgrp->subtree_ss_mask; - new_sc = (old_sc | enable) & ~disable; - new_ss = cgroup_calc_subtree_ss_mask(cgrp, new_sc); + /* save and update control masks and prepare csses */ + cgroup_save_control(cgrp); - css_enable = ~old_ss & new_ss; - css_disable = old_ss & ~new_ss; - enable |= css_enable; - disable |= css_disable; + cgrp->subtree_control |= enable; + cgrp->subtree_control &= ~disable; - cgrp->subtree_control = new_sc; - cgrp->subtree_ss_mask = new_ss; + cgroup_propagate_control(cgrp); - /* prepare csses */ ret = cgroup_apply_control_enable(cgrp); if (ret) goto err_undo_css; @@ -3246,9 +3290,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, err_undo_css: /* restore masks and shoot down new csses */ - cgrp->subtree_control = old_sc; - cgrp->subtree_ss_mask = old_ss; - + cgroup_restore_control(cgrp); cgroup_apply_control_disable(cgrp); goto out_unlock; From f7b2814bb9b6cb1d69333e1592c702260fcb4184 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:58:00 -0500 Subject: [PATCH 28/44] cgroup: factor out cgroup_{apply|finalize}_control() from cgroup_subtree_control_write() Factor out cgroup_{apply|finalize}_control() so that control mask update can be done in several simple steps. This patch doesn't introduce behavior changes. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 81 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 452a90e455fa..2adf0433a3cf 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3169,6 +3169,62 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) } } +/** + * cgroup_apply_control - apply control mask updates to the subtree + * @cgrp: root of the target subtree + * + * subsystems can be enabled and disabled in a subtree using the following + * steps. + * + * 1. Call cgroup_save_control() to stash the current state. + * 2. Update ->subtree_control masks in the subtree as desired. + * 3. Call cgroup_apply_control() to apply the changes. + * 4. Optionally perform other related operations. + * 5. Call cgroup_finalize_control() to finish up. + * + * This function implements step 3 and propagates the mask changes + * throughout @cgrp's subtree, updates csses accordingly and perform + * process migrations. + */ +static int cgroup_apply_control(struct cgroup *cgrp) +{ + int ret; + + cgroup_propagate_control(cgrp); + + ret = cgroup_apply_control_enable(cgrp); + if (ret) + return ret; + + /* + * At this point, cgroup_e_css() results reflect the new csses + * making the following cgroup_update_dfl_csses() properly update + * css associations of all tasks in the subtree. + */ + ret = cgroup_update_dfl_csses(cgrp); + if (ret) + return ret; + + return 0; +} + +/** + * cgroup_finalize_control - finalize control mask update + * @cgrp: root of the target subtree + * @ret: the result of the update + * + * Finalize control mask update. See cgroup_apply_control() for more info. + */ +static void cgroup_finalize_control(struct cgroup *cgrp, int ret) +{ + if (ret) { + cgroup_restore_control(cgrp); + cgroup_propagate_control(cgrp); + } + + cgroup_apply_control_disable(cgrp); +} + /* change the enabled child controllers for a cgroup in the default hierarchy */ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, char *buf, size_t nbytes, @@ -3264,36 +3320,15 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, cgrp->subtree_control |= enable; cgrp->subtree_control &= ~disable; - cgroup_propagate_control(cgrp); + ret = cgroup_apply_control(cgrp); - ret = cgroup_apply_control_enable(cgrp); - if (ret) - goto err_undo_css; - - /* - * At this point, cgroup_e_css() results reflect the new csses - * making the following cgroup_update_dfl_csses() properly update - * css associations of all tasks in the subtree. - */ - ret = cgroup_update_dfl_csses(cgrp); - if (ret) - goto err_undo_css; - - /* all tasks are migrated out of disabled csses, commit disable */ - cgroup_apply_control_disable(cgrp); + cgroup_finalize_control(cgrp, ret); kernfs_activate(cgrp->kn); ret = 0; out_unlock: cgroup_kn_unlock(of->kn); return ret ?: nbytes; - -err_undo_css: - /* restore masks and shoot down new csses */ - cgroup_restore_control(cgrp); - cgroup_apply_control_disable(cgrp); - - goto out_unlock; } static int cgroup_events_show(struct seq_file *seq, void *v) From 945ba1996888809cf510a8da000a9c20a9fab5ad Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:58:00 -0500 Subject: [PATCH 29/44] cgroup: combine cgroup_mutex locking and offline css draining cgroup_drain_offline() is used to wait for csses being offlined to uninstall itself from cgroup->subsys[] array so that new csses can be installed. The function's only user, cgroup_subtree_control_write(), calls it after performing some checks and restarts the whole process via restart_syscall() if draining has to release cgroup_mutex to wait. This can be simplified by draining before other synchronized operations so that there's nothing to restart. This patch converts cgroup_drain_offline() to cgroup_lock_and_drain_offline() which performs both locking and draining and updates cgroup_kn_lock_live() use it instead of cgroup_mutex() if requested. This combined locking and draining operations are easier to use and less error-prone. While at it, add WARNs in control_apply functions which triggers if the subtree isn't properly drained. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 55 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2adf0433a3cf..bbeb35f14eda 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -220,6 +220,7 @@ static struct cftype cgroup_dfl_base_files[]; static struct cftype cgroup_legacy_base_files[]; static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); +static void cgroup_lock_and_drain_offline(struct cgroup *cgrp); static void css_task_iter_advance(struct css_task_iter *it); static int cgroup_destroy_locked(struct cgroup *cgrp); static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, @@ -1391,19 +1392,22 @@ static void cgroup_kn_unlock(struct kernfs_node *kn) /** * cgroup_kn_lock_live - locking helper for cgroup kernfs methods * @kn: the kernfs_node being serviced + * @drain_offline: perform offline draining on the cgroup * * This helper is to be used by a cgroup kernfs method currently servicing * @kn. It breaks the active protection, performs cgroup locking and * verifies that the associated cgroup is alive. Returns the cgroup if * alive; otherwise, %NULL. A successful return should be undone by a - * matching cgroup_kn_unlock() invocation. + * matching cgroup_kn_unlock() invocation. If @drain_offline is %true, the + * cgroup is drained of offlining csses before return. * * Any cgroup kernfs method implementation which requires locking the * associated cgroup should use this helper. It avoids nesting cgroup * locking under kernfs active protection and allows all kernfs operations * including self-removal. */ -static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn) +static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, + bool drain_offline) { struct cgroup *cgrp; @@ -1422,7 +1426,10 @@ static struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn) return NULL; kernfs_break_active_protection(kn); - mutex_lock(&cgroup_mutex); + if (drain_offline) + cgroup_lock_and_drain_offline(cgrp); + else + mutex_lock(&cgroup_mutex); if (!cgroup_is_dead(cgrp)) return cgrp; @@ -2761,7 +2768,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return -EINVAL; - cgrp = cgroup_kn_lock_live(of->kn); + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; @@ -2859,7 +2866,7 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); - cgrp = cgroup_kn_lock_live(of->kn); + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; spin_lock(&release_agent_path_lock); @@ -2984,27 +2991,23 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) } /** - * cgroup_drain_offline - wait for previously offlined csses to go away + * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses * @cgrp: root of the target subtree * * Because css offlining is asynchronous, userland may try to re-enable a - * controller while the previous css is still around. This function drains - * the previous css instances of @cgrp's subtree. - * - * Must be called with cgroup_mutex held. Returns %false if there were no - * dying css instances. Returns %true if there were one or more and this - * function waited. On %true return, cgroup_mutex has been dropped and - * re-acquired inbetween which anything could have happened. The caller - * typically would have to start over. + * controller while the previous css is still around. This function grabs + * cgroup_mutex and drains the previous css instances of @cgrp's subtree. */ -static bool cgroup_drain_offline(struct cgroup *cgrp) +static void cgroup_lock_and_drain_offline(struct cgroup *cgrp) + __acquires(&cgroup_mutex) { struct cgroup *dsct; struct cgroup_subsys_state *d_css; struct cgroup_subsys *ss; int ssid; - lockdep_assert_held(&cgroup_mutex); +restart: + mutex_lock(&cgroup_mutex); cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { @@ -3021,14 +3024,11 @@ static bool cgroup_drain_offline(struct cgroup *cgrp) mutex_unlock(&cgroup_mutex); schedule(); finish_wait(&dsct->offline_waitq, &wait); - mutex_lock(&cgroup_mutex); cgroup_put(dsct); - return true; + goto restart; } } - - return false; } /** @@ -3111,6 +3111,8 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); + WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); + if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) continue; @@ -3155,6 +3157,8 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); + WARN_ON_ONCE(css && percpu_ref_is_dying(&css->refcnt)); + if (!css) continue; @@ -3264,7 +3268,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, return -EINVAL; } - cgrp = cgroup_kn_lock_live(of->kn); + cgrp = cgroup_kn_lock_live(of->kn, true); if (!cgrp) return -ENODEV; @@ -3309,11 +3313,6 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, goto out_unlock; } - if (cgroup_drain_offline(cgrp)) { - cgroup_kn_unlock(of->kn); - return restart_syscall(); - } - /* save and update control masks and prepare csses */ cgroup_save_control(cgrp); @@ -5140,7 +5139,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (strchr(name, '\n')) return -EINVAL; - parent = cgroup_kn_lock_live(parent_kn); + parent = cgroup_kn_lock_live(parent_kn, false); if (!parent) return -ENODEV; @@ -5339,7 +5338,7 @@ static int cgroup_rmdir(struct kernfs_node *kn) struct cgroup *cgrp; int ret = 0; - cgrp = cgroup_kn_lock_live(kn); + cgrp = cgroup_kn_lock_live(kn, false); if (!cgrp) return 0; From 03970d3c11faf870dc5126bb2e84fd1d692af1b7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:58:00 -0500 Subject: [PATCH 30/44] cgroup: use cgroup_apply_enable_control() in cgroup creation path cgroup_create() manually updates control masks and creates child csses which cgroup_mkdir() then manually populates. Both can be simplified by using cgroup_apply_enable_control() and friends. The only catch is that it calls css_populate_dir() with NULL cgroup->kn during cgroup_create(). This is worked around by making the function noop on NULL kn. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index bbeb35f14eda..ee6951b1e35d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1490,7 +1490,7 @@ static int css_populate_dir(struct cgroup_subsys_state *css, struct cftype *cfts, *failed_cfts; int ret; - if (css->flags & CSS_VISIBLE) + if ((css->flags & CSS_VISIBLE) || !cgrp->kn) return 0; if (!css->ss) { @@ -5042,10 +5042,9 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, static struct cgroup *cgroup_create(struct cgroup *parent) { struct cgroup_root *root = parent->root; - struct cgroup_subsys *ss; struct cgroup *cgrp, *tcgrp; int level = parent->level + 1; - int ssid, ret; + int ret; /* allocate the cgroup and its ID, 0 is reserved for the root */ cgrp = kzalloc(sizeof(*cgrp) + @@ -5095,25 +5094,19 @@ static struct cgroup *cgroup_create(struct cgroup *parent) */ cgroup_idr_replace(&root->cgroup_idr, cgrp, cgrp->id); - /* create the csses */ - do_each_subsys_mask(ss, ssid, cgroup_ss_mask(cgrp)) { - struct cgroup_subsys_state *css; - - css = css_create(cgrp, ss); - if (IS_ERR(css)) { - ret = PTR_ERR(css); - goto out_destroy; - } - } while_each_subsys_mask(); - /* * On the default hierarchy, a child doesn't automatically inherit * subtree_control from the parent. Each is configured manually. */ - if (!cgroup_on_dfl(cgrp)) { + if (!cgroup_on_dfl(cgrp)) cgrp->subtree_control = cgroup_control(cgrp); - cgroup_refresh_subtree_ss_mask(cgrp); - } + + cgroup_propagate_control(cgrp); + + /* @cgrp doesn't have dir yet so the following will only create csses */ + ret = cgroup_apply_control_enable(cgrp); + if (ret) + goto out_destroy; return cgrp; @@ -5131,9 +5124,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) { struct cgroup *parent, *cgrp; - struct cgroup_subsys *ss; struct kernfs_node *kn; - int ssid, ret; + int ret; /* do not accept '\n' to prevent making /proc//cgroup unparsable */ if (strchr(name, '\n')) @@ -5171,11 +5163,9 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (ret) goto out_destroy; - do_each_subsys_mask(ss, ssid, cgroup_control(cgrp)) { - ret = css_populate_dir(cgroup_css(cgrp, ss), NULL); - if (ret) - goto out_destroy; - } while_each_subsys_mask(); + ret = cgroup_apply_control_enable(cgrp); + if (ret) + goto out_destroy; /* let's create and online css's */ kernfs_activate(kn); From 334c3679ec4b2b113c35ebe37d2018b112dd5013 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:58:01 -0500 Subject: [PATCH 31/44] cgroup: reimplement rebind_subsystems() using cgroup_apply_control() and friends rebind_subsystem() open codes quite a bit of css and interface file manipulations. It tries to be fail-safe but doesn't quite achieve it. It can be greatly simplified by using the new css management helpers. This patch reimplements rebind_subsytsems() using cgroup_apply_control() and friends. * The half-baked rollback on file creation failure is dropped. It is an extremely cold path, failure isn't critical, and, aside from kernel bugs, the only reason it can fail is memory allocation failure which pretty much doesn't happen for small allocations. * As cgroup_apply_control_disable() is now used to clean up root cgroup on rebind, make sure that it doesn't end up killing root csses. * All callers of rebind_subsystems() are updated to use cgroup_lock_and_drain_offline() as the apply_control functions require drained subtree. * This leaves cgroup_refresh_subtree_ss_mask() without any user. Removed. * css_populate_dir() and css_clear_dir() no longer needs @cgrp_override parameter. Dropped. * While at it, add WARN_ON() to rebind_subsystem() calls which are expected to always succeed just in case. While the rules visible to userland aren't changed, this reimplementation not only simplifies rebind_subsystems() but also allows it to disable and enable csses recursively. This can be used to implement more flexible rebinding. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 107 +++++++++++++----------------------------------- 1 file changed, 28 insertions(+), 79 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index ee6951b1e35d..98e644b0a532 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -221,6 +221,8 @@ static struct cftype cgroup_legacy_base_files[]; static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask); static void cgroup_lock_and_drain_offline(struct cgroup *cgrp); +static int cgroup_apply_control(struct cgroup *cgrp); +static void cgroup_finalize_control(struct cgroup *cgrp, int ret); static void css_task_iter_advance(struct css_task_iter *it); static int cgroup_destroy_locked(struct cgroup *cgrp); static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, @@ -1160,13 +1162,13 @@ static void cgroup_destroy_root(struct cgroup_root *root) struct cgroup *cgrp = &root->cgrp; struct cgrp_cset_link *link, *tmp_link; - mutex_lock(&cgroup_mutex); + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); BUG_ON(atomic_read(&root->nr_cgrps)); BUG_ON(!list_empty(&cgrp->self.children)); /* Rebind all subsystems back to the default hierarchy */ - rebind_subsystems(&cgrp_dfl_root, root->subsys_mask); + WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask)); /* * Release all the links from cset_links to this hierarchy's @@ -1351,19 +1353,6 @@ static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) return cur_ss_mask; } -/** - * cgroup_refresh_subtree_ss_mask - update subtree_ss_mask - * @cgrp: the target cgroup - * - * Update @cgrp->subtree_ss_mask according to the current - * @cgrp->subtree_control using cgroup_calc_subtree_ss_mask(). - */ -static void cgroup_refresh_subtree_ss_mask(struct cgroup *cgrp) -{ - cgrp->subtree_ss_mask = - cgroup_calc_subtree_ss_mask(cgrp, cgrp->subtree_control); -} - /** * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods * @kn: the kernfs_node being serviced @@ -1459,12 +1448,10 @@ static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) /** * css_clear_dir - remove subsys files in a cgroup directory * @css: taget css - * @cgrp_override: specify if target cgroup is different from css->cgroup */ -static void css_clear_dir(struct cgroup_subsys_state *css, - struct cgroup *cgrp_override) +static void css_clear_dir(struct cgroup_subsys_state *css) { - struct cgroup *cgrp = cgrp_override ?: css->cgroup; + struct cgroup *cgrp = css->cgroup; struct cftype *cfts; if (!(css->flags & CSS_VISIBLE)) @@ -1479,14 +1466,12 @@ static void css_clear_dir(struct cgroup_subsys_state *css, /** * css_populate_dir - create subsys files in a cgroup directory * @css: target css - * @cgrp_overried: specify if target cgroup is different from css->cgroup * * On failure, no file is added. */ -static int css_populate_dir(struct cgroup_subsys_state *css, - struct cgroup *cgrp_override) +static int css_populate_dir(struct cgroup_subsys_state *css) { - struct cgroup *cgrp = cgrp_override ?: css->cgroup; + struct cgroup *cgrp = css->cgroup; struct cftype *cfts, *failed_cfts; int ret; @@ -1526,7 +1511,6 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) { struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; - u16 tmp_ss_mask; int ssid, i, ret; lockdep_assert_held(&cgroup_mutex); @@ -1541,46 +1525,6 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) return -EBUSY; } while_each_subsys_mask(); - /* skip creating root files on dfl_root for inhibited subsystems */ - tmp_ss_mask = ss_mask; - if (dst_root == &cgrp_dfl_root) - tmp_ss_mask &= ~cgrp_dfl_inhibit_ss_mask; - - do_each_subsys_mask(ss, ssid, tmp_ss_mask) { - struct cgroup *scgrp = &ss->root->cgrp; - int tssid; - - ret = css_populate_dir(cgroup_css(scgrp, ss), dcgrp); - if (!ret) - continue; - - /* - * Rebinding back to the default root is not allowed to - * fail. Using both default and non-default roots should - * be rare. Moving subsystems back and forth even more so. - * Just warn about it and continue. - */ - if (dst_root == &cgrp_dfl_root) { - if (cgrp_dfl_visible) { - pr_warn("failed to create files (%d) while rebinding 0x%x to default root\n", - ret, ss_mask); - pr_warn("you may retry by moving them to a different hierarchy and unbinding\n"); - } - continue; - } - - do_each_subsys_mask(ss, tssid, tmp_ss_mask) { - if (tssid == ssid) - break; - css_clear_dir(cgroup_css(scgrp, ss), dcgrp); - } while_each_subsys_mask(); - return ret; - } while_each_subsys_mask(); - - /* - * Nothing can fail from this point on. Remove files for the - * removed subsystems and rebind each subsystem. - */ do_each_subsys_mask(ss, ssid, ss_mask) { struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; @@ -1589,8 +1533,12 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) WARN_ON(!css || cgroup_css(dcgrp, ss)); - css_clear_dir(css, NULL); + /* disable from the source */ + src_root->subsys_mask &= ~(1 << ssid); + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + /* rebind */ RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); rcu_assign_pointer(dcgrp->subsys[ssid], css); ss->root = dst_root; @@ -1602,20 +1550,20 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) &dcgrp->e_csets[ss->id]); spin_unlock_bh(&css_set_lock); - src_root->subsys_mask &= ~(1 << ssid); - scgrp->subtree_control &= ~(1 << ssid); - cgroup_refresh_subtree_ss_mask(scgrp); - /* default hierarchy doesn't enable controllers by default */ dst_root->subsys_mask |= 1 << ssid; if (dst_root == &cgrp_dfl_root) { static_branch_enable(cgroup_subsys_on_dfl_key[ssid]); } else { dcgrp->subtree_control |= 1 << ssid; - cgroup_refresh_subtree_ss_mask(dcgrp); static_branch_disable(cgroup_subsys_on_dfl_key[ssid]); } + ret = cgroup_apply_control(dcgrp); + if (ret) + pr_warn("partial failure to rebind %s controller (err=%d)\n", + ss->name, ret); + if (ss->bind) ss->bind(css); } while_each_subsys_mask(); @@ -1807,7 +1755,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) return -EINVAL; } - mutex_lock(&cgroup_mutex); + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* See what subsystems are wanted */ ret = parse_cgroupfs_options(data, &opts); @@ -1840,7 +1788,7 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data) if (ret) goto out_unlock; - rebind_subsystems(&cgrp_dfl_root, removed_mask); + WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask)); if (opts.release_agent) { spin_lock(&release_agent_path_lock); @@ -1991,7 +1939,7 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) } root_cgrp->kn = root->kf_root->kn; - ret = css_populate_dir(&root_cgrp->self, NULL); + ret = css_populate_dir(&root_cgrp->self); if (ret) goto destroy_root; @@ -2070,7 +2018,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, goto out_mount; } - mutex_lock(&cgroup_mutex); + cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); /* First find the desired set of subsystems */ ret = parse_cgroupfs_options(data, &opts); @@ -3123,7 +3071,7 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) } if (cgroup_control(dsct) & (1 << ss->id)) { - ret = css_populate_dir(css, NULL); + ret = css_populate_dir(css); if (ret) return ret; } @@ -3162,10 +3110,11 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) if (!css) continue; - if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) { + if (css->parent && + !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); } else if (!(cgroup_control(dsct) & (1 << ss->id))) { - css_clear_dir(css, NULL); + css_clear_dir(css); if (ss->css_reset) ss->css_reset(css); } @@ -5159,7 +5108,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, if (ret) goto out_destroy; - ret = css_populate_dir(&cgrp->self, NULL); + ret = css_populate_dir(&cgrp->self); if (ret) goto out_destroy; @@ -5231,7 +5180,7 @@ static void kill_css(struct cgroup_subsys_state *css) * This must happen before css is disassociated with its cgroup. * See seq_css() for details. */ - css_clear_dir(css, NULL); + css_clear_dir(css); /* * Killing would put the base ref, but we need to keep it alive From 5ced2518bd3e3a4f01e2122122211f217cd99f4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:58:01 -0500 Subject: [PATCH 32/44] cgroup: make cgroup_calc_subtree_ss_mask() take @this_ss_mask cgroup_calc_subtree_ss_mask() currently takes @cgrp and @subtree_control. @cgrp is used for two purposes - to decide whether it's for default hierarchy and the mask of available subsystems. The former doesn't matter as the results are the same regardless. The latter can be specified directly through a subsystem mask. This patch makes cgroup_calc_subtree_ss_mask() perform the same calculations for both default and legacy hierarchies and take @this_ss_mask for available subsystems. @cgrp is no longer used and dropped. This is to allow using the function in contexts where available controllers can't be decided from the cgroup. v2: cgroup_refres_subtree_ss_mask() is removed by a previous patch. Updated accordingly. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 98e644b0a532..58e02e9aa970 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1309,18 +1309,17 @@ static umode_t cgroup_file_mode(const struct cftype *cft) /** * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask - * @cgrp: the target cgroup * @subtree_control: the new subtree_control mask to consider + * @this_ss_mask: available subsystems * * On the default hierarchy, a subsystem may request other subsystems to be * enabled together through its ->depends_on mask. In such cases, more * subsystems than specified in "cgroup.subtree_control" may be enabled. * * This function calculates which subsystems need to be enabled if - * @subtree_control is to be applied to @cgrp. The returned mask is always - * a superset of @subtree_control and follows the usual hierarchy rules. + * @subtree_control is to be applied while restricted to @this_ss_mask. */ -static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) +static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask) { u16 cur_ss_mask = subtree_control; struct cgroup_subsys *ss; @@ -1328,9 +1327,6 @@ static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) lockdep_assert_held(&cgroup_mutex); - if (!cgroup_on_dfl(cgrp)) - return cur_ss_mask; - while (true) { u16 new_ss_mask = cur_ss_mask; @@ -1343,7 +1339,7 @@ static u16 cgroup_calc_subtree_ss_mask(struct cgroup *cgrp, u16 subtree_control) * happen only if some depended-upon subsystems were bound * to non-default hierarchies. */ - new_ss_mask &= cgroup_ss_mask(cgrp); + new_ss_mask &= this_ss_mask; if (new_ss_mask == cur_ss_mask) break; @@ -3012,8 +3008,9 @@ static void cgroup_propagate_control(struct cgroup *cgrp) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { dsct->subtree_control &= cgroup_control(dsct); - dsct->subtree_ss_mask = cgroup_calc_subtree_ss_mask(dsct, - dsct->subtree_control); + dsct->subtree_ss_mask = + cgroup_calc_subtree_ss_mask(dsct->subtree_control, + cgroup_ss_mask(dsct)); } } From 04313591ae487da8b5781a0d8d444073a3fdee0d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:58:01 -0500 Subject: [PATCH 33/44] cgroup: allocate 2x cgrp_cset_links when setting up a new root During prep, cgroup_setup_root() allocates cgrp_cset_links matching the number of existing css_sets to later link the new root. This is fine for now as the only operation which can happen inbetween is rebind_subsystems() and rebinding of empty subsystems doesn't create new css_sets. However, while not yet allowed, with the recent reimplementation, rebind_subsystems() can rebind subsystems with descendant csses and thus can create new css_sets. This patch makes cgroup_setup_root() allocate 2x of the existing css_sets so that later use of live subsystem rebinding doesn't blow up. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 58e02e9aa970..40ed329482dd 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1915,10 +1915,11 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) /* * We're accessing css_set_count without locking css_set_lock here, * but that's OK - it can only be increased by someone holding - * cgroup_lock, and that's us. The worst that can happen is that we - * have some link structures left over + * cgroup_lock, and that's us. Later rebinding may disable + * controllers on the default hierarchy and thus create new csets, + * which can't be more than the existing ones. Allocate 2x. */ - ret = allocate_cgrp_cset_links(css_set_count, &tmp_links); + ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links); if (ret) goto cancel_ref; From 549626047df99f1129d4e742cce741055bdc2dcb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 3 Mar 2016 09:58:01 -0500 Subject: [PATCH 34/44] cgroup: update css iteration in cgroup_update_dfl_csses() The existing sequences of operations ensure that the offlining csses are drained before cgroup_update_dfl_csses(), so even though cgroup_update_dfl_csses() uses css_for_each_descendant_pre() to walk the target cgroups, it doesn't end up operating on dead cgroups. Also, the function explicitly excludes the subtree root from operation. This is fragile and inconsistent with the rest of css update operations. This patch updates cgroup_update_dfl_csses() to use cgroup_for_each_live_descendant_pre() instead and include the subtree root. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- kernel/cgroup.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 40ed329482dd..c63fce0c5b24 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2877,16 +2877,17 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v) * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy * @cgrp: root of the subtree to update csses for * - * @cgrp's subtree_ss_mask has changed and its subtree's (self excluded) - * css associations need to be updated accordingly. This function looks up - * all css_sets which are attached to the subtree, creates the matching - * updated css_sets and migrates the tasks to the new ones. + * @cgrp's control masks have changed and its subtree's css associations + * need to be updated accordingly. This function looks up all css_sets + * which are attached to the subtree, creates the matching updated css_sets + * and migrates the tasks to the new ones. */ static int cgroup_update_dfl_csses(struct cgroup *cgrp) { LIST_HEAD(preloaded_csets); struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset); - struct cgroup_subsys_state *css; + struct cgroup_subsys_state *d_css; + struct cgroup *dsct; struct css_set *src_cset; int ret; @@ -2896,14 +2897,10 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) /* look up all csses currently attached to @cgrp's subtree */ spin_lock_bh(&css_set_lock); - css_for_each_descendant_pre(css, cgroup_css(cgrp, NULL)) { + cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { struct cgrp_cset_link *link; - /* self is not affected by subtree_ss_mask change */ - if (css->cgroup == cgrp) - continue; - - list_for_each_entry(link, &css->cgroup->cset_links, cset_link) + list_for_each_entry(link, &dsct->cset_links, cset_link) cgroup_migrate_add_src(link->cset, cgrp, &preloaded_csets); } From 88bcf65e66e9037cdc25be4185a1929cc5009c99 Mon Sep 17 00:00:00 2001 From: Haosdent Huang Date: Fri, 4 Mar 2016 20:55:52 +0800 Subject: [PATCH 35/44] cgroup: remove stale item in cgroup-v1 document INDEX file. Signed-off-by: Haosdent Huang Signed-off-by: Tejun Heo --- Documentation/cgroup-v1/00-INDEX | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/cgroup-v1/00-INDEX b/Documentation/cgroup-v1/00-INDEX index 6ad425f7cf56..106885ad670d 100644 --- a/Documentation/cgroup-v1/00-INDEX +++ b/Documentation/cgroup-v1/00-INDEX @@ -24,5 +24,3 @@ net_prio.txt - Network priority cgroups details and usages. pids.txt - Process number cgroups details and usages. -unified-hierarchy.txt - - Description the new/next cgroup interface. From 6cc578df40bd60b791725e4451bc01f8c80abd8b Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 5 Mar 2016 11:30:56 +0530 Subject: [PATCH 36/44] cgroup: Trivial correction to reflect controller. Trivial correction in menuconfig help to reflect PIDs as controller instead of subsystem to align to rest of the text and documentation. Signed-off-by: Parav Pandit Signed-off-by: Tejun Heo --- init/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index 22320804fbaf..9fefb8e40541 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1047,10 +1047,10 @@ config CGROUP_PIDS is fairly trivial to reach PID exhaustion before you reach even a conservative kmemcg limit. As a result, it is possible to grind a system to halt without being limited by other cgroup policies. The - PIDs cgroup subsystem is designed to stop this from happening. + PIDs controller is designed to stop this from happening. It should be noted that organisational operations (such as attaching - to a cgroup hierarchy will *not* be blocked by the PIDs subsystem), + to a cgroup hierarchy will *not* be blocked by the PIDs controller), since the PIDs limit only affects a process's ability to fork, not to attach to a cgroup. From 58cdb1ceb15aab7b34719ad225ff023775d774e7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Mar 2016 11:51:25 -0500 Subject: [PATCH 37/44] cgroup: fix incorrect destination cgroup in cgroup_update_dfl_csses() cgroup_update_dfl_csses() should move each task in the subtree to self; however, it was incorrectly calling cgroup_migrate_add_src() with the root of the subtree as @dst_cgrp. Fortunately, cgroup_migrate_add_src() currently uses @dst_cgrp only to determine the hierarchy and the bug doesn't cause any actual breakages. Fix it. Signed-off-by: Tejun Heo --- kernel/cgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c63fce0c5b24..50879aadcbd0 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2901,7 +2901,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) struct cgrp_cset_link *link; list_for_each_entry(link, &dsct->cset_links, cset_link) - cgroup_migrate_add_src(link->cset, cgrp, + cgroup_migrate_add_src(link->cset, dsct, &preloaded_csets); } spin_unlock_bh(&css_set_lock); From 6c694c88255b2052d9922d62df6df7c9e152eeeb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Mar 2016 11:51:25 -0500 Subject: [PATCH 38/44] cgroup: move migration destination verification out of cgroup_migrate_prepare_dst() cgroup_migrate_prepare_dst() verifies whether the destination cgroup is allowable; however, the test doesn't really belong there. It's too deep and common in the stack and as a result the test itself is gated by another test. Separate the test out into cgroup_may_migrate_to() and update cgroup_attach_task() and cgroup_transfer_tasks() to perform the test directly. This doesn't cause any behavior differences. Signed-off-by: Tejun Heo --- kernel/cgroup.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 50879aadcbd0..8a02076d4317 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2443,6 +2443,20 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, return ret; } +/** + * cgroup_may_migrate_to - verify whether a cgroup can be migration destination + * @dst_cgrp: destination cgroup to test + * + * On the default hierarchy, except for the root, subtree_control must be + * zero for migration destination cgroups with tasks so that child cgroups + * don't compete against tasks. + */ +static bool cgroup_may_migrate_to(struct cgroup *dst_cgrp) +{ + return !cgroup_on_dfl(dst_cgrp) || !cgroup_parent(dst_cgrp) || + !dst_cgrp->subtree_control; +} + /** * cgroup_migrate_finish - cleanup after attach * @preloaded_csets: list of preloaded css_sets @@ -2529,14 +2543,6 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, lockdep_assert_held(&cgroup_mutex); - /* - * Except for the root, subtree_control must be zero for a cgroup - * with tasks so that child cgroups don't compete against tasks. - */ - if (dst_cgrp && cgroup_on_dfl(dst_cgrp) && cgroup_parent(dst_cgrp) && - dst_cgrp->subtree_control) - return -EBUSY; - /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) { struct css_set *dst_cset; @@ -2634,6 +2640,9 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *task; int ret; + if (!cgroup_may_migrate_to(dst_cgrp)) + return -EBUSY; + /* look up all src csets */ spin_lock_bh(&css_set_lock); rcu_read_lock(); @@ -4136,6 +4145,9 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) struct task_struct *task; int ret; + if (!cgroup_may_migrate_to(to)) + return -EBUSY; + mutex_lock(&cgroup_mutex); /* all tasks in @from are being moved, all csets are source */ From 37ff9f8f474216d0cfca7565a4e0caa521ee6e7e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Mar 2016 11:51:26 -0500 Subject: [PATCH 39/44] cgroup: make cgroup[_taskset]_migrate() take cgroup_root instead of cgroup On the default hierarchy, a migration can be multi-source and/or multi-destination. cgroup_taskest_migrate() used to incorrectly assume single destination cgroup but the bug has been fixed by 1f7dd3e5a6e4 ("cgroup: fix handling of multi-destination migration from subtree_control enabling"). Since the commit, @dst_cgrp to cgroup[_taskset]_migrate() is only used to determine which subsystems are affected or which cgroup_root the migration is taking place in. As such, @dst_cgrp is misleading. This patch replaces @dst_cgrp with @root. Signed-off-by: Tejun Heo --- kernel/cgroup.c | 70 ++++++++++++++++++++++++------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 8a02076d4317..5dd761355033 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2355,38 +2355,38 @@ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, } /** - * cgroup_taskset_migrate - migrate a taskset to a cgroup + * cgroup_taskset_migrate - migrate a taskset * @tset: taget taskset - * @dst_cgrp: destination cgroup + * @root: cgroup root the migration is taking place on * - * Migrate tasks in @tset to @dst_cgrp. This function fails iff one of the - * ->can_attach callbacks fails and guarantees that either all or none of - * the tasks in @tset are migrated. @tset is consumed regardless of - * success. + * Migrate tasks in @tset as setup by migration preparation functions. + * This function fails iff one of the ->can_attach callbacks fails and + * guarantees that either all or none of the tasks in @tset are migrated. + * @tset is consumed regardless of success. */ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, - struct cgroup *dst_cgrp) + struct cgroup_root *root) { - struct cgroup_subsys_state *css, *failed_css = NULL; + struct cgroup_subsys *ss; struct task_struct *task, *tmp_task; struct css_set *cset, *tmp_cset; - int i, ret; + int ssid, failed_ssid, ret; /* methods shouldn't be called if no task is actually migrating */ if (list_empty(&tset->src_csets)) return 0; /* check that we can legitimately attach to the cgroup */ - for_each_e_css(css, i, dst_cgrp) { - if (css->ss->can_attach) { - tset->ssid = i; - ret = css->ss->can_attach(tset); + do_each_subsys_mask(ss, ssid, root->subsys_mask) { + if (ss->can_attach) { + tset->ssid = ssid; + ret = ss->can_attach(tset); if (ret) { - failed_css = css; + failed_ssid = ssid; goto out_cancel_attach; } } - } + } while_each_subsys_mask(); /* * Now that we're guaranteed success, proceed to move all tasks to @@ -2413,25 +2413,25 @@ static int cgroup_taskset_migrate(struct cgroup_taskset *tset, */ tset->csets = &tset->dst_csets; - for_each_e_css(css, i, dst_cgrp) { - if (css->ss->attach) { - tset->ssid = i; - css->ss->attach(tset); + do_each_subsys_mask(ss, ssid, root->subsys_mask) { + if (ss->attach) { + tset->ssid = ssid; + ss->attach(tset); } - } + } while_each_subsys_mask(); ret = 0; goto out_release_tset; out_cancel_attach: - for_each_e_css(css, i, dst_cgrp) { - if (css == failed_css) + do_each_subsys_mask(ss, ssid, root->subsys_mask) { + if (ssid == failed_ssid) break; - if (css->ss->cancel_attach) { - tset->ssid = i; - css->ss->cancel_attach(tset); + if (ss->cancel_attach) { + tset->ssid = ssid; + ss->cancel_attach(tset); } - } + } while_each_subsys_mask(); out_release_tset: spin_lock_bh(&css_set_lock); list_splice_init(&tset->dst_csets, &tset->src_csets); @@ -2586,11 +2586,11 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, * cgroup_migrate - migrate a process or task to a cgroup * @leader: the leader of the process or the task to migrate * @threadgroup: whether @leader points to the whole process or a single task - * @cgrp: the destination cgroup + * @root: cgroup root migration is taking place on * - * Migrate a process or task denoted by @leader to @cgrp. If migrating a - * process, the caller must be holding cgroup_threadgroup_rwsem. The - * caller is also responsible for invoking cgroup_migrate_add_src() and + * Migrate a process or task denoted by @leader. If migrating a process, + * the caller must be holding cgroup_threadgroup_rwsem. The caller is also + * responsible for invoking cgroup_migrate_add_src() and * cgroup_migrate_prepare_dst() on the targets before invoking this * function and following up with cgroup_migrate_finish(). * @@ -2601,7 +2601,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, * actually starting migrating. */ static int cgroup_migrate(struct task_struct *leader, bool threadgroup, - struct cgroup *cgrp) + struct cgroup_root *root) { struct cgroup_taskset tset = CGROUP_TASKSET_INIT(tset); struct task_struct *task; @@ -2622,7 +2622,7 @@ static int cgroup_migrate(struct task_struct *leader, bool threadgroup, rcu_read_unlock(); spin_unlock_bh(&css_set_lock); - return cgroup_taskset_migrate(&tset, cgrp); + return cgroup_taskset_migrate(&tset, root); } /** @@ -2659,7 +2659,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, /* prepare dst csets and commit */ ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets); if (!ret) - ret = cgroup_migrate(leader, threadgroup, dst_cgrp); + ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root); cgroup_migrate_finish(&preloaded_csets); return ret; @@ -2934,7 +2934,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) } spin_unlock_bh(&css_set_lock); - ret = cgroup_taskset_migrate(&tset, cgrp); + ret = cgroup_taskset_migrate(&tset, cgrp->root); out_finish: cgroup_migrate_finish(&preloaded_csets); percpu_up_write(&cgroup_threadgroup_rwsem); @@ -4172,7 +4172,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) css_task_iter_end(&it); if (task) { - ret = cgroup_migrate(task, false, to); + ret = cgroup_migrate(task, false, to->root); put_task_struct(task); } } while (task && !ret); From e4857982f49d21c05a84351b56724bf353022355 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Mar 2016 11:51:26 -0500 Subject: [PATCH 40/44] cgroup: use css_set->mg_dst_cgrp for the migration target cgroup Migration can be multi-target on the default hierarchy when a controller is enabled - processes belonging to each child cgroup have to be moved to the child cgroup itself to refresh css association. This isn't a problem for cgroup_migrate_add_src() as each source css_set still maps to single source and target cgroups; however, cgroup_migrate_prepare_dst() is called once after all source css_sets are added and thus might not have a single destination cgroup. This is currently worked around by specifying NULL for @dst_cgrp and using the source's default cgroup as destination as the only multi-target migration in use is self-targetting. While this works, it's subtle and clunky. As all taget cgroups are already specified while preparing the source css_sets, this clunkiness can easily be removed by recording the target cgroup in each source css_set. This patch adds css_set->mg_dst_cgrp which is recorded on cgroup_migrate_src() and used by cgroup_migrate_prepare_dst(). This also makes migration code ready for arbitrary multi-target migration. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 9 +++++---- kernel/cgroup.c | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index aae8c94de4b3..590291d56049 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -191,12 +191,13 @@ struct css_set { /* * If this cset is acting as the source of migration the following - * two fields are set. mg_src_cgrp is the source cgroup of the - * on-going migration and mg_dst_cset is the destination cset the - * target tasks on this cset should be migrated to. Protected by - * cgroup_mutex. + * two fields are set. mg_src_cgrp and mg_dst_cgrp are + * respectively the source and destination cgroups of the on-going + * migration. mg_dst_cset is the destination cset the target tasks + * on this cset should be migrated to. Protected by cgroup_mutex. */ struct cgroup *mg_src_cgrp; + struct cgroup *mg_dst_cgrp; struct css_set *mg_dst_cset; /* diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5dd761355033..fbd3e99a4e98 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2473,6 +2473,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets) spin_lock_bh(&css_set_lock); list_for_each_entry_safe(cset, tmp_cset, preloaded_csets, mg_preload_node) { cset->mg_src_cgrp = NULL; + cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; list_del_init(&cset->mg_preload_node); put_css_set_locked(cset); @@ -2511,32 +2512,31 @@ static void cgroup_migrate_add_src(struct css_set *src_cset, return; WARN_ON(src_cset->mg_src_cgrp); + WARN_ON(src_cset->mg_dst_cgrp); WARN_ON(!list_empty(&src_cset->mg_tasks)); WARN_ON(!list_empty(&src_cset->mg_node)); src_cset->mg_src_cgrp = src_cgrp; + src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); list_add(&src_cset->mg_preload_node, preloaded_csets); } /** * cgroup_migrate_prepare_dst - prepare destination css_sets for migration - * @dst_cgrp: the destination cgroup (may be %NULL) * @preloaded_csets: list of preloaded source css_sets * - * Tasks are about to be moved to @dst_cgrp and all the source css_sets - * have been preloaded to @preloaded_csets. This function looks up and - * pins all destination css_sets, links each to its source, and append them - * to @preloaded_csets. If @dst_cgrp is %NULL, the destination of each - * source css_set is assumed to be its cgroup on the default hierarchy. + * Tasks are about to be moved and all the source css_sets have been + * preloaded to @preloaded_csets. This function looks up and pins all + * destination css_sets, links each to its source, and append them to + * @preloaded_csets. * * This function must be called after cgroup_migrate_add_src() has been * called on each migration source css_set. After migration is performed * using cgroup_migrate(), cgroup_migrate_finish() must be called on * @preloaded_csets. */ -static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, - struct list_head *preloaded_csets) +static int cgroup_migrate_prepare_dst(struct list_head *preloaded_csets) { LIST_HEAD(csets); struct css_set *src_cset, *tmp_cset; @@ -2547,8 +2547,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, list_for_each_entry_safe(src_cset, tmp_cset, preloaded_csets, mg_preload_node) { struct css_set *dst_cset; - dst_cset = find_css_set(src_cset, - dst_cgrp ?: src_cset->dfl_cgrp); + dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); if (!dst_cset) goto err; @@ -2561,6 +2560,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp, */ if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; + src_cset->mg_dst_cgrp = NULL; list_del_init(&src_cset->mg_preload_node); put_css_set(src_cset); put_css_set(dst_cset); @@ -2657,7 +2657,7 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp, spin_unlock_bh(&css_set_lock); /* prepare dst csets and commit */ - ret = cgroup_migrate_prepare_dst(dst_cgrp, &preloaded_csets); + ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (!ret) ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root); @@ -2916,7 +2916,7 @@ static int cgroup_update_dfl_csses(struct cgroup *cgrp) spin_unlock_bh(&css_set_lock); /* NULL dst indicates self on default hierarchy */ - ret = cgroup_migrate_prepare_dst(NULL, &preloaded_csets); + ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (ret) goto out_finish; @@ -4156,7 +4156,7 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from) cgroup_migrate_add_src(link->cset, to, &preloaded_csets); spin_unlock_bh(&css_set_lock); - ret = cgroup_migrate_prepare_dst(to, &preloaded_csets); + ret = cgroup_migrate_prepare_dst(&preloaded_csets); if (ret) goto out_err; From f6d635ad341d5cc0b9c7ab46adfbf3bf5886cee4 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Mar 2016 11:51:26 -0500 Subject: [PATCH 41/44] cgroup: implement cgroup_subsys->implicit_on_dfl Some controllers, perf_event for now and possibly freezer in the future, don't really make sense to control explicitly through "cgroup.subtree_control". For example, the primary role of perf_event is identifying the cgroups of tasks; however, because the controller also keeps a small amount of state per cgroup, it can't be replaced with simple cgroup membership tests. This patch implements cgroup_subsys->implicit_on_dfl flag. When set, the controller is implicitly enabled on all cgroups on the v2 hierarchy so that utility type controllers such as perf_event can be enabled and function transparently. An implicit controller doesn't show up in "cgroup.controllers" or "cgroup.subtree_control", is exempt from no internal process rule and can be stolen from the default hierarchy even if there are non-root csses. v2: Reimplemented on top of the recent updates to css handling and subsystem rebinding. Rebinding implicit subsystems is now a simple matter of exempting it from the busy subsystem check. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 13 +++++++++++++ kernel/cgroup.c | 38 ++++++++++++++++++++++++++++++------- 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 590291d56049..34b42f03fcd8 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -450,6 +450,19 @@ struct cgroup_subsys { bool early_init:1; + /* + * If %true, the controller, on the default hierarchy, doesn't show + * up in "cgroup.controllers" or "cgroup.subtree_control", is + * implicitly enabled on all cgroups on the default hierarchy, and + * bypasses the "no internal process" constraint. This is for + * utility type controllers which is transparent to userland. + * + * An implicit controller can be stolen from the default hierarchy + * anytime and thus must be okay with offline csses from previous + * hierarchies coexisting with csses for the current one. + */ + bool implicit_on_dfl:1; + /* * If %false, this subsystem is properly hierarchical - * configuration, resource accounting and restriction on a parent diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fbd3e99a4e98..e22df5d81e59 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -186,6 +186,9 @@ static u16 cgroup_no_v1_mask; /* some controllers are not supported in the default hierarchy */ static u16 cgrp_dfl_inhibit_ss_mask; +/* some controllers are implicitly enabled on the default hierarchy */ +static unsigned long cgrp_dfl_implicit_ss_mask; + /* The list of hierarchy roots */ static LIST_HEAD(cgroup_roots); @@ -359,8 +362,8 @@ static u16 cgroup_control(struct cgroup *cgrp) return parent->subtree_control; if (cgroup_on_dfl(cgrp)) - root_ss_mask &= ~cgrp_dfl_inhibit_ss_mask; - + root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask | + cgrp_dfl_implicit_ss_mask); return root_ss_mask; } @@ -1327,6 +1330,8 @@ static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask) lockdep_assert_held(&cgroup_mutex); + cur_ss_mask |= cgrp_dfl_implicit_ss_mask; + while (true) { u16 new_ss_mask = cur_ss_mask; @@ -1512,8 +1517,13 @@ static int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) lockdep_assert_held(&cgroup_mutex); do_each_subsys_mask(ss, ssid, ss_mask) { - /* if @ss has non-root csses attached to it, can't move */ - if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss))) + /* + * If @ss has non-root csses attached to it, can't move. + * If @ss is an implicit controller, it is exempt from this + * rule and can be stolen. + */ + if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) && + !ss->implicit_on_dfl) return -EBUSY; /* can't move between two non-dummy roots either */ @@ -3039,6 +3049,18 @@ static void cgroup_restore_control(struct cgroup *cgrp) } } +static bool css_visible(struct cgroup_subsys_state *css) +{ + struct cgroup_subsys *ss = css->ss; + struct cgroup *cgrp = css->cgroup; + + if (cgroup_control(cgrp) & (1 << ss->id)) + return true; + if (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) + return false; + return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl; +} + /** * cgroup_apply_control_enable - enable or show csses according to control * @cgrp: root of the target subtree @@ -3074,7 +3096,7 @@ static int cgroup_apply_control_enable(struct cgroup *cgrp) return PTR_ERR(css); } - if (cgroup_control(dsct) & (1 << ss->id)) { + if (css_visible(css)) { ret = css_populate_dir(css); if (ret) return ret; @@ -3117,7 +3139,7 @@ static void cgroup_apply_control_disable(struct cgroup *cgrp) if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); - } else if (!(cgroup_control(dsct) & (1 << ss->id))) { + } else if (!css_visible(css)) { css_clear_dir(css); if (ss->css_reset) ss->css_reset(css); @@ -5455,7 +5477,9 @@ int __init cgroup_init(void) cgrp_dfl_root.subsys_mask |= 1 << ss->id; - if (!ss->dfl_cftypes) + if (ss->implicit_on_dfl) + cgrp_dfl_implicit_ss_mask |= 1 << ss->id; + else if (!ss->dfl_cftypes) cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; if (ss->dfl_cftypes == ss->legacy_cftypes) { From 6c83e6cb0ce897818878a7d3b1b25d5dc8f611a2 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sat, 5 Mar 2016 11:20:58 +0530 Subject: [PATCH 42/44] Documentation: cgroup v2: Trivial heading correction. Corrected the heading to match with index. Signed-off-by: Parav Pandit Signed-off-by: Tejun Heo --- Documentation/cgroup-v2.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index 3922ae1654fd..6039d41059aa 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -898,7 +898,7 @@ PAGE_SIZE multiple when read back. limit, anonymous meomry of the cgroup will not be swapped out. -5-2-2. General Usage +5-2-2. Usage Guidelines "memory.high" is the main mechanism to control memory usage. Over-committing on high limit (sum of high limits > available memory) From 2b021cbf3cb6208f0d40fd2f1869f237934340ed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 15 Mar 2016 20:43:04 -0400 Subject: [PATCH 43/44] cgroup: ignore css_sets associated with dead cgroups during migration Before 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups"), all dead tasks were associated with init_css_set. If a zombie task is requested for migration, while migration prep operations would still be performed on init_css_set, the actual migration would ignore zombie tasks. As init_css_set is always valid, this worked fine. However, after 2e91fa7f6d45, zombie tasks stay with the css_set it was associated with at the time of death. Let's say a task T associated with cgroup A on hierarchy H-1 and cgroup B on hiearchy H-2. After T becomes a zombie, it would still remain associated with A and B. If A only contains zombie tasks, it can be removed. On removal, A gets marked offline but stays pinned until all zombies are drained. At this point, if migration is initiated on T to a cgroup C on hierarchy H-2, migration path would try to prepare T's css_set for migration and trigger the following. WARNING: CPU: 0 PID: 1576 at kernel/cgroup.c:474 cgroup_get+0x121/0x160() CPU: 0 PID: 1576 Comm: bash Not tainted 4.4.0-work+ #289 ... Call Trace: [] dump_stack+0x4e/0x82 [] warn_slowpath_common+0x78/0xb0 [] warn_slowpath_null+0x15/0x20 [] cgroup_get+0x121/0x160 [] link_css_set+0x7b/0x90 [] find_css_set+0x3bc/0x5e0 [] cgroup_migrate_prepare_dst+0x89/0x1f0 [] cgroup_attach_task+0x157/0x230 [] __cgroup_procs_write+0x2b7/0x470 [] cgroup_tasks_write+0xc/0x10 [] cgroup_file_write+0x30/0x1b0 [] kernfs_fop_write+0x13c/0x180 [] __vfs_write+0x23/0xe0 [] vfs_write+0xa4/0x1a0 [] SyS_write+0x44/0xa0 [] entry_SYSCALL_64_fastpath+0x12/0x6f It doesn't make sense to prepare migration for css_sets pointing to dead cgroups as they are guaranteed to contain only zombies which are ignored later during migration. This patch makes cgroup destruction path mark all affected css_sets as dead and updates the migration path to ignore them during preparation. Signed-off-by: Tejun Heo Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups") Cc: stable@vger.kernel.org # v4.4+ --- include/linux/cgroup-defs.h | 3 +++ kernel/cgroup.c | 20 ++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 34b42f03fcd8..3e39ae5bc799 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -212,6 +212,9 @@ struct css_set { /* all css_task_iters currently walking this cset */ struct list_head task_iters; + /* dead and being drained, ignore for migration */ + bool dead; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e22df5d81e59..d57318950076 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2516,6 +2516,14 @@ static void cgroup_migrate_add_src(struct css_set *src_cset, lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock); + /* + * If ->dead, @src_set is associated with one or more dead cgroups + * and doesn't contain any migratable tasks. Ignore it early so + * that the rest of migration path doesn't get confused by it. + */ + if (src_cset->dead) + return; + src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); if (!list_empty(&src_cset->mg_preload_node)) @@ -5258,6 +5266,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) __releases(&cgroup_mutex) __acquires(&cgroup_mutex) { struct cgroup_subsys_state *css; + struct cgrp_cset_link *link; int ssid; lockdep_assert_held(&cgroup_mutex); @@ -5278,11 +5287,18 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) return -EBUSY; /* - * Mark @cgrp dead. This prevents further task migration and child - * creation by disabling cgroup_lock_live_group(). + * Mark @cgrp and the associated csets dead. The former prevents + * further task migration and child creation by disabling + * cgroup_lock_live_group(). The latter makes the csets ignored by + * the migration path. */ cgrp->self.flags &= ~CSS_ONLINE; + spin_lock_bh(&css_set_lock); + list_for_each_entry(link, &cgrp->cset_links, cset_link) + link->cset->dead = true; + spin_unlock_bh(&css_set_lock); + /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) kill_css(css); From cfe02a8a973e7e5f66926b8ae38dfce404b19e29 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Mar 2016 00:21:06 +0100 Subject: [PATCH 44/44] cgroup: avoid false positive gcc-6 warning When all subsystems are disabled, gcc notices that cgroup_subsys_enabled_key is a zero-length array and that any access to it must be out of bounds: In file included from ../include/linux/cgroup.h:19:0, from ../kernel/cgroup.c:31: ../kernel/cgroup.c: In function 'cgroup_add_cftypes': ../kernel/cgroup.c:261:53: error: array subscript is above array bounds [-Werror=array-bounds] return static_key_enabled(cgroup_subsys_enabled_key[ssid]); ~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~ ../include/linux/jump_label.h:271:40: note: in definition of macro 'static_key_enabled' static_key_count((struct static_key *)x) > 0; \ ^ We should never call the function in this particular case, so this is not a bug. In order to silence the warning, this adds an explicit check for the CGROUP_SUBSYS_COUNT==0 case. Signed-off-by: Arnd Bergmann Signed-off-by: Tejun Heo --- kernel/cgroup.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index d57318950076..3fe02c152799 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -246,6 +246,9 @@ static int cgroup_addrm_files(struct cgroup_subsys_state *css, */ static bool cgroup_ssid_enabled(int ssid) { + if (CGROUP_SUBSYS_COUNT == 0) + return false; + return static_key_enabled(cgroup_subsys_enabled_key[ssid]); }