Merge branch 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup fixes from Tejun Heo:
 "This is quite late but these need to be backported anyway.

  This is the fix for a long-standing cpuset bug which existed from
  2009.  cpuset makes use of PF_SPREAD_{PAGE|SLAB} flags to modify the
  task's memory allocation behavior according to the settings of the
  cpuset it belongs to; unfortunately, when those flags have to be
  changed, cpuset did so directly even whlie the target task is running,
  which is obviously racy as task->flags may be modified by the task
  itself at any time.  This obscure bug manifested as corrupt
  PF_USED_MATH flag leading to a weird crash.

  The bug is fixed by moving the flag to task->atomic_flags.  The first
  two are prepatory ones to help defining atomic_flags accessors and the
  third one is the actual fix"

* 'for-3.17-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags
  sched: add macros to define bitops for task atomic flags
  sched: fix confusing PFA_NO_NEW_PRIVS constant
This commit is contained in:
Linus Torvalds 2014-09-27 16:45:33 -07:00
commit 6111da3432
6 changed files with 41 additions and 22 deletions

View file

@ -345,14 +345,14 @@ the named feature on.
The implementation is simple.
Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
PF_SPREAD_PAGE for each task that is in that cpuset or subsequently
PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
joins that cpuset. The page allocation calls for the page cache
is modified to perform an inline check for this PF_SPREAD_PAGE task
is modified to perform an inline check for this PFA_SPREAD_PAGE task
flag, and if set, a call to a new routine cpuset_mem_spread_node()
returns the node to prefer for the allocation.
Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
PF_SPREAD_SLAB, and appropriately marked slab caches will allocate
PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
pages from the node returned by cpuset_mem_spread_node().
The cpuset_mem_spread_node() routine is also simple. It uses the

View file

@ -93,12 +93,12 @@ extern int cpuset_slab_spread_node(void);
static inline int cpuset_do_page_mem_spread(void)
{
return current->flags & PF_SPREAD_PAGE;
return task_spread_page(current);
}
static inline int cpuset_do_slab_mem_spread(void)
{
return current->flags & PF_SPREAD_SLAB;
return task_spread_slab(current);
}
extern int current_cpuset_is_being_rebound(void);

View file

@ -1903,8 +1903,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */
#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
@ -1957,17 +1955,31 @@ static inline void memalloc_noio_restore(unsigned int flags)
}
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0x00000001 /* May not gain new privileges. */
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
static inline bool task_no_new_privs(struct task_struct *p)
{
return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
static inline void task_set_no_new_privs(struct task_struct *p)
{
set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
}
#define TASK_PFA_TEST(name, func) \
static inline bool task_##func(struct task_struct *p) \
{ return test_bit(PFA_##name, &p->atomic_flags); }
#define TASK_PFA_SET(name, func) \
static inline void task_set_##func(struct task_struct *p) \
{ set_bit(PFA_##name, &p->atomic_flags); }
#define TASK_PFA_CLEAR(name, func) \
static inline void task_clear_##func(struct task_struct *p) \
{ clear_bit(PFA_##name, &p->atomic_flags); }
TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
TASK_PFA_TEST(SPREAD_PAGE, spread_page)
TASK_PFA_SET(SPREAD_PAGE, spread_page)
TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
/*
* task->jobctl flags

View file

@ -365,13 +365,14 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
struct task_struct *tsk)
{
if (is_spread_page(cs))
tsk->flags |= PF_SPREAD_PAGE;
task_set_spread_page(tsk);
else
tsk->flags &= ~PF_SPREAD_PAGE;
task_clear_spread_page(tsk);
if (is_spread_slab(cs))
tsk->flags |= PF_SPREAD_SLAB;
task_set_spread_slab(tsk);
else
tsk->flags &= ~PF_SPREAD_SLAB;
task_clear_spread_slab(tsk);
}
/*

View file

@ -2987,7 +2987,7 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
#ifdef CONFIG_NUMA
/*
* Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
* Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set.
*
* If we are in_interrupt, then process context, including cpusets and
* mempolicy, may not apply and should not be used for allocation policy.
@ -3219,7 +3219,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
{
void *objp;
if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
if (current->mempolicy || cpuset_do_slab_mem_spread()) {
objp = alternate_node_alloc(cache, flags);
if (objp)
goto out;

View file

@ -197,6 +197,9 @@ exuberant()
--regex-c++='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \
--regex-c++='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \
--regex-c++='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
--regex-c++='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \
--regex-c++='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \
--regex-c++='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/'\
--regex-c='/PCI_OP_READ\((\w*).*[1-4]\)/pci_bus_read_config_\1/' \
--regex-c='/PCI_OP_WRITE\((\w*).*[1-4]\)/pci_bus_write_config_\1/' \
--regex-c='/DEFINE_(MUTEX|SEMAPHORE|SPINLOCK)\((\w*)/\2/v/' \
@ -260,6 +263,9 @@ emacs()
--regex='/SETPCGFLAG\(([^,)]*).*/SetPageCgroup\1/' \
--regex='/CLEARPCGFLAG\(([^,)]*).*/ClearPageCgroup\1/' \
--regex='/TESTCLEARPCGFLAG\(([^,)]*).*/TestClearPageCgroup\1/' \
--regex='/TASK_PFA_TEST\([^,]*,\s*([^)]*)\)/task_\1/' \
--regex='/TASK_PFA_SET\([^,]*,\s*([^)]*)\)/task_set_\1/' \
--regex='/TASK_PFA_CLEAR\([^,]*,\s*([^)]*)\)/task_clear_\1/' \
--regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/' \
--regex='/PCI_OP_READ(\([a-z]*[a-z]\).*[1-4])/pci_bus_read_config_\1/' \
--regex='/PCI_OP_WRITE(\([a-z]*[a-z]\).*[1-4])/pci_bus_write_config_\1/'\