Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull timers/nohz changes from Ingo Molnar: "It mostly contains fixes and full dynticks off-case optimizations, by Frederic Weisbecker" * 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) nohz: Include local CPU in full dynticks global kick nohz: Optimize full dynticks's sched hooks with static keys nohz: Optimize full dynticks state checks with static keys nohz: Rename a few state variables vtime: Always debug check snapshot source _before_ updating it vtime: Always scale generic vtime accounting results vtime: Optimize full dynticks accounting off case with static keys vtime: Describe overriden functions in dedicated arch headers m68k: hardirq_count() only need preempt_mask.h hardirq: Split preempt count mask definitions context_tracking: Split low level state headers vtime: Fix racy cputime delta update vtime: Remove a few unneeded generic vtime state checks context_tracking: User/kernel broundary cross trace events context_tracking: Optimize context switch off case with static keys context_tracking: Optimize guest APIs off case with static key context_tracking: Optimize main APIs off case with static key context_tracking: Ground setup for static key use context_tracking: Remove full dynticks' hacky dependency on wide context tracking nohz: Only enable context tracking on full dynticks CPUs ...
This commit is contained in:
commit
6832d9652f
21 changed files with 549 additions and 331 deletions
|
@ -3,3 +3,4 @@ generic-y += clkdev.h
|
||||||
generic-y += exec.h
|
generic-y += exec.h
|
||||||
generic-y += kvm_para.h
|
generic-y += kvm_para.h
|
||||||
generic-y += trace_clock.h
|
generic-y += trace_clock.h
|
||||||
|
generic-y += vtime.h
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
#include <linux/types.h>
|
#include <linux/types.h>
|
||||||
#ifdef CONFIG_MMU
|
#ifdef CONFIG_MMU
|
||||||
#include <linux/hardirq.h>
|
#include <linux/preempt_mask.h>
|
||||||
#endif
|
#endif
|
||||||
#include <linux/preempt.h>
|
#include <linux/preempt.h>
|
||||||
#include <asm/thread_info.h>
|
#include <asm/thread_info.h>
|
||||||
|
|
|
@ -2,3 +2,4 @@
|
||||||
generic-y += clkdev.h
|
generic-y += clkdev.h
|
||||||
generic-y += rwsem.h
|
generic-y += rwsem.h
|
||||||
generic-y += trace_clock.h
|
generic-y += trace_clock.h
|
||||||
|
generic-y += vtime.h
|
|
@ -13,9 +13,6 @@
|
||||||
#include <asm/div64.h>
|
#include <asm/div64.h>
|
||||||
|
|
||||||
|
|
||||||
#define __ARCH_HAS_VTIME_ACCOUNT
|
|
||||||
#define __ARCH_HAS_VTIME_TASK_SWITCH
|
|
||||||
|
|
||||||
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
|
/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
|
||||||
|
|
||||||
typedef unsigned long long __nocast cputime_t;
|
typedef unsigned long long __nocast cputime_t;
|
||||||
|
|
7
arch/s390/include/asm/vtime.h
Normal file
7
arch/s390/include/asm/vtime.h
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
#ifndef _S390_VTIME_H
|
||||||
|
#define _S390_VTIME_H
|
||||||
|
|
||||||
|
#define __ARCH_HAS_VTIME_ACCOUNT
|
||||||
|
#define __ARCH_HAS_VTIME_TASK_SWITCH
|
||||||
|
|
||||||
|
#endif /* _S390_VTIME_H */
|
|
@ -19,6 +19,7 @@
|
||||||
#include <asm/irq_regs.h>
|
#include <asm/irq_regs.h>
|
||||||
#include <asm/cputime.h>
|
#include <asm/cputime.h>
|
||||||
#include <asm/vtimer.h>
|
#include <asm/vtimer.h>
|
||||||
|
#include <asm/vtime.h>
|
||||||
#include <asm/irq.h>
|
#include <asm/irq.h>
|
||||||
#include "entry.h"
|
#include "entry.h"
|
||||||
|
|
||||||
|
|
0
include/asm-generic/vtime.h
Normal file
0
include/asm-generic/vtime.h
Normal file
|
@ -2,100 +2,110 @@
|
||||||
#define _LINUX_CONTEXT_TRACKING_H
|
#define _LINUX_CONTEXT_TRACKING_H
|
||||||
|
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/percpu.h>
|
|
||||||
#include <linux/vtime.h>
|
#include <linux/vtime.h>
|
||||||
|
#include <linux/context_tracking_state.h>
|
||||||
#include <asm/ptrace.h>
|
#include <asm/ptrace.h>
|
||||||
|
|
||||||
struct context_tracking {
|
|
||||||
/*
|
|
||||||
* When active is false, probes are unset in order
|
|
||||||
* to minimize overhead: TIF flags are cleared
|
|
||||||
* and calls to user_enter/exit are ignored. This
|
|
||||||
* may be further optimized using static keys.
|
|
||||||
*/
|
|
||||||
bool active;
|
|
||||||
enum ctx_state {
|
|
||||||
IN_KERNEL = 0,
|
|
||||||
IN_USER,
|
|
||||||
} state;
|
|
||||||
};
|
|
||||||
|
|
||||||
static inline void __guest_enter(void)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* This is running in ioctl context so we can avoid
|
|
||||||
* the call to vtime_account() with its unnecessary idle check.
|
|
||||||
*/
|
|
||||||
vtime_account_system(current);
|
|
||||||
current->flags |= PF_VCPU;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void __guest_exit(void)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* This is running in ioctl context so we can avoid
|
|
||||||
* the call to vtime_account() with its unnecessary idle check.
|
|
||||||
*/
|
|
||||||
vtime_account_system(current);
|
|
||||||
current->flags &= ~PF_VCPU;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_CONTEXT_TRACKING
|
#ifdef CONFIG_CONTEXT_TRACKING
|
||||||
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
extern void context_tracking_cpu_set(int cpu);
|
||||||
|
|
||||||
static inline bool context_tracking_in_user(void)
|
extern void context_tracking_user_enter(void);
|
||||||
|
extern void context_tracking_user_exit(void);
|
||||||
|
extern void __context_tracking_task_switch(struct task_struct *prev,
|
||||||
|
struct task_struct *next);
|
||||||
|
|
||||||
|
static inline void user_enter(void)
|
||||||
{
|
{
|
||||||
return __this_cpu_read(context_tracking.state) == IN_USER;
|
if (static_key_false(&context_tracking_enabled))
|
||||||
}
|
context_tracking_user_enter();
|
||||||
|
|
||||||
static inline bool context_tracking_active(void)
|
}
|
||||||
|
static inline void user_exit(void)
|
||||||
{
|
{
|
||||||
return __this_cpu_read(context_tracking.active);
|
if (static_key_false(&context_tracking_enabled))
|
||||||
|
context_tracking_user_exit();
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void user_enter(void);
|
|
||||||
extern void user_exit(void);
|
|
||||||
|
|
||||||
extern void guest_enter(void);
|
|
||||||
extern void guest_exit(void);
|
|
||||||
|
|
||||||
static inline enum ctx_state exception_enter(void)
|
static inline enum ctx_state exception_enter(void)
|
||||||
{
|
{
|
||||||
enum ctx_state prev_ctx;
|
enum ctx_state prev_ctx;
|
||||||
|
|
||||||
|
if (!static_key_false(&context_tracking_enabled))
|
||||||
|
return 0;
|
||||||
|
|
||||||
prev_ctx = this_cpu_read(context_tracking.state);
|
prev_ctx = this_cpu_read(context_tracking.state);
|
||||||
user_exit();
|
context_tracking_user_exit();
|
||||||
|
|
||||||
return prev_ctx;
|
return prev_ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void exception_exit(enum ctx_state prev_ctx)
|
static inline void exception_exit(enum ctx_state prev_ctx)
|
||||||
{
|
{
|
||||||
if (prev_ctx == IN_USER)
|
if (static_key_false(&context_tracking_enabled)) {
|
||||||
user_enter();
|
if (prev_ctx == IN_USER)
|
||||||
|
context_tracking_user_enter();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern void context_tracking_task_switch(struct task_struct *prev,
|
static inline void context_tracking_task_switch(struct task_struct *prev,
|
||||||
struct task_struct *next);
|
struct task_struct *next)
|
||||||
|
{
|
||||||
|
if (static_key_false(&context_tracking_enabled))
|
||||||
|
__context_tracking_task_switch(prev, next);
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
static inline bool context_tracking_in_user(void) { return false; }
|
|
||||||
static inline void user_enter(void) { }
|
static inline void user_enter(void) { }
|
||||||
static inline void user_exit(void) { }
|
static inline void user_exit(void) { }
|
||||||
|
|
||||||
static inline void guest_enter(void)
|
|
||||||
{
|
|
||||||
__guest_enter();
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void guest_exit(void)
|
|
||||||
{
|
|
||||||
__guest_exit();
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline enum ctx_state exception_enter(void) { return 0; }
|
static inline enum ctx_state exception_enter(void) { return 0; }
|
||||||
static inline void exception_exit(enum ctx_state prev_ctx) { }
|
static inline void exception_exit(enum ctx_state prev_ctx) { }
|
||||||
static inline void context_tracking_task_switch(struct task_struct *prev,
|
static inline void context_tracking_task_switch(struct task_struct *prev,
|
||||||
struct task_struct *next) { }
|
struct task_struct *next) { }
|
||||||
#endif /* !CONFIG_CONTEXT_TRACKING */
|
#endif /* !CONFIG_CONTEXT_TRACKING */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||||
|
extern void context_tracking_init(void);
|
||||||
|
#else
|
||||||
|
static inline void context_tracking_init(void) { }
|
||||||
|
#endif /* CONFIG_CONTEXT_TRACKING_FORCE */
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||||
|
static inline void guest_enter(void)
|
||||||
|
{
|
||||||
|
if (vtime_accounting_enabled())
|
||||||
|
vtime_guest_enter(current);
|
||||||
|
else
|
||||||
|
current->flags |= PF_VCPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void guest_exit(void)
|
||||||
|
{
|
||||||
|
if (vtime_accounting_enabled())
|
||||||
|
vtime_guest_exit(current);
|
||||||
|
else
|
||||||
|
current->flags &= ~PF_VCPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
static inline void guest_enter(void)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* This is running in ioctl context so its safe
|
||||||
|
* to assume that it's the stime pending cputime
|
||||||
|
* to flush.
|
||||||
|
*/
|
||||||
|
vtime_account_system(current);
|
||||||
|
current->flags |= PF_VCPU;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void guest_exit(void)
|
||||||
|
{
|
||||||
|
/* Flush the guest cputime we spent on the guest */
|
||||||
|
vtime_account_system(current);
|
||||||
|
current->flags &= ~PF_VCPU;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
39
include/linux/context_tracking_state.h
Normal file
39
include/linux/context_tracking_state.h
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
#ifndef _LINUX_CONTEXT_TRACKING_STATE_H
|
||||||
|
#define _LINUX_CONTEXT_TRACKING_STATE_H
|
||||||
|
|
||||||
|
#include <linux/percpu.h>
|
||||||
|
#include <linux/static_key.h>
|
||||||
|
|
||||||
|
struct context_tracking {
|
||||||
|
/*
|
||||||
|
* When active is false, probes are unset in order
|
||||||
|
* to minimize overhead: TIF flags are cleared
|
||||||
|
* and calls to user_enter/exit are ignored. This
|
||||||
|
* may be further optimized using static keys.
|
||||||
|
*/
|
||||||
|
bool active;
|
||||||
|
enum ctx_state {
|
||||||
|
IN_KERNEL = 0,
|
||||||
|
IN_USER,
|
||||||
|
} state;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef CONFIG_CONTEXT_TRACKING
|
||||||
|
extern struct static_key context_tracking_enabled;
|
||||||
|
DECLARE_PER_CPU(struct context_tracking, context_tracking);
|
||||||
|
|
||||||
|
static inline bool context_tracking_in_user(void)
|
||||||
|
{
|
||||||
|
return __this_cpu_read(context_tracking.state) == IN_USER;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool context_tracking_active(void)
|
||||||
|
{
|
||||||
|
return __this_cpu_read(context_tracking.active);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline bool context_tracking_in_user(void) { return false; }
|
||||||
|
static inline bool context_tracking_active(void) { return false; }
|
||||||
|
#endif /* CONFIG_CONTEXT_TRACKING */
|
||||||
|
|
||||||
|
#endif
|
|
@ -1,126 +1,11 @@
|
||||||
#ifndef LINUX_HARDIRQ_H
|
#ifndef LINUX_HARDIRQ_H
|
||||||
#define LINUX_HARDIRQ_H
|
#define LINUX_HARDIRQ_H
|
||||||
|
|
||||||
#include <linux/preempt.h>
|
#include <linux/preempt_mask.h>
|
||||||
#include <linux/lockdep.h>
|
#include <linux/lockdep.h>
|
||||||
#include <linux/ftrace_irq.h>
|
#include <linux/ftrace_irq.h>
|
||||||
#include <linux/vtime.h>
|
#include <linux/vtime.h>
|
||||||
#include <asm/hardirq.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We put the hardirq and softirq counter into the preemption
|
|
||||||
* counter. The bitmask has the following meaning:
|
|
||||||
*
|
|
||||||
* - bits 0-7 are the preemption count (max preemption depth: 256)
|
|
||||||
* - bits 8-15 are the softirq count (max # of softirqs: 256)
|
|
||||||
*
|
|
||||||
* The hardirq count can in theory reach the same as NR_IRQS.
|
|
||||||
* In reality, the number of nested IRQS is limited to the stack
|
|
||||||
* size as well. For archs with over 1000 IRQS it is not practical
|
|
||||||
* to expect that they will all nest. We give a max of 10 bits for
|
|
||||||
* hardirq nesting. An arch may choose to give less than 10 bits.
|
|
||||||
* m68k expects it to be 8.
|
|
||||||
*
|
|
||||||
* - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
|
|
||||||
* - bit 26 is the NMI_MASK
|
|
||||||
* - bit 27 is the PREEMPT_ACTIVE flag
|
|
||||||
*
|
|
||||||
* PREEMPT_MASK: 0x000000ff
|
|
||||||
* SOFTIRQ_MASK: 0x0000ff00
|
|
||||||
* HARDIRQ_MASK: 0x03ff0000
|
|
||||||
* NMI_MASK: 0x04000000
|
|
||||||
*/
|
|
||||||
#define PREEMPT_BITS 8
|
|
||||||
#define SOFTIRQ_BITS 8
|
|
||||||
#define NMI_BITS 1
|
|
||||||
|
|
||||||
#define MAX_HARDIRQ_BITS 10
|
|
||||||
|
|
||||||
#ifndef HARDIRQ_BITS
|
|
||||||
# define HARDIRQ_BITS MAX_HARDIRQ_BITS
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
|
|
||||||
#error HARDIRQ_BITS too high!
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define PREEMPT_SHIFT 0
|
|
||||||
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
|
||||||
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
|
||||||
#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
|
|
||||||
|
|
||||||
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
|
||||||
|
|
||||||
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
|
|
||||||
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
|
||||||
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
|
||||||
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
|
|
||||||
|
|
||||||
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
|
|
||||||
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
|
|
||||||
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
|
||||||
#define NMI_OFFSET (1UL << NMI_SHIFT)
|
|
||||||
|
|
||||||
#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
|
|
||||||
|
|
||||||
#ifndef PREEMPT_ACTIVE
|
|
||||||
#define PREEMPT_ACTIVE_BITS 1
|
|
||||||
#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
|
|
||||||
#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
|
|
||||||
#error PREEMPT_ACTIVE is too low!
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
|
||||||
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
|
||||||
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
|
|
||||||
| NMI_MASK))
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Are we doing bottom half or hardware interrupt processing?
|
|
||||||
* Are we in a softirq context? Interrupt context?
|
|
||||||
* in_softirq - Are we currently processing softirq or have bh disabled?
|
|
||||||
* in_serving_softirq - Are we currently processing softirq?
|
|
||||||
*/
|
|
||||||
#define in_irq() (hardirq_count())
|
|
||||||
#define in_softirq() (softirq_count())
|
|
||||||
#define in_interrupt() (irq_count())
|
|
||||||
#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Are we in NMI context?
|
|
||||||
*/
|
|
||||||
#define in_nmi() (preempt_count() & NMI_MASK)
|
|
||||||
|
|
||||||
#if defined(CONFIG_PREEMPT_COUNT)
|
|
||||||
# define PREEMPT_CHECK_OFFSET 1
|
|
||||||
#else
|
|
||||||
# define PREEMPT_CHECK_OFFSET 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Are we running in atomic context? WARNING: this macro cannot
|
|
||||||
* always detect atomic context; in particular, it cannot know about
|
|
||||||
* held spinlocks in non-preemptible kernels. Thus it should not be
|
|
||||||
* used in the general case to determine whether sleeping is possible.
|
|
||||||
* Do not use in_atomic() in driver code.
|
|
||||||
*/
|
|
||||||
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check whether we were atomic before we did preempt_disable():
|
|
||||||
* (used by the scheduler, *after* releasing the kernel lock)
|
|
||||||
*/
|
|
||||||
#define in_atomic_preempt_off() \
|
|
||||||
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
|
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPT_COUNT
|
|
||||||
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
|
||||||
#else
|
|
||||||
# define preemptible() 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
|
#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
|
||||||
extern void synchronize_irq(unsigned int irq);
|
extern void synchronize_irq(unsigned int irq);
|
||||||
|
|
122
include/linux/preempt_mask.h
Normal file
122
include/linux/preempt_mask.h
Normal file
|
@ -0,0 +1,122 @@
|
||||||
|
#ifndef LINUX_PREEMPT_MASK_H
|
||||||
|
#define LINUX_PREEMPT_MASK_H
|
||||||
|
|
||||||
|
#include <linux/preempt.h>
|
||||||
|
#include <asm/hardirq.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We put the hardirq and softirq counter into the preemption
|
||||||
|
* counter. The bitmask has the following meaning:
|
||||||
|
*
|
||||||
|
* - bits 0-7 are the preemption count (max preemption depth: 256)
|
||||||
|
* - bits 8-15 are the softirq count (max # of softirqs: 256)
|
||||||
|
*
|
||||||
|
* The hardirq count can in theory reach the same as NR_IRQS.
|
||||||
|
* In reality, the number of nested IRQS is limited to the stack
|
||||||
|
* size as well. For archs with over 1000 IRQS it is not practical
|
||||||
|
* to expect that they will all nest. We give a max of 10 bits for
|
||||||
|
* hardirq nesting. An arch may choose to give less than 10 bits.
|
||||||
|
* m68k expects it to be 8.
|
||||||
|
*
|
||||||
|
* - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
|
||||||
|
* - bit 26 is the NMI_MASK
|
||||||
|
* - bit 27 is the PREEMPT_ACTIVE flag
|
||||||
|
*
|
||||||
|
* PREEMPT_MASK: 0x000000ff
|
||||||
|
* SOFTIRQ_MASK: 0x0000ff00
|
||||||
|
* HARDIRQ_MASK: 0x03ff0000
|
||||||
|
* NMI_MASK: 0x04000000
|
||||||
|
*/
|
||||||
|
#define PREEMPT_BITS 8
|
||||||
|
#define SOFTIRQ_BITS 8
|
||||||
|
#define NMI_BITS 1
|
||||||
|
|
||||||
|
#define MAX_HARDIRQ_BITS 10
|
||||||
|
|
||||||
|
#ifndef HARDIRQ_BITS
|
||||||
|
# define HARDIRQ_BITS MAX_HARDIRQ_BITS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
|
||||||
|
#error HARDIRQ_BITS too high!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define PREEMPT_SHIFT 0
|
||||||
|
#define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS)
|
||||||
|
#define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS)
|
||||||
|
#define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS)
|
||||||
|
|
||||||
|
#define __IRQ_MASK(x) ((1UL << (x))-1)
|
||||||
|
|
||||||
|
#define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
|
||||||
|
#define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
|
||||||
|
#define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
|
||||||
|
#define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT)
|
||||||
|
|
||||||
|
#define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT)
|
||||||
|
#define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT)
|
||||||
|
#define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT)
|
||||||
|
#define NMI_OFFSET (1UL << NMI_SHIFT)
|
||||||
|
|
||||||
|
#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
|
||||||
|
|
||||||
|
#ifndef PREEMPT_ACTIVE
|
||||||
|
#define PREEMPT_ACTIVE_BITS 1
|
||||||
|
#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS)
|
||||||
|
#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
|
||||||
|
#error PREEMPT_ACTIVE is too low!
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define hardirq_count() (preempt_count() & HARDIRQ_MASK)
|
||||||
|
#define softirq_count() (preempt_count() & SOFTIRQ_MASK)
|
||||||
|
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
|
||||||
|
| NMI_MASK))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Are we doing bottom half or hardware interrupt processing?
|
||||||
|
* Are we in a softirq context? Interrupt context?
|
||||||
|
* in_softirq - Are we currently processing softirq or have bh disabled?
|
||||||
|
* in_serving_softirq - Are we currently processing softirq?
|
||||||
|
*/
|
||||||
|
#define in_irq() (hardirq_count())
|
||||||
|
#define in_softirq() (softirq_count())
|
||||||
|
#define in_interrupt() (irq_count())
|
||||||
|
#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Are we in NMI context?
|
||||||
|
*/
|
||||||
|
#define in_nmi() (preempt_count() & NMI_MASK)
|
||||||
|
|
||||||
|
#if defined(CONFIG_PREEMPT_COUNT)
|
||||||
|
# define PREEMPT_CHECK_OFFSET 1
|
||||||
|
#else
|
||||||
|
# define PREEMPT_CHECK_OFFSET 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Are we running in atomic context? WARNING: this macro cannot
|
||||||
|
* always detect atomic context; in particular, it cannot know about
|
||||||
|
* held spinlocks in non-preemptible kernels. Thus it should not be
|
||||||
|
* used in the general case to determine whether sleeping is possible.
|
||||||
|
* Do not use in_atomic() in driver code.
|
||||||
|
*/
|
||||||
|
#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check whether we were atomic before we did preempt_disable():
|
||||||
|
* (used by the scheduler, *after* releasing the kernel lock)
|
||||||
|
*/
|
||||||
|
#define in_atomic_preempt_off() \
|
||||||
|
((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
|
||||||
|
|
||||||
|
#ifdef CONFIG_PREEMPT_COUNT
|
||||||
|
# define preemptible() (preempt_count() == 0 && !irqs_disabled())
|
||||||
|
#else
|
||||||
|
# define preemptible() 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* LINUX_PREEMPT_MASK_H */
|
|
@ -10,6 +10,8 @@
|
||||||
#include <linux/irqflags.h>
|
#include <linux/irqflags.h>
|
||||||
#include <linux/percpu.h>
|
#include <linux/percpu.h>
|
||||||
#include <linux/hrtimer.h>
|
#include <linux/hrtimer.h>
|
||||||
|
#include <linux/context_tracking_state.h>
|
||||||
|
#include <linux/cpumask.h>
|
||||||
|
|
||||||
#ifdef CONFIG_GENERIC_CLOCKEVENTS
|
#ifdef CONFIG_GENERIC_CLOCKEVENTS
|
||||||
|
|
||||||
|
@ -158,20 +160,51 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
|
||||||
# endif /* !CONFIG_NO_HZ_COMMON */
|
# endif /* !CONFIG_NO_HZ_COMMON */
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_FULL
|
#ifdef CONFIG_NO_HZ_FULL
|
||||||
|
extern bool tick_nohz_full_running;
|
||||||
|
extern cpumask_var_t tick_nohz_full_mask;
|
||||||
|
|
||||||
|
static inline bool tick_nohz_full_enabled(void)
|
||||||
|
{
|
||||||
|
if (!static_key_false(&context_tracking_enabled))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return tick_nohz_full_running;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool tick_nohz_full_cpu(int cpu)
|
||||||
|
{
|
||||||
|
if (!tick_nohz_full_enabled())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return cpumask_test_cpu(cpu, tick_nohz_full_mask);
|
||||||
|
}
|
||||||
|
|
||||||
extern void tick_nohz_init(void);
|
extern void tick_nohz_init(void);
|
||||||
extern int tick_nohz_full_cpu(int cpu);
|
extern void __tick_nohz_full_check(void);
|
||||||
extern void tick_nohz_full_check(void);
|
|
||||||
extern void tick_nohz_full_kick(void);
|
extern void tick_nohz_full_kick(void);
|
||||||
extern void tick_nohz_full_kick_all(void);
|
extern void tick_nohz_full_kick_all(void);
|
||||||
extern void tick_nohz_task_switch(struct task_struct *tsk);
|
extern void __tick_nohz_task_switch(struct task_struct *tsk);
|
||||||
#else
|
#else
|
||||||
static inline void tick_nohz_init(void) { }
|
static inline void tick_nohz_init(void) { }
|
||||||
static inline int tick_nohz_full_cpu(int cpu) { return 0; }
|
static inline bool tick_nohz_full_enabled(void) { return false; }
|
||||||
static inline void tick_nohz_full_check(void) { }
|
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
|
||||||
|
static inline void __tick_nohz_full_check(void) { }
|
||||||
static inline void tick_nohz_full_kick(void) { }
|
static inline void tick_nohz_full_kick(void) { }
|
||||||
static inline void tick_nohz_full_kick_all(void) { }
|
static inline void tick_nohz_full_kick_all(void) { }
|
||||||
static inline void tick_nohz_task_switch(struct task_struct *tsk) { }
|
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static inline void tick_nohz_full_check(void)
|
||||||
|
{
|
||||||
|
if (tick_nohz_full_enabled())
|
||||||
|
__tick_nohz_full_check();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void tick_nohz_task_switch(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
if (tick_nohz_full_enabled())
|
||||||
|
__tick_nohz_task_switch(tsk);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,18 +1,68 @@
|
||||||
#ifndef _LINUX_KERNEL_VTIME_H
|
#ifndef _LINUX_KERNEL_VTIME_H
|
||||||
#define _LINUX_KERNEL_VTIME_H
|
#define _LINUX_KERNEL_VTIME_H
|
||||||
|
|
||||||
|
#include <linux/context_tracking_state.h>
|
||||||
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||||
|
#include <asm/vtime.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
struct task_struct;
|
struct task_struct;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* vtime_accounting_enabled() definitions/declarations
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
||||||
|
static inline bool vtime_accounting_enabled(void) { return true; }
|
||||||
|
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
|
||||||
|
|
||||||
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||||
|
static inline bool vtime_accounting_enabled(void)
|
||||||
|
{
|
||||||
|
if (static_key_false(&context_tracking_enabled)) {
|
||||||
|
if (context_tracking_active())
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||||
|
|
||||||
|
#ifndef CONFIG_VIRT_CPU_ACCOUNTING
|
||||||
|
static inline bool vtime_accounting_enabled(void) { return false; }
|
||||||
|
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Common vtime APIs
|
||||||
|
*/
|
||||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||||
|
|
||||||
|
#ifdef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||||
extern void vtime_task_switch(struct task_struct *prev);
|
extern void vtime_task_switch(struct task_struct *prev);
|
||||||
|
#else
|
||||||
|
extern void vtime_common_task_switch(struct task_struct *prev);
|
||||||
|
static inline void vtime_task_switch(struct task_struct *prev)
|
||||||
|
{
|
||||||
|
if (vtime_accounting_enabled())
|
||||||
|
vtime_common_task_switch(prev);
|
||||||
|
}
|
||||||
|
#endif /* __ARCH_HAS_VTIME_TASK_SWITCH */
|
||||||
|
|
||||||
extern void vtime_account_system(struct task_struct *tsk);
|
extern void vtime_account_system(struct task_struct *tsk);
|
||||||
extern void vtime_account_idle(struct task_struct *tsk);
|
extern void vtime_account_idle(struct task_struct *tsk);
|
||||||
extern void vtime_account_user(struct task_struct *tsk);
|
extern void vtime_account_user(struct task_struct *tsk);
|
||||||
extern void vtime_account_irq_enter(struct task_struct *tsk);
|
|
||||||
|
|
||||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
|
#ifdef __ARCH_HAS_VTIME_ACCOUNT
|
||||||
static inline bool vtime_accounting_enabled(void) { return true; }
|
extern void vtime_account_irq_enter(struct task_struct *tsk);
|
||||||
#endif
|
#else
|
||||||
|
extern void vtime_common_account_irq_enter(struct task_struct *tsk);
|
||||||
|
static inline void vtime_account_irq_enter(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
if (vtime_accounting_enabled())
|
||||||
|
vtime_common_account_irq_enter(tsk);
|
||||||
|
}
|
||||||
|
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
|
||||||
|
|
||||||
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||||
|
|
||||||
|
@ -20,14 +70,20 @@ static inline void vtime_task_switch(struct task_struct *prev) { }
|
||||||
static inline void vtime_account_system(struct task_struct *tsk) { }
|
static inline void vtime_account_system(struct task_struct *tsk) { }
|
||||||
static inline void vtime_account_user(struct task_struct *tsk) { }
|
static inline void vtime_account_user(struct task_struct *tsk) { }
|
||||||
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
|
static inline void vtime_account_irq_enter(struct task_struct *tsk) { }
|
||||||
static inline bool vtime_accounting_enabled(void) { return false; }
|
#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
|
||||||
extern void arch_vtime_task_switch(struct task_struct *tsk);
|
extern void arch_vtime_task_switch(struct task_struct *tsk);
|
||||||
extern void vtime_account_irq_exit(struct task_struct *tsk);
|
extern void vtime_gen_account_irq_exit(struct task_struct *tsk);
|
||||||
extern bool vtime_accounting_enabled(void);
|
|
||||||
|
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
if (vtime_accounting_enabled())
|
||||||
|
vtime_gen_account_irq_exit(tsk);
|
||||||
|
}
|
||||||
|
|
||||||
extern void vtime_user_enter(struct task_struct *tsk);
|
extern void vtime_user_enter(struct task_struct *tsk);
|
||||||
|
|
||||||
static inline void vtime_user_exit(struct task_struct *tsk)
|
static inline void vtime_user_exit(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
vtime_account_user(tsk);
|
vtime_account_user(tsk);
|
||||||
|
@ -35,7 +91,7 @@ static inline void vtime_user_exit(struct task_struct *tsk)
|
||||||
extern void vtime_guest_enter(struct task_struct *tsk);
|
extern void vtime_guest_enter(struct task_struct *tsk);
|
||||||
extern void vtime_guest_exit(struct task_struct *tsk);
|
extern void vtime_guest_exit(struct task_struct *tsk);
|
||||||
extern void vtime_init_idle(struct task_struct *tsk, int cpu);
|
extern void vtime_init_idle(struct task_struct *tsk, int cpu);
|
||||||
#else
|
#else /* !CONFIG_VIRT_CPU_ACCOUNTING_GEN */
|
||||||
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
static inline void vtime_account_irq_exit(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
/* On hard|softirq exit we always account to hard|softirq cputime */
|
/* On hard|softirq exit we always account to hard|softirq cputime */
|
||||||
|
|
58
include/trace/events/context_tracking.h
Normal file
58
include/trace/events/context_tracking.h
Normal file
|
@ -0,0 +1,58 @@
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM context_tracking
|
||||||
|
|
||||||
|
#if !defined(_TRACE_CONTEXT_TRACKING_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_CONTEXT_TRACKING_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
|
DECLARE_EVENT_CLASS(context_tracking_user,
|
||||||
|
|
||||||
|
TP_PROTO(int dummy),
|
||||||
|
|
||||||
|
TP_ARGS(dummy),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field( int, dummy )
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->dummy = dummy;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("%s", "")
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* user_enter - called when the kernel resumes to userspace
|
||||||
|
* @dummy: dummy arg to make trace event macro happy
|
||||||
|
*
|
||||||
|
* This event occurs when the kernel resumes to userspace after
|
||||||
|
* an exception or a syscall.
|
||||||
|
*/
|
||||||
|
DEFINE_EVENT(context_tracking_user, user_enter,
|
||||||
|
|
||||||
|
TP_PROTO(int dummy),
|
||||||
|
|
||||||
|
TP_ARGS(dummy)
|
||||||
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* user_exit - called when userspace enters the kernel
|
||||||
|
* @dummy: dummy arg to make trace event macro happy
|
||||||
|
*
|
||||||
|
* This event occurs when userspace enters the kernel through
|
||||||
|
* an exception or a syscall.
|
||||||
|
*/
|
||||||
|
DEFINE_EVENT(context_tracking_user, user_exit,
|
||||||
|
|
||||||
|
TP_PROTO(int dummy),
|
||||||
|
|
||||||
|
TP_ARGS(dummy)
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* _TRACE_CONTEXT_TRACKING_H */
|
||||||
|
|
||||||
|
/* This part must be outside protection */
|
||||||
|
#include <trace/define_trace.h>
|
28
init/Kconfig
28
init/Kconfig
|
@ -528,13 +528,29 @@ config RCU_USER_QS
|
||||||
config CONTEXT_TRACKING_FORCE
|
config CONTEXT_TRACKING_FORCE
|
||||||
bool "Force context tracking"
|
bool "Force context tracking"
|
||||||
depends on CONTEXT_TRACKING
|
depends on CONTEXT_TRACKING
|
||||||
default CONTEXT_TRACKING
|
default y if !NO_HZ_FULL
|
||||||
help
|
help
|
||||||
Probe on user/kernel boundaries by default in order to
|
The major pre-requirement for full dynticks to work is to
|
||||||
test the features that rely on it such as userspace RCU extended
|
support the context tracking subsystem. But there are also
|
||||||
quiescent states.
|
other dependencies to provide in order to make the full
|
||||||
This test is there for debugging until we have a real user like the
|
dynticks working.
|
||||||
full dynticks mode.
|
|
||||||
|
This option stands for testing when an arch implements the
|
||||||
|
context tracking backend but doesn't yet fullfill all the
|
||||||
|
requirements to make the full dynticks feature working.
|
||||||
|
Without the full dynticks, there is no way to test the support
|
||||||
|
for context tracking and the subsystems that rely on it: RCU
|
||||||
|
userspace extended quiescent state and tickless cputime
|
||||||
|
accounting. This option copes with the absence of the full
|
||||||
|
dynticks subsystem by forcing the context tracking on all
|
||||||
|
CPUs in the system.
|
||||||
|
|
||||||
|
Say Y only if you're working on the developpement of an
|
||||||
|
architecture backend for the context tracking.
|
||||||
|
|
||||||
|
Say N otherwise, this option brings an overhead that you
|
||||||
|
don't want in production.
|
||||||
|
|
||||||
|
|
||||||
config RCU_FANOUT
|
config RCU_FANOUT
|
||||||
int "Tree-based hierarchical RCU fanout value"
|
int "Tree-based hierarchical RCU fanout value"
|
||||||
|
|
|
@ -75,6 +75,7 @@
|
||||||
#include <linux/blkdev.h>
|
#include <linux/blkdev.h>
|
||||||
#include <linux/elevator.h>
|
#include <linux/elevator.h>
|
||||||
#include <linux/sched_clock.h>
|
#include <linux/sched_clock.h>
|
||||||
|
#include <linux/context_tracking.h>
|
||||||
|
|
||||||
#include <asm/io.h>
|
#include <asm/io.h>
|
||||||
#include <asm/bugs.h>
|
#include <asm/bugs.h>
|
||||||
|
@ -545,6 +546,7 @@ asmlinkage void __init start_kernel(void)
|
||||||
idr_init_cache();
|
idr_init_cache();
|
||||||
rcu_init();
|
rcu_init();
|
||||||
tick_nohz_init();
|
tick_nohz_init();
|
||||||
|
context_tracking_init();
|
||||||
radix_tree_init();
|
radix_tree_init();
|
||||||
/* init some links before init_ISA_irqs() */
|
/* init some links before init_ISA_irqs() */
|
||||||
early_irq_init();
|
early_irq_init();
|
||||||
|
|
|
@ -20,22 +20,33 @@
|
||||||
#include <linux/hardirq.h>
|
#include <linux/hardirq.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
|
#define CREATE_TRACE_POINTS
|
||||||
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
#include <trace/events/context_tracking.h>
|
||||||
.active = true,
|
|
||||||
#endif
|
struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
|
||||||
};
|
EXPORT_SYMBOL_GPL(context_tracking_enabled);
|
||||||
|
|
||||||
|
DEFINE_PER_CPU(struct context_tracking, context_tracking);
|
||||||
|
EXPORT_SYMBOL_GPL(context_tracking);
|
||||||
|
|
||||||
|
void context_tracking_cpu_set(int cpu)
|
||||||
|
{
|
||||||
|
if (!per_cpu(context_tracking.active, cpu)) {
|
||||||
|
per_cpu(context_tracking.active, cpu) = true;
|
||||||
|
static_key_slow_inc(&context_tracking_enabled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* user_enter - Inform the context tracking that the CPU is going to
|
* context_tracking_user_enter - Inform the context tracking that the CPU is going to
|
||||||
* enter userspace mode.
|
* enter userspace mode.
|
||||||
*
|
*
|
||||||
* This function must be called right before we switch from the kernel
|
* This function must be called right before we switch from the kernel
|
||||||
* to userspace, when it's guaranteed the remaining kernel instructions
|
* to userspace, when it's guaranteed the remaining kernel instructions
|
||||||
* to execute won't use any RCU read side critical section because this
|
* to execute won't use any RCU read side critical section because this
|
||||||
* function sets RCU in extended quiescent state.
|
* function sets RCU in extended quiescent state.
|
||||||
*/
|
*/
|
||||||
void user_enter(void)
|
void context_tracking_user_enter(void)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
@ -54,17 +65,32 @@ void user_enter(void)
|
||||||
WARN_ON_ONCE(!current->mm);
|
WARN_ON_ONCE(!current->mm);
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
if (__this_cpu_read(context_tracking.active) &&
|
if ( __this_cpu_read(context_tracking.state) != IN_USER) {
|
||||||
__this_cpu_read(context_tracking.state) != IN_USER) {
|
if (__this_cpu_read(context_tracking.active)) {
|
||||||
|
trace_user_enter(0);
|
||||||
|
/*
|
||||||
|
* At this stage, only low level arch entry code remains and
|
||||||
|
* then we'll run in userspace. We can assume there won't be
|
||||||
|
* any RCU read-side critical section until the next call to
|
||||||
|
* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
|
||||||
|
* on the tick.
|
||||||
|
*/
|
||||||
|
vtime_user_enter(current);
|
||||||
|
rcu_user_enter();
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* At this stage, only low level arch entry code remains and
|
* Even if context tracking is disabled on this CPU, because it's outside
|
||||||
* then we'll run in userspace. We can assume there won't be
|
* the full dynticks mask for example, we still have to keep track of the
|
||||||
* any RCU read-side critical section until the next call to
|
* context transitions and states to prevent inconsistency on those of
|
||||||
* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
|
* other CPUs.
|
||||||
* on the tick.
|
* If a task triggers an exception in userspace, sleep on the exception
|
||||||
|
* handler and then migrate to another CPU, that new CPU must know where
|
||||||
|
* the exception returns by the time we call exception_exit().
|
||||||
|
* This information can only be provided by the previous CPU when it called
|
||||||
|
* exception_enter().
|
||||||
|
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
|
||||||
|
* is false because we know that CPU is not tickless.
|
||||||
*/
|
*/
|
||||||
vtime_user_enter(current);
|
|
||||||
rcu_user_enter();
|
|
||||||
__this_cpu_write(context_tracking.state, IN_USER);
|
__this_cpu_write(context_tracking.state, IN_USER);
|
||||||
}
|
}
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
|
@ -87,10 +113,9 @@ void user_enter(void)
|
||||||
*/
|
*/
|
||||||
void __sched notrace preempt_schedule_context(void)
|
void __sched notrace preempt_schedule_context(void)
|
||||||
{
|
{
|
||||||
struct thread_info *ti = current_thread_info();
|
|
||||||
enum ctx_state prev_ctx;
|
enum ctx_state prev_ctx;
|
||||||
|
|
||||||
if (likely(ti->preempt_count || irqs_disabled()))
|
if (likely(!preemptible()))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -112,8 +137,8 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
|
||||||
#endif /* CONFIG_PREEMPT */
|
#endif /* CONFIG_PREEMPT */
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* user_exit - Inform the context tracking that the CPU is
|
* context_tracking_user_exit - Inform the context tracking that the CPU is
|
||||||
* exiting userspace mode and entering the kernel.
|
* exiting userspace mode and entering the kernel.
|
||||||
*
|
*
|
||||||
* This function must be called after we entered the kernel from userspace
|
* This function must be called after we entered the kernel from userspace
|
||||||
* before any use of RCU read side critical section. This potentially include
|
* before any use of RCU read side critical section. This potentially include
|
||||||
|
@ -122,7 +147,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_context);
|
||||||
* This call supports re-entrancy. This way it can be called from any exception
|
* This call supports re-entrancy. This way it can be called from any exception
|
||||||
* handler without needing to know if we came from userspace or not.
|
* handler without needing to know if we came from userspace or not.
|
||||||
*/
|
*/
|
||||||
void user_exit(void)
|
void context_tracking_user_exit(void)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
@ -131,38 +156,22 @@ void user_exit(void)
|
||||||
|
|
||||||
local_irq_save(flags);
|
local_irq_save(flags);
|
||||||
if (__this_cpu_read(context_tracking.state) == IN_USER) {
|
if (__this_cpu_read(context_tracking.state) == IN_USER) {
|
||||||
/*
|
if (__this_cpu_read(context_tracking.active)) {
|
||||||
* We are going to run code that may use RCU. Inform
|
/*
|
||||||
* RCU core about that (ie: we may need the tick again).
|
* We are going to run code that may use RCU. Inform
|
||||||
*/
|
* RCU core about that (ie: we may need the tick again).
|
||||||
rcu_user_exit();
|
*/
|
||||||
vtime_user_exit(current);
|
rcu_user_exit();
|
||||||
|
vtime_user_exit(current);
|
||||||
|
trace_user_exit(0);
|
||||||
|
}
|
||||||
__this_cpu_write(context_tracking.state, IN_KERNEL);
|
__this_cpu_write(context_tracking.state, IN_KERNEL);
|
||||||
}
|
}
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
void guest_enter(void)
|
|
||||||
{
|
|
||||||
if (vtime_accounting_enabled())
|
|
||||||
vtime_guest_enter(current);
|
|
||||||
else
|
|
||||||
__guest_enter();
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(guest_enter);
|
|
||||||
|
|
||||||
void guest_exit(void)
|
|
||||||
{
|
|
||||||
if (vtime_accounting_enabled())
|
|
||||||
vtime_guest_exit(current);
|
|
||||||
else
|
|
||||||
__guest_exit();
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(guest_exit);
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* context_tracking_task_switch - context switch the syscall callbacks
|
* __context_tracking_task_switch - context switch the syscall callbacks
|
||||||
* @prev: the task that is being switched out
|
* @prev: the task that is being switched out
|
||||||
* @next: the task that is being switched in
|
* @next: the task that is being switched in
|
||||||
*
|
*
|
||||||
|
@ -174,11 +183,19 @@ EXPORT_SYMBOL_GPL(guest_exit);
|
||||||
* migrate to some CPU that doesn't do the context tracking. As such the TIF
|
* migrate to some CPU that doesn't do the context tracking. As such the TIF
|
||||||
* flag may not be desired there.
|
* flag may not be desired there.
|
||||||
*/
|
*/
|
||||||
void context_tracking_task_switch(struct task_struct *prev,
|
void __context_tracking_task_switch(struct task_struct *prev,
|
||||||
struct task_struct *next)
|
struct task_struct *next)
|
||||||
{
|
{
|
||||||
if (__this_cpu_read(context_tracking.active)) {
|
clear_tsk_thread_flag(prev, TIF_NOHZ);
|
||||||
clear_tsk_thread_flag(prev, TIF_NOHZ);
|
set_tsk_thread_flag(next, TIF_NOHZ);
|
||||||
set_tsk_thread_flag(next, TIF_NOHZ);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_CONTEXT_TRACKING_FORCE
|
||||||
|
void __init context_tracking_init(void)
|
||||||
|
{
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
for_each_possible_cpu(cpu)
|
||||||
|
context_tracking_cpu_set(cpu);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -2527,13 +2527,11 @@ void __sched schedule_preempt_disabled(void)
|
||||||
*/
|
*/
|
||||||
asmlinkage void __sched notrace preempt_schedule(void)
|
asmlinkage void __sched notrace preempt_schedule(void)
|
||||||
{
|
{
|
||||||
struct thread_info *ti = current_thread_info();
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there is a non-zero preempt_count or interrupts are disabled,
|
* If there is a non-zero preempt_count or interrupts are disabled,
|
||||||
* we do not want to preempt the current task. Just return..
|
* we do not want to preempt the current task. Just return..
|
||||||
*/
|
*/
|
||||||
if (likely(ti->preempt_count || irqs_disabled()))
|
if (likely(!preemptible()))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
|
|
@ -378,11 +378,8 @@ static inline void irqtime_account_process_tick(struct task_struct *p, int user_
|
||||||
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
|
||||||
|
|
||||||
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
|
#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
|
||||||
void vtime_task_switch(struct task_struct *prev)
|
void vtime_common_task_switch(struct task_struct *prev)
|
||||||
{
|
{
|
||||||
if (!vtime_accounting_enabled())
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (is_idle_task(prev))
|
if (is_idle_task(prev))
|
||||||
vtime_account_idle(prev);
|
vtime_account_idle(prev);
|
||||||
else
|
else
|
||||||
|
@ -404,11 +401,8 @@ void vtime_task_switch(struct task_struct *prev)
|
||||||
* vtime_account().
|
* vtime_account().
|
||||||
*/
|
*/
|
||||||
#ifndef __ARCH_HAS_VTIME_ACCOUNT
|
#ifndef __ARCH_HAS_VTIME_ACCOUNT
|
||||||
void vtime_account_irq_enter(struct task_struct *tsk)
|
void vtime_common_account_irq_enter(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
if (!vtime_accounting_enabled())
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (!in_interrupt()) {
|
if (!in_interrupt()) {
|
||||||
/*
|
/*
|
||||||
* If we interrupted user, context_tracking_in_user()
|
* If we interrupted user, context_tracking_in_user()
|
||||||
|
@ -428,7 +422,7 @@ void vtime_account_irq_enter(struct task_struct *tsk)
|
||||||
}
|
}
|
||||||
vtime_account_system(tsk);
|
vtime_account_system(tsk);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
|
EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
|
||||||
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
|
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
|
||||||
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
|
#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
|
||||||
|
|
||||||
|
@ -559,12 +553,6 @@ static void cputime_adjust(struct task_cputime *curr,
|
||||||
{
|
{
|
||||||
cputime_t rtime, stime, utime, total;
|
cputime_t rtime, stime, utime, total;
|
||||||
|
|
||||||
if (vtime_accounting_enabled()) {
|
|
||||||
*ut = curr->utime;
|
|
||||||
*st = curr->stime;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
stime = curr->stime;
|
stime = curr->stime;
|
||||||
total = stime + curr->utime;
|
total = stime + curr->utime;
|
||||||
|
|
||||||
|
@ -664,23 +652,17 @@ static void __vtime_account_system(struct task_struct *tsk)
|
||||||
|
|
||||||
void vtime_account_system(struct task_struct *tsk)
|
void vtime_account_system(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
if (!vtime_accounting_enabled())
|
|
||||||
return;
|
|
||||||
|
|
||||||
write_seqlock(&tsk->vtime_seqlock);
|
write_seqlock(&tsk->vtime_seqlock);
|
||||||
__vtime_account_system(tsk);
|
__vtime_account_system(tsk);
|
||||||
write_sequnlock(&tsk->vtime_seqlock);
|
write_sequnlock(&tsk->vtime_seqlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vtime_account_irq_exit(struct task_struct *tsk)
|
void vtime_gen_account_irq_exit(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
if (!vtime_accounting_enabled())
|
|
||||||
return;
|
|
||||||
|
|
||||||
write_seqlock(&tsk->vtime_seqlock);
|
write_seqlock(&tsk->vtime_seqlock);
|
||||||
|
__vtime_account_system(tsk);
|
||||||
if (context_tracking_in_user())
|
if (context_tracking_in_user())
|
||||||
tsk->vtime_snap_whence = VTIME_USER;
|
tsk->vtime_snap_whence = VTIME_USER;
|
||||||
__vtime_account_system(tsk);
|
|
||||||
write_sequnlock(&tsk->vtime_seqlock);
|
write_sequnlock(&tsk->vtime_seqlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -688,12 +670,8 @@ void vtime_account_user(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
cputime_t delta_cpu;
|
cputime_t delta_cpu;
|
||||||
|
|
||||||
if (!vtime_accounting_enabled())
|
|
||||||
return;
|
|
||||||
|
|
||||||
delta_cpu = get_vtime_delta(tsk);
|
|
||||||
|
|
||||||
write_seqlock(&tsk->vtime_seqlock);
|
write_seqlock(&tsk->vtime_seqlock);
|
||||||
|
delta_cpu = get_vtime_delta(tsk);
|
||||||
tsk->vtime_snap_whence = VTIME_SYS;
|
tsk->vtime_snap_whence = VTIME_SYS;
|
||||||
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
|
||||||
write_sequnlock(&tsk->vtime_seqlock);
|
write_sequnlock(&tsk->vtime_seqlock);
|
||||||
|
@ -701,22 +679,27 @@ void vtime_account_user(struct task_struct *tsk)
|
||||||
|
|
||||||
void vtime_user_enter(struct task_struct *tsk)
|
void vtime_user_enter(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
if (!vtime_accounting_enabled())
|
|
||||||
return;
|
|
||||||
|
|
||||||
write_seqlock(&tsk->vtime_seqlock);
|
write_seqlock(&tsk->vtime_seqlock);
|
||||||
tsk->vtime_snap_whence = VTIME_USER;
|
|
||||||
__vtime_account_system(tsk);
|
__vtime_account_system(tsk);
|
||||||
|
tsk->vtime_snap_whence = VTIME_USER;
|
||||||
write_sequnlock(&tsk->vtime_seqlock);
|
write_sequnlock(&tsk->vtime_seqlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void vtime_guest_enter(struct task_struct *tsk)
|
void vtime_guest_enter(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* The flags must be updated under the lock with
|
||||||
|
* the vtime_snap flush and update.
|
||||||
|
* That enforces a right ordering and update sequence
|
||||||
|
* synchronization against the reader (task_gtime())
|
||||||
|
* that can thus safely catch up with a tickless delta.
|
||||||
|
*/
|
||||||
write_seqlock(&tsk->vtime_seqlock);
|
write_seqlock(&tsk->vtime_seqlock);
|
||||||
__vtime_account_system(tsk);
|
__vtime_account_system(tsk);
|
||||||
current->flags |= PF_VCPU;
|
current->flags |= PF_VCPU;
|
||||||
write_sequnlock(&tsk->vtime_seqlock);
|
write_sequnlock(&tsk->vtime_seqlock);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(vtime_guest_enter);
|
||||||
|
|
||||||
void vtime_guest_exit(struct task_struct *tsk)
|
void vtime_guest_exit(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
|
@ -725,6 +708,7 @@ void vtime_guest_exit(struct task_struct *tsk)
|
||||||
current->flags &= ~PF_VCPU;
|
current->flags &= ~PF_VCPU;
|
||||||
write_sequnlock(&tsk->vtime_seqlock);
|
write_sequnlock(&tsk->vtime_seqlock);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(vtime_guest_exit);
|
||||||
|
|
||||||
void vtime_account_idle(struct task_struct *tsk)
|
void vtime_account_idle(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
|
@ -733,11 +717,6 @@ void vtime_account_idle(struct task_struct *tsk)
|
||||||
account_idle_time(delta_cpu);
|
account_idle_time(delta_cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool vtime_accounting_enabled(void)
|
|
||||||
{
|
|
||||||
return context_tracking_active();
|
|
||||||
}
|
|
||||||
|
|
||||||
void arch_vtime_task_switch(struct task_struct *prev)
|
void arch_vtime_task_switch(struct task_struct *prev)
|
||||||
{
|
{
|
||||||
write_seqlock(&prev->vtime_seqlock);
|
write_seqlock(&prev->vtime_seqlock);
|
||||||
|
|
|
@ -105,7 +105,6 @@ config NO_HZ_FULL
|
||||||
select RCU_USER_QS
|
select RCU_USER_QS
|
||||||
select RCU_NOCB_CPU
|
select RCU_NOCB_CPU
|
||||||
select VIRT_CPU_ACCOUNTING_GEN
|
select VIRT_CPU_ACCOUNTING_GEN
|
||||||
select CONTEXT_TRACKING_FORCE
|
|
||||||
select IRQ_WORK
|
select IRQ_WORK
|
||||||
help
|
help
|
||||||
Adaptively try to shutdown the tick whenever possible, even when
|
Adaptively try to shutdown the tick whenever possible, even when
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <linux/irq_work.h>
|
#include <linux/irq_work.h>
|
||||||
#include <linux/posix-timers.h>
|
#include <linux/posix-timers.h>
|
||||||
#include <linux/perf_event.h>
|
#include <linux/perf_event.h>
|
||||||
|
#include <linux/context_tracking.h>
|
||||||
|
|
||||||
#include <asm/irq_regs.h>
|
#include <asm/irq_regs.h>
|
||||||
|
|
||||||
|
@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_FULL
|
#ifdef CONFIG_NO_HZ_FULL
|
||||||
static cpumask_var_t nohz_full_mask;
|
cpumask_var_t tick_nohz_full_mask;
|
||||||
bool have_nohz_full_mask;
|
bool tick_nohz_full_running;
|
||||||
|
|
||||||
static bool can_stop_full_tick(void)
|
static bool can_stop_full_tick(void)
|
||||||
{
|
{
|
||||||
|
@ -182,7 +183,7 @@ static bool can_stop_full_tick(void)
|
||||||
* Don't allow the user to think they can get
|
* Don't allow the user to think they can get
|
||||||
* full NO_HZ with this machine.
|
* full NO_HZ with this machine.
|
||||||
*/
|
*/
|
||||||
WARN_ONCE(have_nohz_full_mask,
|
WARN_ONCE(tick_nohz_full_running,
|
||||||
"NO_HZ FULL will not work with unstable sched clock");
|
"NO_HZ FULL will not work with unstable sched clock");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -197,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
|
||||||
* Re-evaluate the need for the tick on the current CPU
|
* Re-evaluate the need for the tick on the current CPU
|
||||||
* and restart it if necessary.
|
* and restart it if necessary.
|
||||||
*/
|
*/
|
||||||
void tick_nohz_full_check(void)
|
void __tick_nohz_full_check(void)
|
||||||
{
|
{
|
||||||
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
|
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
|
||||||
|
|
||||||
|
@ -211,7 +212,7 @@ void tick_nohz_full_check(void)
|
||||||
|
|
||||||
static void nohz_full_kick_work_func(struct irq_work *work)
|
static void nohz_full_kick_work_func(struct irq_work *work)
|
||||||
{
|
{
|
||||||
tick_nohz_full_check();
|
__tick_nohz_full_check();
|
||||||
}
|
}
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
|
||||||
|
@ -230,7 +231,7 @@ void tick_nohz_full_kick(void)
|
||||||
|
|
||||||
static void nohz_full_kick_ipi(void *info)
|
static void nohz_full_kick_ipi(void *info)
|
||||||
{
|
{
|
||||||
tick_nohz_full_check();
|
__tick_nohz_full_check();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -239,12 +240,13 @@ static void nohz_full_kick_ipi(void *info)
|
||||||
*/
|
*/
|
||||||
void tick_nohz_full_kick_all(void)
|
void tick_nohz_full_kick_all(void)
|
||||||
{
|
{
|
||||||
if (!have_nohz_full_mask)
|
if (!tick_nohz_full_running)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
smp_call_function_many(nohz_full_mask,
|
smp_call_function_many(tick_nohz_full_mask,
|
||||||
nohz_full_kick_ipi, NULL, false);
|
nohz_full_kick_ipi, NULL, false);
|
||||||
|
tick_nohz_full_kick();
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -253,7 +255,7 @@ void tick_nohz_full_kick_all(void)
|
||||||
* It might need the tick due to per task/process properties:
|
* It might need the tick due to per task/process properties:
|
||||||
* perf events, posix cpu timers, ...
|
* perf events, posix cpu timers, ...
|
||||||
*/
|
*/
|
||||||
void tick_nohz_task_switch(struct task_struct *tsk)
|
void __tick_nohz_task_switch(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
|
@ -269,31 +271,23 @@ void tick_nohz_task_switch(struct task_struct *tsk)
|
||||||
local_irq_restore(flags);
|
local_irq_restore(flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
int tick_nohz_full_cpu(int cpu)
|
|
||||||
{
|
|
||||||
if (!have_nohz_full_mask)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
return cpumask_test_cpu(cpu, nohz_full_mask);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Parse the boot-time nohz CPU list from the kernel parameters. */
|
/* Parse the boot-time nohz CPU list from the kernel parameters. */
|
||||||
static int __init tick_nohz_full_setup(char *str)
|
static int __init tick_nohz_full_setup(char *str)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
|
|
||||||
alloc_bootmem_cpumask_var(&nohz_full_mask);
|
alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
|
||||||
if (cpulist_parse(str, nohz_full_mask) < 0) {
|
if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
|
||||||
pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
|
pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cpu = smp_processor_id();
|
cpu = smp_processor_id();
|
||||||
if (cpumask_test_cpu(cpu, nohz_full_mask)) {
|
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
|
||||||
pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
|
pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
|
||||||
cpumask_clear_cpu(cpu, nohz_full_mask);
|
cpumask_clear_cpu(cpu, tick_nohz_full_mask);
|
||||||
}
|
}
|
||||||
have_nohz_full_mask = true;
|
tick_nohz_full_running = true;
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -311,7 +305,7 @@ static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
|
||||||
* If we handle the timekeeping duty for full dynticks CPUs,
|
* If we handle the timekeeping duty for full dynticks CPUs,
|
||||||
* we can't safely shutdown that CPU.
|
* we can't safely shutdown that CPU.
|
||||||
*/
|
*/
|
||||||
if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
|
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
|
||||||
return NOTIFY_BAD;
|
return NOTIFY_BAD;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -330,31 +324,34 @@ static int tick_nohz_init_all(void)
|
||||||
int err = -1;
|
int err = -1;
|
||||||
|
|
||||||
#ifdef CONFIG_NO_HZ_FULL_ALL
|
#ifdef CONFIG_NO_HZ_FULL_ALL
|
||||||
if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
|
if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
|
||||||
pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
|
pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
err = 0;
|
err = 0;
|
||||||
cpumask_setall(nohz_full_mask);
|
cpumask_setall(tick_nohz_full_mask);
|
||||||
cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
|
cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
|
||||||
have_nohz_full_mask = true;
|
tick_nohz_full_running = true;
|
||||||
#endif
|
#endif
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __init tick_nohz_init(void)
|
void __init tick_nohz_init(void)
|
||||||
{
|
{
|
||||||
if (!have_nohz_full_mask) {
|
int cpu;
|
||||||
|
|
||||||
|
if (!tick_nohz_full_running) {
|
||||||
if (tick_nohz_init_all() < 0)
|
if (tick_nohz_init_all() < 0)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for_each_cpu(cpu, tick_nohz_full_mask)
|
||||||
|
context_tracking_cpu_set(cpu);
|
||||||
|
|
||||||
cpu_notifier(tick_nohz_cpu_down_callback, 0);
|
cpu_notifier(tick_nohz_cpu_down_callback, 0);
|
||||||
cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
|
cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
|
||||||
pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
|
pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
|
||||||
}
|
}
|
||||||
#else
|
|
||||||
#define have_nohz_full_mask (0)
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -732,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (have_nohz_full_mask) {
|
if (tick_nohz_full_enabled()) {
|
||||||
/*
|
/*
|
||||||
* Keep the tick alive to guarantee timekeeping progression
|
* Keep the tick alive to guarantee timekeeping progression
|
||||||
* if there are full dynticks CPUs around
|
* if there are full dynticks CPUs around
|
||||||
|
|
Loading…
Reference in a new issue