timerfd: Manage cancelable timers in timerfd
Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the timer interrupt. Yes, I did not think about it, because the solution was so elegant. I didn't like the extra list in timerfd when it was proposed some time ago, but with a rcu based list the list walk it's less horrible than the original global lock, which was held over the list iteration. Requested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Peter Zijlstra <peterz@infradead.org>
This commit is contained in:
parent
250f972d85
commit
9ec2690758
5 changed files with 117 additions and 106 deletions
99
fs/timerfd.c
99
fs/timerfd.c
|
@ -22,6 +22,7 @@
|
|||
#include <linux/anon_inodes.h>
|
||||
#include <linux/timerfd.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/rcupdate.h>
|
||||
|
||||
struct timerfd_ctx {
|
||||
struct hrtimer tmr;
|
||||
|
@ -31,9 +32,14 @@ struct timerfd_ctx {
|
|||
u64 ticks;
|
||||
int expired;
|
||||
int clockid;
|
||||
struct rcu_head rcu;
|
||||
struct list_head clist;
|
||||
bool might_cancel;
|
||||
};
|
||||
|
||||
static LIST_HEAD(cancel_list);
|
||||
static DEFINE_SPINLOCK(cancel_lock);
|
||||
|
||||
/*
|
||||
* This gets called when the timer event triggers. We set the "expired"
|
||||
* flag, but we do not re-arm the timer (in case it's necessary,
|
||||
|
@ -53,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
|
|||
return HRTIMER_NORESTART;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when the clock was set to cancel the timers in the cancel
|
||||
* list.
|
||||
*/
|
||||
void timerfd_clock_was_set(void)
|
||||
{
|
||||
ktime_t moffs = ktime_get_monotonic_offset();
|
||||
struct timerfd_ctx *ctx;
|
||||
unsigned long flags;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(ctx, &cancel_list, clist) {
|
||||
if (!ctx->might_cancel)
|
||||
continue;
|
||||
spin_lock_irqsave(&ctx->wqh.lock, flags);
|
||||
if (ctx->moffs.tv64 != moffs.tv64) {
|
||||
ctx->moffs.tv64 = KTIME_MAX;
|
||||
wake_up_locked(&ctx->wqh);
|
||||
}
|
||||
spin_unlock_irqrestore(&ctx->wqh.lock, flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
|
||||
{
|
||||
if (ctx->might_cancel) {
|
||||
ctx->might_cancel = false;
|
||||
spin_lock(&cancel_lock);
|
||||
list_del_rcu(&ctx->clist);
|
||||
spin_unlock(&cancel_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static bool timerfd_canceled(struct timerfd_ctx *ctx)
|
||||
{
|
||||
if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
|
||||
return false;
|
||||
ctx->moffs = ktime_get_monotonic_offset();
|
||||
return true;
|
||||
}
|
||||
|
||||
static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
|
||||
{
|
||||
if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
|
||||
(flags & TFD_TIMER_CANCEL_ON_SET)) {
|
||||
if (!ctx->might_cancel) {
|
||||
ctx->might_cancel = true;
|
||||
spin_lock(&cancel_lock);
|
||||
list_add_rcu(&ctx->clist, &cancel_list);
|
||||
spin_unlock(&cancel_lock);
|
||||
}
|
||||
} else if (ctx->might_cancel) {
|
||||
timerfd_remove_cancel(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
|
||||
{
|
||||
ktime_t remaining;
|
||||
|
@ -61,22 +124,6 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
|
|||
return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
|
||||
}
|
||||
|
||||
static bool timerfd_canceled(struct timerfd_ctx *ctx)
|
||||
{
|
||||
ktime_t moffs;
|
||||
|
||||
if (!ctx->might_cancel)
|
||||
return false;
|
||||
|
||||
moffs = ktime_get_monotonic_offset();
|
||||
|
||||
if (moffs.tv64 == ctx->moffs.tv64)
|
||||
return false;
|
||||
|
||||
ctx->moffs = moffs;
|
||||
return true;
|
||||
}
|
||||
|
||||
static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
|
||||
const struct itimerspec *ktmr)
|
||||
{
|
||||
|
@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
|
|||
htmode = (flags & TFD_TIMER_ABSTIME) ?
|
||||
HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
|
||||
|
||||
ctx->might_cancel = false;
|
||||
if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
|
||||
(flags & TFD_TIMER_CANCELON_SET)) {
|
||||
clockid = CLOCK_REALTIME_COS;
|
||||
ctx->might_cancel = true;
|
||||
}
|
||||
|
||||
texp = timespec_to_ktime(ktmr->it_value);
|
||||
ctx->expired = 0;
|
||||
ctx->ticks = 0;
|
||||
|
@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file)
|
|||
{
|
||||
struct timerfd_ctx *ctx = file->private_data;
|
||||
|
||||
timerfd_remove_cancel(ctx);
|
||||
hrtimer_cancel(&ctx->tmr);
|
||||
kfree(ctx);
|
||||
kfree_rcu(ctx, rcu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
|
|||
else
|
||||
res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
|
||||
|
||||
if (ctx->ticks) {
|
||||
ticks = ctx->ticks;
|
||||
|
||||
/*
|
||||
* If clock has changed, we do not care about the
|
||||
* ticks and we do not rearm the timer. Userspace must
|
||||
* reevaluate anyway.
|
||||
*/
|
||||
if (timerfd_canceled(ctx)) {
|
||||
ticks = 0;
|
||||
ctx->ticks = 0;
|
||||
ctx->expired = 0;
|
||||
res = -ECANCELED;
|
||||
}
|
||||
|
||||
if (ctx->ticks) {
|
||||
ticks = ctx->ticks;
|
||||
|
||||
if (ctx->expired && ctx->tintv.tv64) {
|
||||
/*
|
||||
* If tintv.tv64 != 0, this is a periodic timer that
|
||||
|
@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
|
|||
return PTR_ERR(file);
|
||||
ctx = file->private_data;
|
||||
|
||||
timerfd_setup_cancel(ctx, flags);
|
||||
|
||||
/*
|
||||
* We need to stop the existing timer before reprogramming
|
||||
* it to the new values.
|
||||
|
|
|
@ -155,7 +155,6 @@ enum hrtimer_base_type {
|
|||
HRTIMER_BASE_REALTIME,
|
||||
HRTIMER_BASE_MONOTONIC,
|
||||
HRTIMER_BASE_BOOTTIME,
|
||||
HRTIMER_BASE_REALTIME_COS,
|
||||
HRTIMER_MAX_CLOCK_BASES,
|
||||
};
|
||||
|
||||
|
@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
|
|||
#endif
|
||||
|
||||
extern void clock_was_set(void);
|
||||
#ifdef CONFIG_TIMERFD
|
||||
extern void timerfd_clock_was_set(void);
|
||||
#else
|
||||
static inline void timerfd_clock_was_set(void) { }
|
||||
#endif
|
||||
extern void hrtimers_resume(void);
|
||||
|
||||
extern ktime_t ktime_get(void);
|
||||
|
|
|
@ -302,12 +302,6 @@ struct itimerval {
|
|||
* The IDs of various hardware clocks:
|
||||
*/
|
||||
#define CLOCK_SGI_CYCLE 10
|
||||
|
||||
#ifdef __KERNEL__
|
||||
/* This clock is not exposed to user space */
|
||||
#define CLOCK_REALTIME_COS 15
|
||||
#endif
|
||||
|
||||
#define MAX_CLOCKS 16
|
||||
#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
|
||||
#define CLOCKS_MONO CLOCK_MONOTONIC
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* shared O_* flags.
|
||||
*/
|
||||
#define TFD_TIMER_ABSTIME (1 << 0)
|
||||
#define TFD_TIMER_CANCELON_SET (1 << 1)
|
||||
#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
|
||||
#define TFD_CLOEXEC O_CLOEXEC
|
||||
#define TFD_NONBLOCK O_NONBLOCK
|
||||
|
||||
|
@ -27,6 +27,6 @@
|
|||
/* Flags for timerfd_create. */
|
||||
#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
|
||||
/* Flags for timerfd_settime. */
|
||||
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET)
|
||||
#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
|
||||
|
||||
#endif /* _LINUX_TIMERFD_H */
|
||||
|
|
|
@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
|
|||
.get_time = &ktime_get_boottime,
|
||||
.resolution = KTIME_LOW_RES,
|
||||
},
|
||||
{
|
||||
.index = CLOCK_REALTIME_COS,
|
||||
.get_time = &ktime_get_real,
|
||||
.resolution = KTIME_LOW_RES,
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
|
|||
[CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
|
||||
[CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
|
||||
[CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
|
||||
[CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS,
|
||||
};
|
||||
|
||||
static inline int hrtimer_clockid_to_base(clockid_t clock_id)
|
||||
|
@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
|
|||
base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
|
||||
base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
|
||||
base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
|
||||
base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer)
|
|||
trace_hrtimer_cancel(timer);
|
||||
}
|
||||
|
||||
static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);
|
||||
|
||||
/* High resolution timer related functions */
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
|
||||
|
@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void retrigger_next_event(void *arg);
|
||||
/*
|
||||
* Retrigger next event is called after clock was set
|
||||
*
|
||||
* Called with interrupts disabled via on_each_cpu()
|
||||
*/
|
||||
static void retrigger_next_event(void *arg)
|
||||
{
|
||||
struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
|
||||
struct timespec realtime_offset, xtim, wtm, sleep;
|
||||
|
||||
if (!hrtimer_hres_active())
|
||||
return;
|
||||
|
||||
/* Optimized out for !HIGH_RES */
|
||||
get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
|
||||
set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
|
||||
|
||||
/* Adjust CLOCK_REALTIME offset */
|
||||
raw_spin_lock(&base->lock);
|
||||
base->clock_base[HRTIMER_BASE_REALTIME].offset =
|
||||
timespec_to_ktime(realtime_offset);
|
||||
base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
|
||||
timespec_to_ktime(sleep);
|
||||
|
||||
hrtimer_force_reprogram(base, 0);
|
||||
raw_spin_unlock(&base->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Switch to high resolution mode
|
||||
|
@ -711,45 +728,10 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
|
|||
return 0;
|
||||
}
|
||||
static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
|
||||
static inline void retrigger_next_event(void *arg) { }
|
||||
|
||||
#endif /* CONFIG_HIGH_RES_TIMERS */
|
||||
|
||||
/*
|
||||
* Retrigger next event is called after clock was set
|
||||
*
|
||||
* Called with interrupts disabled via on_each_cpu()
|
||||
*/
|
||||
static void retrigger_next_event(void *arg)
|
||||
{
|
||||
struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
|
||||
struct timespec realtime_offset, xtim, wtm, sleep;
|
||||
|
||||
if (!hrtimer_hres_active()) {
|
||||
raw_spin_lock(&base->lock);
|
||||
hrtimer_expire_cancelable(base);
|
||||
raw_spin_unlock(&base->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Optimized out for !HIGH_RES */
|
||||
get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
|
||||
set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
|
||||
|
||||
/* Adjust CLOCK_REALTIME offset */
|
||||
raw_spin_lock(&base->lock);
|
||||
base->clock_base[HRTIMER_BASE_REALTIME].offset =
|
||||
timespec_to_ktime(realtime_offset);
|
||||
base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
|
||||
timespec_to_ktime(sleep);
|
||||
base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
|
||||
timespec_to_ktime(realtime_offset);
|
||||
|
||||
hrtimer_expire_cancelable(base);
|
||||
|
||||
hrtimer_force_reprogram(base, 0);
|
||||
raw_spin_unlock(&base->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clock realtime was set
|
||||
*
|
||||
|
@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg)
|
|||
*/
|
||||
void clock_was_set(void)
|
||||
{
|
||||
#ifdef CONFIG_HIGHRES_TIMERS
|
||||
/* Retrigger the CPU local events everywhere */
|
||||
on_each_cpu(retrigger_next_event, NULL, 1);
|
||||
#endif
|
||||
timerfd_clock_was_set();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -777,6 +762,7 @@ void hrtimers_resume(void)
|
|||
KERN_INFO "hrtimers_resume() called with IRQs enabled!");
|
||||
|
||||
retrigger_next_event(NULL);
|
||||
timerfd_clock_was_set();
|
||||
}
|
||||
|
||||
static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
|
||||
|
@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
|
|||
timer->state &= ~HRTIMER_STATE_CALLBACK;
|
||||
}
|
||||
|
||||
static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
|
||||
{
|
||||
struct timerqueue_node *node;
|
||||
struct hrtimer_clock_base *base;
|
||||
ktime_t now = ktime_get_real();
|
||||
|
||||
base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];
|
||||
|
||||
while ((node = timerqueue_getnext(&base->active))) {
|
||||
struct hrtimer *timer;
|
||||
|
||||
timer = container_of(node, struct hrtimer, node);
|
||||
__run_hrtimer(timer, &now);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HIGH_RES_TIMERS
|
||||
|
||||
/*
|
||||
|
|
Loading…
Reference in a new issue