Merge branch 'for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
Pull workqueue update from Tejun Heo: "Workqueue changes for v4.5. One cleanup patch and three to improve the debuggability. Workqueue now has a stall detector which dumps workqueue state if any worker pool hasn't made forward progress over a certain amount of time (30s by default) and also triggers a warning if a workqueue which can be used in memory reclaim path tries to wait on something which can't be. These should make workqueue hangs a lot easier to debug." * 'for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq: workqueue: simplify the apply_workqueue_attrs_locked() workqueue: implement lockup detector watchdog: introduce touch_softlockup_watchdog_sched() workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue
This commit is contained in:
commit
0f8c790103
8 changed files with 261 additions and 15 deletions
|
@ -4140,6 +4140,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
|
|||
or other driver-specific files in the
|
||||
Documentation/watchdog/ directory.
|
||||
|
||||
workqueue.watchdog_thresh=
|
||||
If CONFIG_WQ_WATCHDOG is configured, workqueue can
|
||||
warn stall conditions and dump internal state to
|
||||
help debugging. 0 disables workqueue stall
|
||||
detection; otherwise, it's the stall threshold
|
||||
duration in seconds. The default value is 30 and
|
||||
it can be updated at runtime by writing to the
|
||||
corresponding sysfs file.
|
||||
|
||||
workqueue.disable_numa
|
||||
By default, all work items queued to unbound
|
||||
workqueues are affine to the NUMA nodes they're
|
||||
|
|
|
@ -377,6 +377,7 @@ extern void scheduler_tick(void);
|
|||
extern void sched_show_task(struct task_struct *p);
|
||||
|
||||
#ifdef CONFIG_LOCKUP_DETECTOR
|
||||
extern void touch_softlockup_watchdog_sched(void);
|
||||
extern void touch_softlockup_watchdog(void);
|
||||
extern void touch_softlockup_watchdog_sync(void);
|
||||
extern void touch_all_softlockup_watchdogs(void);
|
||||
|
@ -387,6 +388,9 @@ extern unsigned int softlockup_panic;
|
|||
extern unsigned int hardlockup_panic;
|
||||
void lockup_detector_init(void);
|
||||
#else
|
||||
static inline void touch_softlockup_watchdog_sched(void)
|
||||
{
|
||||
}
|
||||
static inline void touch_softlockup_watchdog(void)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -618,4 +618,10 @@ static inline int workqueue_sysfs_register(struct workqueue_struct *wq)
|
|||
{ return 0; }
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
#ifdef CONFIG_WQ_WATCHDOG
|
||||
void wq_watchdog_touch(int cpu);
|
||||
#else /* CONFIG_WQ_WATCHDOG */
|
||||
static inline void wq_watchdog_touch(int cpu) { }
|
||||
#endif /* CONFIG_WQ_WATCHDOG */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -354,7 +354,7 @@ void sched_clock_idle_wakeup_event(u64 delta_ns)
|
|||
return;
|
||||
|
||||
sched_clock_tick();
|
||||
touch_softlockup_watchdog();
|
||||
touch_softlockup_watchdog_sched();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
|
||||
|
||||
|
|
|
@ -143,7 +143,7 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
|
|||
* when we go busy again does not account too much ticks.
|
||||
*/
|
||||
if (ts->tick_stopped) {
|
||||
touch_softlockup_watchdog();
|
||||
touch_softlockup_watchdog_sched();
|
||||
if (is_idle_task(current))
|
||||
ts->idle_jiffies++;
|
||||
}
|
||||
|
@ -430,7 +430,7 @@ static void tick_nohz_update_jiffies(ktime_t now)
|
|||
tick_do_update_jiffies64(now);
|
||||
local_irq_restore(flags);
|
||||
|
||||
touch_softlockup_watchdog();
|
||||
touch_softlockup_watchdog_sched();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -717,7 +717,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int
|
|||
update_cpu_load_nohz(active);
|
||||
|
||||
calc_load_exit_idle();
|
||||
touch_softlockup_watchdog();
|
||||
touch_softlockup_watchdog_sched();
|
||||
/*
|
||||
* Cancel the scheduled timer and restore the tick
|
||||
*/
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/smpboot.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
#include <linux/kvm_para.h>
|
||||
|
@ -225,7 +226,15 @@ static void __touch_watchdog(void)
|
|||
__this_cpu_write(watchdog_touch_ts, get_timestamp());
|
||||
}
|
||||
|
||||
void touch_softlockup_watchdog(void)
|
||||
/**
|
||||
* touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
|
||||
*
|
||||
* Call when the scheduler may have stalled for legitimate reasons
|
||||
* preventing the watchdog task from executing - e.g. the scheduler
|
||||
* entering idle state. This should only be used for scheduler events.
|
||||
* Use touch_softlockup_watchdog() for everything else.
|
||||
*/
|
||||
void touch_softlockup_watchdog_sched(void)
|
||||
{
|
||||
/*
|
||||
* Preemption can be enabled. It doesn't matter which CPU's timestamp
|
||||
|
@ -233,6 +242,12 @@ void touch_softlockup_watchdog(void)
|
|||
*/
|
||||
raw_cpu_write(watchdog_touch_ts, 0);
|
||||
}
|
||||
|
||||
void touch_softlockup_watchdog(void)
|
||||
{
|
||||
touch_softlockup_watchdog_sched();
|
||||
wq_watchdog_touch(raw_smp_processor_id());
|
||||
}
|
||||
EXPORT_SYMBOL(touch_softlockup_watchdog);
|
||||
|
||||
void touch_all_softlockup_watchdogs(void)
|
||||
|
@ -246,6 +261,7 @@ void touch_all_softlockup_watchdogs(void)
|
|||
*/
|
||||
for_each_watchdog_cpu(cpu)
|
||||
per_cpu(watchdog_touch_ts, cpu) = 0;
|
||||
wq_watchdog_touch(-1);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HARDLOCKUP_DETECTOR
|
||||
|
|
|
@ -148,6 +148,8 @@ struct worker_pool {
|
|||
int id; /* I: pool ID */
|
||||
unsigned int flags; /* X: flags */
|
||||
|
||||
unsigned long watchdog_ts; /* L: watchdog timestamp */
|
||||
|
||||
struct list_head worklist; /* L: list of pending works */
|
||||
int nr_workers; /* L: total number of workers */
|
||||
|
||||
|
@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)
|
|||
struct pool_workqueue *pwq = get_work_pwq(work);
|
||||
|
||||
trace_workqueue_activate_work(work);
|
||||
if (list_empty(&pwq->pool->worklist))
|
||||
pwq->pool->watchdog_ts = jiffies;
|
||||
move_linked_works(work, &pwq->pool->worklist, NULL);
|
||||
__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));
|
||||
pwq->nr_active++;
|
||||
|
@ -1385,6 +1389,8 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
|
|||
trace_workqueue_activate_work(work);
|
||||
pwq->nr_active++;
|
||||
worklist = &pwq->pool->worklist;
|
||||
if (list_empty(worklist))
|
||||
pwq->pool->watchdog_ts = jiffies;
|
||||
} else {
|
||||
work_flags |= WORK_STRUCT_DELAYED;
|
||||
worklist = &pwq->delayed_works;
|
||||
|
@ -2157,6 +2163,8 @@ static int worker_thread(void *__worker)
|
|||
list_first_entry(&pool->worklist,
|
||||
struct work_struct, entry);
|
||||
|
||||
pool->watchdog_ts = jiffies;
|
||||
|
||||
if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {
|
||||
/* optimization path, not strictly necessary */
|
||||
process_one_work(worker, work);
|
||||
|
@ -2240,6 +2248,7 @@ static int rescuer_thread(void *__rescuer)
|
|||
struct pool_workqueue, mayday_node);
|
||||
struct worker_pool *pool = pwq->pool;
|
||||
struct work_struct *work, *n;
|
||||
bool first = true;
|
||||
|
||||
__set_current_state(TASK_RUNNING);
|
||||
list_del_init(&pwq->mayday_node);
|
||||
|
@ -2256,9 +2265,14 @@ static int rescuer_thread(void *__rescuer)
|
|||
* process'em.
|
||||
*/
|
||||
WARN_ON_ONCE(!list_empty(scheduled));
|
||||
list_for_each_entry_safe(work, n, &pool->worklist, entry)
|
||||
if (get_work_pwq(work) == pwq)
|
||||
list_for_each_entry_safe(work, n, &pool->worklist, entry) {
|
||||
if (get_work_pwq(work) == pwq) {
|
||||
if (first)
|
||||
pool->watchdog_ts = jiffies;
|
||||
move_linked_works(work, scheduled, &n);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
|
||||
if (!list_empty(scheduled)) {
|
||||
process_scheduled_works(rescuer);
|
||||
|
@ -2316,6 +2330,37 @@ static int rescuer_thread(void *__rescuer)
|
|||
goto repeat;
|
||||
}
|
||||
|
||||
/**
|
||||
* check_flush_dependency - check for flush dependency sanity
|
||||
* @target_wq: workqueue being flushed
|
||||
* @target_work: work item being flushed (NULL for workqueue flushes)
|
||||
*
|
||||
* %current is trying to flush the whole @target_wq or @target_work on it.
|
||||
* If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not
|
||||
* reclaiming memory or running on a workqueue which doesn't have
|
||||
* %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to
|
||||
* a deadlock.
|
||||
*/
|
||||
static void check_flush_dependency(struct workqueue_struct *target_wq,
|
||||
struct work_struct *target_work)
|
||||
{
|
||||
work_func_t target_func = target_work ? target_work->func : NULL;
|
||||
struct worker *worker;
|
||||
|
||||
if (target_wq->flags & WQ_MEM_RECLAIM)
|
||||
return;
|
||||
|
||||
worker = current_wq_worker();
|
||||
|
||||
WARN_ONCE(current->flags & PF_MEMALLOC,
|
||||
"workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf",
|
||||
current->pid, current->comm, target_wq->name, target_func);
|
||||
WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM),
|
||||
"workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf",
|
||||
worker->current_pwq->wq->name, worker->current_func,
|
||||
target_wq->name, target_func);
|
||||
}
|
||||
|
||||
struct wq_barrier {
|
||||
struct work_struct work;
|
||||
struct completion done;
|
||||
|
@ -2525,6 +2570,8 @@ void flush_workqueue(struct workqueue_struct *wq)
|
|||
list_add_tail(&this_flusher.list, &wq->flusher_overflow);
|
||||
}
|
||||
|
||||
check_flush_dependency(wq, NULL);
|
||||
|
||||
mutex_unlock(&wq->mutex);
|
||||
|
||||
wait_for_completion(&this_flusher.done);
|
||||
|
@ -2697,6 +2744,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
|
|||
pwq = worker->current_pwq;
|
||||
}
|
||||
|
||||
check_flush_dependency(pwq->wq, work);
|
||||
|
||||
insert_wq_barrier(pwq, barr, work, worker);
|
||||
spin_unlock_irq(&pool->lock);
|
||||
|
||||
|
@ -3069,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool)
|
|||
pool->cpu = -1;
|
||||
pool->node = NUMA_NO_NODE;
|
||||
pool->flags |= POOL_DISASSOCIATED;
|
||||
pool->watchdog_ts = jiffies;
|
||||
INIT_LIST_HEAD(&pool->worklist);
|
||||
INIT_LIST_HEAD(&pool->idle_list);
|
||||
hash_init(pool->busy_hash);
|
||||
|
@ -3601,7 +3651,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
|
|||
const struct workqueue_attrs *attrs)
|
||||
{
|
||||
struct apply_wqattrs_ctx *ctx;
|
||||
int ret = -ENOMEM;
|
||||
|
||||
/* only unbound workqueues can change attributes */
|
||||
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
|
||||
|
@ -3612,16 +3661,14 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
|
|||
return -EINVAL;
|
||||
|
||||
ctx = apply_wqattrs_prepare(wq, attrs);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
/* the ctx has been prepared successfully, let's commit it */
|
||||
if (ctx) {
|
||||
apply_wqattrs_commit(ctx);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
apply_wqattrs_commit(ctx);
|
||||
apply_wqattrs_cleanup(ctx);
|
||||
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -4308,7 +4355,9 @@ void show_workqueue_state(void)
|
|||
|
||||
pr_info("pool %d:", pool->id);
|
||||
pr_cont_pool_info(pool);
|
||||
pr_cont(" workers=%d", pool->nr_workers);
|
||||
pr_cont(" hung=%us workers=%d",
|
||||
jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000,
|
||||
pool->nr_workers);
|
||||
if (pool->manager)
|
||||
pr_cont(" manager: %d",
|
||||
task_pid_nr(pool->manager->task));
|
||||
|
@ -5167,6 +5216,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)
|
|||
static void workqueue_sysfs_unregister(struct workqueue_struct *wq) { }
|
||||
#endif /* CONFIG_SYSFS */
|
||||
|
||||
/*
|
||||
* Workqueue watchdog.
|
||||
*
|
||||
* Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal
|
||||
* flush dependency, a concurrency managed work item which stays RUNNING
|
||||
* indefinitely. Workqueue stalls can be very difficult to debug as the
|
||||
* usual warning mechanisms don't trigger and internal workqueue state is
|
||||
* largely opaque.
|
||||
*
|
||||
* Workqueue watchdog monitors all worker pools periodically and dumps
|
||||
* state if some pools failed to make forward progress for a while where
|
||||
* forward progress is defined as the first item on ->worklist changing.
|
||||
*
|
||||
* This mechanism is controlled through the kernel parameter
|
||||
* "workqueue.watchdog_thresh" which can be updated at runtime through the
|
||||
* corresponding sysfs parameter file.
|
||||
*/
|
||||
#ifdef CONFIG_WQ_WATCHDOG
|
||||
|
||||
static void wq_watchdog_timer_fn(unsigned long data);
|
||||
|
||||
static unsigned long wq_watchdog_thresh = 30;
|
||||
static struct timer_list wq_watchdog_timer =
|
||||
TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0);
|
||||
|
||||
static unsigned long wq_watchdog_touched = INITIAL_JIFFIES;
|
||||
static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES;
|
||||
|
||||
static void wq_watchdog_reset_touched(void)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
wq_watchdog_touched = jiffies;
|
||||
for_each_possible_cpu(cpu)
|
||||
per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
|
||||
}
|
||||
|
||||
static void wq_watchdog_timer_fn(unsigned long data)
|
||||
{
|
||||
unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ;
|
||||
bool lockup_detected = false;
|
||||
struct worker_pool *pool;
|
||||
int pi;
|
||||
|
||||
if (!thresh)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
for_each_pool(pool, pi) {
|
||||
unsigned long pool_ts, touched, ts;
|
||||
|
||||
if (list_empty(&pool->worklist))
|
||||
continue;
|
||||
|
||||
/* get the latest of pool and touched timestamps */
|
||||
pool_ts = READ_ONCE(pool->watchdog_ts);
|
||||
touched = READ_ONCE(wq_watchdog_touched);
|
||||
|
||||
if (time_after(pool_ts, touched))
|
||||
ts = pool_ts;
|
||||
else
|
||||
ts = touched;
|
||||
|
||||
if (pool->cpu >= 0) {
|
||||
unsigned long cpu_touched =
|
||||
READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
|
||||
pool->cpu));
|
||||
if (time_after(cpu_touched, ts))
|
||||
ts = cpu_touched;
|
||||
}
|
||||
|
||||
/* did we stall? */
|
||||
if (time_after(jiffies, ts + thresh)) {
|
||||
lockup_detected = true;
|
||||
pr_emerg("BUG: workqueue lockup - pool");
|
||||
pr_cont_pool_info(pool);
|
||||
pr_cont(" stuck for %us!\n",
|
||||
jiffies_to_msecs(jiffies - pool_ts) / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
if (lockup_detected)
|
||||
show_workqueue_state();
|
||||
|
||||
wq_watchdog_reset_touched();
|
||||
mod_timer(&wq_watchdog_timer, jiffies + thresh);
|
||||
}
|
||||
|
||||
void wq_watchdog_touch(int cpu)
|
||||
{
|
||||
if (cpu >= 0)
|
||||
per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
|
||||
else
|
||||
wq_watchdog_touched = jiffies;
|
||||
}
|
||||
|
||||
static void wq_watchdog_set_thresh(unsigned long thresh)
|
||||
{
|
||||
wq_watchdog_thresh = 0;
|
||||
del_timer_sync(&wq_watchdog_timer);
|
||||
|
||||
if (thresh) {
|
||||
wq_watchdog_thresh = thresh;
|
||||
wq_watchdog_reset_touched();
|
||||
mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ);
|
||||
}
|
||||
}
|
||||
|
||||
static int wq_watchdog_param_set_thresh(const char *val,
|
||||
const struct kernel_param *kp)
|
||||
{
|
||||
unsigned long thresh;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoul(val, 0, &thresh);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (system_wq)
|
||||
wq_watchdog_set_thresh(thresh);
|
||||
else
|
||||
wq_watchdog_thresh = thresh;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops wq_watchdog_thresh_ops = {
|
||||
.set = wq_watchdog_param_set_thresh,
|
||||
.get = param_get_ulong,
|
||||
};
|
||||
|
||||
module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh,
|
||||
0644);
|
||||
|
||||
static void wq_watchdog_init(void)
|
||||
{
|
||||
wq_watchdog_set_thresh(wq_watchdog_thresh);
|
||||
}
|
||||
|
||||
#else /* CONFIG_WQ_WATCHDOG */
|
||||
|
||||
static inline void wq_watchdog_init(void) { }
|
||||
|
||||
#endif /* CONFIG_WQ_WATCHDOG */
|
||||
|
||||
static void __init wq_numa_init(void)
|
||||
{
|
||||
cpumask_var_t *tbl;
|
||||
|
@ -5290,6 +5487,9 @@ static int __init init_workqueues(void)
|
|||
!system_unbound_wq || !system_freezable_wq ||
|
||||
!system_power_efficient_wq ||
|
||||
!system_freezable_power_efficient_wq);
|
||||
|
||||
wq_watchdog_init();
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(init_workqueues);
|
||||
|
|
|
@ -812,6 +812,17 @@ config BOOTPARAM_HUNG_TASK_PANIC_VALUE
|
|||
default 0 if !BOOTPARAM_HUNG_TASK_PANIC
|
||||
default 1 if BOOTPARAM_HUNG_TASK_PANIC
|
||||
|
||||
config WQ_WATCHDOG
|
||||
bool "Detect Workqueue Stalls"
|
||||
depends on DEBUG_KERNEL
|
||||
help
|
||||
Say Y here to enable stall detection on workqueues. If a
|
||||
worker pool doesn't make forward progress on a pending work
|
||||
item for over a given amount of time, 30s by default, a
|
||||
warning message is printed along with dump of workqueue
|
||||
state. This can be configured through kernel parameter
|
||||
"workqueue.watchdog_thresh" and its sysfs counterpart.
|
||||
|
||||
endmenu # "Debug lockups and hangs"
|
||||
|
||||
config PANIC_ON_OOPS
|
||||
|
|
Loading…
Reference in a new issue