kprobes: Cure hotplug lock ordering issues

Converting the cpu hotplug locking to a percpu rwsem unearthed hidden lock
ordering problems.

There is a wide range of locks involved in this: kprobe_mutex,
jump_label_mutex, ftrace_lock, text_mutex, event_mutex, module_mutex,
func_hash->regex_lock and a gazillion of lock order permutations with
nested get_online_cpus() calls.

Some of those permutations are potential deadlocks even with the current
nesting hotplug locking scheme, but they can't be discovered by lockdep.

The conversion of the hotplug locking to a percpu rwsem requires to prevent
nested locking, so it's required to take the hotplug rwsem early in the
call chain and establish a proper lock order.

After quite some analysis and going down the wrong road severa times the
following lock order has been chosen:

kprobe_mutex -> cpus_rwsem -> jump_label_mutex -> text_mutex

For kprobes which hook on an ftrace function trace point, it's required to
drop cpus_rwsem before calling into the ftrace code to avoid a deadlock on
the func_hash->regex_lock.

[ Steven: Ftrace interaction fixes ]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Link: http://lkml.kernel.org/r/20170524081549.104864779@linutronix.de
This commit is contained in:
Thomas Gleixner 2017-05-24 10:15:36 +02:00
parent f2545b2d4c
commit 2d1e38f566

View file

@ -483,11 +483,6 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
*/ */
static void do_optimize_kprobes(void) static void do_optimize_kprobes(void)
{ {
/* Optimization never be done when disarmed */
if (kprobes_all_disarmed || !kprobes_allow_optimization ||
list_empty(&optimizing_list))
return;
/* /*
* The optimization/unoptimization refers online_cpus via * The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus. * stop_machine() and cpu-hotplug modifies online_cpus.
@ -495,14 +490,19 @@ static void do_optimize_kprobes(void)
* This combination can cause a deadlock (cpu-hotplug try to lock * This combination can cause a deadlock (cpu-hotplug try to lock
* text_mutex but stop_machine can not be done because online_cpus * text_mutex but stop_machine can not be done because online_cpus
* has been changed) * has been changed)
* To avoid this deadlock, we need to call get_online_cpus() * To avoid this deadlock, caller must have locked cpu hotplug
* for preventing cpu-hotplug outside of text_mutex locking. * for preventing cpu-hotplug outside of text_mutex locking.
*/ */
get_online_cpus(); lockdep_assert_cpus_held();
/* Optimization never be done when disarmed */
if (kprobes_all_disarmed || !kprobes_allow_optimization ||
list_empty(&optimizing_list))
return;
mutex_lock(&text_mutex); mutex_lock(&text_mutex);
arch_optimize_kprobes(&optimizing_list); arch_optimize_kprobes(&optimizing_list);
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
put_online_cpus();
} }
/* /*
@ -513,12 +513,13 @@ static void do_unoptimize_kprobes(void)
{ {
struct optimized_kprobe *op, *tmp; struct optimized_kprobe *op, *tmp;
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
/* Unoptimization must be done anytime */ /* Unoptimization must be done anytime */
if (list_empty(&unoptimizing_list)) if (list_empty(&unoptimizing_list))
return; return;
/* Ditto to do_optimize_kprobes */
get_online_cpus();
mutex_lock(&text_mutex); mutex_lock(&text_mutex);
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list); arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */ /* Loop free_list for disarming */
@ -537,7 +538,6 @@ static void do_unoptimize_kprobes(void)
list_del_init(&op->list); list_del_init(&op->list);
} }
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
put_online_cpus();
} }
/* Reclaim all kprobes on the free_list */ /* Reclaim all kprobes on the free_list */
@ -562,6 +562,7 @@ static void kick_kprobe_optimizer(void)
static void kprobe_optimizer(struct work_struct *work) static void kprobe_optimizer(struct work_struct *work)
{ {
mutex_lock(&kprobe_mutex); mutex_lock(&kprobe_mutex);
cpus_read_lock();
/* Lock modules while optimizing kprobes */ /* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex); mutex_lock(&module_mutex);
@ -587,6 +588,7 @@ static void kprobe_optimizer(struct work_struct *work)
do_free_cleaned_kprobes(); do_free_cleaned_kprobes();
mutex_unlock(&module_mutex); mutex_unlock(&module_mutex);
cpus_read_unlock();
mutex_unlock(&kprobe_mutex); mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */ /* Step 5: Kick optimizer again if needed */
@ -650,9 +652,8 @@ static void optimize_kprobe(struct kprobe *p)
/* Short cut to direct unoptimizing */ /* Short cut to direct unoptimizing */
static void force_unoptimize_kprobe(struct optimized_kprobe *op) static void force_unoptimize_kprobe(struct optimized_kprobe *op)
{ {
get_online_cpus(); lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op); arch_unoptimize_kprobe(op);
put_online_cpus();
if (kprobe_disabled(&op->kp)) if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp); arch_disarm_kprobe(&op->kp);
} }
@ -791,6 +792,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
return; return;
/* For preparing optimization, jump_label_text_reserved() is called */ /* For preparing optimization, jump_label_text_reserved() is called */
cpus_read_lock();
jump_label_lock(); jump_label_lock();
mutex_lock(&text_mutex); mutex_lock(&text_mutex);
@ -812,6 +814,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
out: out:
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
jump_label_unlock(); jump_label_unlock();
cpus_read_unlock();
} }
#ifdef CONFIG_SYSCTL #ifdef CONFIG_SYSCTL
@ -826,6 +829,7 @@ static void optimize_all_kprobes(void)
if (kprobes_allow_optimization) if (kprobes_allow_optimization)
goto out; goto out;
cpus_read_lock();
kprobes_allow_optimization = true; kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) { for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i]; head = &kprobe_table[i];
@ -833,6 +837,7 @@ static void optimize_all_kprobes(void)
if (!kprobe_disabled(p)) if (!kprobe_disabled(p))
optimize_kprobe(p); optimize_kprobe(p);
} }
cpus_read_unlock();
printk(KERN_INFO "Kprobes globally optimized\n"); printk(KERN_INFO "Kprobes globally optimized\n");
out: out:
mutex_unlock(&kprobe_mutex); mutex_unlock(&kprobe_mutex);
@ -851,6 +856,7 @@ static void unoptimize_all_kprobes(void)
return; return;
} }
cpus_read_lock();
kprobes_allow_optimization = false; kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) { for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i]; head = &kprobe_table[i];
@ -859,6 +865,7 @@ static void unoptimize_all_kprobes(void)
unoptimize_kprobe(p, false); unoptimize_kprobe(p, false);
} }
} }
cpus_read_unlock();
mutex_unlock(&kprobe_mutex); mutex_unlock(&kprobe_mutex);
/* Wait for unoptimizing completion */ /* Wait for unoptimizing completion */
@ -1010,14 +1017,11 @@ static void arm_kprobe(struct kprobe *kp)
arm_kprobe_ftrace(kp); arm_kprobe_ftrace(kp);
return; return;
} }
/* cpus_read_lock();
* Here, since __arm_kprobe() doesn't use stop_machine(),
* this doesn't cause deadlock on text_mutex. So, we don't
* need get_online_cpus().
*/
mutex_lock(&text_mutex); mutex_lock(&text_mutex);
__arm_kprobe(kp); __arm_kprobe(kp);
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
cpus_read_unlock();
} }
/* Disarm a kprobe with text_mutex */ /* Disarm a kprobe with text_mutex */
@ -1027,10 +1031,12 @@ static void disarm_kprobe(struct kprobe *kp, bool reopt)
disarm_kprobe_ftrace(kp); disarm_kprobe_ftrace(kp);
return; return;
} }
/* Ditto */
cpus_read_lock();
mutex_lock(&text_mutex); mutex_lock(&text_mutex);
__disarm_kprobe(kp, reopt); __disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
cpus_read_unlock();
} }
/* /*
@ -1298,13 +1304,10 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
int ret = 0; int ret = 0;
struct kprobe *ap = orig_p; struct kprobe *ap = orig_p;
cpus_read_lock();
/* For preparing optimization, jump_label_text_reserved() is called */ /* For preparing optimization, jump_label_text_reserved() is called */
jump_label_lock(); jump_label_lock();
/*
* Get online CPUs to avoid text_mutex deadlock.with stop machine,
* which is invoked by unoptimize_kprobe() in add_new_kprobe()
*/
get_online_cpus();
mutex_lock(&text_mutex); mutex_lock(&text_mutex);
if (!kprobe_aggrprobe(orig_p)) { if (!kprobe_aggrprobe(orig_p)) {
@ -1352,8 +1355,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
out: out:
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
put_online_cpus();
jump_label_unlock(); jump_label_unlock();
cpus_read_unlock();
if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) { if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
ap->flags &= ~KPROBE_FLAG_DISABLED; ap->flags &= ~KPROBE_FLAG_DISABLED;
@ -1555,9 +1558,12 @@ int register_kprobe(struct kprobe *p)
goto out; goto out;
} }
mutex_lock(&text_mutex); /* Avoiding text modification */ cpus_read_lock();
/* Prevent text modification */
mutex_lock(&text_mutex);
ret = prepare_kprobe(p); ret = prepare_kprobe(p);
mutex_unlock(&text_mutex); mutex_unlock(&text_mutex);
cpus_read_unlock();
if (ret) if (ret)
goto out; goto out;
@ -1570,7 +1576,6 @@ int register_kprobe(struct kprobe *p)
/* Try to optimize kprobe */ /* Try to optimize kprobe */
try_to_optimize_kprobe(p); try_to_optimize_kprobe(p);
out: out:
mutex_unlock(&kprobe_mutex); mutex_unlock(&kprobe_mutex);