locking/lockdep: Rework FS_RECLAIM annotation

A while ago someone, and I cannot find the email just now, asked if we
could not implement the RECLAIM_FS inversion stuff with a 'fake' lock
like we use for other things like workqueues etc. I think this should
be possible which allows reducing the 'irq' states and will reduce the
amount of __bfs() lookups we do.

Removing the 1 IRQ state results in 4 less __bfs() walks per
dependency, improving lockdep performance. And by moving this
annotation out of the lockdep code it becomes easier for the mm people
to extend.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Byungchul Park <byungchul.park@lge.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Nikolay Borisov <nborisov@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: akpm@linux-foundation.org
Cc: boqun.feng@gmail.com
Cc: iamjoonsoo.kim@lge.com
Cc: kernel-team@lge.com
Cc: kirill@shutemov.name
Cc: npiggin@gmail.com
Cc: walken@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Peter Zijlstra 2017-03-03 10:13:38 +01:00 committed by Ingo Molnar
parent a9668cd6ee
commit d92a8cfcb3
10 changed files with 75 additions and 120 deletions

View file

@ -28,6 +28,7 @@
#include <linux/debugfs.h>
#include <linux/sort.h>
#include <linux/sched/mm.h>
#include "intel_drv.h"
static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
@ -4331,7 +4332,7 @@ i915_drop_caches_set(void *data, u64 val)
mutex_unlock(&dev->struct_mutex);
}
lockdep_set_current_reclaim_state(GFP_KERNEL);
fs_reclaim_acquire(GFP_KERNEL);
if (val & DROP_BOUND)
i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND);
@ -4340,7 +4341,7 @@ i915_drop_caches_set(void *data, u64 val)
if (val & DROP_SHRINK_ALL)
i915_gem_shrink_all(dev_priv);
lockdep_clear_current_reclaim_state();
fs_reclaim_release(GFP_KERNEL);
if (val & DROP_FREED) {
synchronize_rcu();

View file

@ -29,7 +29,7 @@ extern int lock_stat;
* We'd rather not expose kernel/lockdep_states.h this wide, but we do need
* the total number of states... :-(
*/
#define XXX_LOCK_USAGE_STATES (1+3*4)
#define XXX_LOCK_USAGE_STATES (1+2*4)
/*
* NR_LOCKDEP_CACHING_CLASSES ... Number of classes
@ -363,10 +363,6 @@ static inline void lock_set_subclass(struct lockdep_map *lock,
extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip);
extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
extern void lockdep_clear_current_reclaim_state(void);
extern void lockdep_trace_alloc(gfp_t mask);
struct pin_cookie { unsigned int val; };
#define NIL_COOKIE (struct pin_cookie){ .val = 0U, }
@ -375,7 +371,7 @@ extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock);
extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie);
extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie);
# define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
# define INIT_LOCKDEP .lockdep_recursion = 0,
#define lockdep_depth(tsk) (debug_locks ? (tsk)->lockdep_depth : 0)
@ -416,9 +412,6 @@ static inline void lockdep_on(void)
# define lock_downgrade(l, i) do { } while (0)
# define lock_set_class(l, n, k, s, i) do { } while (0)
# define lock_set_subclass(l, s, i) do { } while (0)
# define lockdep_set_current_reclaim_state(g) do { } while (0)
# define lockdep_clear_current_reclaim_state() do { } while (0)
# define lockdep_trace_alloc(g) do { } while (0)
# define lockdep_info() do { } while (0)
# define lockdep_init_map(lock, name, key, sub) \
do { (void)(name); (void)(key); } while (0)

View file

@ -846,7 +846,6 @@ struct task_struct {
int lockdep_depth;
unsigned int lockdep_recursion;
struct held_lock held_locks[MAX_LOCK_DEPTH];
gfp_t lockdep_reclaim_gfp;
#endif
#ifdef CONFIG_UBSAN

View file

@ -167,6 +167,14 @@ static inline gfp_t current_gfp_context(gfp_t flags)
return flags;
}
#ifdef CONFIG_LOCKDEP
extern void fs_reclaim_acquire(gfp_t gfp_mask);
extern void fs_reclaim_release(gfp_t gfp_mask);
#else
static inline void fs_reclaim_acquire(gfp_t gfp_mask) { }
static inline void fs_reclaim_release(gfp_t gfp_mask) { }
#endif
static inline unsigned int memalloc_noio_save(void)
{
unsigned int flags = current->flags & PF_MEMALLOC_NOIO;

View file

@ -344,14 +344,12 @@ EXPORT_SYMBOL(lockdep_on);
#if VERBOSE
# define HARDIRQ_VERBOSE 1
# define SOFTIRQ_VERBOSE 1
# define RECLAIM_VERBOSE 1
#else
# define HARDIRQ_VERBOSE 0
# define SOFTIRQ_VERBOSE 0
# define RECLAIM_VERBOSE 0
#endif
#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
/*
* Quick filtering for interesting events:
*/
@ -2567,14 +2565,6 @@ static int SOFTIRQ_verbose(struct lock_class *class)
return 0;
}
static int RECLAIM_FS_verbose(struct lock_class *class)
{
#if RECLAIM_VERBOSE
return class_filter(class);
#endif
return 0;
}
#define STRICT_READ_CHECKS 1
static int (*state_verbose_f[])(struct lock_class *class) = {
@ -2870,57 +2860,6 @@ void trace_softirqs_off(unsigned long ip)
debug_atomic_inc(redundant_softirqs_off);
}
static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
{
struct task_struct *curr = current;
if (unlikely(!debug_locks))
return;
gfp_mask = current_gfp_context(gfp_mask);
/* no reclaim without waiting on it */
if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
return;
/* this guy won't enter reclaim */
if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
return;
/* We're only interested __GFP_FS allocations for now */
if (!(gfp_mask & __GFP_FS) || (curr->flags & PF_MEMALLOC_NOFS))
return;
/*
* Oi! Can't be having __GFP_FS allocations with IRQs disabled.
*/
if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
return;
/* Disable lockdep if explicitly requested */
if (gfp_mask & __GFP_NOLOCKDEP)
return;
mark_held_locks(curr, RECLAIM_FS);
}
static void check_flags(unsigned long flags);
void lockdep_trace_alloc(gfp_t gfp_mask)
{
unsigned long flags;
if (unlikely(current->lockdep_recursion))
return;
raw_local_irq_save(flags);
check_flags(flags);
current->lockdep_recursion = 1;
__lockdep_trace_alloc(gfp_mask, flags);
current->lockdep_recursion = 0;
raw_local_irq_restore(flags);
}
static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
{
/*
@ -2966,22 +2905,6 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
}
}
/*
* We reuse the irq context infrastructure more broadly as a general
* context checking code. This tests GFP_FS recursion (a lock taken
* during reclaim for a GFP_FS allocation is held over a GFP_FS
* allocation).
*/
if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
if (hlock->read) {
if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
return 0;
} else {
if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
return 0;
}
}
return 1;
}
@ -3040,10 +2963,6 @@ static inline int separate_irq_context(struct task_struct *curr,
return 0;
}
void lockdep_trace_alloc(gfp_t gfp_mask)
{
}
#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
/*
@ -3952,18 +3871,6 @@ void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie)
}
EXPORT_SYMBOL_GPL(lock_unpin_lock);
void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
{
current->lockdep_reclaim_gfp = current_gfp_context(gfp_mask);
}
EXPORT_SYMBOL_GPL(lockdep_set_current_reclaim_state);
void lockdep_clear_current_reclaim_state(void)
{
current->lockdep_reclaim_gfp = 0;
}
EXPORT_SYMBOL_GPL(lockdep_clear_current_reclaim_state);
#ifdef CONFIG_LOCK_STAT
static int
print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,

View file

@ -6,4 +6,3 @@
*/
LOCKDEP_STATE(HARDIRQ)
LOCKDEP_STATE(SOFTIRQ)
LOCKDEP_STATE(RECLAIM_FS)

View file

@ -66,6 +66,7 @@
#include <linux/kthread.h>
#include <linux/memcontrol.h>
#include <linux/ftrace.h>
#include <linux/lockdep.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@ -3490,6 +3491,47 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
}
#endif /* CONFIG_COMPACTION */
#ifdef CONFIG_LOCKDEP
struct lockdep_map __fs_reclaim_map =
STATIC_LOCKDEP_MAP_INIT("fs_reclaim", &__fs_reclaim_map);
static bool __need_fs_reclaim(gfp_t gfp_mask)
{
gfp_mask = current_gfp_context(gfp_mask);
/* no reclaim without waiting on it */
if (!(gfp_mask & __GFP_DIRECT_RECLAIM))
return false;
/* this guy won't enter reclaim */
if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
return false;
/* We're only interested __GFP_FS allocations for now */
if (!(gfp_mask & __GFP_FS))
return false;
if (gfp_mask & __GFP_NOLOCKDEP)
return false;
return true;
}
void fs_reclaim_acquire(gfp_t gfp_mask)
{
if (__need_fs_reclaim(gfp_mask))
lock_map_acquire(&__fs_reclaim_map);
}
EXPORT_SYMBOL_GPL(fs_reclaim_acquire);
void fs_reclaim_release(gfp_t gfp_mask)
{
if (__need_fs_reclaim(gfp_mask))
lock_map_release(&__fs_reclaim_map);
}
EXPORT_SYMBOL_GPL(fs_reclaim_release);
#endif
/* Perform direct synchronous page reclaim */
static int
__perform_reclaim(gfp_t gfp_mask, unsigned int order,
@ -3504,7 +3546,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
/* We now go into synchronous reclaim */
cpuset_memory_pressure_bump();
noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(gfp_mask);
fs_reclaim_acquire(gfp_mask);
reclaim_state.reclaimed_slab = 0;
current->reclaim_state = &reclaim_state;
@ -3512,7 +3554,7 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order,
ac->nodemask);
current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
fs_reclaim_release(gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
cond_resched();
@ -4041,7 +4083,8 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
*alloc_flags |= ALLOC_CPUSET;
}
lockdep_trace_alloc(gfp_mask);
fs_reclaim_acquire(gfp_mask);
fs_reclaim_release(gfp_mask);
might_sleep_if(gfp_mask & __GFP_DIRECT_RECLAIM);

View file

@ -43,6 +43,7 @@ struct kmem_cache {
#include <linux/kasan.h>
#include <linux/kmemleak.h>
#include <linux/random.h>
#include <linux/sched/mm.h>
/*
* State of the slab allocator.
@ -412,7 +413,10 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
gfp_t flags)
{
flags &= gfp_allowed_mask;
lockdep_trace_alloc(flags);
fs_reclaim_acquire(flags);
fs_reclaim_release(flags);
might_sleep_if(gfpflags_allow_blocking(flags));
if (should_failslab(s, flags))

View file

@ -432,7 +432,8 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
gfp &= gfp_allowed_mask;
lockdep_trace_alloc(gfp);
fs_reclaim_acquire(gfp);
fs_reclaim_release(gfp);
if (size < PAGE_SIZE - align) {
if (!size)
@ -538,7 +539,8 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
flags &= gfp_allowed_mask;
lockdep_trace_alloc(flags);
fs_reclaim_acquire(flags);
fs_reclaim_release(flags);
if (c->size < PAGE_SIZE) {
b = slob_alloc(c->size, flags, c->align, node);

View file

@ -3525,8 +3525,6 @@ static int kswapd(void *p)
};
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
lockdep_set_current_reclaim_state(GFP_KERNEL);
if (!cpumask_empty(cpumask))
set_cpus_allowed_ptr(tsk, cpumask);
current->reclaim_state = &reclaim_state;
@ -3585,14 +3583,15 @@ static int kswapd(void *p)
*/
trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
alloc_order);
fs_reclaim_acquire(GFP_KERNEL);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
fs_reclaim_release(GFP_KERNEL);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
}
tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
current->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
return 0;
}
@ -3655,14 +3654,14 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim)
unsigned int noreclaim_flag;
noreclaim_flag = memalloc_noreclaim_save();
lockdep_set_current_reclaim_state(sc.gfp_mask);
fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
p->reclaim_state = NULL;
lockdep_clear_current_reclaim_state();
fs_reclaim_release(sc.gfp_mask);
memalloc_noreclaim_restore(noreclaim_flag);
return nr_reclaimed;
@ -3847,7 +3846,7 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
*/
noreclaim_flag = memalloc_noreclaim_save();
p->flags |= PF_SWAPWRITE;
lockdep_set_current_reclaim_state(sc.gfp_mask);
fs_reclaim_acquire(sc.gfp_mask);
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
@ -3862,9 +3861,9 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
}
p->reclaim_state = NULL;
fs_reclaim_release(gfp_mask);
current->flags &= ~PF_SWAPWRITE;
memalloc_noreclaim_restore(noreclaim_flag);
lockdep_clear_current_reclaim_state();
return sc.nr_reclaimed >= nr_pages;
}