d50d82faa0
In kernel 4.17 I removed some code from dm-bufio that did slab cache
merging (commit 21bb132767
: "dm bufio: remove code that merges slab
caches") - both slab and slub support merging caches with identical
attributes, so dm-bufio now just calls kmem_cache_create and relies on
implicit merging.
This uncovered a bug in the slub subsystem - if we delete a cache and
immediatelly create another cache with the same attributes, it fails
because of duplicate filename in /sys/kernel/slab/. The slub subsystem
offloads freeing the cache to a workqueue - and if we create the new
cache before the workqueue runs, it complains because of duplicate
filename in sysfs.
This patch fixes the bug by moving the call of kobject_del from
sysfs_slab_remove_workfn to shutdown_cache. kobject_del must be called
while we hold slab_mutex - so that the sysfs entry is deleted before a
cache with the same attributes could be created.
Running device-mapper-test-suite with:
dmtest run --suite thin-provisioning -n /commit_failure_causes_fallback/
triggered:
Buffer I/O error on dev dm-0, logical block 1572848, async page read
device-mapper: thin: 253:1: metadata operation 'dm_pool_alloc_data_block' failed: error = -5
device-mapper: thin: 253:1: aborting current metadata transaction
sysfs: cannot create duplicate filename '/kernel/slab/:a-0000144'
CPU: 2 PID: 1037 Comm: kworker/u48:1 Not tainted 4.17.0.snitm+ #25
Hardware name: Supermicro SYS-1029P-WTR/X11DDW-L, BIOS 2.0a 12/06/2017
Workqueue: dm-thin do_worker [dm_thin_pool]
Call Trace:
dump_stack+0x5a/0x73
sysfs_warn_dup+0x58/0x70
sysfs_create_dir_ns+0x77/0x80
kobject_add_internal+0xba/0x2e0
kobject_init_and_add+0x70/0xb0
sysfs_slab_add+0xb1/0x250
__kmem_cache_create+0x116/0x150
create_cache+0xd9/0x1f0
kmem_cache_create_usercopy+0x1c1/0x250
kmem_cache_create+0x18/0x20
dm_bufio_client_create+0x1ae/0x410 [dm_bufio]
dm_block_manager_create+0x5e/0x90 [dm_persistent_data]
__create_persistent_data_objects+0x38/0x940 [dm_thin_pool]
dm_pool_abort_metadata+0x64/0x90 [dm_thin_pool]
metadata_operation_failed+0x59/0x100 [dm_thin_pool]
alloc_data_block.isra.53+0x86/0x180 [dm_thin_pool]
process_cell+0x2a3/0x550 [dm_thin_pool]
do_worker+0x28d/0x8f0 [dm_thin_pool]
process_one_work+0x171/0x370
worker_thread+0x49/0x3f0
kthread+0xf8/0x130
ret_from_fork+0x35/0x40
kobject_add_internal failed for :a-0000144 with -EEXIST, don't try to register things with the same name in the same directory.
kmem_cache_create(dm_bufio_buffer-16) failed with error -17
Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1806151817130.6333@file01.intranet.prod.int.rdu2.redhat.com
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-by: Mike Snitzer <snitzer@redhat.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
185 lines
5.6 KiB
C
185 lines
5.6 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_SLUB_DEF_H
|
|
#define _LINUX_SLUB_DEF_H
|
|
|
|
/*
|
|
* SLUB : A Slab allocator without object queues.
|
|
*
|
|
* (C) 2007 SGI, Christoph Lameter
|
|
*/
|
|
#include <linux/kobject.h>
|
|
|
|
enum stat_item {
|
|
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
|
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
|
FREE_FASTPATH, /* Free to cpu slab */
|
|
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
|
FREE_FROZEN, /* Freeing to frozen slab */
|
|
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
|
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
|
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from node partial list */
|
|
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
|
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
|
ALLOC_NODE_MISMATCH, /* Switching cpu slab */
|
|
FREE_SLAB, /* Slab freed to the page allocator */
|
|
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
|
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
|
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
|
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
|
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
|
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
|
DEACTIVATE_BYPASS, /* Implicit deactivation */
|
|
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
|
CMPXCHG_DOUBLE_CPU_FAIL,/* Failure of this_cpu_cmpxchg_double */
|
|
CMPXCHG_DOUBLE_FAIL, /* Number of times that cmpxchg double did not match */
|
|
CPU_PARTIAL_ALLOC, /* Used cpu partial on alloc */
|
|
CPU_PARTIAL_FREE, /* Refill cpu partial on free */
|
|
CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
|
|
CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
|
|
NR_SLUB_STAT_ITEMS };
|
|
|
|
struct kmem_cache_cpu {
|
|
void **freelist; /* Pointer to next available object */
|
|
unsigned long tid; /* Globally unique transaction id */
|
|
struct page *page; /* The slab from which we are allocating */
|
|
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
|
struct page *partial; /* Partially allocated frozen slabs */
|
|
#endif
|
|
#ifdef CONFIG_SLUB_STATS
|
|
unsigned stat[NR_SLUB_STAT_ITEMS];
|
|
#endif
|
|
};
|
|
|
|
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
|
#define slub_percpu_partial(c) ((c)->partial)
|
|
|
|
#define slub_set_percpu_partial(c, p) \
|
|
({ \
|
|
slub_percpu_partial(c) = (p)->next; \
|
|
})
|
|
|
|
#define slub_percpu_partial_read_once(c) READ_ONCE(slub_percpu_partial(c))
|
|
#else
|
|
#define slub_percpu_partial(c) NULL
|
|
|
|
#define slub_set_percpu_partial(c, p)
|
|
|
|
#define slub_percpu_partial_read_once(c) NULL
|
|
#endif // CONFIG_SLUB_CPU_PARTIAL
|
|
|
|
/*
|
|
* Word size structure that can be atomically updated or read and that
|
|
* contains both the order and the number of objects that a slab of the
|
|
* given order would contain.
|
|
*/
|
|
struct kmem_cache_order_objects {
|
|
unsigned int x;
|
|
};
|
|
|
|
/*
|
|
* Slab cache management.
|
|
*/
|
|
struct kmem_cache {
|
|
struct kmem_cache_cpu __percpu *cpu_slab;
|
|
/* Used for retriving partial slabs etc */
|
|
slab_flags_t flags;
|
|
unsigned long min_partial;
|
|
unsigned int size; /* The size of an object including meta data */
|
|
unsigned int object_size;/* The size of an object without meta data */
|
|
unsigned int offset; /* Free pointer offset. */
|
|
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
|
/* Number of per cpu partial objects to keep around */
|
|
unsigned int cpu_partial;
|
|
#endif
|
|
struct kmem_cache_order_objects oo;
|
|
|
|
/* Allocation and freeing of slabs */
|
|
struct kmem_cache_order_objects max;
|
|
struct kmem_cache_order_objects min;
|
|
gfp_t allocflags; /* gfp flags to use on each alloc */
|
|
int refcount; /* Refcount for slab cache destroy */
|
|
void (*ctor)(void *);
|
|
unsigned int inuse; /* Offset to metadata */
|
|
unsigned int align; /* Alignment */
|
|
unsigned int red_left_pad; /* Left redzone padding size */
|
|
const char *name; /* Name (only for display!) */
|
|
struct list_head list; /* List of slab caches */
|
|
#ifdef CONFIG_SYSFS
|
|
struct kobject kobj; /* For sysfs */
|
|
struct work_struct kobj_remove_work;
|
|
#endif
|
|
#ifdef CONFIG_MEMCG
|
|
struct memcg_cache_params memcg_params;
|
|
/* for propagation, maximum size of a stored attr */
|
|
unsigned int max_attr_size;
|
|
#ifdef CONFIG_SYSFS
|
|
struct kset *memcg_kset;
|
|
#endif
|
|
#endif
|
|
|
|
#ifdef CONFIG_SLAB_FREELIST_HARDENED
|
|
unsigned long random;
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA
|
|
/*
|
|
* Defragmentation by allocating from a remote node.
|
|
*/
|
|
unsigned int remote_node_defrag_ratio;
|
|
#endif
|
|
|
|
#ifdef CONFIG_SLAB_FREELIST_RANDOM
|
|
unsigned int *random_seq;
|
|
#endif
|
|
|
|
#ifdef CONFIG_KASAN
|
|
struct kasan_cache kasan_info;
|
|
#endif
|
|
|
|
unsigned int useroffset; /* Usercopy region offset */
|
|
unsigned int usersize; /* Usercopy region size */
|
|
|
|
struct kmem_cache_node *node[MAX_NUMNODES];
|
|
};
|
|
|
|
#ifdef CONFIG_SLUB_CPU_PARTIAL
|
|
#define slub_cpu_partial(s) ((s)->cpu_partial)
|
|
#define slub_set_cpu_partial(s, n) \
|
|
({ \
|
|
slub_cpu_partial(s) = (n); \
|
|
})
|
|
#else
|
|
#define slub_cpu_partial(s) (0)
|
|
#define slub_set_cpu_partial(s, n)
|
|
#endif // CONFIG_SLUB_CPU_PARTIAL
|
|
|
|
#ifdef CONFIG_SYSFS
|
|
#define SLAB_SUPPORTS_SYSFS
|
|
void sysfs_slab_unlink(struct kmem_cache *);
|
|
void sysfs_slab_release(struct kmem_cache *);
|
|
#else
|
|
static inline void sysfs_slab_unlink(struct kmem_cache *s)
|
|
{
|
|
}
|
|
static inline void sysfs_slab_release(struct kmem_cache *s)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
void object_err(struct kmem_cache *s, struct page *page,
|
|
u8 *object, char *reason);
|
|
|
|
void *fixup_red_left(struct kmem_cache *s, void *p);
|
|
|
|
static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
|
|
void *x) {
|
|
void *object = x - (x - page_address(page)) % cache->size;
|
|
void *last_object = page_address(page) +
|
|
(page->objects - 1) * cache->size;
|
|
void *result = (unlikely(object > last_object)) ? last_object : object;
|
|
|
|
result = fixup_red_left(cache, result);
|
|
return result;
|
|
}
|
|
|
|
#endif /* _LINUX_SLUB_DEF_H */
|