From 6adc5cac538b5d9162d8bcdb6145319592afc0d6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:33 -0400 Subject: [PATCH 01/21] percpu: disallow archs from overriding SHIFT_PERCPU_PTR() It has been about half a decade since all archs started using the dynamic percpu allocator and thus the same SHIFT_PERCPU_PTR() implementation. There's no benefit in overriding SHIFT_PERCPU_PTR() anymore. Remove #ifndef around it to clarify that this is identical regardless of the arch. This patch doesn't cause any functional difference. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/asm-generic/percpu.h | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 0703aa75b5e8..63d2b68c826e 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -36,17 +36,14 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #endif /* - * Add a offset to a pointer but keep the pointer as is. - * - * Only S390 provides its own means of moving the pointer. + * Add an offset to a pointer but keep the pointer as-is. Use RELOC_HIDE() + * to prevent the compiler from making incorrect assumptions about the + * pointer value. The weird cast keeps both GCC and sparse happy. */ -#ifndef SHIFT_PERCPU_PTR -/* Weird cast keeps both GCC and sparse happy. */ #define SHIFT_PERCPU_PTR(__p, __offset) ({ \ __verify_pcpu_ptr((__p)); \ RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ }) -#endif /* * A percpu variable may point to a discarded regions. The following are From bbc344e1e3aef3034a0edc79f7f64a912517926b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:34 -0400 Subject: [PATCH 02/21] percpu: introduce arch_raw_cpu_ptr() Currently, archs can override raw_cpu_ptr() directly; however, we wanna build a layer of indirection in the generic part of percpu so that we can implement generic features there without affecting archs. Introduce arch_raw_cpu_ptr() which is used to define raw_cpu_ptr() by generic percpu code. The two are identical for now. x86 is currently the only arch which overrides raw_cpu_ptr() and is converted to define arch_raw_cpu_ptr() instead. This doesn't introduce any functional difference. Signed-off-by: Tejun Heo Cc: Christoph Lameter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/include/asm/percpu.h | 2 +- include/asm-generic/percpu.h | 11 +++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 851bcdc5db04..9bc23f18a6fa 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -52,7 +52,7 @@ * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. */ -#define raw_cpu_ptr(ptr) \ +#define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ __verify_pcpu_ptr(ptr); \ diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 63d2b68c826e..a247d80b6630 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -53,9 +53,16 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define per_cpu(var, cpu) \ (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) -#ifndef raw_cpu_ptr -#define raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) +/* + * Arch may define arch_raw_cpu_ptr() to provide more efficient address + * translations for raw_cpu_ptr(). + */ +#ifndef arch_raw_cpu_ptr +#define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #endif + +#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr) + #ifdef CONFIG_DEBUG_PREEMPT #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) #else From 62fde54123fb64879326c8b71c3f92cc5db1c452 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:34 -0400 Subject: [PATCH 03/21] percpu: include/asm-generic/percpu.h should contain only arch-overridable parts The roles of the various percpu header files has become unclear. There are four header files involved. include/linux/percpu-defs.h include/linux/percpu.h include/asm-generic/percpu.h arch/*/include/asm/percpu.h The original intention for include/asm-generic/percpu.h is providing generic definitions for arch-overridable parts; however, it now hosts various stuff which can't be overridden by archs. Also, include/linux/percpu-defs.h was initially added to contain section and percpu variable definition macros so that arch header files can make use of them without worrying about introducing cyclic inclusion dependency by including include/linux/percpu.h; however, arch headers sometimes need to access percpu variables too and this is one of the reasons why some accessors were implemented in include/linux/asm-generic/percpu.h. Let's clear up the situation by making include/asm-generic/percpu.h contain only arch-overridable parts and moving accessors and operations into include/linux/percpu-defs. Note that this patch only moves things from include/asm-generic/percpu.h. include/linux/percpu.h will be taken care of by later patches. This patch moves the followings. * SHIFT_PERCPU_PTR() / VERIFY_PERCPU_PTR() * per_cpu() * raw_cpu_ptr() * this_cpu_ptr() * __get_cpu_var() * __raw_get_cpu_var() * __this_cpu_ptr() * PER_CPU_[SHARED_]ALIGNED_SECTION * PER_CPU_[SHARED_]ALIGNED_SECTION * PER_CPU_FIRST_SECTION This patch is pure reorganization. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/asm-generic/percpu.h | 64 -------------------------- include/linux/percpu-defs.h | 89 ++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 64 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index a247d80b6630..e5ace4d49084 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -35,24 +35,6 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define my_cpu_offset __my_cpu_offset #endif -/* - * Add an offset to a pointer but keep the pointer as-is. Use RELOC_HIDE() - * to prevent the compiler from making incorrect assumptions about the - * pointer value. The weird cast keeps both GCC and sparse happy. - */ -#define SHIFT_PERCPU_PTR(__p, __offset) ({ \ - __verify_pcpu_ptr((__p)); \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ -}) - -/* - * A percpu variable may point to a discarded regions. The following are - * established ways to produce a usable pointer from the percpu variable - * offset. - */ -#define per_cpu(var, cpu) \ - (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) - /* * Arch may define arch_raw_cpu_ptr() to provide more efficient address * translations for raw_cpu_ptr(). @@ -61,34 +43,10 @@ extern unsigned long __per_cpu_offset[NR_CPUS]; #define arch_raw_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, __my_cpu_offset) #endif -#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr) - -#ifdef CONFIG_DEBUG_PREEMPT -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) -#else -#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) -#endif - -#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) -#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) - #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA extern void setup_per_cpu_areas(void); #endif -#else /* ! SMP */ - -#define VERIFY_PERCPU_PTR(__p) ({ \ - __verify_pcpu_ptr((__p)); \ - (typeof(*(__p)) __kernel __force *)(__p); \ -}) - -#define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) -#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) -#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) -#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -#define raw_cpu_ptr(ptr) this_cpu_ptr(ptr) - #endif /* SMP */ #ifndef PER_CPU_BASE_SECTION @@ -99,25 +57,6 @@ extern void setup_per_cpu_areas(void); #endif #endif -#ifdef CONFIG_SMP - -#ifdef MODULE -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_ALIGNED_SECTION "" -#else -#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" -#define PER_CPU_ALIGNED_SECTION "..shared_aligned" -#endif -#define PER_CPU_FIRST_SECTION "..first" - -#else - -#define PER_CPU_SHARED_ALIGNED_SECTION "" -#define PER_CPU_ALIGNED_SECTION "..shared_aligned" -#define PER_CPU_FIRST_SECTION "" - -#endif - #ifndef PER_CPU_ATTRIBUTES #define PER_CPU_ATTRIBUTES #endif @@ -126,7 +65,4 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_DEF_ATTRIBUTES #endif -/* Keep until we have removed all uses of __this_cpu_ptr */ -#define __this_cpu_ptr raw_cpu_ptr - #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index a5fc7d01aad6..1a1af3e06a71 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -1,6 +1,40 @@ +/* + * linux/percpu-defs.h - basic definitions for percpu areas + * + * DO NOT INCLUDE DIRECTLY OUTSIDE PERCPU IMPLEMENTATION PROPER. + * + * This file is separate from linux/percpu.h to avoid cyclic inclusion + * dependency from arch header files. Only to be included from + * asm/percpu.h. + * + * This file includes macros necessary to declare percpu sections and + * variables, and definitions of percpu accessors and operations. It + * should provide enough percpu features to arch header files even when + * they can only include asm/percpu.h to avoid cyclic inclusion dependency. + */ + #ifndef _LINUX_PERCPU_DEFS_H #define _LINUX_PERCPU_DEFS_H +#ifdef CONFIG_SMP + +#ifdef MODULE +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_ALIGNED_SECTION "" +#else +#define PER_CPU_SHARED_ALIGNED_SECTION "..shared_aligned" +#define PER_CPU_ALIGNED_SECTION "..shared_aligned" +#endif +#define PER_CPU_FIRST_SECTION "..first" + +#else + +#define PER_CPU_SHARED_ALIGNED_SECTION "" +#define PER_CPU_ALIGNED_SECTION "..shared_aligned" +#define PER_CPU_FIRST_SECTION "" + +#endif + /* * Base implementations of per-CPU variable declarations and definitions, where * the section in which the variable is to be placed is provided by the @@ -164,4 +198,59 @@ #define EXPORT_PER_CPU_SYMBOL_GPL(var) #endif +/* + * Accessors and operations. + */ +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_SMP + +/* + * Add an offset to a pointer but keep the pointer as-is. Use RELOC_HIDE() + * to prevent the compiler from making incorrect assumptions about the + * pointer value. The weird cast keeps both GCC and sparse happy. + */ +#define SHIFT_PERCPU_PTR(__p, __offset) ({ \ + __verify_pcpu_ptr((__p)); \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ +}) + +/* + * A percpu variable may point to a discarded regions. The following are + * established ways to produce a usable pointer from the percpu variable + * offset. + */ +#define per_cpu(var, cpu) \ + (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) + +#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr) + +#ifdef CONFIG_DEBUG_PREEMPT +#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#else +#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) +#endif + +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) +#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) + +#else /* CONFIG_SMP */ + +#define VERIFY_PERCPU_PTR(__p) ({ \ + __verify_pcpu_ptr((__p)); \ + (typeof(*(__p)) __kernel __force *)(__p); \ +}) + +#define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) +#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) +#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) +#define raw_cpu_ptr(ptr) this_cpu_ptr(ptr) + +#endif /* CONFIG_SMP */ + +/* keep until we have removed all uses of __this_cpu_ptr */ +#define __this_cpu_ptr(ptr) raw_cpu_ptr(ptr) + +#endif /* __ASSEMBLY__ */ #endif /* _LINUX_PERCPU_DEFS_H */ From 9defda18f913181debfe7cdc8c0a752f707ac861 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:34 -0400 Subject: [PATCH 04/21] percpu: move accessors from include/linux/percpu.h to percpu-defs.h include/linux/percpu-defs.h is gonna host all accessors and operations so that arch headers can make use of them too without worrying about circular dependency through include/linux/percpu.h. This patch moves the following accessors from include/linux/percpu.h to include/linux/percpu-defs.h. * get/put_cpu_var() * get/put_cpu_ptr() * per_cpu_ptr() This is pure reorgniazation. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/linux/percpu-defs.h | 32 ++++++++++++++++++++++++++++++++ include/linux/percpu.h | 37 ------------------------------------- 2 files changed, 32 insertions(+), 37 deletions(-) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 1a1af3e06a71..f782f98004db 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -252,5 +252,37 @@ /* keep until we have removed all uses of __this_cpu_ptr */ #define __this_cpu_ptr(ptr) raw_cpu_ptr(ptr) +/* + * Must be an lvalue. Since @var must be a simple identifier, + * we force a syntax error here if it isn't. + */ +#define get_cpu_var(var) (*({ \ + preempt_disable(); \ + this_cpu_ptr(&var); })) + +/* + * The weird & is necessary because sparse considers (void)(var) to be + * a direct dereference of percpu variable (var). + */ +#define put_cpu_var(var) do { \ + (void)&(var); \ + preempt_enable(); \ +} while (0) + +#define get_cpu_ptr(var) ({ \ + preempt_disable(); \ + this_cpu_ptr(var); }) + +#define put_cpu_ptr(var) do { \ + (void)(var); \ + preempt_enable(); \ +} while (0) + +#ifdef CONFIG_SMP +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#else +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#endif + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 8419053d0f2e..97b207990c45 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -23,32 +23,6 @@ PERCPU_MODULE_RESERVE) #endif -/* - * Must be an lvalue. Since @var must be a simple identifier, - * we force a syntax error here if it isn't. - */ -#define get_cpu_var(var) (*({ \ - preempt_disable(); \ - this_cpu_ptr(&var); })) - -/* - * The weird & is necessary because sparse considers (void)(var) to be - * a direct dereference of percpu variable (var). - */ -#define put_cpu_var(var) do { \ - (void)&(var); \ - preempt_enable(); \ -} while (0) - -#define get_cpu_ptr(var) ({ \ - preempt_disable(); \ - this_cpu_ptr(var); }) - -#define put_cpu_ptr(var) do { \ - (void)(var); \ - preempt_enable(); \ -} while (0) - /* minimum unit size, also is the maximum supported allocation size */ #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(32 << 10) @@ -140,17 +114,6 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_populate_pte_fn_t populate_pte_fn); #endif -/* - * Use this to get to a cpu's version of the per-cpu object - * dynamically allocated. Non-atomic access to the current CPU's - * version should probably be combined with get_cpu()/put_cpu(). - */ -#ifdef CONFIG_SMP -#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) -#else -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) -#endif - extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); extern bool is_kernel_percpu_address(unsigned long addr); From 3b8ed91d6463f48ab180f5ebedc9663eddfa0587 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:37 -0400 Subject: [PATCH 05/21] percpu: reorganize include/linux/percpu-defs.h Reorganize for better readability. * Accessor definitions are collected into one place and SMP and UP now define them in the same order. * Definitions are layered when possible - e.g. per_cpu() is now defined in terms of this_cpu_ptr(). * Rather pointless comment dropped. * per_cpu(), __raw_get_cpu_var() and __get_cpu_var() are defined in a way which can be shared between SMP and UP and moved out of CONFIG_SMP blocks. This patch doesn't introduce any functional difference. Signed-off-by: Tejun Heo Cc: Christoph Lameter --- include/linux/percpu-defs.h | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index f782f98004db..94cd90afadac 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -215,15 +215,8 @@ RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ }) -/* - * A percpu variable may point to a discarded regions. The following are - * established ways to produce a usable pointer from the percpu variable - * offset. - */ -#define per_cpu(var, cpu) \ - (*SHIFT_PERCPU_PTR(&(var), per_cpu_offset(cpu))) - -#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr) +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr) #ifdef CONFIG_DEBUG_PREEMPT #define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) @@ -231,9 +224,6 @@ #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) #endif -#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) -#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) - #else /* CONFIG_SMP */ #define VERIFY_PERCPU_PTR(__p) ({ \ @@ -241,14 +231,16 @@ (typeof(*(__p)) __kernel __force *)(__p); \ }) -#define per_cpu(var, cpu) (*((void)(cpu), VERIFY_PERCPU_PTR(&(var)))) -#define __get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) -#define __raw_get_cpu_var(var) (*VERIFY_PERCPU_PTR(&(var))) -#define this_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) -#define raw_cpu_ptr(ptr) this_cpu_ptr(ptr) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) +#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) #endif /* CONFIG_SMP */ +#define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu)) +#define __raw_get_cpu_var(var) (*raw_cpu_ptr(&(var))) +#define __get_cpu_var(var) (*this_cpu_ptr(&(var))) + /* keep until we have removed all uses of __this_cpu_ptr */ #define __this_cpu_ptr(ptr) raw_cpu_ptr(ptr) @@ -278,11 +270,5 @@ preempt_enable(); \ } while (0) -#ifdef CONFIG_SMP -#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) -#else -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) -#endif - #endif /* __ASSEMBLY__ */ #endif /* _LINUX_PERCPU_DEFS_H */ From dcba4333683c3a0642fd575e475c6c740122a037 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:39 -0400 Subject: [PATCH 06/21] percpu: only allow sized arch overrides for {raw|this}_cpu_*() ops Currently, percpu allows two separate methods for overriding {raw|this}_cpu_*() ops - for a given operation, an arch can provide whole replacement or sized sub operations to override specific parts of it. e.g. arch either can provide this_cpu_add() or this_cpu_add_4() to override only the 4 byte operation. While quite flexible on a glance, the dual-overriding scheme complicates the code path for no actual gain. It compilcates the already complex operation definitions and if an arch wants to override all sizes, it can easily provide all variants anyway. In fact, no arch is actually making use of whole operation override. Another oddity is that __this_cpu_*() operations are defined in the same way as raw_cpu_*() but ignores full overrides of the raw_cpu_*() and doesn't allow full operation override, so if an arch provides whole overrides for raw_cpu_*() operations __this_cpu_*() ends up using the generic implementations. More importantly, it takes away the layering between arch-specific and generic parts making it impossible for the generic part to implement arch-independent features on top of arch-specific overrides. This patch removes the support for whole operation overrides. As no arch is using it, this doesn't cause any actual difference. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/linux/percpu.h | 94 +++--------------------------------------- 1 file changed, 5 insertions(+), 89 deletions(-) diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 97b207990c45..95d380e5d246 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -226,17 +226,11 @@ do { \ * safe. Interrupts may occur. If the interrupt modifies the variable * too then RMW actions will not be reliable. * - * The arch code can provide optimized functions in two ways: - * - * 1. Override the function completely. F.e. define this_cpu_add(). - * The arch must then ensure that the various scalar format passed - * are handled correctly. - * - * 2. Provide functions for certain scalar sizes. F.e. provide - * this_cpu_add_2() to provide per cpu atomic operations for 2 byte - * sized RMW actions. If arch code does not provide operations for - * a scalar size then the fallback in the generic code will be - * used. + * The arch code can provide optimized implementation by defining macros + * for certain scalar sizes. F.e. provide this_cpu_add_2() to provide per + * cpu atomic operations for 2 byte sized RMW actions. If arch code does + * not provide operations for a scalar size then the fallback in the + * generic code will be used. */ #define _this_cpu_generic_read(pcp) \ @@ -247,7 +241,6 @@ do { \ ret__; \ }) -#ifndef this_cpu_read # ifndef this_cpu_read_1 # define this_cpu_read_1(pcp) _this_cpu_generic_read(pcp) # endif @@ -261,7 +254,6 @@ do { \ # define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) # endif # define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) -#endif #define _this_cpu_generic_to_op(pcp, val, op) \ do { \ @@ -271,7 +263,6 @@ do { \ raw_local_irq_restore(flags); \ } while (0) -#ifndef this_cpu_write # ifndef this_cpu_write_1 # define this_cpu_write_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) # endif @@ -285,9 +276,7 @@ do { \ # define this_cpu_write_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) # endif # define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) -#endif -#ifndef this_cpu_add # ifndef this_cpu_add_1 # define this_cpu_add_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) # endif @@ -301,21 +290,11 @@ do { \ # define this_cpu_add_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) # endif # define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) -#endif -#ifndef this_cpu_sub # define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) -#endif - -#ifndef this_cpu_inc # define this_cpu_inc(pcp) this_cpu_add((pcp), 1) -#endif - -#ifndef this_cpu_dec # define this_cpu_dec(pcp) this_cpu_sub((pcp), 1) -#endif -#ifndef this_cpu_and # ifndef this_cpu_and_1 # define this_cpu_and_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) # endif @@ -329,9 +308,7 @@ do { \ # define this_cpu_and_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) # endif # define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) -#endif -#ifndef this_cpu_or # ifndef this_cpu_or_1 # define this_cpu_or_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) # endif @@ -345,7 +322,6 @@ do { \ # define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) # endif # define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) -#endif #define _this_cpu_generic_add_return(pcp, val) \ ({ \ @@ -358,7 +334,6 @@ do { \ ret__; \ }) -#ifndef this_cpu_add_return # ifndef this_cpu_add_return_1 # define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) # endif @@ -372,7 +347,6 @@ do { \ # define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) # endif # define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) -#endif #define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) @@ -388,7 +362,6 @@ do { \ ret__; \ }) -#ifndef this_cpu_xchg # ifndef this_cpu_xchg_1 # define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval) # endif @@ -403,7 +376,6 @@ do { \ # endif # define this_cpu_xchg(pcp, nval) \ __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval) -#endif #define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ @@ -417,7 +389,6 @@ do { \ ret__; \ }) -#ifndef this_cpu_cmpxchg # ifndef this_cpu_cmpxchg_1 # define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) # endif @@ -432,7 +403,6 @@ do { \ # endif # define this_cpu_cmpxchg(pcp, oval, nval) \ __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) -#endif /* * cmpxchg_double replaces two adjacent scalars at once. The first @@ -453,7 +423,6 @@ do { \ ret__; \ }) -#ifndef this_cpu_cmpxchg_double # ifndef this_cpu_cmpxchg_double_1 # define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) @@ -472,7 +441,6 @@ do { \ # endif # define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) -#endif /* * Generic percpu operations for contexts where we do not want to do @@ -484,7 +452,6 @@ do { \ * or an interrupt occurred and the same percpu variable was modified from * the interrupt context. */ -#ifndef raw_cpu_read # ifndef raw_cpu_read_1 # define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) # endif @@ -498,15 +465,12 @@ do { \ # define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) # endif # define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) -#endif #define raw_cpu_generic_to_op(pcp, val, op) \ do { \ *raw_cpu_ptr(&(pcp)) op val; \ } while (0) - -#ifndef raw_cpu_write # ifndef raw_cpu_write_1 # define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif @@ -520,9 +484,7 @@ do { \ # define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif # define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) -#endif -#ifndef raw_cpu_add # ifndef raw_cpu_add_1 # define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif @@ -536,21 +498,13 @@ do { \ # define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif # define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) -#endif -#ifndef raw_cpu_sub # define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) -#endif -#ifndef raw_cpu_inc # define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) -#endif -#ifndef raw_cpu_dec # define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) -#endif -#ifndef raw_cpu_and # ifndef raw_cpu_and_1 # define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif @@ -564,9 +518,7 @@ do { \ # define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif # define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) -#endif -#ifndef raw_cpu_or # ifndef raw_cpu_or_1 # define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif @@ -580,7 +532,6 @@ do { \ # define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif # define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) -#endif #define raw_cpu_generic_add_return(pcp, val) \ ({ \ @@ -588,7 +539,6 @@ do { \ raw_cpu_read(pcp); \ }) -#ifndef raw_cpu_add_return # ifndef raw_cpu_add_return_1 # define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif @@ -603,7 +553,6 @@ do { \ # endif # define raw_cpu_add_return(pcp, val) \ __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) -#endif #define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) #define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) @@ -616,7 +565,6 @@ do { \ ret__; \ }) -#ifndef raw_cpu_xchg # ifndef raw_cpu_xchg_1 # define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif @@ -631,7 +579,6 @@ do { \ # endif # define raw_cpu_xchg(pcp, nval) \ __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) -#endif #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ @@ -642,7 +589,6 @@ do { \ ret__; \ }) -#ifndef raw_cpu_cmpxchg # ifndef raw_cpu_cmpxchg_1 # define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif @@ -657,7 +603,6 @@ do { \ # endif # define raw_cpu_cmpxchg(pcp, oval, nval) \ __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) -#endif #define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ @@ -671,7 +616,6 @@ do { \ (__ret); \ }) -#ifndef raw_cpu_cmpxchg_double # ifndef raw_cpu_cmpxchg_double_1 # define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) @@ -690,79 +634,51 @@ do { \ # endif # define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) -#endif /* * Generic percpu operations for context that are safe from preemption/interrupts. */ -#ifndef __this_cpu_read # define __this_cpu_read(pcp) \ (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp))) -#endif -#ifndef __this_cpu_write # define __this_cpu_write(pcp, val) \ do { __this_cpu_preempt_check("write"); \ __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \ } while (0) -#endif -#ifndef __this_cpu_add # define __this_cpu_add(pcp, val) \ do { __this_cpu_preempt_check("add"); \ __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \ } while (0) -#endif -#ifndef __this_cpu_sub # define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) -#endif - -#ifndef __this_cpu_inc # define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) -#endif - -#ifndef __this_cpu_dec # define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) -#endif -#ifndef __this_cpu_and # define __this_cpu_and(pcp, val) \ do { __this_cpu_preempt_check("and"); \ __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \ } while (0) -#endif - -#ifndef __this_cpu_or # define __this_cpu_or(pcp, val) \ do { __this_cpu_preempt_check("or"); \ __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \ } while (0) -#endif -#ifndef __this_cpu_add_return # define __this_cpu_add_return(pcp, val) \ (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)) -#endif #define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) -#ifndef __this_cpu_xchg # define __this_cpu_xchg(pcp, nval) \ (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)) -#endif -#ifndef __this_cpu_cmpxchg # define __this_cpu_cmpxchg(pcp, oval, nval) \ (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)) -#endif -#ifndef __this_cpu_cmpxchg_double # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))) -#endif #endif /* __LINUX_PERCPU_H */ From 47b69ad673d9aa53c1d6032a6a522fc0ce8d6fc1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:39 -0400 Subject: [PATCH 07/21] percpu: move generic {raw|this}_cpu_*_N() definitions to include/asm-generic/percpu.h {raw|this}_cpu_*_N() operations are expected to be provided by archs and the generic definitions are provided as fallbacks. As such, these firmly belong to include/asm-generic/percpu.h. Move the generic definitions to include/asm-generic/percpu.h. The code is moved mostly verbatim; however, raw_cpu_*_N() are placed above this_cpu_*_N() which is more conventional as the raw operations may be used to defined other variants. This is pure reorganization. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/asm-generic/percpu.h | 341 ++++++++++++++++++++++++++++++++++ include/linux/percpu.h | 344 ----------------------------------- 2 files changed, 341 insertions(+), 344 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index e5ace4d49084..932ce602128f 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -65,4 +65,345 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_DEF_ATTRIBUTES #endif +# ifndef raw_cpu_read_1 +# define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) +# endif +# ifndef raw_cpu_read_2 +# define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) +# endif +# ifndef raw_cpu_read_4 +# define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) +# endif +# ifndef raw_cpu_read_8 +# define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) +# endif + +#define raw_cpu_generic_to_op(pcp, val, op) \ +do { \ + *raw_cpu_ptr(&(pcp)) op val; \ +} while (0) + +# ifndef raw_cpu_write_1 +# define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) +# endif +# ifndef raw_cpu_write_2 +# define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) +# endif +# ifndef raw_cpu_write_4 +# define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) +# endif +# ifndef raw_cpu_write_8 +# define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) +# endif + +# ifndef raw_cpu_add_1 +# define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) +# endif +# ifndef raw_cpu_add_2 +# define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) +# endif +# ifndef raw_cpu_add_4 +# define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) +# endif +# ifndef raw_cpu_add_8 +# define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) +# endif + +# ifndef raw_cpu_and_1 +# define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) +# endif +# ifndef raw_cpu_and_2 +# define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) +# endif +# ifndef raw_cpu_and_4 +# define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) +# endif +# ifndef raw_cpu_and_8 +# define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) +# endif + +# ifndef raw_cpu_or_1 +# define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) +# endif +# ifndef raw_cpu_or_2 +# define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) +# endif +# ifndef raw_cpu_or_4 +# define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) +# endif +# ifndef raw_cpu_or_8 +# define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) +# endif + +#define raw_cpu_generic_add_return(pcp, val) \ +({ \ + raw_cpu_add(pcp, val); \ + raw_cpu_read(pcp); \ +}) + +# ifndef raw_cpu_add_return_1 +# define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) +# endif +# ifndef raw_cpu_add_return_2 +# define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) +# endif +# ifndef raw_cpu_add_return_4 +# define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) +# endif +# ifndef raw_cpu_add_return_8 +# define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) +# endif + +#define raw_cpu_generic_xchg(pcp, nval) \ +({ typeof(pcp) ret__; \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ + ret__; \ +}) + +# ifndef raw_cpu_xchg_1 +# define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +# endif +# ifndef raw_cpu_xchg_2 +# define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +# endif +# ifndef raw_cpu_xchg_4 +# define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +# endif +# ifndef raw_cpu_xchg_8 +# define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +# endif + +#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ \ + typeof(pcp) ret__; \ + ret__ = raw_cpu_read(pcp); \ + if (ret__ == (oval)) \ + raw_cpu_write(pcp, nval); \ + ret__; \ +}) + +# ifndef raw_cpu_cmpxchg_1 +# define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef raw_cpu_cmpxchg_2 +# define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef raw_cpu_cmpxchg_4 +# define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef raw_cpu_cmpxchg_8 +# define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) +# endif + +#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +({ \ + int __ret = 0; \ + if (raw_cpu_read(pcp1) == (oval1) && \ + raw_cpu_read(pcp2) == (oval2)) { \ + raw_cpu_write(pcp1, (nval1)); \ + raw_cpu_write(pcp2, (nval2)); \ + __ret = 1; \ + } \ + (__ret); \ +}) + +# ifndef raw_cpu_cmpxchg_double_1 +# define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif +# ifndef raw_cpu_cmpxchg_double_2 +# define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif +# ifndef raw_cpu_cmpxchg_double_4 +# define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif +# ifndef raw_cpu_cmpxchg_double_8 +# define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif + +#define _this_cpu_generic_read(pcp) \ +({ typeof(pcp) ret__; \ + preempt_disable(); \ + ret__ = *this_cpu_ptr(&(pcp)); \ + preempt_enable(); \ + ret__; \ +}) + +# ifndef this_cpu_read_1 +# define this_cpu_read_1(pcp) _this_cpu_generic_read(pcp) +# endif +# ifndef this_cpu_read_2 +# define this_cpu_read_2(pcp) _this_cpu_generic_read(pcp) +# endif +# ifndef this_cpu_read_4 +# define this_cpu_read_4(pcp) _this_cpu_generic_read(pcp) +# endif +# ifndef this_cpu_read_8 +# define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) +# endif + +#define _this_cpu_generic_to_op(pcp, val, op) \ +do { \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + *raw_cpu_ptr(&(pcp)) op val; \ + raw_local_irq_restore(flags); \ +} while (0) + +# ifndef this_cpu_write_1 +# define this_cpu_write_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) +# endif +# ifndef this_cpu_write_2 +# define this_cpu_write_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) +# endif +# ifndef this_cpu_write_4 +# define this_cpu_write_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) +# endif +# ifndef this_cpu_write_8 +# define this_cpu_write_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) +# endif + +# ifndef this_cpu_add_1 +# define this_cpu_add_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) +# endif +# ifndef this_cpu_add_2 +# define this_cpu_add_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) +# endif +# ifndef this_cpu_add_4 +# define this_cpu_add_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) +# endif +# ifndef this_cpu_add_8 +# define this_cpu_add_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) +# endif + +# ifndef this_cpu_and_1 +# define this_cpu_and_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) +# endif +# ifndef this_cpu_and_2 +# define this_cpu_and_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) +# endif +# ifndef this_cpu_and_4 +# define this_cpu_and_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) +# endif +# ifndef this_cpu_and_8 +# define this_cpu_and_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) +# endif + +# ifndef this_cpu_or_1 +# define this_cpu_or_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) +# endif +# ifndef this_cpu_or_2 +# define this_cpu_or_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) +# endif +# ifndef this_cpu_or_4 +# define this_cpu_or_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) +# endif +# ifndef this_cpu_or_8 +# define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) +# endif + +#define _this_cpu_generic_add_return(pcp, val) \ +({ \ + typeof(pcp) ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + raw_cpu_add(pcp, val); \ + ret__ = raw_cpu_read(pcp); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + +# ifndef this_cpu_add_return_1 +# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_2 +# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_4 +# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif +# ifndef this_cpu_add_return_8 +# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) +# endif + +#define _this_cpu_generic_xchg(pcp, nval) \ +({ typeof(pcp) ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + +# ifndef this_cpu_xchg_1 +# define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef this_cpu_xchg_2 +# define this_cpu_xchg_2(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef this_cpu_xchg_4 +# define this_cpu_xchg_4(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif +# ifndef this_cpu_xchg_8 +# define this_cpu_xchg_8(pcp, nval) _this_cpu_generic_xchg(pcp, nval) +# endif + +#define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ \ + typeof(pcp) ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + ret__ = raw_cpu_read(pcp); \ + if (ret__ == (oval)) \ + raw_cpu_write(pcp, nval); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + +# ifndef this_cpu_cmpxchg_1 +# define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef this_cpu_cmpxchg_2 +# define this_cpu_cmpxchg_2(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef this_cpu_cmpxchg_4 +# define this_cpu_cmpxchg_4(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif +# ifndef this_cpu_cmpxchg_8 +# define this_cpu_cmpxchg_8(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) +# endif + +#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +({ \ + int ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ + oval1, oval2, nval1, nval2); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + +# ifndef this_cpu_cmpxchg_double_1 +# define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif +# ifndef this_cpu_cmpxchg_double_2 +# define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif +# ifndef this_cpu_cmpxchg_double_4 +# define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif +# ifndef this_cpu_cmpxchg_double_8 +# define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# endif + #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 95d380e5d246..20b953532596 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -233,174 +233,20 @@ do { \ * generic code will be used. */ -#define _this_cpu_generic_read(pcp) \ -({ typeof(pcp) ret__; \ - preempt_disable(); \ - ret__ = *this_cpu_ptr(&(pcp)); \ - preempt_enable(); \ - ret__; \ -}) - -# ifndef this_cpu_read_1 -# define this_cpu_read_1(pcp) _this_cpu_generic_read(pcp) -# endif -# ifndef this_cpu_read_2 -# define this_cpu_read_2(pcp) _this_cpu_generic_read(pcp) -# endif -# ifndef this_cpu_read_4 -# define this_cpu_read_4(pcp) _this_cpu_generic_read(pcp) -# endif -# ifndef this_cpu_read_8 -# define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) -# endif # define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) - -#define _this_cpu_generic_to_op(pcp, val, op) \ -do { \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - *raw_cpu_ptr(&(pcp)) op val; \ - raw_local_irq_restore(flags); \ -} while (0) - -# ifndef this_cpu_write_1 -# define this_cpu_write_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef this_cpu_write_2 -# define this_cpu_write_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef this_cpu_write_4 -# define this_cpu_write_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef this_cpu_write_8 -# define this_cpu_write_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif # define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) - -# ifndef this_cpu_add_1 -# define this_cpu_add_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef this_cpu_add_2 -# define this_cpu_add_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef this_cpu_add_4 -# define this_cpu_add_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef this_cpu_add_8 -# define this_cpu_add_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif # define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) - # define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) # define this_cpu_inc(pcp) this_cpu_add((pcp), 1) # define this_cpu_dec(pcp) this_cpu_sub((pcp), 1) - -# ifndef this_cpu_and_1 -# define this_cpu_and_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef this_cpu_and_2 -# define this_cpu_and_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef this_cpu_and_4 -# define this_cpu_and_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef this_cpu_and_8 -# define this_cpu_and_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif # define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) - -# ifndef this_cpu_or_1 -# define this_cpu_or_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef this_cpu_or_2 -# define this_cpu_or_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef this_cpu_or_4 -# define this_cpu_or_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef this_cpu_or_8 -# define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif # define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) - -#define _this_cpu_generic_add_return(pcp, val) \ -({ \ - typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - raw_cpu_add(pcp, val); \ - ret__ = raw_cpu_read(pcp); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - -# ifndef this_cpu_add_return_1 -# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_2 -# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_4 -# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_8 -# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif # define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) - #define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) - -#define _this_cpu_generic_xchg(pcp, nval) \ -({ typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_read(pcp); \ - raw_cpu_write(pcp, nval); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - -# ifndef this_cpu_xchg_1 -# define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif -# ifndef this_cpu_xchg_2 -# define this_cpu_xchg_2(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif -# ifndef this_cpu_xchg_4 -# define this_cpu_xchg_4(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif -# ifndef this_cpu_xchg_8 -# define this_cpu_xchg_8(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif # define this_cpu_xchg(pcp, nval) \ __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval) - -#define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ -({ \ - typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_read(pcp); \ - if (ret__ == (oval)) \ - raw_cpu_write(pcp, nval); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - -# ifndef this_cpu_cmpxchg_1 -# define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef this_cpu_cmpxchg_2 -# define this_cpu_cmpxchg_2(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef this_cpu_cmpxchg_4 -# define this_cpu_cmpxchg_4(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef this_cpu_cmpxchg_8 -# define this_cpu_cmpxchg_8(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif # define this_cpu_cmpxchg(pcp, oval, nval) \ __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) @@ -412,33 +258,6 @@ do { \ * very limited hardware support for these operations, so only certain * sizes may work. */ -#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ -({ \ - int ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ - oval1, oval2, nval1, nval2); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - -# ifndef this_cpu_cmpxchg_double_1 -# define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef this_cpu_cmpxchg_double_2 -# define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef this_cpu_cmpxchg_double_4 -# define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef this_cpu_cmpxchg_double_8 -# define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif # define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) @@ -452,186 +271,23 @@ do { \ * or an interrupt occurred and the same percpu variable was modified from * the interrupt context. */ -# ifndef raw_cpu_read_1 -# define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) -# endif -# ifndef raw_cpu_read_2 -# define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) -# endif -# ifndef raw_cpu_read_4 -# define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) -# endif -# ifndef raw_cpu_read_8 -# define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) -# endif # define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) - -#define raw_cpu_generic_to_op(pcp, val, op) \ -do { \ - *raw_cpu_ptr(&(pcp)) op val; \ -} while (0) - -# ifndef raw_cpu_write_1 -# define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef raw_cpu_write_2 -# define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef raw_cpu_write_4 -# define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef raw_cpu_write_8 -# define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif # define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) - -# ifndef raw_cpu_add_1 -# define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef raw_cpu_add_2 -# define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef raw_cpu_add_4 -# define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef raw_cpu_add_8 -# define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif # define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) - # define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) - # define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) - # define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) - -# ifndef raw_cpu_and_1 -# define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef raw_cpu_and_2 -# define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef raw_cpu_and_4 -# define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef raw_cpu_and_8 -# define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif # define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) - -# ifndef raw_cpu_or_1 -# define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef raw_cpu_or_2 -# define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef raw_cpu_or_4 -# define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef raw_cpu_or_8 -# define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif # define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) - -#define raw_cpu_generic_add_return(pcp, val) \ -({ \ - raw_cpu_add(pcp, val); \ - raw_cpu_read(pcp); \ -}) - -# ifndef raw_cpu_add_return_1 -# define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif -# ifndef raw_cpu_add_return_2 -# define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif -# ifndef raw_cpu_add_return_4 -# define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif -# ifndef raw_cpu_add_return_8 -# define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif # define raw_cpu_add_return(pcp, val) \ __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) - #define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) #define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) #define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) - -#define raw_cpu_generic_xchg(pcp, nval) \ -({ typeof(pcp) ret__; \ - ret__ = raw_cpu_read(pcp); \ - raw_cpu_write(pcp, nval); \ - ret__; \ -}) - -# ifndef raw_cpu_xchg_1 -# define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif -# ifndef raw_cpu_xchg_2 -# define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif -# ifndef raw_cpu_xchg_4 -# define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif -# ifndef raw_cpu_xchg_8 -# define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif # define raw_cpu_xchg(pcp, nval) \ __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) - -#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ -({ \ - typeof(pcp) ret__; \ - ret__ = raw_cpu_read(pcp); \ - if (ret__ == (oval)) \ - raw_cpu_write(pcp, nval); \ - ret__; \ -}) - -# ifndef raw_cpu_cmpxchg_1 -# define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef raw_cpu_cmpxchg_2 -# define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef raw_cpu_cmpxchg_4 -# define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef raw_cpu_cmpxchg_8 -# define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif # define raw_cpu_cmpxchg(pcp, oval, nval) \ __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) - -#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ -({ \ - int __ret = 0; \ - if (raw_cpu_read(pcp1) == (oval1) && \ - raw_cpu_read(pcp2) == (oval2)) { \ - raw_cpu_write(pcp1, (nval1)); \ - raw_cpu_write(pcp2, (nval2)); \ - __ret = 1; \ - } \ - (__ret); \ -}) - -# ifndef raw_cpu_cmpxchg_double_1 -# define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef raw_cpu_cmpxchg_double_2 -# define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef raw_cpu_cmpxchg_double_4 -# define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef raw_cpu_cmpxchg_double_8 -# define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif # define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) From a32f8d8eda8bd49017ac5f88e2b859f1f582557f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:39 -0400 Subject: [PATCH 08/21] percpu: move {raw|this}_cpu_*() definitions to include/linux/percpu-defs.h We're in the process of moving all percpu accessors and operations to include/linux/percpu-defs.h so that they're available to arch headers without having to include full include/linux/percpu.h which may cause cyclic inclusion dependency. This patch moves {raw|this}_cpu_*() definitions from include/linux/percpu.h to include/linux/percpu-defs.h. The code is moved mostly verbatim; however, raw_cpu_*() are placed above this_cpu_*() which is more conventional as the raw operations may be used to defined other variants. This is pure reorganization. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/linux/percpu-defs.h | 209 ++++++++++++++++++++++++++++++++++++ include/linux/percpu.h | 208 ----------------------------------- 2 files changed, 209 insertions(+), 208 deletions(-) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 94cd90afadac..6710eb9555fa 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -270,5 +270,214 @@ preempt_enable(); \ } while (0) +/* + * Branching function to split up a function into a set of functions that + * are called for different scalar sizes of the objects handled. + */ + +extern void __bad_size_call_parameter(void); + +#ifdef CONFIG_DEBUG_PREEMPT +extern void __this_cpu_preempt_check(const char *op); +#else +static inline void __this_cpu_preempt_check(const char *op) { } +#endif + +#define __pcpu_size_call_return(stem, variable) \ +({ typeof(variable) pscr_ret__; \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: pscr_ret__ = stem##1(variable);break; \ + case 2: pscr_ret__ = stem##2(variable);break; \ + case 4: pscr_ret__ = stem##4(variable);break; \ + case 8: pscr_ret__ = stem##8(variable);break; \ + default: \ + __bad_size_call_parameter();break; \ + } \ + pscr_ret__; \ +}) + +#define __pcpu_size_call_return2(stem, variable, ...) \ +({ \ + typeof(variable) pscr2_ret__; \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ + case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \ + case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \ + case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pscr2_ret__; \ +}) + +/* + * Special handling for cmpxchg_double. cmpxchg_double is passed two + * percpu variables. The first has to be aligned to a double word + * boundary and the second has to follow directly thereafter. + * We enforce this on all architectures even if they don't support + * a double cmpxchg instruction, since it's a cheap requirement, and it + * avoids breaking the requirement for architectures with the instruction. + */ +#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ +({ \ + bool pdcrb_ret__; \ + __verify_pcpu_ptr(&pcp1); \ + BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ + VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \ + VM_BUG_ON((unsigned long)(&pcp2) != \ + (unsigned long)(&pcp1) + sizeof(pcp1)); \ + switch(sizeof(pcp1)) { \ + case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ + case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ + case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ + case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pdcrb_ret__; \ +}) + +#define __pcpu_size_call(stem, variable, ...) \ +do { \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: stem##1(variable, __VA_ARGS__);break; \ + case 2: stem##2(variable, __VA_ARGS__);break; \ + case 4: stem##4(variable, __VA_ARGS__);break; \ + case 8: stem##8(variable, __VA_ARGS__);break; \ + default: \ + __bad_size_call_parameter();break; \ + } \ +} while (0) + +/* + * this_cpu operations (C) 2008-2013 Christoph Lameter + * + * Optimized manipulation for memory allocated through the per cpu + * allocator or for addresses of per cpu variables. + * + * These operation guarantee exclusivity of access for other operations + * on the *same* processor. The assumption is that per cpu data is only + * accessed by a single processor instance (the current one). + * + * The arch code can provide optimized implementation by defining macros + * for certain scalar sizes. F.e. provide this_cpu_add_2() to provide per + * cpu atomic operations for 2 byte sized RMW actions. If arch code does + * not provide operations for a scalar size then the fallback in the + * generic code will be used. + */ + +/* + * Generic percpu operations for contexts where we do not want to do + * any checks for preemptiosn. + * + * If there is no other protection through preempt disable and/or + * disabling interupts then one of these RMW operations can show unexpected + * behavior because the execution thread was rescheduled on another processor + * or an interrupt occurred and the same percpu variable was modified from + * the interrupt context. + */ +# define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) +# define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) +# define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) +# define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) +# define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) +# define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) +# define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) +# define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) +# define raw_cpu_add_return(pcp, val) \ + __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) +#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) +#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) +# define raw_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) +# define raw_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) +# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + +/* + * Generic percpu operations for context that are safe from preemption/interrupts. + */ +# define __this_cpu_read(pcp) \ + (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp))) + +# define __this_cpu_write(pcp, val) \ +do { __this_cpu_preempt_check("write"); \ + __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \ +} while (0) + +# define __this_cpu_add(pcp, val) \ +do { __this_cpu_preempt_check("add"); \ + __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \ +} while (0) + +# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) +# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) +# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) + +# define __this_cpu_and(pcp, val) \ +do { __this_cpu_preempt_check("and"); \ + __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \ +} while (0) + +# define __this_cpu_or(pcp, val) \ +do { __this_cpu_preempt_check("or"); \ + __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \ +} while (0) + +# define __this_cpu_add_return(pcp, val) \ + (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)) + +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) +#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) + +# define __this_cpu_xchg(pcp, nval) \ + (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)) + +# define __this_cpu_cmpxchg(pcp, oval, nval) \ + (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)) + +# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))) + +/* + * this_cpu_*() operations are used for accesses that must be done in a + * preemption safe way since we know that the context is not preempt + * safe. Interrupts may occur. If the interrupt modifies the variable too + * then RMW actions will not be reliable. + */ +# define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) +# define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) +# define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) +# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) +# define this_cpu_inc(pcp) this_cpu_add((pcp), 1) +# define this_cpu_dec(pcp) this_cpu_sub((pcp), 1) +# define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) +# define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) +# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) +#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) +# define this_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval) +# define this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) + +/* + * cmpxchg_double replaces two adjacent scalars at once. The first + * two parameters are per cpu variables which have to be of the same + * size. A truth value is returned to indicate success or failure + * (since a double register result is difficult to handle). There is + * very limited hardware support for these operations, so only certain + * sizes may work. + */ +# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + #endif /* __ASSEMBLY__ */ #endif /* _LINUX_PERCPU_DEFS_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 20b953532596..6f61b61b7996 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -129,212 +129,4 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); #define alloc_percpu(type) \ (typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type)) -/* - * Branching function to split up a function into a set of functions that - * are called for different scalar sizes of the objects handled. - */ - -extern void __bad_size_call_parameter(void); - -#ifdef CONFIG_DEBUG_PREEMPT -extern void __this_cpu_preempt_check(const char *op); -#else -static inline void __this_cpu_preempt_check(const char *op) { } -#endif - -#define __pcpu_size_call_return(stem, variable) \ -({ typeof(variable) pscr_ret__; \ - __verify_pcpu_ptr(&(variable)); \ - switch(sizeof(variable)) { \ - case 1: pscr_ret__ = stem##1(variable);break; \ - case 2: pscr_ret__ = stem##2(variable);break; \ - case 4: pscr_ret__ = stem##4(variable);break; \ - case 8: pscr_ret__ = stem##8(variable);break; \ - default: \ - __bad_size_call_parameter();break; \ - } \ - pscr_ret__; \ -}) - -#define __pcpu_size_call_return2(stem, variable, ...) \ -({ \ - typeof(variable) pscr2_ret__; \ - __verify_pcpu_ptr(&(variable)); \ - switch(sizeof(variable)) { \ - case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ - case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \ - case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \ - case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \ - default: \ - __bad_size_call_parameter(); break; \ - } \ - pscr2_ret__; \ -}) - -/* - * Special handling for cmpxchg_double. cmpxchg_double is passed two - * percpu variables. The first has to be aligned to a double word - * boundary and the second has to follow directly thereafter. - * We enforce this on all architectures even if they don't support - * a double cmpxchg instruction, since it's a cheap requirement, and it - * avoids breaking the requirement for architectures with the instruction. - */ -#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ -({ \ - bool pdcrb_ret__; \ - __verify_pcpu_ptr(&pcp1); \ - BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ - VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \ - VM_BUG_ON((unsigned long)(&pcp2) != \ - (unsigned long)(&pcp1) + sizeof(pcp1)); \ - switch(sizeof(pcp1)) { \ - case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ - case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ - case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ - case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ - default: \ - __bad_size_call_parameter(); break; \ - } \ - pdcrb_ret__; \ -}) - -#define __pcpu_size_call(stem, variable, ...) \ -do { \ - __verify_pcpu_ptr(&(variable)); \ - switch(sizeof(variable)) { \ - case 1: stem##1(variable, __VA_ARGS__);break; \ - case 2: stem##2(variable, __VA_ARGS__);break; \ - case 4: stem##4(variable, __VA_ARGS__);break; \ - case 8: stem##8(variable, __VA_ARGS__);break; \ - default: \ - __bad_size_call_parameter();break; \ - } \ -} while (0) - -/* - * this_cpu operations (C) 2008-2013 Christoph Lameter - * - * Optimized manipulation for memory allocated through the per cpu - * allocator or for addresses of per cpu variables. - * - * These operation guarantee exclusivity of access for other operations - * on the *same* processor. The assumption is that per cpu data is only - * accessed by a single processor instance (the current one). - * - * The first group is used for accesses that must be done in a - * preemption safe way since we know that the context is not preempt - * safe. Interrupts may occur. If the interrupt modifies the variable - * too then RMW actions will not be reliable. - * - * The arch code can provide optimized implementation by defining macros - * for certain scalar sizes. F.e. provide this_cpu_add_2() to provide per - * cpu atomic operations for 2 byte sized RMW actions. If arch code does - * not provide operations for a scalar size then the fallback in the - * generic code will be used. - */ - -# define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) -# define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) -# define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) -# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) -# define this_cpu_inc(pcp) this_cpu_add((pcp), 1) -# define this_cpu_dec(pcp) this_cpu_sub((pcp), 1) -# define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) -# define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) -# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) -#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) -#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) -#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) -# define this_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval) -# define this_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) - -/* - * cmpxchg_double replaces two adjacent scalars at once. The first - * two parameters are per cpu variables which have to be of the same - * size. A truth value is returned to indicate success or failure - * (since a double register result is difficult to handle). There is - * very limited hardware support for these operations, so only certain - * sizes may work. - */ -# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) - -/* - * Generic percpu operations for contexts where we do not want to do - * any checks for preemptiosn. - * - * If there is no other protection through preempt disable and/or - * disabling interupts then one of these RMW operations can show unexpected - * behavior because the execution thread was rescheduled on another processor - * or an interrupt occurred and the same percpu variable was modified from - * the interrupt context. - */ -# define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) -# define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) -# define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) -# define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) -# define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) -# define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) -# define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) -# define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) -# define raw_cpu_add_return(pcp, val) \ - __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) -#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) -#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) -#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) -# define raw_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) -# define raw_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) -# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) - -/* - * Generic percpu operations for context that are safe from preemption/interrupts. - */ -# define __this_cpu_read(pcp) \ - (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp))) - -# define __this_cpu_write(pcp, val) \ -do { __this_cpu_preempt_check("write"); \ - __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \ -} while (0) - -# define __this_cpu_add(pcp, val) \ -do { __this_cpu_preempt_check("add"); \ - __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \ -} while (0) - -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) -# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) -# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) - -# define __this_cpu_and(pcp, val) \ -do { __this_cpu_preempt_check("and"); \ - __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \ -} while (0) - -# define __this_cpu_or(pcp, val) \ -do { __this_cpu_preempt_check("or"); \ - __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \ -} while (0) - -# define __this_cpu_add_return(pcp, val) \ - (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)) - -#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) -#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) -#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) - -# define __this_cpu_xchg(pcp, nval) \ - (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)) - -# define __this_cpu_cmpxchg(pcp, oval, nval) \ - (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)) - -# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))) - #endif /* __LINUX_PERCPU_H */ From 9c28278a24c01c0073fb89e53c1d2a605ab9587d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:39 -0400 Subject: [PATCH 09/21] percpu: reorder macros in percpu header files * In include/asm-generic/percpu.h, collect {raw|_this}_cpu_generic*() macros into one place. They were dispersed through {raw|this}_cpu_*_N() definitions and the visiual inconsistency was making following the code unnecessarily difficult. * In include/linux/percpu-defs.h, move __verify_pcpu_ptr() later in the file so that it's right above accessor definitions where it's actually used. This is pure reorganization. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/asm-generic/percpu.h | 198 +++++++++++++++++------------------ include/linux/percpu-defs.h | 26 ++--- 2 files changed, 112 insertions(+), 112 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 932ce602128f..2300d989087b 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -65,6 +65,105 @@ extern void setup_per_cpu_areas(void); #define PER_CPU_DEF_ATTRIBUTES #endif +#define raw_cpu_generic_to_op(pcp, val, op) \ +do { \ + *raw_cpu_ptr(&(pcp)) op val; \ +} while (0) + +#define raw_cpu_generic_add_return(pcp, val) \ +({ \ + raw_cpu_add(pcp, val); \ + raw_cpu_read(pcp); \ +}) + +#define raw_cpu_generic_xchg(pcp, nval) \ +({ typeof(pcp) ret__; \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ + ret__; \ +}) + +#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ \ + typeof(pcp) ret__; \ + ret__ = raw_cpu_read(pcp); \ + if (ret__ == (oval)) \ + raw_cpu_write(pcp, nval); \ + ret__; \ +}) + +#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +({ \ + int __ret = 0; \ + if (raw_cpu_read(pcp1) == (oval1) && \ + raw_cpu_read(pcp2) == (oval2)) { \ + raw_cpu_write(pcp1, (nval1)); \ + raw_cpu_write(pcp2, (nval2)); \ + __ret = 1; \ + } \ + (__ret); \ +}) + +#define _this_cpu_generic_read(pcp) \ +({ typeof(pcp) ret__; \ + preempt_disable(); \ + ret__ = *this_cpu_ptr(&(pcp)); \ + preempt_enable(); \ + ret__; \ +}) + +#define _this_cpu_generic_to_op(pcp, val, op) \ +do { \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + *raw_cpu_ptr(&(pcp)) op val; \ + raw_local_irq_restore(flags); \ +} while (0) + +#define _this_cpu_generic_add_return(pcp, val) \ +({ \ + typeof(pcp) ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + raw_cpu_add(pcp, val); \ + ret__ = raw_cpu_read(pcp); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + +#define _this_cpu_generic_xchg(pcp, nval) \ +({ typeof(pcp) ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + +#define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ +({ \ + typeof(pcp) ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + ret__ = raw_cpu_read(pcp); \ + if (ret__ == (oval)) \ + raw_cpu_write(pcp, nval); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + +#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +({ \ + int ret__; \ + unsigned long flags; \ + raw_local_irq_save(flags); \ + ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ + oval1, oval2, nval1, nval2); \ + raw_local_irq_restore(flags); \ + ret__; \ +}) + # ifndef raw_cpu_read_1 # define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) # endif @@ -78,11 +177,6 @@ extern void setup_per_cpu_areas(void); # define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) # endif -#define raw_cpu_generic_to_op(pcp, val, op) \ -do { \ - *raw_cpu_ptr(&(pcp)) op val; \ -} while (0) - # ifndef raw_cpu_write_1 # define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif @@ -135,12 +229,6 @@ do { \ # define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -#define raw_cpu_generic_add_return(pcp, val) \ -({ \ - raw_cpu_add(pcp, val); \ - raw_cpu_read(pcp); \ -}) - # ifndef raw_cpu_add_return_1 # define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif @@ -154,13 +242,6 @@ do { \ # define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -#define raw_cpu_generic_xchg(pcp, nval) \ -({ typeof(pcp) ret__; \ - ret__ = raw_cpu_read(pcp); \ - raw_cpu_write(pcp, nval); \ - ret__; \ -}) - # ifndef raw_cpu_xchg_1 # define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif @@ -174,15 +255,6 @@ do { \ # define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ -({ \ - typeof(pcp) ret__; \ - ret__ = raw_cpu_read(pcp); \ - if (ret__ == (oval)) \ - raw_cpu_write(pcp, nval); \ - ret__; \ -}) - # ifndef raw_cpu_cmpxchg_1 # define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif @@ -196,18 +268,6 @@ do { \ # define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ -({ \ - int __ret = 0; \ - if (raw_cpu_read(pcp1) == (oval1) && \ - raw_cpu_read(pcp2) == (oval2)) { \ - raw_cpu_write(pcp1, (nval1)); \ - raw_cpu_write(pcp2, (nval2)); \ - __ret = 1; \ - } \ - (__ret); \ -}) - # ifndef raw_cpu_cmpxchg_double_1 # define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) @@ -225,14 +285,6 @@ do { \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif -#define _this_cpu_generic_read(pcp) \ -({ typeof(pcp) ret__; \ - preempt_disable(); \ - ret__ = *this_cpu_ptr(&(pcp)); \ - preempt_enable(); \ - ret__; \ -}) - # ifndef this_cpu_read_1 # define this_cpu_read_1(pcp) _this_cpu_generic_read(pcp) # endif @@ -246,14 +298,6 @@ do { \ # define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) # endif -#define _this_cpu_generic_to_op(pcp, val, op) \ -do { \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - *raw_cpu_ptr(&(pcp)) op val; \ - raw_local_irq_restore(flags); \ -} while (0) - # ifndef this_cpu_write_1 # define this_cpu_write_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) # endif @@ -306,17 +350,6 @@ do { \ # define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) # endif -#define _this_cpu_generic_add_return(pcp, val) \ -({ \ - typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - raw_cpu_add(pcp, val); \ - ret__ = raw_cpu_read(pcp); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - # ifndef this_cpu_add_return_1 # define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) # endif @@ -330,16 +363,6 @@ do { \ # define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) # endif -#define _this_cpu_generic_xchg(pcp, nval) \ -({ typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_read(pcp); \ - raw_cpu_write(pcp, nval); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - # ifndef this_cpu_xchg_1 # define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval) # endif @@ -353,18 +376,6 @@ do { \ # define this_cpu_xchg_8(pcp, nval) _this_cpu_generic_xchg(pcp, nval) # endif -#define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ -({ \ - typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_read(pcp); \ - if (ret__ == (oval)) \ - raw_cpu_write(pcp, nval); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - # ifndef this_cpu_cmpxchg_1 # define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) # endif @@ -378,17 +389,6 @@ do { \ # define this_cpu_cmpxchg_8(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) # endif -#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ -({ \ - int ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ - oval1, oval2, nval1, nval2); \ - raw_local_irq_restore(flags); \ - ret__; \ -}) - # ifndef this_cpu_cmpxchg_double_1 # define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 6710eb9555fa..fd0b9ee19ec8 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -52,19 +52,6 @@ #define __PCPU_DUMMY_ATTRS \ __attribute__((section(".discard"), unused)) -/* - * Macro which verifies @ptr is a percpu pointer without evaluating - * @ptr. This is to be used in percpu accessors to verify that the - * input parameter is a percpu pointer. - * - * + 0 is required in order to convert the pointer type from a - * potential array type to a pointer to a single item of the array. - */ -#define __verify_pcpu_ptr(ptr) do { \ - const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \ - (void)__vpp_verify; \ -} while (0) - /* * s390 and alpha modules require percpu variables to be defined as * weak to force the compiler to generate GOT based external @@ -203,6 +190,19 @@ */ #ifndef __ASSEMBLY__ +/* + * Macro which verifies @ptr is a percpu pointer without evaluating + * @ptr. This is to be used in percpu accessors to verify that the + * input parameter is a percpu pointer. + * + * + 0 is required in order to convert the pointer type from a + * potential array type to a pointer to a single item of the array. + */ +#define __verify_pcpu_ptr(ptr) do { \ + const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \ + (void)__vpp_verify; \ +} while (0) + #ifdef CONFIG_SMP /* From cadb1c4db2d33e0a818f645cd1963a479dab91e2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:39 -0400 Subject: [PATCH 10/21] percpu: use raw_cpu_*() to define __this_cpu_*() __this_cpu_*() operations are the same as raw_cpu_*() operations except for the added __this_cpu_preempt_check(). Curiously, these were defined using __pcu_size_call_*() instead of being layered on top of raw_cpu_*(). Let's layer them so that __this_cpu_*() are defined in terms of raw_cpu_*(). It's simpler and less error-prone this way. This patch doesn't introduce any functional difference. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/linux/percpu-defs.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index fd0b9ee19ec8..215917e9a176 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -403,16 +403,16 @@ do { \ * Generic percpu operations for context that are safe from preemption/interrupts. */ # define __this_cpu_read(pcp) \ - (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp))) + (__this_cpu_preempt_check("read"),raw_cpu_read(pcp)) # define __this_cpu_write(pcp, val) \ do { __this_cpu_preempt_check("write"); \ - __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \ + raw_cpu_write(pcp, val); \ } while (0) # define __this_cpu_add(pcp, val) \ do { __this_cpu_preempt_check("add"); \ - __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \ + raw_cpu_add(pcp, val); \ } while (0) # define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) @@ -421,29 +421,29 @@ do { __this_cpu_preempt_check("add"); \ # define __this_cpu_and(pcp, val) \ do { __this_cpu_preempt_check("and"); \ - __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \ + raw_cpu_and(pcp, val); \ } while (0) # define __this_cpu_or(pcp, val) \ do { __this_cpu_preempt_check("or"); \ - __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \ + raw_cpu_or(pcp, val); \ } while (0) # define __this_cpu_add_return(pcp, val) \ - (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)) + (__this_cpu_preempt_check("add_return"),raw_cpu_add_return(pcp, val)) #define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) # define __this_cpu_xchg(pcp, nval) \ - (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)) + (__this_cpu_preempt_check("xchg"),raw_cpu_xchg(pcp, nval)) # define __this_cpu_cmpxchg(pcp, oval, nval) \ - (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)) + (__this_cpu_preempt_check("cmpxchg"),raw_cpu_cmpxchg(pcp, oval, nval)) # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))) + (__this_cpu_preempt_check("cmpxchg_double"),raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)) /* * this_cpu_*() operations are used for accesses that must be done in a From eba117889ac444bea6e8270049cbaeed48169889 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:40 -0400 Subject: [PATCH 11/21] percpu: preffity percpu header files percpu macros are difficult to read. It's partly because they're fairly complex but also because they simply lack visual and conventional consistency to an unusual degree. The preceding patches tried to organize macro definitions consistently by their roles. This patch makes the following cosmetic changes to improve overall readability. * Use consistent convention for multi-line macro definitions - "do {" or "({" are now put on their own lines and the line continuing '\' are all put on the same column. * Temp variables used inside macro are consistently given "__" prefix. * When a macro argument is passed to another macro or a function, putting extra parenthses around it doesn't help anything. Don't put them. * _this_cpu_generic_*() are renamed to this_cpu_generic_*() so that they're consistent with raw_cpu_generic_*(). * Reorganize raw_cpu_*() and this_cpu_*() definitions so that trivial wrappers are collected in one place after actual operation definitions. * Other misc cleanups including reorganizing comments. All changes in this patch are cosmetic and cause no functional difference. Signed-off-by: Tejun Heo Acked-by: Christoph Lameter --- include/asm-generic/percpu.h | 547 ++++++++++++++++++----------------- include/linux/percpu-defs.h | 261 +++++++++-------- 2 files changed, 422 insertions(+), 386 deletions(-) diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h index 2300d989087b..4d9f233c4ba8 100644 --- a/include/asm-generic/percpu.h +++ b/include/asm-generic/percpu.h @@ -77,333 +77,344 @@ do { \ }) #define raw_cpu_generic_xchg(pcp, nval) \ -({ typeof(pcp) ret__; \ - ret__ = raw_cpu_read(pcp); \ +({ \ + typeof(pcp) __ret; \ + __ret = raw_cpu_read(pcp); \ raw_cpu_write(pcp, nval); \ - ret__; \ + __ret; \ }) #define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ - typeof(pcp) ret__; \ - ret__ = raw_cpu_read(pcp); \ - if (ret__ == (oval)) \ + typeof(pcp) __ret; \ + __ret = raw_cpu_read(pcp); \ + if (__ret == (oval)) \ raw_cpu_write(pcp, nval); \ - ret__; \ + __ret; \ }) -#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ int __ret = 0; \ if (raw_cpu_read(pcp1) == (oval1) && \ raw_cpu_read(pcp2) == (oval2)) { \ - raw_cpu_write(pcp1, (nval1)); \ - raw_cpu_write(pcp2, (nval2)); \ + raw_cpu_write(pcp1, nval1); \ + raw_cpu_write(pcp2, nval2); \ __ret = 1; \ } \ (__ret); \ }) -#define _this_cpu_generic_read(pcp) \ -({ typeof(pcp) ret__; \ +#define this_cpu_generic_read(pcp) \ +({ \ + typeof(pcp) __ret; \ preempt_disable(); \ - ret__ = *this_cpu_ptr(&(pcp)); \ + __ret = *this_cpu_ptr(&(pcp)); \ preempt_enable(); \ - ret__; \ + __ret; \ }) -#define _this_cpu_generic_to_op(pcp, val, op) \ +#define this_cpu_generic_to_op(pcp, val, op) \ do { \ - unsigned long flags; \ - raw_local_irq_save(flags); \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ *raw_cpu_ptr(&(pcp)) op val; \ - raw_local_irq_restore(flags); \ + raw_local_irq_restore(__flags); \ } while (0) -#define _this_cpu_generic_add_return(pcp, val) \ +#define this_cpu_generic_add_return(pcp, val) \ ({ \ - typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - raw_cpu_add(pcp, val); \ - ret__ = raw_cpu_read(pcp); \ - raw_local_irq_restore(flags); \ - ret__; \ + typeof(pcp) __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + raw_cpu_add(pcp, val); \ + __ret = raw_cpu_read(pcp); \ + raw_local_irq_restore(__flags); \ + __ret; \ }) -#define _this_cpu_generic_xchg(pcp, nval) \ -({ typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_read(pcp); \ +#define this_cpu_generic_xchg(pcp, nval) \ +({ \ + typeof(pcp) __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_read(pcp); \ raw_cpu_write(pcp, nval); \ - raw_local_irq_restore(flags); \ - ret__; \ + raw_local_irq_restore(__flags); \ + __ret; \ }) -#define _this_cpu_generic_cmpxchg(pcp, oval, nval) \ +#define this_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ - typeof(pcp) ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_read(pcp); \ - if (ret__ == (oval)) \ + typeof(pcp) __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_read(pcp); \ + if (__ret == (oval)) \ raw_cpu_write(pcp, nval); \ - raw_local_irq_restore(flags); \ - ret__; \ + raw_local_irq_restore(__flags); \ + __ret; \ }) -#define _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#define this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ - int ret__; \ - unsigned long flags; \ - raw_local_irq_save(flags); \ - ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ + int __ret; \ + unsigned long __flags; \ + raw_local_irq_save(__flags); \ + __ret = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ - raw_local_irq_restore(flags); \ - ret__; \ + raw_local_irq_restore(__flags); \ + __ret; \ }) -# ifndef raw_cpu_read_1 -# define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) -# endif -# ifndef raw_cpu_read_2 -# define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) -# endif -# ifndef raw_cpu_read_4 -# define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) -# endif -# ifndef raw_cpu_read_8 -# define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) -# endif +#ifndef raw_cpu_read_1 +#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) +#endif +#ifndef raw_cpu_read_2 +#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) +#endif +#ifndef raw_cpu_read_4 +#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) +#endif +#ifndef raw_cpu_read_8 +#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) +#endif -# ifndef raw_cpu_write_1 -# define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef raw_cpu_write_2 -# define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef raw_cpu_write_4 -# define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef raw_cpu_write_8 -# define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) -# endif +#ifndef raw_cpu_write_1 +#define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op(pcp, val, =) +#endif +#ifndef raw_cpu_write_2 +#define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op(pcp, val, =) +#endif +#ifndef raw_cpu_write_4 +#define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op(pcp, val, =) +#endif +#ifndef raw_cpu_write_8 +#define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op(pcp, val, =) +#endif -# ifndef raw_cpu_add_1 -# define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef raw_cpu_add_2 -# define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef raw_cpu_add_4 -# define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef raw_cpu_add_8 -# define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) -# endif +#ifndef raw_cpu_add_1 +#define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) +#endif +#ifndef raw_cpu_add_2 +#define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) +#endif +#ifndef raw_cpu_add_4 +#define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) +#endif +#ifndef raw_cpu_add_8 +#define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op(pcp, val, +=) +#endif -# ifndef raw_cpu_and_1 -# define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef raw_cpu_and_2 -# define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef raw_cpu_and_4 -# define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef raw_cpu_and_8 -# define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) -# endif +#ifndef raw_cpu_and_1 +#define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) +#endif +#ifndef raw_cpu_and_2 +#define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) +#endif +#ifndef raw_cpu_and_4 +#define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) +#endif +#ifndef raw_cpu_and_8 +#define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op(pcp, val, &=) +#endif -# ifndef raw_cpu_or_1 -# define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef raw_cpu_or_2 -# define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef raw_cpu_or_4 -# define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef raw_cpu_or_8 -# define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) -# endif +#ifndef raw_cpu_or_1 +#define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) +#endif +#ifndef raw_cpu_or_2 +#define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) +#endif +#ifndef raw_cpu_or_4 +#define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) +#endif +#ifndef raw_cpu_or_8 +#define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op(pcp, val, |=) +#endif -# ifndef raw_cpu_add_return_1 -# define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif -# ifndef raw_cpu_add_return_2 -# define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif -# ifndef raw_cpu_add_return_4 -# define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif -# ifndef raw_cpu_add_return_8 -# define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) -# endif +#ifndef raw_cpu_add_return_1 +#define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) +#endif +#ifndef raw_cpu_add_return_2 +#define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) +#endif +#ifndef raw_cpu_add_return_4 +#define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) +#endif +#ifndef raw_cpu_add_return_8 +#define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) +#endif -# ifndef raw_cpu_xchg_1 -# define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif -# ifndef raw_cpu_xchg_2 -# define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif -# ifndef raw_cpu_xchg_4 -# define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif -# ifndef raw_cpu_xchg_8 -# define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) -# endif +#ifndef raw_cpu_xchg_1 +#define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +#endif +#ifndef raw_cpu_xchg_2 +#define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +#endif +#ifndef raw_cpu_xchg_4 +#define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +#endif +#ifndef raw_cpu_xchg_8 +#define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) +#endif -# ifndef raw_cpu_cmpxchg_1 -# define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef raw_cpu_cmpxchg_2 -# define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef raw_cpu_cmpxchg_4 -# define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef raw_cpu_cmpxchg_8 -# define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) -# endif +#ifndef raw_cpu_cmpxchg_1 +#define raw_cpu_cmpxchg_1(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef raw_cpu_cmpxchg_2 +#define raw_cpu_cmpxchg_2(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef raw_cpu_cmpxchg_4 +#define raw_cpu_cmpxchg_4(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef raw_cpu_cmpxchg_8 +#define raw_cpu_cmpxchg_8(pcp, oval, nval) \ + raw_cpu_generic_cmpxchg(pcp, oval, nval) +#endif -# ifndef raw_cpu_cmpxchg_double_1 -# define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#ifndef raw_cpu_cmpxchg_double_1 +#define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef raw_cpu_cmpxchg_double_2 -# define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#endif +#ifndef raw_cpu_cmpxchg_double_2 +#define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef raw_cpu_cmpxchg_double_4 -# define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#endif +#ifndef raw_cpu_cmpxchg_double_4 +#define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef raw_cpu_cmpxchg_double_8 -# define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#endif +#ifndef raw_cpu_cmpxchg_double_8 +#define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif +#endif -# ifndef this_cpu_read_1 -# define this_cpu_read_1(pcp) _this_cpu_generic_read(pcp) -# endif -# ifndef this_cpu_read_2 -# define this_cpu_read_2(pcp) _this_cpu_generic_read(pcp) -# endif -# ifndef this_cpu_read_4 -# define this_cpu_read_4(pcp) _this_cpu_generic_read(pcp) -# endif -# ifndef this_cpu_read_8 -# define this_cpu_read_8(pcp) _this_cpu_generic_read(pcp) -# endif +#ifndef this_cpu_read_1 +#define this_cpu_read_1(pcp) this_cpu_generic_read(pcp) +#endif +#ifndef this_cpu_read_2 +#define this_cpu_read_2(pcp) this_cpu_generic_read(pcp) +#endif +#ifndef this_cpu_read_4 +#define this_cpu_read_4(pcp) this_cpu_generic_read(pcp) +#endif +#ifndef this_cpu_read_8 +#define this_cpu_read_8(pcp) this_cpu_generic_read(pcp) +#endif -# ifndef this_cpu_write_1 -# define this_cpu_write_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef this_cpu_write_2 -# define this_cpu_write_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef this_cpu_write_4 -# define this_cpu_write_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif -# ifndef this_cpu_write_8 -# define this_cpu_write_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), =) -# endif +#ifndef this_cpu_write_1 +#define this_cpu_write_1(pcp, val) this_cpu_generic_to_op(pcp, val, =) +#endif +#ifndef this_cpu_write_2 +#define this_cpu_write_2(pcp, val) this_cpu_generic_to_op(pcp, val, =) +#endif +#ifndef this_cpu_write_4 +#define this_cpu_write_4(pcp, val) this_cpu_generic_to_op(pcp, val, =) +#endif +#ifndef this_cpu_write_8 +#define this_cpu_write_8(pcp, val) this_cpu_generic_to_op(pcp, val, =) +#endif -# ifndef this_cpu_add_1 -# define this_cpu_add_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef this_cpu_add_2 -# define this_cpu_add_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef this_cpu_add_4 -# define this_cpu_add_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif -# ifndef this_cpu_add_8 -# define this_cpu_add_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), +=) -# endif +#ifndef this_cpu_add_1 +#define this_cpu_add_1(pcp, val) this_cpu_generic_to_op(pcp, val, +=) +#endif +#ifndef this_cpu_add_2 +#define this_cpu_add_2(pcp, val) this_cpu_generic_to_op(pcp, val, +=) +#endif +#ifndef this_cpu_add_4 +#define this_cpu_add_4(pcp, val) this_cpu_generic_to_op(pcp, val, +=) +#endif +#ifndef this_cpu_add_8 +#define this_cpu_add_8(pcp, val) this_cpu_generic_to_op(pcp, val, +=) +#endif -# ifndef this_cpu_and_1 -# define this_cpu_and_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef this_cpu_and_2 -# define this_cpu_and_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef this_cpu_and_4 -# define this_cpu_and_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif -# ifndef this_cpu_and_8 -# define this_cpu_and_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), &=) -# endif +#ifndef this_cpu_and_1 +#define this_cpu_and_1(pcp, val) this_cpu_generic_to_op(pcp, val, &=) +#endif +#ifndef this_cpu_and_2 +#define this_cpu_and_2(pcp, val) this_cpu_generic_to_op(pcp, val, &=) +#endif +#ifndef this_cpu_and_4 +#define this_cpu_and_4(pcp, val) this_cpu_generic_to_op(pcp, val, &=) +#endif +#ifndef this_cpu_and_8 +#define this_cpu_and_8(pcp, val) this_cpu_generic_to_op(pcp, val, &=) +#endif -# ifndef this_cpu_or_1 -# define this_cpu_or_1(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef this_cpu_or_2 -# define this_cpu_or_2(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef this_cpu_or_4 -# define this_cpu_or_4(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif -# ifndef this_cpu_or_8 -# define this_cpu_or_8(pcp, val) _this_cpu_generic_to_op((pcp), (val), |=) -# endif +#ifndef this_cpu_or_1 +#define this_cpu_or_1(pcp, val) this_cpu_generic_to_op(pcp, val, |=) +#endif +#ifndef this_cpu_or_2 +#define this_cpu_or_2(pcp, val) this_cpu_generic_to_op(pcp, val, |=) +#endif +#ifndef this_cpu_or_4 +#define this_cpu_or_4(pcp, val) this_cpu_generic_to_op(pcp, val, |=) +#endif +#ifndef this_cpu_or_8 +#define this_cpu_or_8(pcp, val) this_cpu_generic_to_op(pcp, val, |=) +#endif -# ifndef this_cpu_add_return_1 -# define this_cpu_add_return_1(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_2 -# define this_cpu_add_return_2(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_4 -# define this_cpu_add_return_4(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif -# ifndef this_cpu_add_return_8 -# define this_cpu_add_return_8(pcp, val) _this_cpu_generic_add_return(pcp, val) -# endif +#ifndef this_cpu_add_return_1 +#define this_cpu_add_return_1(pcp, val) this_cpu_generic_add_return(pcp, val) +#endif +#ifndef this_cpu_add_return_2 +#define this_cpu_add_return_2(pcp, val) this_cpu_generic_add_return(pcp, val) +#endif +#ifndef this_cpu_add_return_4 +#define this_cpu_add_return_4(pcp, val) this_cpu_generic_add_return(pcp, val) +#endif +#ifndef this_cpu_add_return_8 +#define this_cpu_add_return_8(pcp, val) this_cpu_generic_add_return(pcp, val) +#endif -# ifndef this_cpu_xchg_1 -# define this_cpu_xchg_1(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif -# ifndef this_cpu_xchg_2 -# define this_cpu_xchg_2(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif -# ifndef this_cpu_xchg_4 -# define this_cpu_xchg_4(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif -# ifndef this_cpu_xchg_8 -# define this_cpu_xchg_8(pcp, nval) _this_cpu_generic_xchg(pcp, nval) -# endif +#ifndef this_cpu_xchg_1 +#define this_cpu_xchg_1(pcp, nval) this_cpu_generic_xchg(pcp, nval) +#endif +#ifndef this_cpu_xchg_2 +#define this_cpu_xchg_2(pcp, nval) this_cpu_generic_xchg(pcp, nval) +#endif +#ifndef this_cpu_xchg_4 +#define this_cpu_xchg_4(pcp, nval) this_cpu_generic_xchg(pcp, nval) +#endif +#ifndef this_cpu_xchg_8 +#define this_cpu_xchg_8(pcp, nval) this_cpu_generic_xchg(pcp, nval) +#endif -# ifndef this_cpu_cmpxchg_1 -# define this_cpu_cmpxchg_1(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef this_cpu_cmpxchg_2 -# define this_cpu_cmpxchg_2(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef this_cpu_cmpxchg_4 -# define this_cpu_cmpxchg_4(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif -# ifndef this_cpu_cmpxchg_8 -# define this_cpu_cmpxchg_8(pcp, oval, nval) _this_cpu_generic_cmpxchg(pcp, oval, nval) -# endif +#ifndef this_cpu_cmpxchg_1 +#define this_cpu_cmpxchg_1(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef this_cpu_cmpxchg_2 +#define this_cpu_cmpxchg_2(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef this_cpu_cmpxchg_4 +#define this_cpu_cmpxchg_4(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif +#ifndef this_cpu_cmpxchg_8 +#define this_cpu_cmpxchg_8(pcp, oval, nval) \ + this_cpu_generic_cmpxchg(pcp, oval, nval) +#endif -# ifndef this_cpu_cmpxchg_double_1 -# define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef this_cpu_cmpxchg_double_2 -# define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef this_cpu_cmpxchg_double_4 -# define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif -# ifndef this_cpu_cmpxchg_double_8 -# define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - _this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) -# endif +#ifndef this_cpu_cmpxchg_double_1 +#define this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +#endif +#ifndef this_cpu_cmpxchg_double_2 +#define this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +#endif +#ifndef this_cpu_cmpxchg_double_4 +#define this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +#endif +#ifndef this_cpu_cmpxchg_double_8 +#define this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +#endif #endif /* _ASM_GENERIC_PERCPU_H_ */ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 215917e9a176..d8bb6e001c6a 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -198,7 +198,8 @@ * + 0 is required in order to convert the pointer type from a * potential array type to a pointer to a single item of the array. */ -#define __verify_pcpu_ptr(ptr) do { \ +#define __verify_pcpu_ptr(ptr) \ +do { \ const void __percpu *__vpp_verify = (typeof((ptr) + 0))NULL; \ (void)__vpp_verify; \ } while (0) @@ -210,12 +211,13 @@ * to prevent the compiler from making incorrect assumptions about the * pointer value. The weird cast keeps both GCC and sparse happy. */ -#define SHIFT_PERCPU_PTR(__p, __offset) ({ \ - __verify_pcpu_ptr((__p)); \ +#define SHIFT_PERCPU_PTR(__p, __offset) \ +({ \ + __verify_pcpu_ptr(__p); \ RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ }) -#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) +#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR(ptr, per_cpu_offset(cpu)) #define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr) #ifdef CONFIG_DEBUG_PREEMPT @@ -226,12 +228,13 @@ #else /* CONFIG_SMP */ -#define VERIFY_PERCPU_PTR(__p) ({ \ - __verify_pcpu_ptr((__p)); \ - (typeof(*(__p)) __kernel __force *)(__p); \ +#define VERIFY_PERCPU_PTR(__p) \ +({ \ + __verify_pcpu_ptr(__p); \ + (typeof(*(__p)) __kernel __force *)(__p); \ }) -#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR((ptr)); }) +#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); VERIFY_PERCPU_PTR(ptr); }) #define raw_cpu_ptr(ptr) per_cpu_ptr(ptr, 0) #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) @@ -248,26 +251,32 @@ * Must be an lvalue. Since @var must be a simple identifier, * we force a syntax error here if it isn't. */ -#define get_cpu_var(var) (*({ \ - preempt_disable(); \ - this_cpu_ptr(&var); })) +#define get_cpu_var(var) \ +(*({ \ + preempt_disable(); \ + this_cpu_ptr(&var); \ +})) /* * The weird & is necessary because sparse considers (void)(var) to be * a direct dereference of percpu variable (var). */ -#define put_cpu_var(var) do { \ - (void)&(var); \ - preempt_enable(); \ +#define put_cpu_var(var) \ +do { \ + (void)&(var); \ + preempt_enable(); \ } while (0) -#define get_cpu_ptr(var) ({ \ - preempt_disable(); \ - this_cpu_ptr(var); }) +#define get_cpu_ptr(var) \ +({ \ + preempt_disable(); \ + this_cpu_ptr(var); \ +}) -#define put_cpu_ptr(var) do { \ - (void)(var); \ - preempt_enable(); \ +#define put_cpu_ptr(var) \ +do { \ + (void)(var); \ + preempt_enable(); \ } while (0) /* @@ -284,15 +293,16 @@ static inline void __this_cpu_preempt_check(const char *op) { } #endif #define __pcpu_size_call_return(stem, variable) \ -({ typeof(variable) pscr_ret__; \ +({ \ + typeof(variable) pscr_ret__; \ __verify_pcpu_ptr(&(variable)); \ switch(sizeof(variable)) { \ - case 1: pscr_ret__ = stem##1(variable);break; \ - case 2: pscr_ret__ = stem##2(variable);break; \ - case 4: pscr_ret__ = stem##4(variable);break; \ - case 8: pscr_ret__ = stem##8(variable);break; \ + case 1: pscr_ret__ = stem##1(variable); break; \ + case 2: pscr_ret__ = stem##2(variable); break; \ + case 4: pscr_ret__ = stem##4(variable); break; \ + case 8: pscr_ret__ = stem##8(variable); break; \ default: \ - __bad_size_call_parameter();break; \ + __bad_size_call_parameter(); break; \ } \ pscr_ret__; \ }) @@ -323,11 +333,11 @@ static inline void __this_cpu_preempt_check(const char *op) { } #define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ ({ \ bool pdcrb_ret__; \ - __verify_pcpu_ptr(&pcp1); \ + __verify_pcpu_ptr(&(pcp1)); \ BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ - VM_BUG_ON((unsigned long)(&pcp1) % (2 * sizeof(pcp1))); \ - VM_BUG_ON((unsigned long)(&pcp2) != \ - (unsigned long)(&pcp1) + sizeof(pcp1)); \ + VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1))); \ + VM_BUG_ON((unsigned long)(&(pcp2)) != \ + (unsigned long)(&(pcp1)) + sizeof(pcp1)); \ switch(sizeof(pcp1)) { \ case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ @@ -367,117 +377,132 @@ do { \ * cpu atomic operations for 2 byte sized RMW actions. If arch code does * not provide operations for a scalar size then the fallback in the * generic code will be used. - */ - -/* - * Generic percpu operations for contexts where we do not want to do - * any checks for preemptiosn. * - * If there is no other protection through preempt disable and/or - * disabling interupts then one of these RMW operations can show unexpected - * behavior because the execution thread was rescheduled on another processor - * or an interrupt occurred and the same percpu variable was modified from - * the interrupt context. + * cmpxchg_double replaces two adjacent scalars at once. The first two + * parameters are per cpu variables which have to be of the same size. A + * truth value is returned to indicate success or failure (since a double + * register result is difficult to handle). There is very limited hardware + * support for these operations, so only certain sizes may work. */ -# define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) -# define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) -# define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) -# define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) -# define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) -# define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) -# define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) -# define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) -# define raw_cpu_add_return(pcp, val) \ - __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) -#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) -#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) -#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) -# define raw_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) -# define raw_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) -# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) /* - * Generic percpu operations for context that are safe from preemption/interrupts. + * Operations for contexts where we do not want to do any checks for + * preemptions. Unless strictly necessary, always use [__]this_cpu_*() + * instead. + * + * If there is no other protection through preempt disable and/or disabling + * interupts then one of these RMW operations can show unexpected behavior + * because the execution thread was rescheduled on another processor or an + * interrupt occurred and the same percpu variable was modified from the + * interrupt context. */ -# define __this_cpu_read(pcp) \ - (__this_cpu_preempt_check("read"),raw_cpu_read(pcp)) +#define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, pcp) +#define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, pcp, val) +#define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, pcp, val) +#define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, pcp, val) +#define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, pcp, val) +#define raw_cpu_add_return(pcp, val) __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) +#define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval) +#define raw_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) +#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) -# define __this_cpu_write(pcp, val) \ -do { __this_cpu_preempt_check("write"); \ - raw_cpu_write(pcp, val); \ -} while (0) +#define raw_cpu_sub(pcp, val) raw_cpu_add(pcp, -(val)) +#define raw_cpu_inc(pcp) raw_cpu_add(pcp, 1) +#define raw_cpu_dec(pcp) raw_cpu_sub(pcp, 1) +#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) +#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) -# define __this_cpu_add(pcp, val) \ -do { __this_cpu_preempt_check("add"); \ +/* + * Operations for contexts that are safe from preemption/interrupts. These + * operations verify that preemption is disabled. + */ +#define __this_cpu_read(pcp) \ +({ \ + __this_cpu_preempt_check("read"); \ + raw_cpu_read(pcp); \ +}) + +#define __this_cpu_write(pcp, val) \ +({ \ + __this_cpu_preempt_check("write"); \ + raw_cpu_write(pcp, val); \ +}) + +#define __this_cpu_add(pcp, val) \ +({ \ + __this_cpu_preempt_check("add"); \ raw_cpu_add(pcp, val); \ -} while (0) +}) -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) -# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) -# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) - -# define __this_cpu_and(pcp, val) \ -do { __this_cpu_preempt_check("and"); \ +#define __this_cpu_and(pcp, val) \ +({ \ + __this_cpu_preempt_check("and"); \ raw_cpu_and(pcp, val); \ -} while (0) +}) -# define __this_cpu_or(pcp, val) \ -do { __this_cpu_preempt_check("or"); \ +#define __this_cpu_or(pcp, val) \ +({ \ + __this_cpu_preempt_check("or"); \ raw_cpu_or(pcp, val); \ -} while (0) +}) -# define __this_cpu_add_return(pcp, val) \ - (__this_cpu_preempt_check("add_return"),raw_cpu_add_return(pcp, val)) +#define __this_cpu_add_return(pcp, val) \ +({ \ + __this_cpu_preempt_check("add_return"); \ + raw_cpu_add_return(pcp, val); \ +}) +#define __this_cpu_xchg(pcp, nval) \ +({ \ + __this_cpu_preempt_check("xchg"); \ + raw_cpu_xchg(pcp, nval); \ +}) + +#define __this_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + __this_cpu_preempt_check("cmpxchg"); \ + raw_cpu_cmpxchg(pcp, oval, nval); \ +}) + +#define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +({ __this_cpu_preempt_check("cmpxchg_double"); \ + raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2); \ +}) + +#define __this_cpu_sub(pcp, val) __this_cpu_add(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1) +#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1) #define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) #define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) -# define __this_cpu_xchg(pcp, nval) \ - (__this_cpu_preempt_check("xchg"),raw_cpu_xchg(pcp, nval)) - -# define __this_cpu_cmpxchg(pcp, oval, nval) \ - (__this_cpu_preempt_check("cmpxchg"),raw_cpu_cmpxchg(pcp, oval, nval)) - -# define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - (__this_cpu_preempt_check("cmpxchg_double"),raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)) - /* - * this_cpu_*() operations are used for accesses that must be done in a - * preemption safe way since we know that the context is not preempt - * safe. Interrupts may occur. If the interrupt modifies the variable too - * then RMW actions will not be reliable. + * Operations with implied preemption protection. These operations can be + * used without worrying about preemption. Note that interrupts may still + * occur while an operation is in progress and if the interrupt modifies + * the variable too then RMW actions may not be reliable. */ -# define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, (pcp)) -# define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, (pcp), (val)) -# define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, (pcp), (val)) -# define this_cpu_sub(pcp, val) this_cpu_add((pcp), -(typeof(pcp))(val)) -# define this_cpu_inc(pcp) this_cpu_add((pcp), 1) -# define this_cpu_dec(pcp) this_cpu_sub((pcp), 1) -# define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, (pcp), (val)) -# define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, (pcp), (val)) -# define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) +#define this_cpu_read(pcp) __pcpu_size_call_return(this_cpu_read_, pcp) +#define this_cpu_write(pcp, val) __pcpu_size_call(this_cpu_write_, pcp, val) +#define this_cpu_add(pcp, val) __pcpu_size_call(this_cpu_add_, pcp, val) +#define this_cpu_and(pcp, val) __pcpu_size_call(this_cpu_and_, pcp, val) +#define this_cpu_or(pcp, val) __pcpu_size_call(this_cpu_or_, pcp, val) +#define this_cpu_add_return(pcp, val) __pcpu_size_call_return2(this_cpu_add_return_, pcp, val) +#define this_cpu_xchg(pcp, nval) __pcpu_size_call_return2(this_cpu_xchg_, pcp, nval) +#define this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) +#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) + +#define this_cpu_sub(pcp, val) this_cpu_add(pcp, -(typeof(pcp))(val)) +#define this_cpu_inc(pcp) this_cpu_add(pcp, 1) +#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1) #define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) #define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) #define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) -# define this_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(this_cpu_xchg_, (pcp), nval) -# define this_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) - -/* - * cmpxchg_double replaces two adjacent scalars at once. The first - * two parameters are per cpu variables which have to be of the same - * size. A truth value is returned to indicate success or failure - * (since a double register result is difficult to handle). There is - * very limited hardware support for these operations, so only certain - * sizes may work. - */ -# define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) #endif /* __ASSEMBLY__ */ #endif /* _LINUX_PERCPU_DEFS_H */ From 6fbc07bbe2b5a898532f970c5a397f8789ace0d5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 17 Jun 2014 19:12:40 -0400 Subject: [PATCH 12/21] percpu: invoke __verify_pcpu_ptr() from the generic part of accessors and operations __verify_pcpu_ptr() is used to verify that a specified parameter is actually an percpu pointer by percpu accessor and operation implementations. Currently, where it's called isn't clearly defined and we just ensure that it's invoked at least once for all accessors and operations. The lack of clarity on when it should be called isn't nice and given that this is a completely generic issue, there's no reason to make archs worry about it. This patch updates __verify_pcpu_ptr() invocations such that it's always invoked from the final generic wrapper once per access or operation. As this is already the case for {raw|this}_cpu_*() definitions through __pcpu_size_*(), only the {raw|per|this}_cpu_ptr() accessors need to be updated. This change makes it unnecessary for archs to worry about __verify_pcpu_ptr(). x86's arch_raw_cpu_ptr() is updated accordingly. Signed-off-by: Tejun Heo Cc: Christoph Lameter Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" --- arch/x86/include/asm/percpu.h | 1 - include/linux/percpu-defs.h | 29 +++++++++++++++++++++-------- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 9bc23f18a6fa..fd472181a1d0 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -55,7 +55,6 @@ #define arch_raw_cpu_ptr(ptr) \ ({ \ unsigned long tcp_ptr__; \ - __verify_pcpu_ptr(ptr); \ asm volatile("add " __percpu_arg(1) ", %0" \ : "=r" (tcp_ptr__) \ : "m" (this_cpu_off), "0" (ptr)); \ diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index d8bb6e001c6a..c93fff16776c 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -191,9 +191,12 @@ #ifndef __ASSEMBLY__ /* - * Macro which verifies @ptr is a percpu pointer without evaluating - * @ptr. This is to be used in percpu accessors to verify that the - * input parameter is a percpu pointer. + * __verify_pcpu_ptr() verifies @ptr is a percpu pointer without evaluating + * @ptr and is invoked once before a percpu area is accessed by all + * accessors and operations. This is performed in the generic part of + * percpu and arch overrides don't need to worry about it; however, if an + * arch wants to implement an arch-specific percpu accessor or operation, + * it may use __verify_pcpu_ptr() to verify the parameters. * * + 0 is required in order to convert the pointer type from a * potential array type to a pointer to a single item of the array. @@ -212,16 +215,26 @@ do { \ * pointer value. The weird cast keeps both GCC and sparse happy. */ #define SHIFT_PERCPU_PTR(__p, __offset) \ + RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)) + +#define per_cpu_ptr(ptr, cpu) \ ({ \ - __verify_pcpu_ptr(__p); \ - RELOC_HIDE((typeof(*(__p)) __kernel __force *)(__p), (__offset)); \ + __verify_pcpu_ptr(ptr); \ + SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))); \ }) -#define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR(ptr, per_cpu_offset(cpu)) -#define raw_cpu_ptr(ptr) arch_raw_cpu_ptr(ptr) +#define raw_cpu_ptr(ptr) \ +({ \ + __verify_pcpu_ptr(ptr); \ + arch_raw_cpu_ptr(ptr); \ +}) #ifdef CONFIG_DEBUG_PREEMPT -#define this_cpu_ptr(ptr) SHIFT_PERCPU_PTR(ptr, my_cpu_offset) +#define this_cpu_ptr(ptr) \ +({ \ + __verify_pcpu_ptr(ptr); \ + SHIFT_PERCPU_PTR(ptr, my_cpu_offset); \ +}) #else #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) #endif From fb009e3a998e48fb33436bf9b563c82801a7329c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 19 Jun 2014 09:59:18 -0500 Subject: [PATCH 13/21] percpu: Use ALIGN macro instead of hand coding alignment calculation Signed-off-by: Christoph Lameter Signed-off-by: Tejun Heo --- mm/percpu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/percpu.c b/mm/percpu.c index 2ddf9a990dbd..2139e30a4b44 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -720,8 +720,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved) if (unlikely(align < 2)) align = 2; - if (unlikely(size & 1)) - size++; + size = ALIGN(size, 2); if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { WARN(true, "illegal size (%zu) or align (%zu) for " From f05b558d7e0368e0511a098c4beb9654e41246c4 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 Jun 2014 15:33:27 +0800 Subject: [PATCH 14/21] workqueue: clear POOL_DISASSOCIATED in rebind_workers() The commit a9ab775bcadf ("workqueue: directly restore CPU affinity of workers from CPU_ONLINE") moved the pool->lock into rebind_workers() without also moving "pool->flags &= ~POOL_DISASSOCIATED". There is nothing wrong with "pool->flags &= ~POOL_DISASSOCIATED" not being moved together, but there isn't any benefit either. We move it into rebind_workers() and achieve these benefits: 1) Better readability. POOL_DISASSOCIATED is cleared in rebind_workers() as expected. 2) When POOL_DISASSOCIATED is cleared, we can ensure that all the running workers of the pool are on the local CPU (pool->cpu). tj: Cosmetic updates to the code and description. Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6203d2900877..129e4c3452ee 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4542,6 +4542,7 @@ static void rebind_workers(struct worker_pool *pool) pool->attrs->cpumask) < 0); spin_lock_irq(&pool->lock); + pool->flags &= ~POOL_DISASSOCIATED; for_each_pool_worker(worker, pool) { unsigned int worker_flags = worker->flags; @@ -4643,15 +4644,10 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb, for_each_pool(pool, pi) { mutex_lock(&pool->attach_mutex); - if (pool->cpu == cpu) { - spin_lock_irq(&pool->lock); - pool->flags &= ~POOL_DISASSOCIATED; - spin_unlock_irq(&pool->lock); - + if (pool->cpu == cpu) rebind_workers(pool); - } else if (pool->cpu < 0) { + else if (pool->cpu < 0) restore_unbound_workers_cpumask(pool, cpu); - } mutex_unlock(&pool->attach_mutex); } From 807407c0a29b1e9fe35565f5a671ef3a9dd9f00c Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Tue, 3 Jun 2014 15:33:28 +0800 Subject: [PATCH 15/21] workqueue: stronger test in process_one_work() After the recent changes, when POOL_DISASSOCIATED is cleared, the running worker's local CPU should be the same as pool->cpu without any exception even during cpu-hotplug. Update the sanity check in process_one_work() accordingly. This patch changes "(proposition_A && proposition_B && proposition_C)" to "(proposition_B && proposition_C)", so if the old compound proposition is true, the new one must be true too. so this will not hide any possible bug which can be caught by the old test. tj: Minor updates to the description. CC: Jason J. Herne CC: Sasha Levin Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 129e4c3452ee..a3021d63f62d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2020,13 +2020,8 @@ __acquires(&pool->lock) lockdep_copy_map(&lockdep_map, &work->lockdep_map); #endif - /* - * Ensure we're on the correct CPU. DISASSOCIATED test is - * necessary to avoid spurious warnings from rescuers servicing the - * unbound or a disassociated pool. - */ - WARN_ON_ONCE(!(worker->flags & WORKER_UNBOUND) && - !(pool->flags & POOL_DISASSOCIATED) && + /* ensure we're on the correct CPU */ + WARN_ON_ONCE(!(pool->flags & POOL_DISASSOCIATED) && raw_smp_processor_id() != pool->cpu); /* From 55c6c814ae0aa896781d3c51e4608de542624f64 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Jun 2014 08:10:12 -0400 Subject: [PATCH 16/21] percpu-refcount, aio: use percpu_ref_cancel_init() in ioctx_alloc() ioctx_alloc() reaches inside percpu_ref and directly frees ->pcpu_count in its failure path, which is quite gross. percpu_ref has been providing a proper interface to do this, percpu_ref_cancel_init(), for quite some time now. Let's use that instead. This patch doesn't introduce any behavior changes. Signed-off-by: Tejun Heo Acked-by: Benjamin LaHaise Cc: Kent Overstreet --- fs/aio.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 4f078c054b41..5e0d7f9cb693 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -715,8 +715,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err: mutex_unlock(&ctx->ring_lock); free_percpu(ctx->cpu); - free_percpu(ctx->reqs.pcpu_count); - free_percpu(ctx->users.pcpu_count); + percpu_ref_cancel_init(&ctx->reqs); + percpu_ref_cancel_init(&ctx->users); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); From d630dc4c9adb41e5bd1e06df2dbeaf622469ddd5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Jun 2014 08:10:12 -0400 Subject: [PATCH 17/21] percpu-refcount: one bit is enough for REF_STATUS percpu-refcount currently reserves two lowest bits of its percpu pointer to indicate its state; however, only one bit is used for PCPU_REF_DEAD. Simplify it by removing PCPU_STATUS_BITS/MASK and testing PCPU_REF_DEAD directly. This also allows the compiler to choose a more efficient instruction depending on the architecture. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Christoph Lameter --- include/linux/percpu-refcount.h | 4 +--- lib/percpu-refcount.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 5d8920e23073..bfdeb0d48e21 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -88,12 +88,10 @@ static inline void percpu_ref_kill(struct percpu_ref *ref) return percpu_ref_kill_and_confirm(ref, NULL); } -#define PCPU_STATUS_BITS 2 -#define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1) #define PCPU_REF_PTR 0 #define PCPU_REF_DEAD 1 -#define REF_STATUS(count) (((unsigned long) count) & PCPU_STATUS_MASK) +#define REF_STATUS(count) (((unsigned long) count) & PCPU_REF_DEAD) /** * percpu_ref_get - increment a percpu refcount diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 963b7034a51b..17bce2bccc14 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -96,7 +96,7 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) /* Mask out PCPU_REF_DEAD */ pcpu_count = (unsigned __percpu *) - (((unsigned long) pcpu_count) & ~PCPU_STATUS_MASK); + (((unsigned long) pcpu_count) & ~PCPU_REF_DEAD); for_each_possible_cpu(cpu) count += *per_cpu_ptr(pcpu_count, cpu); From eae7975ddf031b3084f4a5f7d88f698aefad96fb Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Jun 2014 08:10:13 -0400 Subject: [PATCH 18/21] percpu-refcount: add helpers for ->percpu_count accesses * All four percpu_ref_*() operations implemented in the header file perform the same operation to determine whether the percpu_ref is alive and extract the percpu pointer. Factor out the common logic into __pcpu_ref_alive(). This doesn't change the generated code. * There are a couple places in percpu-refcount.c which masks out PCPU_REF_DEAD to obtain the percpu pointer. Factor it out into pcpu_count_ptr(). * The above changes make the WARN_ON_ONCE() conditional at the top of percpu_ref_kill_and_confirm() the only user of REF_STATUS(). Test PCPU_REF_DEAD directly and remove REF_STATUS(). This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Christoph Lameter --- include/linux/percpu-refcount.h | 35 ++++++++++++++++++++------------- lib/percpu-refcount.c | 17 ++++++++-------- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index bfdeb0d48e21..b62a4ee6d6ad 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -88,10 +88,25 @@ static inline void percpu_ref_kill(struct percpu_ref *ref) return percpu_ref_kill_and_confirm(ref, NULL); } -#define PCPU_REF_PTR 0 #define PCPU_REF_DEAD 1 -#define REF_STATUS(count) (((unsigned long) count) & PCPU_REF_DEAD) +/* + * Internal helper. Don't use outside percpu-refcount proper. The + * function doesn't return the pointer and let the caller test it for NULL + * because doing so forces the compiler to generate two conditional + * branches as it can't assume that @ref->pcpu_count is not NULL. + */ +static inline bool __pcpu_ref_alive(struct percpu_ref *ref, + unsigned __percpu **pcpu_countp) +{ + unsigned long pcpu_ptr = (unsigned long)ACCESS_ONCE(ref->pcpu_count); + + if (unlikely(pcpu_ptr & PCPU_REF_DEAD)) + return false; + + *pcpu_countp = (unsigned __percpu *)pcpu_ptr; + return true; +} /** * percpu_ref_get - increment a percpu refcount @@ -105,9 +120,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref) rcu_read_lock_sched(); - pcpu_count = ACCESS_ONCE(ref->pcpu_count); - - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) + if (__pcpu_ref_alive(ref, &pcpu_count)) this_cpu_inc(*pcpu_count); else atomic_inc(&ref->count); @@ -131,9 +144,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref) rcu_read_lock_sched(); - pcpu_count = ACCESS_ONCE(ref->pcpu_count); - - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { + if (__pcpu_ref_alive(ref, &pcpu_count)) { this_cpu_inc(*pcpu_count); ret = true; } else { @@ -166,9 +177,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) rcu_read_lock_sched(); - pcpu_count = ACCESS_ONCE(ref->pcpu_count); - - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) { + if (__pcpu_ref_alive(ref, &pcpu_count)) { this_cpu_inc(*pcpu_count); ret = true; } @@ -191,9 +200,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref) rcu_read_lock_sched(); - pcpu_count = ACCESS_ONCE(ref->pcpu_count); - - if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR)) + if (__pcpu_ref_alive(ref, &pcpu_count)) this_cpu_dec(*pcpu_count); else if (unlikely(atomic_dec_and_test(&ref->count))) ref->release(ref); diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 17bce2bccc14..087f1a04f9bc 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -31,6 +31,11 @@ #define PCPU_COUNT_BIAS (1U << 31) +static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) +{ + return (unsigned __percpu *)((unsigned long)ref->pcpu_count & ~PCPU_REF_DEAD); +} + /** * percpu_ref_init - initialize a percpu refcount * @ref: percpu_ref to initialize @@ -74,7 +79,7 @@ EXPORT_SYMBOL_GPL(percpu_ref_init); */ void percpu_ref_cancel_init(struct percpu_ref *ref) { - unsigned __percpu *pcpu_count = ref->pcpu_count; + unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); int cpu; WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); @@ -82,7 +87,7 @@ void percpu_ref_cancel_init(struct percpu_ref *ref) if (pcpu_count) { for_each_possible_cpu(cpu) WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); - free_percpu(ref->pcpu_count); + free_percpu(pcpu_count); } } EXPORT_SYMBOL_GPL(percpu_ref_cancel_init); @@ -90,14 +95,10 @@ EXPORT_SYMBOL_GPL(percpu_ref_cancel_init); static void percpu_ref_kill_rcu(struct rcu_head *rcu) { struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu); - unsigned __percpu *pcpu_count = ref->pcpu_count; + unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); unsigned count = 0; int cpu; - /* Mask out PCPU_REF_DEAD */ - pcpu_count = (unsigned __percpu *) - (((unsigned long) pcpu_count) & ~PCPU_REF_DEAD); - for_each_possible_cpu(cpu) count += *per_cpu_ptr(pcpu_count, cpu); @@ -152,7 +153,7 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill) { - WARN_ONCE(REF_STATUS(ref->pcpu_count) == PCPU_REF_DEAD, + WARN_ONCE((unsigned long)ref->pcpu_count & PCPU_REF_DEAD, "percpu_ref_kill() called more than once!\n"); ref->pcpu_count = (unsigned __percpu *) From 7d742075120deb831c7b94c268ca20d409e91d60 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Jun 2014 08:10:13 -0400 Subject: [PATCH 19/21] percpu-refcount: use unsigned long for pcpu_count pointer percpu_ref->pcpu_count is a percpu pointer with a status flag in its lowest bit. As such, it always goes through arithmetic operations which is very cumbersome to do on a pointer. It has to be first casted to unsigned long and then back. Let's just make the field unsigned long so that we can skip the first casts. While at it, rename it to pcpu_counter_ptr to clarify that it's a pointer value. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Christoph Lameter --- include/linux/percpu-refcount.h | 4 ++-- lib/percpu-refcount.c | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index b62a4ee6d6ad..6f8cd4c0546c 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -61,7 +61,7 @@ struct percpu_ref { * hack because we need to keep the pointer around for * percpu_ref_kill_rcu()) */ - unsigned __percpu *pcpu_count; + unsigned long pcpu_count_ptr; percpu_ref_func_t *release; percpu_ref_func_t *confirm_kill; struct rcu_head rcu; @@ -99,7 +99,7 @@ static inline void percpu_ref_kill(struct percpu_ref *ref) static inline bool __pcpu_ref_alive(struct percpu_ref *ref, unsigned __percpu **pcpu_countp) { - unsigned long pcpu_ptr = (unsigned long)ACCESS_ONCE(ref->pcpu_count); + unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr); if (unlikely(pcpu_ptr & PCPU_REF_DEAD)) return false; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 087f1a04f9bc..94e5b624de64 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -33,7 +33,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref) { - return (unsigned __percpu *)((unsigned long)ref->pcpu_count & ~PCPU_REF_DEAD); + return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD); } /** @@ -51,8 +51,8 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) { atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); - ref->pcpu_count = alloc_percpu(unsigned); - if (!ref->pcpu_count) + ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned); + if (!ref->pcpu_count_ptr) return -ENOMEM; ref->release = release; @@ -153,11 +153,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill) { - WARN_ONCE((unsigned long)ref->pcpu_count & PCPU_REF_DEAD, + WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD, "percpu_ref_kill() called more than once!\n"); - ref->pcpu_count = (unsigned __percpu *) - (((unsigned long) ref->pcpu_count)|PCPU_REF_DEAD); + ref->pcpu_count_ptr |= PCPU_REF_DEAD; ref->confirm_kill = confirm_kill; call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); From 9a1049da9bd2cd83fe11d46433e603c193aa9c71 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Jun 2014 08:10:14 -0400 Subject: [PATCH 20/21] percpu-refcount: require percpu_ref to be exited explicitly Currently, a percpu_ref undoes percpu_ref_init() automatically by freeing the allocated percpu area when the percpu_ref is killed. While seemingly convenient, this has the following niggles. * It's impossible to re-init a released reference counter without going through re-allocation. * In the similar vein, it's impossible to initialize a percpu_ref count with static percpu variables. * We need and have an explicit destructor anyway for failure paths - percpu_ref_cancel_init(). This patch removes the automatic percpu counter freeing in percpu_ref_kill_rcu() and repurposes percpu_ref_cancel_init() into a generic destructor now named percpu_ref_exit(). percpu_ref_destroy() is considered but it gets confusing with percpu_ref_kill() while "exit" clearly indicates that it's the counterpart of percpu_ref_init(). All percpu_ref_cancel_init() users are updated to invoke percpu_ref_exit() instead and explicit percpu_ref_exit() calls are added to the destruction path of all percpu_ref users. Signed-off-by: Tejun Heo Acked-by: Benjamin LaHaise Cc: Kent Overstreet Cc: Christoph Lameter Cc: Benjamin LaHaise Cc: Nicholas A. Bellinger Cc: Li Zefan --- drivers/target/target_core_tpg.c | 4 +++- fs/aio.c | 6 ++++-- include/linux/percpu-refcount.h | 6 ++---- kernel/cgroup.c | 8 +++++--- lib/percpu-refcount.c | 33 ++++++++++---------------------- 5 files changed, 24 insertions(+), 33 deletions(-) diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index c036595b17cf..fddfae61222f 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -825,7 +825,7 @@ int core_tpg_add_lun( ret = core_dev_export(dev, tpg, lun); if (ret < 0) { - percpu_ref_cancel_init(&lun->lun_ref); + percpu_ref_exit(&lun->lun_ref); return ret; } @@ -880,5 +880,7 @@ int core_tpg_post_dellun( lun->lun_status = TRANSPORT_LUN_STATUS_FREE; spin_unlock(&tpg->tpg_lun_lock); + percpu_ref_exit(&lun->lun_ref); + return 0; } diff --git a/fs/aio.c b/fs/aio.c index 5e0d7f9cb693..ea1bc2e8f4f3 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -506,6 +506,8 @@ static void free_ioctx(struct work_struct *work) aio_free_ring(ctx); free_percpu(ctx->cpu); + percpu_ref_exit(&ctx->reqs); + percpu_ref_exit(&ctx->users); kmem_cache_free(kioctx_cachep, ctx); } @@ -715,8 +717,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) err: mutex_unlock(&ctx->ring_lock); free_percpu(ctx->cpu); - percpu_ref_cancel_init(&ctx->reqs); - percpu_ref_cancel_init(&ctx->users); + percpu_ref_exit(&ctx->reqs); + percpu_ref_exit(&ctx->users); kmem_cache_free(kioctx_cachep, ctx); pr_debug("error allocating ioctx %d\n", err); return ERR_PTR(err); diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 6f8cd4c0546c..0ddd2839ca84 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -57,9 +57,7 @@ struct percpu_ref { atomic_t count; /* * The low bit of the pointer indicates whether the ref is in percpu - * mode; if set, then get/put will manipulate the atomic_t (this is a - * hack because we need to keep the pointer around for - * percpu_ref_kill_rcu()) + * mode; if set, then get/put will manipulate the atomic_t. */ unsigned long pcpu_count_ptr; percpu_ref_func_t *release; @@ -69,7 +67,7 @@ struct percpu_ref { int __must_check percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release); -void percpu_ref_cancel_init(struct percpu_ref *ref); +void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7868fc3c0bc5..c06aa5e257a8 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1638,7 +1638,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask) exit_root_id: cgroup_exit_root_id(root); cancel_ref: - percpu_ref_cancel_init(&root_cgrp->self.refcnt); + percpu_ref_exit(&root_cgrp->self.refcnt); out: free_cgrp_cset_links(&tmp_links); return ret; @@ -4133,6 +4133,8 @@ static void css_free_work_fn(struct work_struct *work) container_of(work, struct cgroup_subsys_state, destroy_work); struct cgroup *cgrp = css->cgroup; + percpu_ref_exit(&css->refcnt); + if (css->ss) { /* css free path */ if (css->parent) @@ -4330,7 +4332,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss) err_free_id: cgroup_idr_remove(&ss->css_idr, css->id); err_free_percpu_ref: - percpu_ref_cancel_init(&css->refcnt); + percpu_ref_exit(&css->refcnt); err_free_css: call_rcu(&css->rcu_head, css_free_rcu_fn); return err; @@ -4441,7 +4443,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, out_free_id: cgroup_idr_remove(&root->cgroup_idr, cgrp->id); out_cancel_ref: - percpu_ref_cancel_init(&cgrp->self.refcnt); + percpu_ref_exit(&cgrp->self.refcnt); out_free_cgrp: kfree(cgrp); out_unlock: diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 94e5b624de64..ac4299120087 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -61,36 +61,25 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) EXPORT_SYMBOL_GPL(percpu_ref_init); /** - * percpu_ref_cancel_init - cancel percpu_ref_init() - * @ref: percpu_ref to cancel init for + * percpu_ref_exit - undo percpu_ref_init() + * @ref: percpu_ref to exit * - * Once a percpu_ref is initialized, its destruction is initiated by - * percpu_ref_kill() and completes asynchronously, which can be painful to - * do when destroying a half-constructed object in init failure path. - * - * This function destroys @ref without invoking @ref->release and the - * memory area containing it can be freed immediately on return. To - * prevent accidental misuse, it's required that @ref has finished - * percpu_ref_init(), whether successful or not, but never used. - * - * The weird name and usage restriction are to prevent people from using - * this function by mistake for normal shutdown instead of - * percpu_ref_kill(). + * This function exits @ref. The caller is responsible for ensuring that + * @ref is no longer in active use. The usual places to invoke this + * function from are the @ref->release() callback or in init failure path + * where percpu_ref_init() succeeded but other parts of the initialization + * of the embedding object failed. */ -void percpu_ref_cancel_init(struct percpu_ref *ref) +void percpu_ref_exit(struct percpu_ref *ref) { unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); - int cpu; - - WARN_ON_ONCE(atomic_read(&ref->count) != 1 + PCPU_COUNT_BIAS); if (pcpu_count) { - for_each_possible_cpu(cpu) - WARN_ON_ONCE(*per_cpu_ptr(pcpu_count, cpu)); free_percpu(pcpu_count); + ref->pcpu_count_ptr = PCPU_REF_DEAD; } } -EXPORT_SYMBOL_GPL(percpu_ref_cancel_init); +EXPORT_SYMBOL_GPL(percpu_ref_exit); static void percpu_ref_kill_rcu(struct rcu_head *rcu) { @@ -102,8 +91,6 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) for_each_possible_cpu(cpu) count += *per_cpu_ptr(pcpu_count, cpu); - free_percpu(pcpu_count); - pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count); /* From 2d7227828e1475c7b272e55bd70c4cec8eea219a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sat, 28 Jun 2014 08:10:14 -0400 Subject: [PATCH 21/21] percpu-refcount: implement percpu_ref_reinit() and percpu_ref_is_zero() Now that explicit invocation of percpu_ref_exit() is necessary to free the percpu counter, we can implement percpu_ref_reinit() which reinitializes a released percpu_ref. This can be used implement scalable gating switch which can be drained and then re-opened without worrying about memory allocation failures. percpu_ref_is_zero() is added to be used in a sanity check in percpu_ref_exit(). As this function will be useful for other purposes too, make it a public interface. v2: Use smp_read_barrier_depends() instead of smp_load_acquire(). We only need data dep barrier and smp_load_acquire() is stronger and heavier on some archs. Spotted by Lai Jiangshan. Signed-off-by: Tejun Heo Cc: Kent Overstreet Cc: Christoph Lameter Cc: Lai Jiangshan --- include/linux/percpu-refcount.h | 19 ++++++++++++++++++ lib/percpu-refcount.c | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 0ddd2839ca84..3dfbf237cd8f 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -67,6 +67,7 @@ struct percpu_ref { int __must_check percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release); +void percpu_ref_reinit(struct percpu_ref *ref); void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); @@ -99,6 +100,9 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref, { unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr); + /* paired with smp_store_release() in percpu_ref_reinit() */ + smp_read_barrier_depends(); + if (unlikely(pcpu_ptr & PCPU_REF_DEAD)) return false; @@ -206,4 +210,19 @@ static inline void percpu_ref_put(struct percpu_ref *ref) rcu_read_unlock_sched(); } +/** + * percpu_ref_is_zero - test whether a percpu refcount reached zero + * @ref: percpu_ref to test + * + * Returns %true if @ref reached zero. + */ +static inline bool percpu_ref_is_zero(struct percpu_ref *ref) +{ + unsigned __percpu *pcpu_count; + + if (__pcpu_ref_alive(ref, &pcpu_count)) + return false; + return !atomic_read(&ref->count); +} + #endif diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index ac4299120087..fe5a3342e960 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -60,6 +60,41 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release) } EXPORT_SYMBOL_GPL(percpu_ref_init); +/** + * percpu_ref_reinit - re-initialize a percpu refcount + * @ref: perpcu_ref to re-initialize + * + * Re-initialize @ref so that it's in the same state as when it finished + * percpu_ref_init(). @ref must have been initialized successfully, killed + * and reached 0 but not exited. + * + * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while + * this function is in progress. + */ +void percpu_ref_reinit(struct percpu_ref *ref) +{ + unsigned __percpu *pcpu_count = pcpu_count_ptr(ref); + int cpu; + + BUG_ON(!pcpu_count); + WARN_ON(!percpu_ref_is_zero(ref)); + + atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS); + + /* + * Restore per-cpu operation. smp_store_release() is paired with + * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees + * that the zeroing is visible to all percpu accesses which can see + * the following PCPU_REF_DEAD clearing. + */ + for_each_possible_cpu(cpu) + *per_cpu_ptr(pcpu_count, cpu) = 0; + + smp_store_release(&ref->pcpu_count_ptr, + ref->pcpu_count_ptr & ~PCPU_REF_DEAD); +} +EXPORT_SYMBOL_GPL(percpu_ref_reinit); + /** * percpu_ref_exit - undo percpu_ref_init() * @ref: percpu_ref to exit