kernel-fxtec-pro1x/kernel/up.c
Ying Huang 966a967116 smp: Avoid using two cache lines for struct call_single_data
struct call_single_data is used in IPIs to transfer information between
CPUs.  Its size is bigger than sizeof(unsigned long) and less than
cache line size.  Currently it is not allocated with any explicit alignment
requirements.  This makes it possible for allocated call_single_data to
cross two cache lines, which results in double the number of the cache lines
that need to be transferred among CPUs.

This can be fixed by requiring call_single_data to be aligned with the
size of call_single_data. Currently the size of call_single_data is the
power of 2.  If we add new fields to call_single_data, we may need to
add padding to make sure the size of new definition is the power of 2
as well.

Fortunately, this is enforced by GCC, which will report bad sizes.

To set alignment requirements of call_single_data to the size of
call_single_data, a struct definition and a typedef is used.

To test the effect of the patch, I used the vm-scalability multiple
thread swap test case (swap-w-seq-mt).  The test will create multiple
threads and each thread will eat memory until all RAM and part of swap
is used, so that huge number of IPIs are triggered when unmapping
memory.  In the test, the throughput of memory writing improves ~5%
compared with misaligned call_single_data, because of faster IPIs.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Huang, Ying <ying.huang@intel.com>
[ Add call_single_data_t and align with size of call_single_data. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/87bmnqd6lz.fsf@yhuang-mobile.sh.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-08-29 15:14:38 +02:00

102 lines
2 KiB
C

/*
* Uniprocessor-only support functions. The counterpart to kernel/smp.c
*/
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/smp.h>
#include <linux/hypervisor.h>
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int wait)
{
unsigned long flags;
WARN_ON(cpu != 0);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
return 0;
}
EXPORT_SYMBOL(smp_call_function_single);
int smp_call_function_single_async(int cpu, call_single_data_t *csd)
{
unsigned long flags;
local_irq_save(flags);
csd->func(csd->info);
local_irq_restore(flags);
return 0;
}
EXPORT_SYMBOL(smp_call_function_single_async);
int on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
return 0;
}
EXPORT_SYMBOL(on_each_cpu);
/*
* Note we still need to test the mask even for UP
* because we actually can get an empty mask from
* code that on SMP might call us without the local
* CPU in the mask.
*/
void on_each_cpu_mask(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
{
unsigned long flags;
if (cpumask_test_cpu(0, mask)) {
local_irq_save(flags);
func(info);
local_irq_restore(flags);
}
}
EXPORT_SYMBOL(on_each_cpu_mask);
/*
* Preemption is disabled here to make sure the cond_func is called under the
* same condtions in UP and SMP.
*/
void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
smp_call_func_t func, void *info, bool wait,
gfp_t gfp_flags)
{
unsigned long flags;
preempt_disable();
if (cond_func(0, info)) {
local_irq_save(flags);
func(info);
local_irq_restore(flags);
}
preempt_enable();
}
EXPORT_SYMBOL(on_each_cpu_cond);
int smp_call_on_cpu(unsigned int cpu, int (*func)(void *), void *par, bool phys)
{
int ret;
if (cpu != 0)
return -ENXIO;
if (phys)
hypervisor_pin_vcpu(0);
ret = func(par);
if (phys)
hypervisor_pin_vcpu(-1);
return ret;
}
EXPORT_SYMBOL_GPL(smp_call_on_cpu);