percpu: align percpu readmostly subsection to cacheline
Currently percpu readmostly subsection may share cachelines with other percpu subsections which may result in unnecessary cacheline bounce and performance degradation. This patch adds @cacheline parameter to PERCPU() and PERCPU_VADDR() linker macros, makes each arch linker scripts specify its cacheline size and use it to align percpu subsections. This is based on Shaohua's x86 only patch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Shaohua Li <shaohua.li@intel.com>
This commit is contained in:
parent
c723fdab8a
commit
19df0c2fef
19 changed files with 41 additions and 32 deletions
|
@ -38,7 +38,7 @@ SECTIONS
|
||||||
__init_begin = ALIGN(PAGE_SIZE);
|
__init_begin = ALIGN(PAGE_SIZE);
|
||||||
INIT_TEXT_SECTION(PAGE_SIZE)
|
INIT_TEXT_SECTION(PAGE_SIZE)
|
||||||
INIT_DATA_SECTION(16)
|
INIT_DATA_SECTION(16)
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(64, PAGE_SIZE)
|
||||||
/* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
|
/* Align to THREAD_SIZE rather than PAGE_SIZE here so any padding page
|
||||||
needed for the THREAD_SIZE aligned init_task gets freed after init */
|
needed for the THREAD_SIZE aligned init_task gets freed after init */
|
||||||
. = ALIGN(THREAD_SIZE);
|
. = ALIGN(THREAD_SIZE);
|
||||||
|
|
|
@ -70,7 +70,7 @@ SECTIONS
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(32, PAGE_SIZE)
|
||||||
|
|
||||||
#ifndef CONFIG_XIP_KERNEL
|
#ifndef CONFIG_XIP_KERNEL
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
|
|
|
@ -136,7 +136,7 @@ SECTIONS
|
||||||
|
|
||||||
. = ALIGN(16);
|
. = ALIGN(16);
|
||||||
INIT_DATA_SECTION(16)
|
INIT_DATA_SECTION(16)
|
||||||
PERCPU(4)
|
PERCPU(32, 4)
|
||||||
|
|
||||||
.exit.data :
|
.exit.data :
|
||||||
{
|
{
|
||||||
|
|
|
@ -107,7 +107,7 @@ SECTIONS
|
||||||
#endif
|
#endif
|
||||||
__vmlinux_end = .; /* Last address of the physical file. */
|
__vmlinux_end = .; /* Last address of the physical file. */
|
||||||
#ifdef CONFIG_ETRAX_ARCH_V32
|
#ifdef CONFIG_ETRAX_ARCH_V32
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(32, PAGE_SIZE)
|
||||||
|
|
||||||
.init.ramfs : {
|
.init.ramfs : {
|
||||||
INIT_RAM_FS
|
INIT_RAM_FS
|
||||||
|
|
|
@ -37,7 +37,7 @@ SECTIONS
|
||||||
_einittext = .;
|
_einittext = .;
|
||||||
|
|
||||||
INIT_DATA_SECTION(8)
|
INIT_DATA_SECTION(8)
|
||||||
PERCPU(4096)
|
PERCPU(L1_CACHE_BYTES, 4096)
|
||||||
|
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .;
|
__init_end = .;
|
||||||
|
|
|
@ -198,7 +198,7 @@ SECTIONS {
|
||||||
|
|
||||||
/* Per-cpu data: */
|
/* Per-cpu data: */
|
||||||
. = ALIGN(PERCPU_PAGE_SIZE);
|
. = ALIGN(PERCPU_PAGE_SIZE);
|
||||||
PERCPU_VADDR(PERCPU_ADDR, :percpu)
|
PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu)
|
||||||
__phys_per_cpu_start = __per_cpu_load;
|
__phys_per_cpu_start = __per_cpu_load;
|
||||||
/*
|
/*
|
||||||
* ensure percpu data fits
|
* ensure percpu data fits
|
||||||
|
|
|
@ -53,7 +53,7 @@ SECTIONS
|
||||||
__init_begin = .;
|
__init_begin = .;
|
||||||
INIT_TEXT_SECTION(PAGE_SIZE)
|
INIT_TEXT_SECTION(PAGE_SIZE)
|
||||||
INIT_DATA_SECTION(16)
|
INIT_DATA_SECTION(16)
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(32, PAGE_SIZE)
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .;
|
__init_end = .;
|
||||||
/* freed after init ends here */
|
/* freed after init ends here */
|
||||||
|
|
|
@ -115,7 +115,7 @@ SECTIONS
|
||||||
EXIT_DATA
|
EXIT_DATA
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(1 << CONFIG_MIPS_L1_CACHE_SHIFT, PAGE_SIZE)
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .;
|
__init_end = .;
|
||||||
/* freed after init ends here */
|
/* freed after init ends here */
|
||||||
|
|
|
@ -70,7 +70,7 @@ SECTIONS
|
||||||
.exit.text : { EXIT_TEXT; }
|
.exit.text : { EXIT_TEXT; }
|
||||||
.exit.data : { EXIT_DATA; }
|
.exit.data : { EXIT_DATA; }
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(32, PAGE_SIZE)
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .;
|
__init_end = .;
|
||||||
/* freed after init ends here */
|
/* freed after init ends here */
|
||||||
|
|
|
@ -145,7 +145,7 @@ SECTIONS
|
||||||
EXIT_DATA
|
EXIT_DATA
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .;
|
__init_end = .;
|
||||||
/* freed after init ends here */
|
/* freed after init ends here */
|
||||||
|
|
|
@ -160,7 +160,7 @@ SECTIONS
|
||||||
INIT_RAM_FS
|
INIT_RAM_FS
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
|
||||||
|
|
||||||
. = ALIGN(8);
|
. = ALIGN(8);
|
||||||
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
|
.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
|
||||||
|
|
|
@ -77,7 +77,7 @@ SECTIONS
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
INIT_DATA_SECTION(0x100)
|
INIT_DATA_SECTION(0x100)
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(0x100, PAGE_SIZE)
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .; /* freed after init ends here */
|
__init_end = .; /* freed after init ends here */
|
||||||
|
|
||||||
|
|
|
@ -66,7 +66,7 @@ SECTIONS
|
||||||
__machvec_end = .;
|
__machvec_end = .;
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(L1_CACHE_BYTES, PAGE_SIZE)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* .exit.text is discarded at runtime, not link time, to deal with
|
* .exit.text is discarded at runtime, not link time, to deal with
|
||||||
|
|
|
@ -108,7 +108,7 @@ SECTIONS
|
||||||
__sun4v_2insn_patch_end = .;
|
__sun4v_2insn_patch_end = .;
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(SMP_CACHE_BYTES, PAGE_SIZE)
|
||||||
|
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
__init_end = .;
|
__init_end = .;
|
||||||
|
|
|
@ -63,7 +63,7 @@ SECTIONS
|
||||||
*(.init.page)
|
*(.init.page)
|
||||||
} :data =0
|
} :data =0
|
||||||
INIT_DATA_SECTION(16)
|
INIT_DATA_SECTION(16)
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(L2_CACHE_BYTES, PAGE_SIZE)
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
VMLINUX_SYMBOL(_einitdata) = .;
|
VMLINUX_SYMBOL(_einitdata) = .;
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
INIT_SETUP(0)
|
INIT_SETUP(0)
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(32)
|
PERCPU(32, 32)
|
||||||
|
|
||||||
.initcall.init : {
|
.initcall.init : {
|
||||||
INIT_CALLS
|
INIT_CALLS
|
||||||
|
|
|
@ -230,7 +230,7 @@ SECTIONS
|
||||||
* output PHDR, so the next output section - .init.text - should
|
* output PHDR, so the next output section - .init.text - should
|
||||||
* start another segment - init.
|
* start another segment - init.
|
||||||
*/
|
*/
|
||||||
PERCPU_VADDR(0, :percpu)
|
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
INIT_TEXT_SECTION(PAGE_SIZE)
|
INIT_TEXT_SECTION(PAGE_SIZE)
|
||||||
|
@ -305,7 +305,7 @@ SECTIONS
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
|
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
|
||||||
PERCPU(THREAD_SIZE)
|
PERCPU(INTERNODE_CACHE_BYTES, THREAD_SIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
. = ALIGN(PAGE_SIZE);
|
. = ALIGN(PAGE_SIZE);
|
||||||
|
|
|
@ -155,7 +155,7 @@ SECTIONS
|
||||||
INIT_RAM_FS
|
INIT_RAM_FS
|
||||||
}
|
}
|
||||||
|
|
||||||
PERCPU(PAGE_SIZE)
|
PERCPU(XCHAL_ICACHE_LINESIZE, PAGE_SIZE)
|
||||||
|
|
||||||
/* We need this dummy segment here */
|
/* We need this dummy segment here */
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
* HEAD_TEXT_SECTION
|
* HEAD_TEXT_SECTION
|
||||||
* INIT_TEXT_SECTION(PAGE_SIZE)
|
* INIT_TEXT_SECTION(PAGE_SIZE)
|
||||||
* INIT_DATA_SECTION(...)
|
* INIT_DATA_SECTION(...)
|
||||||
* PERCPU(PAGE_SIZE)
|
* PERCPU(CACHELINE_SIZE, PAGE_SIZE)
|
||||||
* __init_end = .;
|
* __init_end = .;
|
||||||
*
|
*
|
||||||
* _stext = .;
|
* _stext = .;
|
||||||
|
@ -683,13 +683,18 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PERCPU_VADDR - define output section for percpu area
|
* PERCPU_VADDR - define output section for percpu area
|
||||||
|
* @cacheline: cacheline size
|
||||||
* @vaddr: explicit base address (optional)
|
* @vaddr: explicit base address (optional)
|
||||||
* @phdr: destination PHDR (optional)
|
* @phdr: destination PHDR (optional)
|
||||||
*
|
*
|
||||||
* Macro which expands to output section for percpu area. If @vaddr
|
* Macro which expands to output section for percpu area.
|
||||||
* is not blank, it specifies explicit base address and all percpu
|
*
|
||||||
* symbols will be offset from the given address. If blank, @vaddr
|
* @cacheline is used to align subsections to avoid false cacheline
|
||||||
* always equals @laddr + LOAD_OFFSET.
|
* sharing between subsections for different purposes.
|
||||||
|
*
|
||||||
|
* If @vaddr is not blank, it specifies explicit base address and all
|
||||||
|
* percpu symbols will be offset from the given address. If blank,
|
||||||
|
* @vaddr always equals @laddr + LOAD_OFFSET.
|
||||||
*
|
*
|
||||||
* @phdr defines the output PHDR to use if not blank. Be warned that
|
* @phdr defines the output PHDR to use if not blank. Be warned that
|
||||||
* output PHDR is sticky. If @phdr is specified, the next output
|
* output PHDR is sticky. If @phdr is specified, the next output
|
||||||
|
@ -700,7 +705,7 @@
|
||||||
* If there is no need to put the percpu section at a predetermined
|
* If there is no need to put the percpu section at a predetermined
|
||||||
* address, use PERCPU().
|
* address, use PERCPU().
|
||||||
*/
|
*/
|
||||||
#define PERCPU_VADDR(vaddr, phdr) \
|
#define PERCPU_VADDR(cacheline, vaddr, phdr) \
|
||||||
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
||||||
.data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
|
.data..percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
|
||||||
- LOAD_OFFSET) { \
|
- LOAD_OFFSET) { \
|
||||||
|
@ -708,7 +713,9 @@
|
||||||
*(.data..percpu..first) \
|
*(.data..percpu..first) \
|
||||||
. = ALIGN(PAGE_SIZE); \
|
. = ALIGN(PAGE_SIZE); \
|
||||||
*(.data..percpu..page_aligned) \
|
*(.data..percpu..page_aligned) \
|
||||||
|
. = ALIGN(cacheline); \
|
||||||
*(.data..percpu..readmostly) \
|
*(.data..percpu..readmostly) \
|
||||||
|
. = ALIGN(cacheline); \
|
||||||
*(.data..percpu) \
|
*(.data..percpu) \
|
||||||
*(.data..percpu..shared_aligned) \
|
*(.data..percpu..shared_aligned) \
|
||||||
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
||||||
|
@ -717,18 +724,18 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PERCPU - define output section for percpu area, simple version
|
* PERCPU - define output section for percpu area, simple version
|
||||||
|
* @cacheline: cacheline size
|
||||||
* @align: required alignment
|
* @align: required alignment
|
||||||
*
|
*
|
||||||
* Align to @align and outputs output section for percpu area. This
|
* Align to @align and outputs output section for percpu area. This macro
|
||||||
* macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
|
* doesn't manipulate @vaddr or @phdr and __per_cpu_load and
|
||||||
* __per_cpu_start will be identical.
|
* __per_cpu_start will be identical.
|
||||||
*
|
*
|
||||||
* This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
|
* This macro is equivalent to ALIGN(@align); PERCPU_VADDR(@cacheline,,)
|
||||||
* that __per_cpu_load is defined as a relative symbol against
|
* except that __per_cpu_load is defined as a relative symbol against
|
||||||
* .data..percpu which is required for relocatable x86_32
|
* .data..percpu which is required for relocatable x86_32 configuration.
|
||||||
* configuration.
|
|
||||||
*/
|
*/
|
||||||
#define PERCPU(align) \
|
#define PERCPU(cacheline, align) \
|
||||||
. = ALIGN(align); \
|
. = ALIGN(align); \
|
||||||
.data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
|
.data..percpu : AT(ADDR(.data..percpu) - LOAD_OFFSET) { \
|
||||||
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
VMLINUX_SYMBOL(__per_cpu_load) = .; \
|
||||||
|
@ -736,7 +743,9 @@
|
||||||
*(.data..percpu..first) \
|
*(.data..percpu..first) \
|
||||||
. = ALIGN(PAGE_SIZE); \
|
. = ALIGN(PAGE_SIZE); \
|
||||||
*(.data..percpu..page_aligned) \
|
*(.data..percpu..page_aligned) \
|
||||||
|
. = ALIGN(cacheline); \
|
||||||
*(.data..percpu..readmostly) \
|
*(.data..percpu..readmostly) \
|
||||||
|
. = ALIGN(cacheline); \
|
||||||
*(.data..percpu) \
|
*(.data..percpu) \
|
||||||
*(.data..percpu..shared_aligned) \
|
*(.data..percpu..shared_aligned) \
|
||||||
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
VMLINUX_SYMBOL(__per_cpu_end) = .; \
|
||||||
|
|
Loading…
Reference in a new issue