Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6
* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (231 commits) [PATCH] i386: Don't delete cpu_devs data to identify different x86 types in late_initcall [PATCH] i386: type may be unused [PATCH] i386: Some additional chipset register values validation. [PATCH] i386: Add missing !X86_PAE dependincy to the 2G/2G split. [PATCH] x86-64: Don't exclude asm-offsets.c in Documentation/dontdiff [PATCH] i386: avoid redundant preempt_disable in __unlazy_fpu [PATCH] i386: white space fixes in i387.h [PATCH] i386: Drop noisy e820 debugging printks [PATCH] x86-64: Fix allnoconfig error in genapic_flat.c [PATCH] x86-64: Shut up warnings for vfat compat ioctls on other file systems [PATCH] x86-64: Share identical video.S between i386 and x86-64 [PATCH] x86-64: Remove CONFIG_REORDER [PATCH] x86-64: Print type and size correctly for unknown compat ioctls [PATCH] i386: Remove copy_*_user BUG_ONs for (size < 0) [PATCH] i386: Little cleanups in smpboot.c [PATCH] x86-64: Don't enable NUMA for a single node in K8 NUMA scanning [PATCH] x86: Use RDTSCP for synchronous get_cycles if possible [PATCH] i386: Add X86_FEATURE_RDTSCP [PATCH] i386: Implement X86_FEATURE_SYNC_RDTSC on i386 [PATCH] i386: Implement alternative_io for i386 ... Fix up trivial conflict in include/linux/highmem.h manually. Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
commit
ea62ccd00f
319 changed files with 7641 additions and 10477 deletions
5
CREDITS
5
CREDITS
|
@ -1745,8 +1745,9 @@ S: D-64295
|
|||
S: Germany
|
||||
|
||||
N: Andi Kleen
|
||||
E: ak@muc.de
|
||||
D: network hacker, syncookies
|
||||
E: andi@firstfloor.org
|
||||
U: http://www.halobates.de
|
||||
D: network, x86, NUMA, various hacks
|
||||
S: Schwalbenstr. 96
|
||||
S: 85551 Ottobrunn
|
||||
S: Germany
|
||||
|
|
|
@ -55,8 +55,8 @@ aic7*seq.h*
|
|||
aicasm
|
||||
aicdb.h*
|
||||
asm
|
||||
asm-offsets.*
|
||||
asm_offsets.*
|
||||
asm-offsets.h
|
||||
asm_offsets.h
|
||||
autoconf.h*
|
||||
bbootsect
|
||||
bin2c
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
----------------------------
|
||||
|
||||
H. Peter Anvin <hpa@zytor.com>
|
||||
Last update 2007-01-26
|
||||
Last update 2007-03-06
|
||||
|
||||
On the i386 platform, the Linux kernel uses a rather complicated boot
|
||||
convention. This has evolved partially due to historical aspects, as
|
||||
|
@ -35,9 +35,13 @@ Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible
|
|||
initrd address available to the bootloader.
|
||||
|
||||
Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes.
|
||||
|
||||
Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
|
||||
Introduce relocatable_kernel and kernel_alignment fields.
|
||||
|
||||
Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
|
||||
the boot command line
|
||||
|
||||
|
||||
**** MEMORY LAYOUT
|
||||
|
||||
|
@ -133,6 +137,8 @@ Offset Proto Name Meaning
|
|||
022C/4 2.03+ initrd_addr_max Highest legal initrd address
|
||||
0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
|
||||
0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
|
||||
0235/3 N/A pad2 Unused
|
||||
0238/4 2.06+ cmdline_size Maximum size of the kernel command line
|
||||
|
||||
(1) For backwards compatibility, if the setup_sects field contains 0, the
|
||||
real value is 4.
|
||||
|
@ -233,6 +239,12 @@ filled out, however:
|
|||
if your ramdisk is exactly 131072 bytes long and this field is
|
||||
0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
|
||||
|
||||
cmdline_size:
|
||||
The maximum size of the command line without the terminating
|
||||
zero. This means that the command line can contain at most
|
||||
cmdline_size characters. With protocol version 2.05 and
|
||||
earlier, the maximum size was 255.
|
||||
|
||||
|
||||
**** THE KERNEL COMMAND LINE
|
||||
|
||||
|
@ -241,11 +253,10 @@ loader to communicate with the kernel. Some of its options are also
|
|||
relevant to the boot loader itself, see "special command line options"
|
||||
below.
|
||||
|
||||
The kernel command line is a null-terminated string currently up to
|
||||
255 characters long, plus the final null. A string that is too long
|
||||
will be automatically truncated by the kernel, a boot loader may allow
|
||||
a longer command line to be passed to permit future kernels to extend
|
||||
this limit.
|
||||
The kernel command line is a null-terminated string. The maximum
|
||||
length can be retrieved from the field cmdline_size. Before protocol
|
||||
version 2.06, the maximum was 255 characters. A string that is too
|
||||
long will be automatically truncated by the kernel.
|
||||
|
||||
If the boot protocol version is 2.02 or later, the address of the
|
||||
kernel command line is given by the header field cmd_line_ptr (see
|
||||
|
|
|
@ -64,6 +64,7 @@ parameter is applicable:
|
|||
GENERIC_TIME The generic timeofday code is enabled.
|
||||
NFS Appropriate NFS support is enabled.
|
||||
OSS OSS sound support is enabled.
|
||||
PV_OPS A paravirtualized kernel
|
||||
PARIDE The ParIDE subsystem is enabled.
|
||||
PARISC The PA-RISC architecture is enabled.
|
||||
PCI PCI bus support is enabled.
|
||||
|
@ -695,8 +696,15 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed
|
||||
See Documentation/ide.txt.
|
||||
|
||||
idle= [HW]
|
||||
Format: idle=poll or idle=halt
|
||||
idle= [X86]
|
||||
Format: idle=poll or idle=mwait
|
||||
Poll forces a polling idle loop that can slightly improves the performance
|
||||
of waking up a idle CPU, but will use a lot of power and make the system
|
||||
run hot. Not recommended.
|
||||
idle=mwait. On systems which support MONITOR/MWAIT but the kernel chose
|
||||
to not use it because it doesn't save as much power as a normal idle
|
||||
loop use the MONITOR/MWAIT idle loop anyways. Performance should be the same
|
||||
as idle=poll.
|
||||
|
||||
ignore_loglevel [KNL]
|
||||
Ignore loglevel setting - this will print /all/
|
||||
|
@ -1157,6 +1165,11 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
|
||||
nomce [IA-32] Machine Check Exception
|
||||
|
||||
noreplace-paravirt [IA-32,PV_OPS] Don't patch paravirt_ops
|
||||
|
||||
noreplace-smp [IA-32,SMP] Don't replace SMP instructions
|
||||
with UP alternatives
|
||||
|
||||
noresidual [PPC] Don't use residual data on PReP machines.
|
||||
|
||||
noresume [SWSUSP] Disables resume and restores original swap
|
||||
|
@ -1562,6 +1575,9 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
smart2= [HW]
|
||||
Format: <io1>[,<io2>[,...,<io8>]]
|
||||
|
||||
smp-alt-once [IA-32,SMP] On a hotplug CPU system, only
|
||||
attempt to substitute SMP alternatives once at boot.
|
||||
|
||||
snd-ad1816a= [HW,ALSA]
|
||||
|
||||
snd-ad1848= [HW,ALSA]
|
||||
|
@ -1820,6 +1836,7 @@ and is between 256 and 4096 characters. It is defined in the file
|
|||
[USBHID] The interval which mice are to be polled at.
|
||||
|
||||
vdso= [IA-32,SH]
|
||||
vdso=2: enable compat VDSO (default with COMPAT_VDSO)
|
||||
vdso=1: enable VDSO (default)
|
||||
vdso=0: disable VDSO mapping
|
||||
|
||||
|
|
|
@ -149,7 +149,19 @@ NUMA
|
|||
|
||||
numa=noacpi Don't parse the SRAT table for NUMA setup
|
||||
|
||||
numa=fake=X Fake X nodes and ignore NUMA setup of the actual machine.
|
||||
numa=fake=CMDLINE
|
||||
If a number, fakes CMDLINE nodes and ignores NUMA setup of the
|
||||
actual machine. Otherwise, system memory is configured
|
||||
depending on the sizes and coefficients listed. For example:
|
||||
numa=fake=2*512,1024,4*256,*128
|
||||
gives two 512M nodes, a 1024M node, four 256M nodes, and the
|
||||
rest split into 128M chunks. If the last character of CMDLINE
|
||||
is a *, the remaining memory is divided up equally among its
|
||||
coefficient:
|
||||
numa=fake=2*512,2*
|
||||
gives two 512M nodes and the rest split into two nodes.
|
||||
Otherwise, the remaining system RAM is allocated to an
|
||||
additional node.
|
||||
|
||||
numa=hotadd=percent
|
||||
Only allow hotadd memory to preallocate page structures upto
|
||||
|
|
66
Documentation/x86_64/fake-numa-for-cpusets
Normal file
66
Documentation/x86_64/fake-numa-for-cpusets
Normal file
|
@ -0,0 +1,66 @@
|
|||
Using numa=fake and CPUSets for Resource Management
|
||||
Written by David Rientjes <rientjes@cs.washington.edu>
|
||||
|
||||
This document describes how the numa=fake x86_64 command-line option can be used
|
||||
in conjunction with cpusets for coarse memory management. Using this feature,
|
||||
you can create fake NUMA nodes that represent contiguous chunks of memory and
|
||||
assign them to cpusets and their attached tasks. This is a way of limiting the
|
||||
amount of system memory that are available to a certain class of tasks.
|
||||
|
||||
For more information on the features of cpusets, see Documentation/cpusets.txt.
|
||||
There are a number of different configurations you can use for your needs. For
|
||||
more information on the numa=fake command line option and its various ways of
|
||||
configuring fake nodes, see Documentation/x86_64/boot-options.txt.
|
||||
|
||||
For the purposes of this introduction, we'll assume a very primitive NUMA
|
||||
emulation setup of "numa=fake=4*512,". This will split our system memory into
|
||||
four equal chunks of 512M each that we can now use to assign to cpusets. As
|
||||
you become more familiar with using this combination for resource control,
|
||||
you'll determine a better setup to minimize the number of nodes you have to deal
|
||||
with.
|
||||
|
||||
A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
|
||||
|
||||
Faking node 0 at 0000000000000000-0000000020000000 (512MB)
|
||||
Faking node 1 at 0000000020000000-0000000040000000 (512MB)
|
||||
Faking node 2 at 0000000040000000-0000000060000000 (512MB)
|
||||
Faking node 3 at 0000000060000000-0000000080000000 (512MB)
|
||||
...
|
||||
On node 0 totalpages: 130975
|
||||
On node 1 totalpages: 131072
|
||||
On node 2 totalpages: 131072
|
||||
On node 3 totalpages: 131072
|
||||
|
||||
Now following the instructions for mounting the cpusets filesystem from
|
||||
Documentation/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
|
||||
address spaces) to individual cpusets:
|
||||
|
||||
[root@xroads /]# mkdir exampleset
|
||||
[root@xroads /]# mount -t cpuset none exampleset
|
||||
[root@xroads /]# mkdir exampleset/ddset
|
||||
[root@xroads /]# cd exampleset/ddset
|
||||
[root@xroads /exampleset/ddset]# echo 0-1 > cpus
|
||||
[root@xroads /exampleset/ddset]# echo 0-1 > mems
|
||||
|
||||
Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
|
||||
memory allocations (1G).
|
||||
|
||||
You can now assign tasks to these cpusets to limit the memory resources
|
||||
available to them according to the fake nodes assigned as mems:
|
||||
|
||||
[root@xroads /exampleset/ddset]# echo $$ > tasks
|
||||
[root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
|
||||
[1] 13425
|
||||
|
||||
Notice the difference between the system memory usage as reported by
|
||||
/proc/meminfo between the restricted cpuset case above and the unrestricted
|
||||
case (i.e. running the same 'dd' command without assigning it to a fake NUMA
|
||||
cpuset):
|
||||
Unrestricted Restricted
|
||||
MemTotal: 3091900 kB 3091900 kB
|
||||
MemFree: 42113 kB 1513236 kB
|
||||
|
||||
This allows for coarse memory management for the tasks you assign to particular
|
||||
cpusets. Since cpusets can form a hierarchy, you can create some pretty
|
||||
interesting combinations of use-cases for various classes of tasks for your
|
||||
memory management needs.
|
|
@ -36,7 +36,12 @@ between all CPUs.
|
|||
|
||||
check_interval
|
||||
How often to poll for corrected machine check errors, in seconds
|
||||
(Note output is hexademical). Default 5 minutes.
|
||||
(Note output is hexademical). Default 5 minutes. When the poller
|
||||
finds MCEs it triggers an exponential speedup (poll more often) on
|
||||
the polling interval. When the poller stops finding MCEs, it
|
||||
triggers an exponential backoff (poll less often) on the polling
|
||||
interval. The check_interval variable is both the initial and
|
||||
maximum polling interval.
|
||||
|
||||
tolerant
|
||||
Tolerance level. When a machine check exception occurs for a non
|
||||
|
|
24
MAINTAINERS
24
MAINTAINERS
|
@ -1617,7 +1617,7 @@ S: Maintained
|
|||
|
||||
HPET: x86_64
|
||||
P: Andi Kleen and Vojtech Pavlik
|
||||
M: ak@muc.de and vojtech@suse.cz
|
||||
M: andi@firstfloor.org and vojtech@suse.cz
|
||||
S: Maintained
|
||||
|
||||
HPET: ACPI hpet.c
|
||||
|
@ -2652,6 +2652,19 @@ T: git kernel.org:/pub/scm/linux/kernel/git/kyle/parisc-2.6.git
|
|||
T: cvs cvs.parisc-linux.org:/var/cvs/linux-2.6
|
||||
S: Maintained
|
||||
|
||||
PARAVIRT_OPS INTERFACE
|
||||
P: Jeremy Fitzhardinge
|
||||
M: jeremy@xensource.com
|
||||
P: Chris Wright
|
||||
M: chrisw@sous-sol.org
|
||||
P: Zachary Amsden
|
||||
M: zach@vmware.com
|
||||
P: Rusty Russell
|
||||
M: rusty@rustcorp.com.au
|
||||
L: virtualization@lists.osdl.org
|
||||
L: linux-kernel@vger.kernel.org
|
||||
S: Supported
|
||||
|
||||
PC87360 HARDWARE MONITORING DRIVER
|
||||
P: Jim Cromie
|
||||
M: jim.cromie@gmail.com
|
||||
|
@ -3876,6 +3889,15 @@ M: eis@baty.hanse.de
|
|||
L: linux-x25@vger.kernel.org
|
||||
S: Maintained
|
||||
|
||||
XEN HYPERVISOR INTERFACE
|
||||
P: Jeremy Fitzhardinge
|
||||
M: jeremy@xensource.com
|
||||
P: Chris Wright
|
||||
M: chrisw@sous-sol.org
|
||||
L: virtualization@lists.osdl.org
|
||||
L: xen-devel@lists.xensource.com
|
||||
S: Supported
|
||||
|
||||
XFS FILESYSTEM
|
||||
P: Silicon Graphics Inc
|
||||
P: Tim Shimmin, David Chatterton
|
||||
|
|
2
Makefile
2
Makefile
|
@ -491,7 +491,7 @@ endif
|
|||
include $(srctree)/arch/$(ARCH)/Makefile
|
||||
|
||||
ifdef CONFIG_FRAME_POINTER
|
||||
CFLAGS += -fno-omit-frame-pointer $(call cc-option,-fno-optimize-sibling-calls,)
|
||||
CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
|
||||
else
|
||||
CFLAGS += -fomit-frame-pointer
|
||||
endif
|
||||
|
|
|
@ -98,7 +98,7 @@ extern int end;
|
|||
static ulg free_mem_ptr;
|
||||
static ulg free_mem_ptr_end;
|
||||
|
||||
#define HEAP_SIZE 0x2000
|
||||
#define HEAP_SIZE 0x3000
|
||||
|
||||
#include "../../../lib/inflate.c"
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ SECTIONS
|
|||
. = ALIGN(8);
|
||||
SECURITY_INIT
|
||||
|
||||
. = ALIGN(64);
|
||||
. = ALIGN(8192);
|
||||
__per_cpu_start = .;
|
||||
.data.percpu : { *(.data.percpu) }
|
||||
__per_cpu_end = .;
|
||||
|
|
|
@ -239,7 +239,7 @@ extern int end;
|
|||
static ulg free_mem_ptr;
|
||||
static ulg free_mem_ptr_end;
|
||||
|
||||
#define HEAP_SIZE 0x2000
|
||||
#define HEAP_SIZE 0x3000
|
||||
|
||||
#include "../../../../lib/inflate.c"
|
||||
|
||||
|
|
|
@ -59,7 +59,7 @@ SECTIONS
|
|||
usr/built-in.o(.init.ramfs)
|
||||
__initramfs_end = .;
|
||||
#endif
|
||||
. = ALIGN(64);
|
||||
. = ALIGN(4096);
|
||||
__per_cpu_start = .;
|
||||
*(.data.percpu)
|
||||
__per_cpu_end = .;
|
||||
|
|
|
@ -182,7 +182,7 @@ extern int end;
|
|||
static ulg free_mem_ptr;
|
||||
static ulg free_mem_ptr_end;
|
||||
|
||||
#define HEAP_SIZE 0x2000
|
||||
#define HEAP_SIZE 0x3000
|
||||
|
||||
#include "../../../../lib/inflate.c"
|
||||
|
||||
|
|
|
@ -91,6 +91,7 @@ SECTIONS
|
|||
}
|
||||
SECURITY_INIT
|
||||
|
||||
. = ALIGN (8192);
|
||||
__per_cpu_start = .;
|
||||
.data.percpu : { *(.data.percpu) }
|
||||
__per_cpu_end = .;
|
||||
|
|
|
@ -57,6 +57,7 @@ SECTIONS
|
|||
__alt_instructions_end = .;
|
||||
.altinstr_replacement : { *(.altinstr_replacement) }
|
||||
|
||||
. = ALIGN(4096);
|
||||
__per_cpu_start = .;
|
||||
.data.percpu : { *(.data.percpu) }
|
||||
__per_cpu_end = .;
|
||||
|
|
|
@ -220,7 +220,7 @@ config PARAVIRT
|
|||
|
||||
config VMI
|
||||
bool "VMI Paravirt-ops support"
|
||||
depends on PARAVIRT && !COMPAT_VDSO
|
||||
depends on PARAVIRT
|
||||
help
|
||||
VMI provides a paravirtualized interface to the VMware ESX server
|
||||
(it could be used by other hypervisors in theory too, but is not
|
||||
|
@ -571,6 +571,9 @@ choice
|
|||
bool "3G/1G user/kernel split (for full 1G low memory)"
|
||||
config VMSPLIT_2G
|
||||
bool "2G/2G user/kernel split"
|
||||
config VMSPLIT_2G_OPT
|
||||
depends on !HIGHMEM
|
||||
bool "2G/2G user/kernel split (for full 2G low memory)"
|
||||
config VMSPLIT_1G
|
||||
bool "1G/3G user/kernel split"
|
||||
endchoice
|
||||
|
@ -578,7 +581,8 @@ endchoice
|
|||
config PAGE_OFFSET
|
||||
hex
|
||||
default 0xB0000000 if VMSPLIT_3G_OPT
|
||||
default 0x78000000 if VMSPLIT_2G
|
||||
default 0x80000000 if VMSPLIT_2G
|
||||
default 0x78000000 if VMSPLIT_2G_OPT
|
||||
default 0x40000000 if VMSPLIT_1G
|
||||
default 0xC0000000
|
||||
|
||||
|
@ -915,12 +919,9 @@ source kernel/power/Kconfig
|
|||
|
||||
source "drivers/acpi/Kconfig"
|
||||
|
||||
menu "APM (Advanced Power Management) BIOS Support"
|
||||
depends on PM && !X86_VISWS
|
||||
|
||||
config APM
|
||||
menuconfig APM
|
||||
tristate "APM (Advanced Power Management) BIOS support"
|
||||
depends on PM
|
||||
depends on PM && !X86_VISWS
|
||||
---help---
|
||||
APM is a BIOS specification for saving power using several different
|
||||
techniques. This is mostly useful for battery powered laptops with
|
||||
|
@ -977,9 +978,10 @@ config APM
|
|||
To compile this driver as a module, choose M here: the
|
||||
module will be called apm.
|
||||
|
||||
if APM
|
||||
|
||||
config APM_IGNORE_USER_SUSPEND
|
||||
bool "Ignore USER SUSPEND"
|
||||
depends on APM
|
||||
help
|
||||
This option will ignore USER SUSPEND requests. On machines with a
|
||||
compliant APM BIOS, you want to say N. However, on the NEC Versa M
|
||||
|
@ -987,7 +989,6 @@ config APM_IGNORE_USER_SUSPEND
|
|||
|
||||
config APM_DO_ENABLE
|
||||
bool "Enable PM at boot time"
|
||||
depends on APM
|
||||
---help---
|
||||
Enable APM features at boot time. From page 36 of the APM BIOS
|
||||
specification: "When disabled, the APM BIOS does not automatically
|
||||
|
@ -1005,7 +1006,6 @@ config APM_DO_ENABLE
|
|||
|
||||
config APM_CPU_IDLE
|
||||
bool "Make CPU Idle calls when idle"
|
||||
depends on APM
|
||||
help
|
||||
Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
|
||||
On some machines, this can activate improved power savings, such as
|
||||
|
@ -1017,7 +1017,6 @@ config APM_CPU_IDLE
|
|||
|
||||
config APM_DISPLAY_BLANK
|
||||
bool "Enable console blanking using APM"
|
||||
depends on APM
|
||||
help
|
||||
Enable console blanking using the APM. Some laptops can use this to
|
||||
turn off the LCD backlight when the screen blanker of the Linux
|
||||
|
@ -1029,22 +1028,8 @@ config APM_DISPLAY_BLANK
|
|||
backlight at all, or it might print a lot of errors to the console,
|
||||
especially if you are using gpm.
|
||||
|
||||
config APM_RTC_IS_GMT
|
||||
bool "RTC stores time in GMT"
|
||||
depends on APM
|
||||
help
|
||||
Say Y here if your RTC (Real Time Clock a.k.a. hardware clock)
|
||||
stores the time in GMT (Greenwich Mean Time). Say N if your RTC
|
||||
stores localtime.
|
||||
|
||||
It is in fact recommended to store GMT in your RTC, because then you
|
||||
don't have to worry about daylight savings time changes. The only
|
||||
reason not to use GMT in your RTC is if you also run a broken OS
|
||||
that doesn't understand GMT.
|
||||
|
||||
config APM_ALLOW_INTS
|
||||
bool "Allow interrupts during APM BIOS calls"
|
||||
depends on APM
|
||||
help
|
||||
Normally we disable external interrupts while we are making calls to
|
||||
the APM BIOS as a measure to lessen the effects of a badly behaving
|
||||
|
@ -1055,13 +1040,12 @@ config APM_ALLOW_INTS
|
|||
|
||||
config APM_REAL_MODE_POWER_OFF
|
||||
bool "Use real mode APM BIOS call to power off"
|
||||
depends on APM
|
||||
help
|
||||
Use real mode APM BIOS calls to switch off the computer. This is
|
||||
a work-around for a number of buggy BIOSes. Switch this option on if
|
||||
your computer crashes instead of powering off properly.
|
||||
|
||||
endmenu
|
||||
endif # APM
|
||||
|
||||
source "arch/i386/kernel/cpu/cpufreq/Kconfig"
|
||||
|
||||
|
|
|
@ -43,6 +43,7 @@ config M386
|
|||
- "Geode GX/LX" For AMD Geode GX and LX processors.
|
||||
- "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
|
||||
- "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above).
|
||||
- "VIA C7" for VIA C7.
|
||||
|
||||
If you don't know what to do, choose "386".
|
||||
|
||||
|
@ -203,6 +204,12 @@ config MVIAC3_2
|
|||
of SSE and tells gcc to treat the CPU as a 686.
|
||||
Note, this kernel will not boot on older (pre model 9) C3s.
|
||||
|
||||
config MVIAC7
|
||||
bool "VIA C7"
|
||||
help
|
||||
Select this for a VIA C7. Selecting this uses the correct cache
|
||||
shift and tells gcc to treat the CPU as a 686.
|
||||
|
||||
endchoice
|
||||
|
||||
config X86_GENERIC
|
||||
|
@ -231,16 +238,21 @@ config X86_L1_CACHE_SHIFT
|
|||
default "7" if MPENTIUM4 || X86_GENERIC
|
||||
default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
|
||||
default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
|
||||
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2
|
||||
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
|
||||
|
||||
config X86_XADD
|
||||
bool
|
||||
depends on !M386
|
||||
default y
|
||||
|
||||
config RWSEM_GENERIC_SPINLOCK
|
||||
bool
|
||||
depends on M386
|
||||
depends on !X86_XADD
|
||||
default y
|
||||
|
||||
config RWSEM_XCHGADD_ALGORITHM
|
||||
bool
|
||||
depends on !M386
|
||||
depends on X86_XADD
|
||||
default y
|
||||
|
||||
config ARCH_HAS_ILOG2_U32
|
||||
|
@ -297,7 +309,7 @@ config X86_ALIGNMENT_16
|
|||
|
||||
config X86_GOOD_APIC
|
||||
bool
|
||||
depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2
|
||||
depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7
|
||||
default y
|
||||
|
||||
config X86_INTEL_USERCOPY
|
||||
|
@ -322,5 +334,18 @@ config X86_OOSTORE
|
|||
|
||||
config X86_TSC
|
||||
bool
|
||||
depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ
|
||||
depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ
|
||||
default y
|
||||
|
||||
# this should be set for all -march=.. options where the compiler
|
||||
# generates cmov.
|
||||
config X86_CMOV
|
||||
bool
|
||||
depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7)
|
||||
default y
|
||||
|
||||
config X86_MINIMUM_CPU_MODEL
|
||||
int
|
||||
default "4" if X86_XADD || X86_CMPXCHG || X86_BSWAP
|
||||
default "0"
|
||||
|
||||
|
|
|
@ -85,14 +85,4 @@ config DOUBLEFAULT
|
|||
option saves about 4k and might cause you much additional grey
|
||||
hair.
|
||||
|
||||
config DEBUG_PARAVIRT
|
||||
bool "Enable some paravirtualization debugging"
|
||||
default n
|
||||
depends on PARAVIRT && DEBUG_KERNEL
|
||||
help
|
||||
Currently deliberately clobbers regs which are allowed to be
|
||||
clobbered in inlined paravirt hooks, even in native mode.
|
||||
If turning this off solves a problem, then DISABLE_INTERRUPTS() or
|
||||
ENABLE_INTERRUPTS() is lying about what registers can be clobbered.
|
||||
|
||||
endmenu
|
||||
|
|
|
@ -34,7 +34,7 @@ CHECKFLAGS += -D__i386__
|
|||
CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return
|
||||
|
||||
# prevent gcc from keeping the stack 16 byte aligned
|
||||
CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
|
||||
CFLAGS += -mpreferred-stack-boundary=4
|
||||
|
||||
# CPU-specific tuning. Anything which can be shared with UML should go here.
|
||||
include $(srctree)/arch/i386/Makefile.cpu
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
#-mtune exists since gcc 3.4
|
||||
HAS_MTUNE := $(call cc-option-yn, -mtune=i386)
|
||||
ifeq ($(HAS_MTUNE),y)
|
||||
tune = $(call cc-option,-mtune=$(1),)
|
||||
tune = $(call cc-option,-mtune=$(1),$(2))
|
||||
else
|
||||
tune = $(call cc-option,-mcpu=$(1),)
|
||||
tune = $(call cc-option,-mcpu=$(1),$(2))
|
||||
endif
|
||||
|
||||
align := $(cc-option-align)
|
||||
|
@ -32,7 +32,8 @@ cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586)
|
|||
cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
|
||||
cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
|
||||
cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
|
||||
cflags-$(CONFIG_MCORE2) += -march=i686 $(call cc-option,-mtune=core2,$(call cc-option,-mtune=generic,-mtune=i686))
|
||||
cflags-$(CONFIG_MVIAC7) += -march=i686
|
||||
cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
|
||||
|
||||
# AMD Elan support
|
||||
cflags-$(CONFIG_X86_ELAN) += -march=i486
|
||||
|
@ -42,5 +43,5 @@ cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
|
|||
|
||||
# add at the end to overwrite eventual tuning options from earlier
|
||||
# cpu entries
|
||||
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic)
|
||||
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
|
||||
|
||||
|
|
|
@ -36,9 +36,9 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE)
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
$(obj)/zImage: IMAGE_OFFSET := 0x1000
|
||||
$(obj)/zImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK)
|
||||
$(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK)
|
||||
$(obj)/bzImage: IMAGE_OFFSET := 0x100000
|
||||
$(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
|
||||
$(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
|
||||
$(obj)/bzImage: BUILDFLAGS := -b
|
||||
|
||||
quiet_cmd_image = BUILD $@
|
||||
|
|
|
@ -189,7 +189,7 @@ static void putstr(const char *);
|
|||
static unsigned long free_mem_ptr;
|
||||
static unsigned long free_mem_end_ptr;
|
||||
|
||||
#define HEAP_SIZE 0x3000
|
||||
#define HEAP_SIZE 0x4000
|
||||
|
||||
static char *vidmem = (char *)0xb8000;
|
||||
static int vidport;
|
||||
|
|
|
@ -52,6 +52,7 @@
|
|||
#include <asm/boot.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/setup.h>
|
||||
|
||||
/* Signature words to ensure LILO loaded us right */
|
||||
#define SIG1 0xAA55
|
||||
|
@ -81,7 +82,7 @@ start:
|
|||
# This is the setup header, and it must start at %cs:2 (old 0x9020:2)
|
||||
|
||||
.ascii "HdrS" # header signature
|
||||
.word 0x0205 # header version number (>= 0x0105)
|
||||
.word 0x0206 # header version number (>= 0x0105)
|
||||
# or else old loadlin-1.5 will fail)
|
||||
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
|
||||
start_sys_seg: .word SYSSEG
|
||||
|
@ -171,6 +172,10 @@ relocatable_kernel: .byte 0
|
|||
pad2: .byte 0
|
||||
pad3: .word 0
|
||||
|
||||
cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line,
|
||||
#added with boot protocol
|
||||
#version 2.06
|
||||
|
||||
trampoline: call start_of_setup
|
||||
.align 16
|
||||
# The offset at this point is 0x240
|
||||
|
@ -297,7 +302,24 @@ good_sig:
|
|||
|
||||
loader_panic_mess: .string "Wrong loader, giving up..."
|
||||
|
||||
# check minimum cpuid
|
||||
# we do this here because it is the last place we can actually
|
||||
# show a user visible error message. Later the video modus
|
||||
# might be already messed up.
|
||||
loader_ok:
|
||||
call verify_cpu
|
||||
testl %eax,%eax
|
||||
jz cpu_ok
|
||||
lea cpu_panic_mess,%si
|
||||
call prtstr
|
||||
1: jmp 1b
|
||||
|
||||
cpu_panic_mess:
|
||||
.asciz "PANIC: CPU too old for this kernel."
|
||||
|
||||
#include "../kernel/verify_cpu.S"
|
||||
|
||||
cpu_ok:
|
||||
# Get memory size (extended mem, kB)
|
||||
|
||||
xorl %eax, %eax
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#
|
||||
# Automatically generated make config: don't edit
|
||||
# Linux kernel version: 2.6.21-rc3
|
||||
# Wed Mar 7 15:29:47 2007
|
||||
# Linux kernel version: 2.6.21-git3
|
||||
# Tue May 1 07:30:51 2007
|
||||
#
|
||||
CONFIG_X86_32=y
|
||||
CONFIG_GENERIC_TIME=y
|
||||
|
@ -108,9 +108,9 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
|
|||
#
|
||||
# Processor type and features
|
||||
#
|
||||
# CONFIG_TICK_ONESHOT is not set
|
||||
# CONFIG_NO_HZ is not set
|
||||
# CONFIG_HIGH_RES_TIMERS is not set
|
||||
CONFIG_TICK_ONESHOT=y
|
||||
CONFIG_NO_HZ=y
|
||||
CONFIG_HIGH_RES_TIMERS=y
|
||||
CONFIG_SMP=y
|
||||
# CONFIG_X86_PC is not set
|
||||
# CONFIG_X86_ELAN is not set
|
||||
|
@ -146,9 +146,11 @@ CONFIG_MPENTIUMIII=y
|
|||
# CONFIG_MGEODE_LX is not set
|
||||
# CONFIG_MCYRIXIII is not set
|
||||
# CONFIG_MVIAC3_2 is not set
|
||||
# CONFIG_MVIAC7 is not set
|
||||
CONFIG_X86_GENERIC=y
|
||||
CONFIG_X86_CMPXCHG=y
|
||||
CONFIG_X86_L1_CACHE_SHIFT=7
|
||||
CONFIG_X86_XADD=y
|
||||
CONFIG_RWSEM_XCHGADD_ALGORITHM=y
|
||||
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
|
||||
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
|
||||
|
@ -162,6 +164,8 @@ CONFIG_X86_GOOD_APIC=y
|
|||
CONFIG_X86_INTEL_USERCOPY=y
|
||||
CONFIG_X86_USE_PPRO_CHECKSUM=y
|
||||
CONFIG_X86_TSC=y
|
||||
CONFIG_X86_CMOV=y
|
||||
CONFIG_X86_MINIMUM_CPU_MODEL=4
|
||||
CONFIG_HPET_TIMER=y
|
||||
CONFIG_HPET_EMULATE_RTC=y
|
||||
CONFIG_NR_CPUS=32
|
||||
|
@ -248,7 +252,6 @@ CONFIG_ACPI_FAN=y
|
|||
CONFIG_ACPI_PROCESSOR=y
|
||||
CONFIG_ACPI_THERMAL=y
|
||||
# CONFIG_ACPI_ASUS is not set
|
||||
# CONFIG_ACPI_IBM is not set
|
||||
# CONFIG_ACPI_TOSHIBA is not set
|
||||
CONFIG_ACPI_BLACKLIST_YEAR=2001
|
||||
CONFIG_ACPI_DEBUG=y
|
||||
|
@ -257,10 +260,7 @@ CONFIG_ACPI_POWER=y
|
|||
CONFIG_ACPI_SYSTEM=y
|
||||
CONFIG_X86_PM_TIMER=y
|
||||
# CONFIG_ACPI_CONTAINER is not set
|
||||
|
||||
#
|
||||
# APM (Advanced Power Management) BIOS Support
|
||||
#
|
||||
# CONFIG_ACPI_SBS is not set
|
||||
# CONFIG_APM is not set
|
||||
|
||||
#
|
||||
|
@ -277,7 +277,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
|
|||
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
|
||||
CONFIG_CPU_FREQ_GOV_USERSPACE=y
|
||||
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
|
||||
# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
|
||||
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
|
||||
|
||||
#
|
||||
# CPUFreq processor drivers
|
||||
|
@ -349,7 +349,6 @@ CONFIG_NET=y
|
|||
#
|
||||
# Networking options
|
||||
#
|
||||
# CONFIG_NETDEBUG is not set
|
||||
CONFIG_PACKET=y
|
||||
# CONFIG_PACKET_MMAP is not set
|
||||
CONFIG_UNIX=y
|
||||
|
@ -388,6 +387,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic"
|
|||
CONFIG_IPV6=y
|
||||
# CONFIG_IPV6_PRIVACY is not set
|
||||
# CONFIG_IPV6_ROUTER_PREF is not set
|
||||
# CONFIG_IPV6_OPTIMISTIC_DAD is not set
|
||||
# CONFIG_INET6_AH is not set
|
||||
# CONFIG_INET6_ESP is not set
|
||||
# CONFIG_INET6_IPCOMP is not set
|
||||
|
@ -443,6 +443,13 @@ CONFIG_IPV6_SIT=y
|
|||
# CONFIG_HAMRADIO is not set
|
||||
# CONFIG_IRDA is not set
|
||||
# CONFIG_BT is not set
|
||||
# CONFIG_AF_RXRPC is not set
|
||||
|
||||
#
|
||||
# Wireless
|
||||
#
|
||||
# CONFIG_CFG80211 is not set
|
||||
# CONFIG_WIRELESS_EXT is not set
|
||||
# CONFIG_IEEE80211 is not set
|
||||
|
||||
#
|
||||
|
@ -463,10 +470,6 @@ CONFIG_FW_LOADER=y
|
|||
# Connector - unified userspace <-> kernelspace linker
|
||||
#
|
||||
# CONFIG_CONNECTOR is not set
|
||||
|
||||
#
|
||||
# Memory Technology Devices (MTD)
|
||||
#
|
||||
# CONFIG_MTD is not set
|
||||
|
||||
#
|
||||
|
@ -513,6 +516,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
|
|||
# CONFIG_SGI_IOC4 is not set
|
||||
# CONFIG_TIFM_CORE is not set
|
||||
# CONFIG_SONY_LAPTOP is not set
|
||||
# CONFIG_THINKPAD_ACPI is not set
|
||||
|
||||
#
|
||||
# ATA/ATAPI/MFM/RLL support
|
||||
|
@ -548,7 +552,6 @@ CONFIG_BLK_DEV_IDEPCI=y
|
|||
# CONFIG_BLK_DEV_RZ1000 is not set
|
||||
CONFIG_BLK_DEV_IDEDMA_PCI=y
|
||||
# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
|
||||
CONFIG_IDEDMA_PCI_AUTO=y
|
||||
# CONFIG_IDEDMA_ONLYDISK is not set
|
||||
# CONFIG_BLK_DEV_AEC62XX is not set
|
||||
# CONFIG_BLK_DEV_ALI15X3 is not set
|
||||
|
@ -580,7 +583,6 @@ CONFIG_BLK_DEV_PIIX=y
|
|||
# CONFIG_IDE_ARM is not set
|
||||
CONFIG_BLK_DEV_IDEDMA=y
|
||||
# CONFIG_IDEDMA_IVB is not set
|
||||
CONFIG_IDEDMA_AUTO=y
|
||||
# CONFIG_BLK_DEV_HD is not set
|
||||
|
||||
#
|
||||
|
@ -669,6 +671,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
|
|||
# CONFIG_SCSI_DC390T is not set
|
||||
# CONFIG_SCSI_NSP32 is not set
|
||||
# CONFIG_SCSI_DEBUG is not set
|
||||
# CONFIG_SCSI_ESP_CORE is not set
|
||||
# CONFIG_SCSI_SRP is not set
|
||||
|
||||
#
|
||||
|
@ -697,6 +700,7 @@ CONFIG_SATA_ACPI=y
|
|||
# CONFIG_PATA_AMD is not set
|
||||
# CONFIG_PATA_ARTOP is not set
|
||||
# CONFIG_PATA_ATIIXP is not set
|
||||
# CONFIG_PATA_CMD640_PCI is not set
|
||||
# CONFIG_PATA_CMD64X is not set
|
||||
# CONFIG_PATA_CS5520 is not set
|
||||
# CONFIG_PATA_CS5530 is not set
|
||||
|
@ -762,10 +766,9 @@ CONFIG_IEEE1394=y
|
|||
# Subsystem Options
|
||||
#
|
||||
# CONFIG_IEEE1394_VERBOSEDEBUG is not set
|
||||
# CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set
|
||||
|
||||
#
|
||||
# Device Drivers
|
||||
# Controllers
|
||||
#
|
||||
|
||||
#
|
||||
|
@ -774,10 +777,11 @@ CONFIG_IEEE1394=y
|
|||
CONFIG_IEEE1394_OHCI1394=y
|
||||
|
||||
#
|
||||
# Protocol Drivers
|
||||
# Protocols
|
||||
#
|
||||
# CONFIG_IEEE1394_VIDEO1394 is not set
|
||||
# CONFIG_IEEE1394_SBP2 is not set
|
||||
# CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set
|
||||
# CONFIG_IEEE1394_ETH1394 is not set
|
||||
# CONFIG_IEEE1394_DV1394 is not set
|
||||
CONFIG_IEEE1394_RAWIO=y
|
||||
|
@ -820,7 +824,9 @@ CONFIG_MII=y
|
|||
# CONFIG_HAPPYMEAL is not set
|
||||
# CONFIG_SUNGEM is not set
|
||||
# CONFIG_CASSINI is not set
|
||||
# CONFIG_NET_VENDOR_3COM is not set
|
||||
CONFIG_NET_VENDOR_3COM=y
|
||||
CONFIG_VORTEX=y
|
||||
# CONFIG_TYPHOON is not set
|
||||
|
||||
#
|
||||
# Tulip family network device support
|
||||
|
@ -901,9 +907,10 @@ CONFIG_BNX2=y
|
|||
# CONFIG_TR is not set
|
||||
|
||||
#
|
||||
# Wireless LAN (non-hamradio)
|
||||
# Wireless LAN
|
||||
#
|
||||
# CONFIG_NET_RADIO is not set
|
||||
# CONFIG_WLAN_PRE80211 is not set
|
||||
# CONFIG_WLAN_80211 is not set
|
||||
|
||||
#
|
||||
# Wan interfaces
|
||||
|
@ -917,7 +924,6 @@ CONFIG_BNX2=y
|
|||
# CONFIG_SHAPER is not set
|
||||
CONFIG_NETCONSOLE=y
|
||||
CONFIG_NETPOLL=y
|
||||
# CONFIG_NETPOLL_RX is not set
|
||||
# CONFIG_NETPOLL_TRAP is not set
|
||||
CONFIG_NET_POLL_CONTROLLER=y
|
||||
|
||||
|
@ -1050,7 +1056,7 @@ CONFIG_MAX_RAW_DEVS=256
|
|||
CONFIG_HPET=y
|
||||
# CONFIG_HPET_RTC_IRQ is not set
|
||||
CONFIG_HPET_MMAP=y
|
||||
CONFIG_HANGCHECK_TIMER=y
|
||||
# CONFIG_HANGCHECK_TIMER is not set
|
||||
|
||||
#
|
||||
# TPM devices
|
||||
|
@ -1141,6 +1147,14 @@ CONFIG_SOUND_ICH=y
|
|||
CONFIG_HID=y
|
||||
# CONFIG_HID_DEBUG is not set
|
||||
|
||||
#
|
||||
# USB Input Devices
|
||||
#
|
||||
CONFIG_USB_HID=y
|
||||
# CONFIG_USB_HIDINPUT_POWERBOOK is not set
|
||||
# CONFIG_HID_FF is not set
|
||||
# CONFIG_USB_HIDDEV is not set
|
||||
|
||||
#
|
||||
# USB support
|
||||
#
|
||||
|
@ -1154,6 +1168,7 @@ CONFIG_USB=y
|
|||
# Miscellaneous USB options
|
||||
#
|
||||
CONFIG_USB_DEVICEFS=y
|
||||
# CONFIG_USB_DEVICE_CLASS is not set
|
||||
# CONFIG_USB_DYNAMIC_MINORS is not set
|
||||
# CONFIG_USB_SUSPEND is not set
|
||||
# CONFIG_USB_OTG is not set
|
||||
|
@ -1204,10 +1219,6 @@ CONFIG_USB_STORAGE=y
|
|||
#
|
||||
# USB Input Devices
|
||||
#
|
||||
CONFIG_USB_HID=y
|
||||
# CONFIG_USB_HIDINPUT_POWERBOOK is not set
|
||||
# CONFIG_HID_FF is not set
|
||||
# CONFIG_USB_HIDDEV is not set
|
||||
# CONFIG_USB_AIPTEK is not set
|
||||
# CONFIG_USB_WACOM is not set
|
||||
# CONFIG_USB_ACECAD is not set
|
||||
|
@ -1528,7 +1539,7 @@ CONFIG_DEBUG_KERNEL=y
|
|||
CONFIG_LOG_BUF_SHIFT=18
|
||||
CONFIG_DETECT_SOFTLOCKUP=y
|
||||
# CONFIG_SCHEDSTATS is not set
|
||||
# CONFIG_TIMER_STATS is not set
|
||||
CONFIG_TIMER_STATS=y
|
||||
# CONFIG_DEBUG_SLAB is not set
|
||||
# CONFIG_DEBUG_RT_MUTEXES is not set
|
||||
# CONFIG_RT_MUTEX_TESTER is not set
|
||||
|
|
|
@ -39,12 +39,10 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
|||
obj-$(CONFIG_HPET_TIMER) += hpet.o
|
||||
obj-$(CONFIG_K8_NB) += k8.o
|
||||
|
||||
obj-$(CONFIG_VMI) += vmi.o vmitime.o
|
||||
obj-$(CONFIG_VMI) += vmi.o vmiclock.o
|
||||
obj-$(CONFIG_PARAVIRT) += paravirt.o
|
||||
obj-y += pcspeaker.o
|
||||
|
||||
EXTRA_AFLAGS := -traditional
|
||||
|
||||
obj-$(CONFIG_SCx200) += scx200.o
|
||||
|
||||
# vsyscall.o contains the vsyscall DSO images as __initdata.
|
||||
|
|
|
@ -874,7 +874,7 @@ static void __init acpi_process_madt(void)
|
|||
acpi_ioapic = 1;
|
||||
|
||||
smp_found_config = 1;
|
||||
clustered_apic_check();
|
||||
setup_apic_routing();
|
||||
}
|
||||
}
|
||||
if (error == -EINVAL) {
|
||||
|
|
|
@ -10,7 +10,6 @@
|
|||
#include <asm/pci-direct.h>
|
||||
#include <asm/acpi.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/irq.h>
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
|
||||
|
@ -48,24 +47,6 @@ static int __init check_bridge(int vendor, int device)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void check_intel(void)
|
||||
{
|
||||
u16 vendor, device;
|
||||
|
||||
vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID);
|
||||
|
||||
if (vendor != PCI_VENDOR_ID_INTEL)
|
||||
return;
|
||||
|
||||
device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
|
||||
#ifdef CONFIG_SMP
|
||||
if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
|
||||
device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
|
||||
device == PCI_DEVICE_ID_INTEL_E7525_MCH)
|
||||
quirk_intel_irqbalance();
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init check_acpi_pci(void)
|
||||
{
|
||||
int num, slot, func;
|
||||
|
@ -77,8 +58,6 @@ void __init check_acpi_pci(void)
|
|||
if (!early_pci_allowed())
|
||||
return;
|
||||
|
||||
check_intel();
|
||||
|
||||
/* Poor man's PCI discovery */
|
||||
for (num = 0; num < 32; num++) {
|
||||
for (slot = 0; slot < 32; slot++) {
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#include <asm/alternative.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
static int noreplace_smp = 0;
|
||||
static int smp_alt_once = 0;
|
||||
static int debug_alternative = 0;
|
||||
|
||||
|
@ -13,15 +14,33 @@ static int __init bootonly(char *str)
|
|||
smp_alt_once = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("smp-alt-boot", bootonly);
|
||||
|
||||
static int __init debug_alt(char *str)
|
||||
{
|
||||
debug_alternative = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("smp-alt-boot", bootonly);
|
||||
__setup("debug-alternative", debug_alt);
|
||||
|
||||
static int __init setup_noreplace_smp(char *str)
|
||||
{
|
||||
noreplace_smp = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("noreplace-smp", setup_noreplace_smp);
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
static int noreplace_paravirt = 0;
|
||||
|
||||
static int __init setup_noreplace_paravirt(char *str)
|
||||
{
|
||||
noreplace_paravirt = 1;
|
||||
return 1;
|
||||
}
|
||||
__setup("noreplace-paravirt", setup_noreplace_paravirt);
|
||||
#endif
|
||||
|
||||
#define DPRINTK(fmt, args...) if (debug_alternative) \
|
||||
printk(KERN_DEBUG fmt, args)
|
||||
|
||||
|
@ -132,11 +151,8 @@ static void nop_out(void *insns, unsigned int len)
|
|||
}
|
||||
|
||||
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
|
||||
extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
|
||||
extern u8 *__smp_locks[], *__smp_locks_end[];
|
||||
|
||||
extern u8 __smp_alt_begin[], __smp_alt_end[];
|
||||
|
||||
/* Replace instructions with better alternatives for this CPU type.
|
||||
This runs before SMP is initialized to avoid SMP problems with
|
||||
self modifying code. This implies that assymetric systems where
|
||||
|
@ -171,29 +187,6 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
|
|||
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end)
|
||||
{
|
||||
struct alt_instr *a;
|
||||
|
||||
DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end);
|
||||
for (a = start; a < end; a++) {
|
||||
memcpy(a->replacement + a->replacementlen,
|
||||
a->instr,
|
||||
a->instrlen);
|
||||
}
|
||||
}
|
||||
|
||||
static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end)
|
||||
{
|
||||
struct alt_instr *a;
|
||||
|
||||
for (a = start; a < end; a++) {
|
||||
memcpy(a->instr,
|
||||
a->replacement + a->replacementlen,
|
||||
a->instrlen);
|
||||
}
|
||||
}
|
||||
|
||||
static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
|
||||
{
|
||||
u8 **ptr;
|
||||
|
@ -211,6 +204,9 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
|
|||
{
|
||||
u8 **ptr;
|
||||
|
||||
if (noreplace_smp)
|
||||
return;
|
||||
|
||||
for (ptr = start; ptr < end; ptr++) {
|
||||
if (*ptr < text)
|
||||
continue;
|
||||
|
@ -245,6 +241,9 @@ void alternatives_smp_module_add(struct module *mod, char *name,
|
|||
struct smp_alt_module *smp;
|
||||
unsigned long flags;
|
||||
|
||||
if (noreplace_smp)
|
||||
return;
|
||||
|
||||
if (smp_alt_once) {
|
||||
if (boot_cpu_has(X86_FEATURE_UP))
|
||||
alternatives_smp_unlock(locks, locks_end,
|
||||
|
@ -279,7 +278,7 @@ void alternatives_smp_module_del(struct module *mod)
|
|||
struct smp_alt_module *item;
|
||||
unsigned long flags;
|
||||
|
||||
if (smp_alt_once)
|
||||
if (smp_alt_once || noreplace_smp)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&smp_alt, flags);
|
||||
|
@ -310,7 +309,7 @@ void alternatives_smp_switch(int smp)
|
|||
return;
|
||||
#endif
|
||||
|
||||
if (smp_alt_once)
|
||||
if (noreplace_smp || smp_alt_once)
|
||||
return;
|
||||
BUG_ON(!smp && (num_online_cpus() > 1));
|
||||
|
||||
|
@ -319,8 +318,6 @@ void alternatives_smp_switch(int smp)
|
|||
printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
|
||||
clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
|
||||
clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
|
||||
alternatives_smp_apply(__smp_alt_instructions,
|
||||
__smp_alt_instructions_end);
|
||||
list_for_each_entry(mod, &smp_alt_modules, next)
|
||||
alternatives_smp_lock(mod->locks, mod->locks_end,
|
||||
mod->text, mod->text_end);
|
||||
|
@ -328,8 +325,6 @@ void alternatives_smp_switch(int smp)
|
|||
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
|
||||
set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
|
||||
set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
|
||||
apply_alternatives(__smp_alt_instructions,
|
||||
__smp_alt_instructions_end);
|
||||
list_for_each_entry(mod, &smp_alt_modules, next)
|
||||
alternatives_smp_unlock(mod->locks, mod->locks_end,
|
||||
mod->text, mod->text_end);
|
||||
|
@ -340,36 +335,31 @@ void alternatives_smp_switch(int smp)
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
|
||||
void apply_paravirt(struct paravirt_patch_site *start,
|
||||
struct paravirt_patch_site *end)
|
||||
{
|
||||
struct paravirt_patch *p;
|
||||
struct paravirt_patch_site *p;
|
||||
|
||||
if (noreplace_paravirt)
|
||||
return;
|
||||
|
||||
for (p = start; p < end; p++) {
|
||||
unsigned int used;
|
||||
|
||||
used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
|
||||
p->len);
|
||||
#ifdef CONFIG_DEBUG_PARAVIRT
|
||||
{
|
||||
int i;
|
||||
/* Deliberately clobber regs using "not %reg" to find bugs. */
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
|
||||
memcpy(p->instr + used, "\xf7\xd0", 2);
|
||||
p->instr[used+1] |= i;
|
||||
used += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
BUG_ON(used > p->len);
|
||||
|
||||
/* Pad the rest with nops */
|
||||
nop_out(p->instr + used, p->len - used);
|
||||
}
|
||||
|
||||
/* Sync to be conservative, in case we patched following instructions */
|
||||
/* Sync to be conservative, in case we patched following
|
||||
* instructions */
|
||||
sync_core();
|
||||
}
|
||||
extern struct paravirt_patch __start_parainstructions[],
|
||||
extern struct paravirt_patch_site __start_parainstructions[],
|
||||
__stop_parainstructions[];
|
||||
#endif /* CONFIG_PARAVIRT */
|
||||
|
||||
|
@ -396,23 +386,19 @@ void __init alternative_instructions(void)
|
|||
printk(KERN_INFO "SMP alternatives: switching to UP code\n");
|
||||
set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability);
|
||||
set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability);
|
||||
apply_alternatives(__smp_alt_instructions,
|
||||
__smp_alt_instructions_end);
|
||||
alternatives_smp_unlock(__smp_locks, __smp_locks_end,
|
||||
_text, _etext);
|
||||
}
|
||||
free_init_pages("SMP alternatives",
|
||||
(unsigned long)__smp_alt_begin,
|
||||
(unsigned long)__smp_alt_end);
|
||||
__pa_symbol(&__smp_locks),
|
||||
__pa_symbol(&__smp_locks_end));
|
||||
} else {
|
||||
alternatives_smp_save(__smp_alt_instructions,
|
||||
__smp_alt_instructions_end);
|
||||
alternatives_smp_module_add(NULL, "core kernel",
|
||||
__smp_locks, __smp_locks_end,
|
||||
_text, _etext);
|
||||
alternatives_smp_switch(0);
|
||||
}
|
||||
#endif
|
||||
apply_paravirt(__start_parainstructions, __stop_parainstructions);
|
||||
apply_paravirt(__parainstructions, __parainstructions_end);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
|
|
@ -129,6 +129,28 @@ static int modern_apic(void)
|
|||
return lapic_get_version() >= 0x14;
|
||||
}
|
||||
|
||||
void apic_wait_icr_idle(void)
|
||||
{
|
||||
while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
unsigned long safe_apic_wait_icr_idle(void)
|
||||
{
|
||||
unsigned long send_status;
|
||||
int timeout;
|
||||
|
||||
timeout = 0;
|
||||
do {
|
||||
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
|
||||
if (!send_status)
|
||||
break;
|
||||
udelay(100);
|
||||
} while (timeout++ < 1000);
|
||||
|
||||
return send_status;
|
||||
}
|
||||
|
||||
/**
|
||||
* enable_NMI_through_LVT0 - enable NMI through local vector table 0
|
||||
*/
|
||||
|
|
|
@ -233,11 +233,10 @@
|
|||
#include <asm/desc.h>
|
||||
#include <asm/i8253.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
#include "io_ports.h"
|
||||
|
||||
extern void machine_real_restart(unsigned char *, int);
|
||||
|
||||
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
|
||||
extern int (*console_blank_hook)(int);
|
||||
#endif
|
||||
|
@ -384,13 +383,6 @@ static int ignore_sys_suspend;
|
|||
static int ignore_normal_resume;
|
||||
static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL;
|
||||
|
||||
#ifdef CONFIG_APM_RTC_IS_GMT
|
||||
# define clock_cmos_diff 0
|
||||
# define got_clock_diff 1
|
||||
#else
|
||||
static long clock_cmos_diff;
|
||||
static int got_clock_diff;
|
||||
#endif
|
||||
static int debug __read_mostly;
|
||||
static int smp __read_mostly;
|
||||
static int apm_disabled = -1;
|
||||
|
|
|
@ -11,11 +11,11 @@
|
|||
#include <linux/suspend.h>
|
||||
#include <asm/ucontext.h>
|
||||
#include "sigframe.h"
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/fixmap.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/thread_info.h>
|
||||
#include <asm/elf.h>
|
||||
#include <asm/pda.h>
|
||||
|
||||
#define DEFINE(sym, val) \
|
||||
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
|
||||
|
@ -25,6 +25,9 @@
|
|||
#define OFFSET(sym, str, mem) \
|
||||
DEFINE(sym, offsetof(struct str, mem));
|
||||
|
||||
/* workaround for a warning with -Wmissing-prototypes */
|
||||
void foo(void);
|
||||
|
||||
void foo(void)
|
||||
{
|
||||
OFFSET(SIGCONTEXT_eax, sigcontext, eax);
|
||||
|
@ -90,18 +93,19 @@ void foo(void)
|
|||
OFFSET(pbe_next, pbe, next);
|
||||
|
||||
/* Offset from the sysenter stack to tss.esp0 */
|
||||
DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) -
|
||||
DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) -
|
||||
sizeof(struct tss_struct));
|
||||
|
||||
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
|
||||
DEFINE(VDSO_PRELINK, VDSO_PRELINK);
|
||||
DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT);
|
||||
DEFINE(PTRS_PER_PTE, PTRS_PER_PTE);
|
||||
DEFINE(PTRS_PER_PMD, PTRS_PER_PMD);
|
||||
DEFINE(PTRS_PER_PGD, PTRS_PER_PGD);
|
||||
|
||||
DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK);
|
||||
|
||||
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
|
||||
|
||||
BLANK();
|
||||
OFFSET(PDA_cpu, i386_pda, cpu_number);
|
||||
OFFSET(PDA_pcurrent, i386_pda, pcurrent);
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
BLANK();
|
||||
OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Makefile for x86-compatible CPU details and quirks
|
||||
#
|
||||
|
||||
obj-y := common.o proc.o
|
||||
obj-y := common.o proc.o bugs.o
|
||||
|
||||
obj-y += amd.o
|
||||
obj-y += cyrix.o
|
||||
|
@ -17,3 +17,5 @@ obj-$(CONFIG_X86_MCE) += mcheck/
|
|||
|
||||
obj-$(CONFIG_MTRR) += mtrr/
|
||||
obj-$(CONFIG_CPU_FREQ) += cpufreq/
|
||||
|
||||
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
|
||||
|
|
|
@ -53,6 +53,8 @@ static __cpuinit int amd_apic_timer_broken(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int force_mwait __cpuinitdata;
|
||||
|
||||
static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
||||
{
|
||||
u32 l, h;
|
||||
|
@ -275,6 +277,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
|||
|
||||
if (amd_apic_timer_broken())
|
||||
set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
|
||||
|
||||
if (c->x86 == 0x10 && !force_mwait)
|
||||
clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
|
||||
}
|
||||
|
||||
static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
|
||||
|
@ -314,13 +319,3 @@ int __init amd_init_cpu(void)
|
|||
cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(amd_init_cpu);
|
||||
|
||||
static int __init amd_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_AMD] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(amd_exit_cpu);
|
||||
|
|
191
arch/i386/kernel/cpu/bugs.c
Normal file
191
arch/i386/kernel/cpu/bugs.c
Normal file
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* arch/i386/cpu/bugs.c
|
||||
*
|
||||
* Copyright (C) 1994 Linus Torvalds
|
||||
*
|
||||
* Cyrix stuff, June 1998 by:
|
||||
* - Rafael R. Reilova (moved everything from head.S),
|
||||
* <rreilova@ececs.uc.edu>
|
||||
* - Channing Corn (tests & fixes),
|
||||
* - Andrew D. Balsa (code cleanup).
|
||||
*/
|
||||
#include <linux/init.h>
|
||||
#include <linux/utsname.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/i387.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/paravirt.h>
|
||||
#include <asm/alternative.h>
|
||||
|
||||
static int __init no_halt(char *s)
|
||||
{
|
||||
boot_cpu_data.hlt_works_ok = 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("no-hlt", no_halt);
|
||||
|
||||
static int __init mca_pentium(char *s)
|
||||
{
|
||||
mca_pentium_flag = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("mca-pentium", mca_pentium);
|
||||
|
||||
static int __init no_387(char *s)
|
||||
{
|
||||
boot_cpu_data.hard_math = 0;
|
||||
write_cr0(0xE | read_cr0());
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("no387", no_387);
|
||||
|
||||
static double __initdata x = 4195835.0;
|
||||
static double __initdata y = 3145727.0;
|
||||
|
||||
/*
|
||||
* This used to check for exceptions..
|
||||
* However, it turns out that to support that,
|
||||
* the XMM trap handlers basically had to
|
||||
* be buggy. So let's have a correct XMM trap
|
||||
* handler, and forget about printing out
|
||||
* some status at boot.
|
||||
*
|
||||
* We should really only care about bugs here
|
||||
* anyway. Not features.
|
||||
*/
|
||||
static void __init check_fpu(void)
|
||||
{
|
||||
if (!boot_cpu_data.hard_math) {
|
||||
#ifndef CONFIG_MATH_EMULATION
|
||||
printk(KERN_EMERG "No coprocessor found and no math emulation present.\n");
|
||||
printk(KERN_EMERG "Giving up.\n");
|
||||
for (;;) ;
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
/* trap_init() enabled FXSR and company _before_ testing for FP problems here. */
|
||||
/* Test for the divl bug.. */
|
||||
__asm__("fninit\n\t"
|
||||
"fldl %1\n\t"
|
||||
"fdivl %2\n\t"
|
||||
"fmull %2\n\t"
|
||||
"fldl %1\n\t"
|
||||
"fsubp %%st,%%st(1)\n\t"
|
||||
"fistpl %0\n\t"
|
||||
"fwait\n\t"
|
||||
"fninit"
|
||||
: "=m" (*&boot_cpu_data.fdiv_bug)
|
||||
: "m" (*&x), "m" (*&y));
|
||||
if (boot_cpu_data.fdiv_bug)
|
||||
printk("Hmm, FPU with FDIV bug.\n");
|
||||
}
|
||||
|
||||
static void __init check_hlt(void)
|
||||
{
|
||||
if (paravirt_enabled())
|
||||
return;
|
||||
|
||||
printk(KERN_INFO "Checking 'hlt' instruction... ");
|
||||
if (!boot_cpu_data.hlt_works_ok) {
|
||||
printk("disabled\n");
|
||||
return;
|
||||
}
|
||||
halt();
|
||||
halt();
|
||||
halt();
|
||||
halt();
|
||||
printk("OK.\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Most 386 processors have a bug where a POPAD can lock the
|
||||
* machine even from user space.
|
||||
*/
|
||||
|
||||
static void __init check_popad(void)
|
||||
{
|
||||
#ifndef CONFIG_X86_POPAD_OK
|
||||
int res, inp = (int) &res;
|
||||
|
||||
printk(KERN_INFO "Checking for popad bug... ");
|
||||
__asm__ __volatile__(
|
||||
"movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx "
|
||||
: "=&a" (res)
|
||||
: "d" (inp)
|
||||
: "ecx", "edi" );
|
||||
/* If this fails, it means that any user program may lock the CPU hard. Too bad. */
|
||||
if (res != 12345678) printk( "Buggy.\n" );
|
||||
else printk( "OK.\n" );
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Check whether we are able to run this kernel safely on SMP.
|
||||
*
|
||||
* - In order to run on a i386, we need to be compiled for i386
|
||||
* (for due to lack of "invlpg" and working WP on a i386)
|
||||
* - In order to run on anything without a TSC, we need to be
|
||||
* compiled for a i486.
|
||||
* - In order to support the local APIC on a buggy Pentium machine,
|
||||
* we need to be compiled with CONFIG_X86_GOOD_APIC disabled,
|
||||
* which happens implicitly if compiled for a Pentium or lower
|
||||
* (unless an advanced selection of CPU features is used) as an
|
||||
* otherwise config implies a properly working local APIC without
|
||||
* the need to do extra reads from the APIC.
|
||||
*/
|
||||
|
||||
static void __init check_config(void)
|
||||
{
|
||||
/*
|
||||
* We'd better not be a i386 if we're configured to use some
|
||||
* i486+ only features! (WP works in supervisor mode and the
|
||||
* new "invlpg" and "bswap" instructions)
|
||||
*/
|
||||
#if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP)
|
||||
if (boot_cpu_data.x86 == 3)
|
||||
panic("Kernel requires i486+ for 'invlpg' and other features");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we configured ourselves for a TSC, we'd better have one!
|
||||
*/
|
||||
#ifdef CONFIG_X86_TSC
|
||||
if (!cpu_has_tsc && !tsc_disable)
|
||||
panic("Kernel compiled for Pentium+, requires TSC feature!");
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If we were told we had a good local APIC, check for buggy Pentia,
|
||||
* i.e. all B steppings and the C2 stepping of P54C when using their
|
||||
* integrated APIC (see 11AP erratum in "Pentium Processor
|
||||
* Specification Update").
|
||||
*/
|
||||
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC)
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
|
||||
&& cpu_has_apic
|
||||
&& boot_cpu_data.x86 == 5
|
||||
&& boot_cpu_data.x86_model == 2
|
||||
&& (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11))
|
||||
panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void __init check_bugs(void)
|
||||
{
|
||||
identify_boot_cpu();
|
||||
#ifndef CONFIG_SMP
|
||||
printk("CPU: ");
|
||||
print_cpu_info(&boot_cpu_data);
|
||||
#endif
|
||||
check_config();
|
||||
check_fpu();
|
||||
check_hlt();
|
||||
check_popad();
|
||||
init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86);
|
||||
alternative_instructions();
|
||||
}
|
|
@ -469,13 +469,3 @@ int __init centaur_init_cpu(void)
|
|||
cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(centaur_init_cpu);
|
||||
|
||||
static int __init centaur_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_CENTAUR] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(centaur_exit_cpu);
|
||||
|
|
|
@ -18,15 +18,37 @@
|
|||
#include <asm/apic.h>
|
||||
#include <mach_apic.h>
|
||||
#endif
|
||||
#include <asm/pda.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
|
||||
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
||||
[GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
|
||||
[GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
|
||||
[GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
|
||||
[GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
|
||||
/*
|
||||
* Segments used for calling PnP BIOS have byte granularity.
|
||||
* They code segments and data segments have fixed 64k limits,
|
||||
* the transfer segment sizes are set at run time.
|
||||
*/
|
||||
[GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
|
||||
[GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
|
||||
[GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
|
||||
[GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
|
||||
[GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
|
||||
/*
|
||||
* The APM segments have byte granularity and their bases
|
||||
* are set at run time. All have 64k limits.
|
||||
*/
|
||||
[GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
|
||||
/* 16-bit code */
|
||||
[GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
|
||||
[GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
|
||||
|
||||
struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
|
||||
EXPORT_SYMBOL(_cpu_pda);
|
||||
[GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
|
||||
[GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
|
||||
} };
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
||||
|
||||
static int cachesize_override __cpuinitdata = -1;
|
||||
static int disable_x86_fxsr __cpuinitdata;
|
||||
|
@ -368,7 +390,7 @@ __setup("serialnumber", x86_serial_nr_setup);
|
|||
/*
|
||||
* This does the hard work of actually picking apart the CPU stuff...
|
||||
*/
|
||||
void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
||||
static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
@ -479,15 +501,22 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|||
|
||||
/* Init Machine Check Exception if available. */
|
||||
mcheck_init(c);
|
||||
}
|
||||
|
||||
if (c == &boot_cpu_data)
|
||||
sysenter_setup();
|
||||
void __init identify_boot_cpu(void)
|
||||
{
|
||||
identify_cpu(&boot_cpu_data);
|
||||
sysenter_setup();
|
||||
enable_sep_cpu();
|
||||
mtrr_bp_init();
|
||||
}
|
||||
|
||||
if (c == &boot_cpu_data)
|
||||
mtrr_bp_init();
|
||||
else
|
||||
mtrr_ap_init();
|
||||
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
||||
{
|
||||
BUG_ON(c == &boot_cpu_data);
|
||||
identify_cpu(c);
|
||||
enable_sep_cpu();
|
||||
mtrr_ap_init();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_HT
|
||||
|
@ -601,129 +630,36 @@ void __init early_cpu_init(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* Make sure %gs is initialized properly in idle threads */
|
||||
/* Make sure %fs is initialized properly in idle threads */
|
||||
struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
|
||||
{
|
||||
memset(regs, 0, sizeof(struct pt_regs));
|
||||
regs->xfs = __KERNEL_PDA;
|
||||
regs->xfs = __KERNEL_PERCPU;
|
||||
return regs;
|
||||
}
|
||||
|
||||
static __cpuinit int alloc_gdt(int cpu)
|
||||
/* Current gdt points %fs at the "master" per-cpu area: after this,
|
||||
* it's on the real one. */
|
||||
void switch_to_new_gdt(void)
|
||||
{
|
||||
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
||||
struct desc_struct *gdt;
|
||||
struct i386_pda *pda;
|
||||
struct Xgt_desc_struct gdt_descr;
|
||||
|
||||
gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
||||
pda = cpu_pda(cpu);
|
||||
|
||||
/*
|
||||
* This is a horrible hack to allocate the GDT. The problem
|
||||
* is that cpu_init() is called really early for the boot CPU
|
||||
* (and hence needs bootmem) but much later for the secondary
|
||||
* CPUs, when bootmem will have gone away
|
||||
*/
|
||||
if (NODE_DATA(0)->bdata->node_bootmem_map) {
|
||||
BUG_ON(gdt != NULL || pda != NULL);
|
||||
|
||||
gdt = alloc_bootmem_pages(PAGE_SIZE);
|
||||
pda = alloc_bootmem(sizeof(*pda));
|
||||
/* alloc_bootmem(_pages) panics on failure, so no check */
|
||||
|
||||
memset(gdt, 0, PAGE_SIZE);
|
||||
memset(pda, 0, sizeof(*pda));
|
||||
} else {
|
||||
/* GDT and PDA might already have been allocated if
|
||||
this is a CPU hotplug re-insertion. */
|
||||
if (gdt == NULL)
|
||||
gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
|
||||
|
||||
if (pda == NULL)
|
||||
pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
|
||||
|
||||
if (unlikely(!gdt || !pda)) {
|
||||
free_pages((unsigned long)gdt, 0);
|
||||
kfree(pda);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
cpu_gdt_descr->address = (unsigned long)gdt;
|
||||
cpu_pda(cpu) = pda;
|
||||
|
||||
return 1;
|
||||
gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
|
||||
}
|
||||
|
||||
/* Initial PDA used by boot CPU */
|
||||
struct i386_pda boot_pda = {
|
||||
._pda = &boot_pda,
|
||||
.cpu_number = 0,
|
||||
.pcurrent = &init_task,
|
||||
};
|
||||
|
||||
static inline void set_kernel_fs(void)
|
||||
{
|
||||
/* Set %fs for this CPU's PDA. Memory clobber is to create a
|
||||
barrier with respect to any PDA operations, so the compiler
|
||||
doesn't move any before here. */
|
||||
asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
|
||||
}
|
||||
|
||||
/* Initialize the CPU's GDT and PDA. The boot CPU does this for
|
||||
itself, but secondaries find this done for them. */
|
||||
__cpuinit int init_gdt(int cpu, struct task_struct *idle)
|
||||
{
|
||||
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
||||
struct desc_struct *gdt;
|
||||
struct i386_pda *pda;
|
||||
|
||||
/* For non-boot CPUs, the GDT and PDA should already have been
|
||||
allocated. */
|
||||
if (!alloc_gdt(cpu)) {
|
||||
printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
|
||||
return 0;
|
||||
}
|
||||
|
||||
gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
||||
pda = cpu_pda(cpu);
|
||||
|
||||
BUG_ON(gdt == NULL || pda == NULL);
|
||||
|
||||
/*
|
||||
* Initialize the per-CPU GDT with the boot GDT,
|
||||
* and set up the GDT descriptor:
|
||||
*/
|
||||
memcpy(gdt, cpu_gdt_table, GDT_SIZE);
|
||||
cpu_gdt_descr->size = GDT_SIZE - 1;
|
||||
|
||||
pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
|
||||
(u32 *)&gdt[GDT_ENTRY_PDA].b,
|
||||
(unsigned long)pda, sizeof(*pda) - 1,
|
||||
0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
|
||||
|
||||
memset(pda, 0, sizeof(*pda));
|
||||
pda->_pda = pda;
|
||||
pda->cpu_number = cpu;
|
||||
pda->pcurrent = idle;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void __cpuinit cpu_set_gdt(int cpu)
|
||||
{
|
||||
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
||||
|
||||
/* Reinit these anyway, even if they've already been done (on
|
||||
the boot CPU, this will transition from the boot gdt+pda to
|
||||
the real ones). */
|
||||
load_gdt(cpu_gdt_descr);
|
||||
set_kernel_fs();
|
||||
}
|
||||
|
||||
/* Common CPU init for both boot and secondary CPUs */
|
||||
static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
|
||||
/*
|
||||
* cpu_init() initializes state that is per-CPU. Some data is already
|
||||
* initialized (naturally) in the bootstrap process, such as the GDT
|
||||
* and IDT. We reload them nevertheless, this function acts as a
|
||||
* 'CPU state barrier', nothing should get across.
|
||||
*/
|
||||
void __cpuinit cpu_init(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct task_struct *curr = current;
|
||||
struct tss_struct * t = &per_cpu(init_tss, cpu);
|
||||
struct thread_struct *thread = &curr->thread;
|
||||
|
||||
|
@ -744,6 +680,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
|
|||
}
|
||||
|
||||
load_idt(&idt_descr);
|
||||
switch_to_new_gdt();
|
||||
|
||||
/*
|
||||
* Set up and load the per-CPU TSS and LDT
|
||||
|
@ -783,38 +720,6 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
|
|||
mxcsr_feature_mask_init();
|
||||
}
|
||||
|
||||
/* Entrypoint to initialize secondary CPU */
|
||||
void __cpuinit secondary_cpu_init(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct task_struct *curr = current;
|
||||
|
||||
_cpu_init(cpu, curr);
|
||||
}
|
||||
|
||||
/*
|
||||
* cpu_init() initializes state that is per-CPU. Some data is already
|
||||
* initialized (naturally) in the bootstrap process, such as the GDT
|
||||
* and IDT. We reload them nevertheless, this function acts as a
|
||||
* 'CPU state barrier', nothing should get across.
|
||||
*/
|
||||
void __cpuinit cpu_init(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct task_struct *curr = current;
|
||||
|
||||
/* Set up the real GDT and PDA, so we can transition from the
|
||||
boot versions. */
|
||||
if (!init_gdt(cpu, curr)) {
|
||||
/* failed to allocate something; not much we can do... */
|
||||
for (;;)
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
cpu_set_gdt(cpu);
|
||||
_cpu_init(cpu, curr);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
void __cpuinit cpu_uninit(void)
|
||||
{
|
||||
|
|
|
@ -279,7 +279,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
|
|||
*/
|
||||
if (vendor == PCI_VENDOR_ID_CYRIX &&
|
||||
(device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520))
|
||||
pit_latch_buggy = 1;
|
||||
mark_tsc_unstable("cyrix 5510/5520 detected");
|
||||
}
|
||||
#endif
|
||||
c->x86_cache_size=16; /* Yep 16K integrated cache thats it */
|
||||
|
@ -448,16 +448,6 @@ int __init cyrix_init_cpu(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(cyrix_init_cpu);
|
||||
|
||||
static int __init cyrix_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_CYRIX] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(cyrix_exit_cpu);
|
||||
|
||||
static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
|
||||
.c_vendor = "NSC",
|
||||
.c_ident = { "Geode by NSC" },
|
||||
|
@ -470,12 +460,3 @@ int __init nsc_init_cpu(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(nsc_init_cpu);
|
||||
|
||||
static int __init nsc_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_NSC] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(nsc_exit_cpu);
|
||||
|
|
|
@ -188,8 +188,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
|||
}
|
||||
#endif
|
||||
|
||||
if (c->x86 == 15)
|
||||
if (c->x86 == 15) {
|
||||
set_bit(X86_FEATURE_P4, c->x86_capability);
|
||||
set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability);
|
||||
}
|
||||
if (c->x86 == 6)
|
||||
set_bit(X86_FEATURE_P3, c->x86_capability);
|
||||
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
||||
|
|
|
@ -75,6 +75,9 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
|
|||
machine_check_vector = k7_machine_check;
|
||||
wmb();
|
||||
|
||||
if (!cpu_has(c, X86_FEATURE_MCE))
|
||||
return;
|
||||
|
||||
printk (KERN_INFO "Intel machine check architecture supported.\n");
|
||||
rdmsr (MSR_IA32_MCG_CAP, l, h);
|
||||
if (l & (1<<8)) /* Control register present ? */
|
||||
|
@ -82,9 +85,13 @@ void amd_mcheck_init(struct cpuinfo_x86 *c)
|
|||
nr_mce_banks = l & 0xff;
|
||||
|
||||
/* Clear status for MC index 0 separately, we don't touch CTL,
|
||||
* as some Athlons cause spurious MCEs when its enabled. */
|
||||
wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
|
||||
for (i=1; i<nr_mce_banks; i++) {
|
||||
* as some K7 Athlons cause spurious MCEs when its enabled. */
|
||||
if (boot_cpu_data.x86 == 6) {
|
||||
wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
|
||||
i = 1;
|
||||
} else
|
||||
i = 0;
|
||||
for (; i<nr_mce_banks; i++) {
|
||||
wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
|
||||
wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
|
||||
}
|
||||
|
|
|
@ -38,8 +38,7 @@ void mcheck_init(struct cpuinfo_x86 *c)
|
|||
|
||||
switch (c->x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (c->x86==6 || c->x86==15)
|
||||
amd_mcheck_init(c);
|
||||
amd_mcheck_init(c);
|
||||
break;
|
||||
|
||||
case X86_VENDOR_INTEL:
|
||||
|
|
|
@ -124,13 +124,10 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
|
|||
|
||||
|
||||
/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
|
||||
static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
||||
static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
||||
{
|
||||
u32 h;
|
||||
|
||||
if (mce_num_extended_msrs == 0)
|
||||
goto done;
|
||||
|
||||
rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
|
||||
rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
|
||||
rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
|
||||
|
@ -141,12 +138,6 @@ static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
|
|||
rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
|
||||
rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
|
||||
rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
|
||||
|
||||
/* can we rely on kmalloc to do a dynamic
|
||||
* allocation for the reserved registers?
|
||||
*/
|
||||
done:
|
||||
return mce_num_extended_msrs;
|
||||
}
|
||||
|
||||
static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
|
||||
|
@ -155,7 +146,6 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
|
|||
u32 alow, ahigh, high, low;
|
||||
u32 mcgstl, mcgsth;
|
||||
int i;
|
||||
struct intel_mce_extended_msrs dbg;
|
||||
|
||||
rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
|
||||
if (mcgstl & (1<<0)) /* Recoverable ? */
|
||||
|
@ -164,7 +154,9 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
|
|||
printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
|
||||
smp_processor_id(), mcgsth, mcgstl);
|
||||
|
||||
if (intel_get_extended_msrs(&dbg)) {
|
||||
if (mce_num_extended_msrs > 0) {
|
||||
struct intel_mce_extended_msrs dbg;
|
||||
intel_get_extended_msrs(&dbg);
|
||||
printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
|
||||
smp_processor_id(), dbg.eip, dbg.eflags);
|
||||
printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
|
||||
|
|
|
@ -20,13 +20,25 @@ struct mtrr_state {
|
|||
mtrr_type def_type;
|
||||
};
|
||||
|
||||
struct fixed_range_block {
|
||||
int base_msr; /* start address of an MTRR block */
|
||||
int ranges; /* number of MTRRs in this block */
|
||||
};
|
||||
|
||||
static struct fixed_range_block fixed_range_blocks[] = {
|
||||
{ MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */
|
||||
{ MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */
|
||||
{ MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */
|
||||
{}
|
||||
};
|
||||
|
||||
static unsigned long smp_changes_mask;
|
||||
static struct mtrr_state mtrr_state = {};
|
||||
|
||||
#undef MODULE_PARAM_PREFIX
|
||||
#define MODULE_PARAM_PREFIX "mtrr."
|
||||
|
||||
static __initdata int mtrr_show;
|
||||
static int mtrr_show;
|
||||
module_param_named(show, mtrr_show, bool, 0);
|
||||
|
||||
/* Get the MSR pair relating to a var range */
|
||||
|
@ -37,7 +49,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
|
|||
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
|
||||
}
|
||||
|
||||
static void __init
|
||||
static void
|
||||
get_fixed_ranges(mtrr_type * frs)
|
||||
{
|
||||
unsigned int *p = (unsigned int *) frs;
|
||||
|
@ -51,12 +63,18 @@ get_fixed_ranges(mtrr_type * frs)
|
|||
rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
|
||||
}
|
||||
|
||||
static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types)
|
||||
void mtrr_save_fixed_ranges(void *info)
|
||||
{
|
||||
get_fixed_ranges(mtrr_state.fixed_ranges);
|
||||
}
|
||||
|
||||
static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*types)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < 8; ++i, ++types, base += step)
|
||||
printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types));
|
||||
printk(KERN_INFO "MTRR %05X-%05X %s\n",
|
||||
base, base + step - 1, mtrr_attrib_to_str(*types));
|
||||
}
|
||||
|
||||
/* Grab all of the MTRR state for this CPU into *state */
|
||||
|
@ -147,6 +165,44 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
|
|||
smp_processor_id(), msr, a, b);
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs
|
||||
* see AMD publication no. 24593, chapter 3.2.1 for more information
|
||||
*/
|
||||
static inline void k8_enable_fixed_iorrs(void)
|
||||
{
|
||||
unsigned lo, hi;
|
||||
|
||||
rdmsr(MSR_K8_SYSCFG, lo, hi);
|
||||
mtrr_wrmsr(MSR_K8_SYSCFG, lo
|
||||
| K8_MTRRFIXRANGE_DRAM_ENABLE
|
||||
| K8_MTRRFIXRANGE_DRAM_MODIFY, hi);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks and updates an fixed-range MTRR if it differs from the value it
|
||||
* should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also.
|
||||
* see AMD publication no. 24593, chapter 7.8.1, page 233 for more information
|
||||
* \param msr MSR address of the MTTR which should be checked and updated
|
||||
* \param changed pointer which indicates whether the MTRR needed to be changed
|
||||
* \param msrwords pointer to the MSR values which the MSR should have
|
||||
*/
|
||||
static void set_fixed_range(int msr, int * changed, unsigned int * msrwords)
|
||||
{
|
||||
unsigned lo, hi;
|
||||
|
||||
rdmsr(msr, lo, hi);
|
||||
|
||||
if (lo != msrwords[0] || hi != msrwords[1]) {
|
||||
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
|
||||
boot_cpu_data.x86 == 15 &&
|
||||
((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
|
||||
k8_enable_fixed_iorrs();
|
||||
mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
|
||||
*changed = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
|
||||
/* [SUMMARY] Get a free MTRR.
|
||||
<base> The starting (base) address of the region.
|
||||
|
@ -196,36 +252,21 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
|
|||
*type = base_lo & 0xff;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks and updates the fixed-range MTRRs if they differ from the saved set
|
||||
* \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges()
|
||||
*/
|
||||
static int set_fixed_ranges(mtrr_type * frs)
|
||||
{
|
||||
unsigned int *p = (unsigned int *) frs;
|
||||
unsigned long long *saved = (unsigned long long *) frs;
|
||||
int changed = FALSE;
|
||||
int i;
|
||||
unsigned int lo, hi;
|
||||
int block=-1, range;
|
||||
|
||||
rdmsr(MTRRfix64K_00000_MSR, lo, hi);
|
||||
if (p[0] != lo || p[1] != hi) {
|
||||
mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
|
||||
changed = TRUE;
|
||||
}
|
||||
while (fixed_range_blocks[++block].ranges)
|
||||
for (range=0; range < fixed_range_blocks[block].ranges; range++)
|
||||
set_fixed_range(fixed_range_blocks[block].base_msr + range,
|
||||
&changed, (unsigned int *) saved++);
|
||||
|
||||
for (i = 0; i < 2; i++) {
|
||||
rdmsr(MTRRfix16K_80000_MSR + i, lo, hi);
|
||||
if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
|
||||
mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2],
|
||||
p[3 + i * 2]);
|
||||
changed = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi);
|
||||
if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
|
||||
mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2],
|
||||
p[7 + i * 2]);
|
||||
changed = TRUE;
|
||||
}
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
@ -428,7 +469,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
|
|||
}
|
||||
}
|
||||
|
||||
if (base + size < 0x100) {
|
||||
if (base < 0x100) {
|
||||
printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n",
|
||||
base, size);
|
||||
return -EINVAL;
|
||||
|
|
|
@ -729,6 +729,17 @@ void mtrr_ap_init(void)
|
|||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/**
|
||||
* Save current fixed-range MTRR state of the BSP
|
||||
*/
|
||||
void mtrr_save_state(void)
|
||||
{
|
||||
if (smp_processor_id() == 0)
|
||||
mtrr_save_fixed_ranges(NULL);
|
||||
else
|
||||
smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1);
|
||||
}
|
||||
|
||||
static int __init mtrr_init_finialize(void)
|
||||
{
|
||||
if (!mtrr_if)
|
||||
|
|
|
@ -58,13 +58,3 @@ int __init nexgen_init_cpu(void)
|
|||
cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(nexgen_init_cpu);
|
||||
|
||||
static int __init nexgen_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_NEXGEN] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(nexgen_exit_cpu);
|
||||
|
|
658
arch/i386/kernel/cpu/perfctr-watchdog.c
Normal file
658
arch/i386/kernel/cpu/perfctr-watchdog.c
Normal file
|
@ -0,0 +1,658 @@
|
|||
/* local apic based NMI watchdog for various CPUs.
|
||||
This file also handles reservation of performance counters for coordination
|
||||
with other users (like oprofile).
|
||||
|
||||
Note that these events normally don't tick when the CPU idles. This means
|
||||
the frequency varies with CPU load.
|
||||
|
||||
Original code for K7/P6 written by Keith Owens */
|
||||
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/intel_arch_perfmon.h>
|
||||
|
||||
struct nmi_watchdog_ctlblk {
|
||||
unsigned int cccr_msr;
|
||||
unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
|
||||
unsigned int evntsel_msr; /* the MSR to select the events to handle */
|
||||
};
|
||||
|
||||
/* Interface defining a CPU specific perfctr watchdog */
|
||||
struct wd_ops {
|
||||
int (*reserve)(void);
|
||||
void (*unreserve)(void);
|
||||
int (*setup)(unsigned nmi_hz);
|
||||
void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
|
||||
void (*stop)(void *);
|
||||
unsigned perfctr;
|
||||
unsigned evntsel;
|
||||
u64 checkbit;
|
||||
};
|
||||
|
||||
static struct wd_ops *wd_ops;
|
||||
|
||||
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
|
||||
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
|
||||
*/
|
||||
#define NMI_MAX_COUNTER_BITS 66
|
||||
|
||||
/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
|
||||
* evtsel_nmi_owner tracks the ownership of the event selection
|
||||
* - different performance counters/ event selection may be reserved for
|
||||
* different subsystems this reservation system just tries to coordinate
|
||||
* things a little
|
||||
*/
|
||||
static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
|
||||
static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
|
||||
|
||||
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
|
||||
|
||||
/* converts an msr to an appropriate reservation bit */
|
||||
static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
|
||||
{
|
||||
return wd_ops ? msr - wd_ops->perfctr : 0;
|
||||
}
|
||||
|
||||
/* converts an msr to an appropriate reservation bit */
|
||||
/* returns the bit offset of the event selection register */
|
||||
static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
{
|
||||
return wd_ops ? msr - wd_ops->evntsel : 0;
|
||||
}
|
||||
|
||||
/* checks for a bit availability (hack for oprofile) */
|
||||
int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
|
||||
{
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
return (!test_bit(counter, perfctr_nmi_owner));
|
||||
}
|
||||
|
||||
/* checks the an msr for availability */
|
||||
int avail_to_resrv_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
return (!test_bit(counter, perfctr_nmi_owner));
|
||||
}
|
||||
|
||||
int reserve_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
if (!test_and_set_bit(counter, perfctr_nmi_owner))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void release_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
clear_bit(counter, perfctr_nmi_owner);
|
||||
}
|
||||
|
||||
int reserve_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_evntsel_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
if (!test_and_set_bit(counter, evntsel_nmi_owner))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void release_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
|
||||
counter = nmi_evntsel_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
clear_bit(counter, evntsel_nmi_owner);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
|
||||
EXPORT_SYMBOL(reserve_perfctr_nmi);
|
||||
EXPORT_SYMBOL(release_perfctr_nmi);
|
||||
EXPORT_SYMBOL(reserve_evntsel_nmi);
|
||||
EXPORT_SYMBOL(release_evntsel_nmi);
|
||||
|
||||
void disable_lapic_nmi_watchdog(void)
|
||||
{
|
||||
BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
|
||||
|
||||
if (atomic_read(&nmi_active) <= 0)
|
||||
return;
|
||||
|
||||
on_each_cpu(wd_ops->stop, NULL, 0, 1);
|
||||
wd_ops->unreserve();
|
||||
|
||||
BUG_ON(atomic_read(&nmi_active) != 0);
|
||||
}
|
||||
|
||||
void enable_lapic_nmi_watchdog(void)
|
||||
{
|
||||
BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
|
||||
|
||||
/* are we already enabled */
|
||||
if (atomic_read(&nmi_active) != 0)
|
||||
return;
|
||||
|
||||
/* are we lapic aware */
|
||||
if (!wd_ops)
|
||||
return;
|
||||
if (!wd_ops->reserve()) {
|
||||
printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
|
||||
return;
|
||||
}
|
||||
|
||||
on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
/*
|
||||
* Activate the NMI watchdog via the local APIC.
|
||||
*/
|
||||
|
||||
static unsigned int adjust_for_32bit_ctr(unsigned int hz)
|
||||
{
|
||||
u64 counter_val;
|
||||
unsigned int retval = hz;
|
||||
|
||||
/*
|
||||
* On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
|
||||
* are writable, with higher bits sign extending from bit 31.
|
||||
* So, we can only program the counter with 31 bit values and
|
||||
* 32nd bit should be 1, for 33.. to be 1.
|
||||
* Find the appropriate nmi_hz
|
||||
*/
|
||||
counter_val = (u64)cpu_khz * 1000;
|
||||
do_div(counter_val, retval);
|
||||
if (counter_val > 0x7fffffffULL) {
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
do_div(count, 0x7fffffffUL);
|
||||
retval = count + 1;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
static void
|
||||
write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz)
|
||||
{
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
|
||||
do_div(count, nmi_hz);
|
||||
if(descr)
|
||||
Dprintk("setting %s to -0x%08Lx\n", descr, count);
|
||||
wrmsrl(perfctr_msr, 0 - count);
|
||||
}
|
||||
|
||||
static void write_watchdog_counter32(unsigned int perfctr_msr,
|
||||
const char *descr, unsigned nmi_hz)
|
||||
{
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
|
||||
do_div(count, nmi_hz);
|
||||
if(descr)
|
||||
Dprintk("setting %s to -0x%08Lx\n", descr, count);
|
||||
wrmsr(perfctr_msr, (u32)(-count), 0);
|
||||
}
|
||||
|
||||
/* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface
|
||||
nicely stable so there is not much variety */
|
||||
|
||||
#define K7_EVNTSEL_ENABLE (1 << 22)
|
||||
#define K7_EVNTSEL_INT (1 << 20)
|
||||
#define K7_EVNTSEL_OS (1 << 17)
|
||||
#define K7_EVNTSEL_USR (1 << 16)
|
||||
#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
|
||||
#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
|
||||
|
||||
static int setup_k7_watchdog(unsigned nmi_hz)
|
||||
{
|
||||
unsigned int perfctr_msr, evntsel_msr;
|
||||
unsigned int evntsel;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
perfctr_msr = MSR_K7_PERFCTR0;
|
||||
evntsel_msr = MSR_K7_EVNTSEL0;
|
||||
|
||||
wrmsrl(perfctr_msr, 0UL);
|
||||
|
||||
evntsel = K7_EVNTSEL_INT
|
||||
| K7_EVNTSEL_OS
|
||||
| K7_EVNTSEL_USR
|
||||
| K7_NMI_EVENT;
|
||||
|
||||
/* setup the timer */
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz);
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
evntsel |= K7_EVNTSEL_ENABLE;
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void single_msr_stop_watchdog(void *arg)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
wrmsr(wd->evntsel_msr, 0, 0);
|
||||
}
|
||||
|
||||
static int single_msr_reserve(void)
|
||||
{
|
||||
if (!reserve_perfctr_nmi(wd_ops->perfctr))
|
||||
return 0;
|
||||
|
||||
if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
|
||||
release_perfctr_nmi(wd_ops->perfctr);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void single_msr_unreserve(void)
|
||||
{
|
||||
release_evntsel_nmi(wd_ops->perfctr);
|
||||
release_perfctr_nmi(wd_ops->evntsel);
|
||||
}
|
||||
|
||||
static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
|
||||
{
|
||||
/* start the cycle over again */
|
||||
write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
|
||||
}
|
||||
|
||||
static struct wd_ops k7_wd_ops = {
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_k7_watchdog,
|
||||
.rearm = single_msr_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_K7_PERFCTR0,
|
||||
.evntsel = MSR_K7_EVNTSEL0,
|
||||
.checkbit = 1ULL<<63,
|
||||
};
|
||||
|
||||
/* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */
|
||||
|
||||
#define P6_EVNTSEL0_ENABLE (1 << 22)
|
||||
#define P6_EVNTSEL_INT (1 << 20)
|
||||
#define P6_EVNTSEL_OS (1 << 17)
|
||||
#define P6_EVNTSEL_USR (1 << 16)
|
||||
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
|
||||
#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
|
||||
|
||||
static int setup_p6_watchdog(unsigned nmi_hz)
|
||||
{
|
||||
unsigned int perfctr_msr, evntsel_msr;
|
||||
unsigned int evntsel;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
perfctr_msr = MSR_P6_PERFCTR0;
|
||||
evntsel_msr = MSR_P6_EVNTSEL0;
|
||||
|
||||
wrmsrl(perfctr_msr, 0UL);
|
||||
|
||||
evntsel = P6_EVNTSEL_INT
|
||||
| P6_EVNTSEL_OS
|
||||
| P6_EVNTSEL_USR
|
||||
| P6_NMI_EVENT;
|
||||
|
||||
/* setup the timer */
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
|
||||
write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz);
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
evntsel |= P6_EVNTSEL0_ENABLE;
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
|
||||
{
|
||||
/* P6 based Pentium M need to re-unmask
|
||||
* the apic vector but it doesn't hurt
|
||||
* other P6 variant.
|
||||
* ArchPerfom/Core Duo also needs this */
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
/* P6/ARCH_PERFMON has 32 bit counter write */
|
||||
write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz);
|
||||
}
|
||||
|
||||
static struct wd_ops p6_wd_ops = {
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_p6_watchdog,
|
||||
.rearm = p6_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_P6_PERFCTR0,
|
||||
.evntsel = MSR_P6_EVNTSEL0,
|
||||
.checkbit = 1ULL<<39,
|
||||
};
|
||||
|
||||
/* Intel P4 performance counters. By far the most complicated of all. */
|
||||
|
||||
#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
|
||||
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
|
||||
#define P4_ESCR_OS (1<<3)
|
||||
#define P4_ESCR_USR (1<<2)
|
||||
#define P4_CCCR_OVF_PMI0 (1<<26)
|
||||
#define P4_CCCR_OVF_PMI1 (1<<27)
|
||||
#define P4_CCCR_THRESHOLD(N) ((N)<<20)
|
||||
#define P4_CCCR_COMPLEMENT (1<<19)
|
||||
#define P4_CCCR_COMPARE (1<<18)
|
||||
#define P4_CCCR_REQUIRED (3<<16)
|
||||
#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
|
||||
#define P4_CCCR_ENABLE (1<<12)
|
||||
#define P4_CCCR_OVF (1<<31)
|
||||
|
||||
/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
|
||||
CRU_ESCR0 (with any non-null event selector) through a complemented
|
||||
max threshold. [IA32-Vol3, Section 14.9.9] */
|
||||
|
||||
static int setup_p4_watchdog(unsigned nmi_hz)
|
||||
{
|
||||
unsigned int perfctr_msr, evntsel_msr, cccr_msr;
|
||||
unsigned int evntsel, cccr_val;
|
||||
unsigned int misc_enable, dummy;
|
||||
unsigned int ht_num;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
|
||||
if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* detect which hyperthread we are on */
|
||||
if (smp_num_siblings == 2) {
|
||||
unsigned int ebx, apicid;
|
||||
|
||||
ebx = cpuid_ebx(1);
|
||||
apicid = (ebx >> 24) & 0xff;
|
||||
ht_num = apicid & 1;
|
||||
} else
|
||||
#endif
|
||||
ht_num = 0;
|
||||
|
||||
/* performance counters are shared resources
|
||||
* assign each hyperthread its own set
|
||||
* (re-use the ESCR0 register, seems safe
|
||||
* and keeps the cccr_val the same)
|
||||
*/
|
||||
if (!ht_num) {
|
||||
/* logical cpu 0 */
|
||||
perfctr_msr = MSR_P4_IQ_PERFCTR0;
|
||||
evntsel_msr = MSR_P4_CRU_ESCR0;
|
||||
cccr_msr = MSR_P4_IQ_CCCR0;
|
||||
cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
|
||||
} else {
|
||||
/* logical cpu 1 */
|
||||
perfctr_msr = MSR_P4_IQ_PERFCTR1;
|
||||
evntsel_msr = MSR_P4_CRU_ESCR0;
|
||||
cccr_msr = MSR_P4_IQ_CCCR1;
|
||||
cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
|
||||
}
|
||||
|
||||
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
|
||||
| P4_ESCR_OS
|
||||
| P4_ESCR_USR;
|
||||
|
||||
cccr_val |= P4_CCCR_THRESHOLD(15)
|
||||
| P4_CCCR_COMPLEMENT
|
||||
| P4_CCCR_COMPARE
|
||||
| P4_CCCR_REQUIRED;
|
||||
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
wrmsr(cccr_msr, cccr_val, 0);
|
||||
write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
cccr_val |= P4_CCCR_ENABLE;
|
||||
wrmsr(cccr_msr, cccr_val, 0);
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = cccr_msr;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void stop_p4_watchdog(void *arg)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
wrmsr(wd->cccr_msr, 0, 0);
|
||||
wrmsr(wd->evntsel_msr, 0, 0);
|
||||
}
|
||||
|
||||
static int p4_reserve(void)
|
||||
{
|
||||
if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
|
||||
return 0;
|
||||
#ifdef CONFIG_SMP
|
||||
if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
|
||||
goto fail1;
|
||||
#endif
|
||||
if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
|
||||
goto fail2;
|
||||
/* RED-PEN why is ESCR1 not reserved here? */
|
||||
return 1;
|
||||
fail2:
|
||||
#ifdef CONFIG_SMP
|
||||
if (smp_num_siblings > 1)
|
||||
release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
|
||||
fail1:
|
||||
#endif
|
||||
release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void p4_unreserve(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
if (smp_num_siblings > 1)
|
||||
release_evntsel_nmi(MSR_P4_IQ_PERFCTR1);
|
||||
#endif
|
||||
release_evntsel_nmi(MSR_P4_IQ_PERFCTR0);
|
||||
release_perfctr_nmi(MSR_P4_CRU_ESCR0);
|
||||
}
|
||||
|
||||
static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
|
||||
{
|
||||
unsigned dummy;
|
||||
/*
|
||||
* P4 quirks:
|
||||
* - An overflown perfctr will assert its interrupt
|
||||
* until the OVF flag in its CCCR is cleared.
|
||||
* - LVTPC is masked on interrupt and must be
|
||||
* unmasked by the LVTPC handler.
|
||||
*/
|
||||
rdmsrl(wd->cccr_msr, dummy);
|
||||
dummy &= ~P4_CCCR_OVF;
|
||||
wrmsrl(wd->cccr_msr, dummy);
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
/* start the cycle over again */
|
||||
write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
|
||||
}
|
||||
|
||||
static struct wd_ops p4_wd_ops = {
|
||||
.reserve = p4_reserve,
|
||||
.unreserve = p4_unreserve,
|
||||
.setup = setup_p4_watchdog,
|
||||
.rearm = p4_rearm,
|
||||
.stop = stop_p4_watchdog,
|
||||
/* RED-PEN this is wrong for the other sibling */
|
||||
.perfctr = MSR_P4_BPU_PERFCTR0,
|
||||
.evntsel = MSR_P4_BSU_ESCR0,
|
||||
.checkbit = 1ULL<<39,
|
||||
};
|
||||
|
||||
/* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully
|
||||
all future Intel CPUs. */
|
||||
|
||||
#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
|
||||
#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
|
||||
|
||||
static int setup_intel_arch_watchdog(unsigned nmi_hz)
|
||||
{
|
||||
unsigned int ebx;
|
||||
union cpuid10_eax eax;
|
||||
unsigned int unused;
|
||||
unsigned int perfctr_msr, evntsel_msr;
|
||||
unsigned int evntsel;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
/*
|
||||
* Check whether the Architectural PerfMon supports
|
||||
* Unhalted Core Cycles Event or not.
|
||||
* NOTE: Corresponding bit = 0 in ebx indicates event present.
|
||||
*/
|
||||
cpuid(10, &(eax.full), &ebx, &unused, &unused);
|
||||
if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
|
||||
(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
|
||||
return 0;
|
||||
|
||||
perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1;
|
||||
evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1;
|
||||
|
||||
wrmsrl(perfctr_msr, 0UL);
|
||||
|
||||
evntsel = ARCH_PERFMON_EVENTSEL_INT
|
||||
| ARCH_PERFMON_EVENTSEL_OS
|
||||
| ARCH_PERFMON_EVENTSEL_USR
|
||||
| ARCH_PERFMON_NMI_EVENT_SEL
|
||||
| ARCH_PERFMON_NMI_EVENT_UMASK;
|
||||
|
||||
/* setup the timer */
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
|
||||
write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct wd_ops intel_arch_wd_ops = {
|
||||
.reserve = single_msr_reserve,
|
||||
.unreserve = single_msr_unreserve,
|
||||
.setup = setup_intel_arch_watchdog,
|
||||
.rearm = p6_rearm,
|
||||
.stop = single_msr_stop_watchdog,
|
||||
.perfctr = MSR_ARCH_PERFMON_PERFCTR0,
|
||||
.evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
|
||||
};
|
||||
|
||||
static void probe_nmi_watchdog(void)
|
||||
{
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
|
||||
boot_cpu_data.x86 != 16)
|
||||
return;
|
||||
wd_ops = &k7_wd_ops;
|
||||
break;
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
|
||||
wd_ops = &intel_arch_wd_ops;
|
||||
break;
|
||||
}
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
if (boot_cpu_data.x86_model > 0xd)
|
||||
return;
|
||||
|
||||
wd_ops = &p6_wd_ops;
|
||||
break;
|
||||
case 15:
|
||||
if (boot_cpu_data.x86_model > 0x4)
|
||||
return;
|
||||
|
||||
wd_ops = &p4_wd_ops;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Interface to nmi.c */
|
||||
|
||||
int lapic_watchdog_init(unsigned nmi_hz)
|
||||
{
|
||||
if (!wd_ops) {
|
||||
probe_nmi_watchdog();
|
||||
if (!wd_ops)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!(wd_ops->setup(nmi_hz))) {
|
||||
printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
|
||||
raw_smp_processor_id());
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void lapic_watchdog_stop(void)
|
||||
{
|
||||
if (wd_ops)
|
||||
wd_ops->stop(NULL);
|
||||
}
|
||||
|
||||
unsigned lapic_adjust_nmi_hz(unsigned hz)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
|
||||
wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
|
||||
hz = adjust_for_32bit_ctr(hz);
|
||||
return hz;
|
||||
}
|
||||
|
||||
int lapic_wd_event(unsigned nmi_hz)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
u64 ctr;
|
||||
rdmsrl(wd->perfctr_msr, ctr);
|
||||
if (ctr & wd_ops->checkbit) { /* perfctr still running? */
|
||||
return 0;
|
||||
}
|
||||
wd_ops->rearm(wd, nmi_hz);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int lapic_watchdog_ok(void)
|
||||
{
|
||||
return wd_ops != NULL;
|
||||
}
|
|
@ -72,8 +72,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
|||
"stc",
|
||||
"100mhzsteps",
|
||||
"hwpstate",
|
||||
NULL,
|
||||
NULL, /* constant_tsc - moved to flags */
|
||||
"", /* constant_tsc - moved to flags */
|
||||
/* nothing */
|
||||
};
|
||||
struct cpuinfo_x86 *c = v;
|
||||
|
|
|
@ -50,12 +50,3 @@ int __init rise_init_cpu(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(rise_init_cpu);
|
||||
|
||||
static int __init rise_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_RISE] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(rise_exit_cpu);
|
||||
|
|
|
@ -112,13 +112,3 @@ int __init transmeta_init_cpu(void)
|
|||
cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(transmeta_init_cpu);
|
||||
|
||||
static int __init transmeta_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_TRANSMETA] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(transmeta_exit_cpu);
|
||||
|
|
|
@ -24,13 +24,3 @@ int __init umc_init_cpu(void)
|
|||
cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
//early_arch_initcall(umc_init_cpu);
|
||||
|
||||
static int __init umc_exit_cpu(void)
|
||||
{
|
||||
cpu_devs[X86_VENDOR_UMC] = NULL;
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(umc_exit_cpu);
|
||||
|
|
|
@ -33,7 +33,7 @@ static void doublefault_fn(void)
|
|||
printk("double fault, tss at %08lx\n", tss);
|
||||
|
||||
if (ptr_ok(tss)) {
|
||||
struct tss_struct *t = (struct tss_struct *)tss;
|
||||
struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
|
||||
|
||||
printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp);
|
||||
|
||||
|
@ -49,18 +49,21 @@ static void doublefault_fn(void)
|
|||
}
|
||||
|
||||
struct tss_struct doublefault_tss __cacheline_aligned = {
|
||||
.esp0 = STACK_START,
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ldt = 0,
|
||||
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
|
||||
.x86_tss = {
|
||||
.esp0 = STACK_START,
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ldt = 0,
|
||||
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
|
||||
|
||||
.eip = (unsigned long) doublefault_fn,
|
||||
.eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */
|
||||
.esp = STACK_START,
|
||||
.es = __USER_DS,
|
||||
.cs = __KERNEL_CS,
|
||||
.ss = __KERNEL_DS,
|
||||
.ds = __USER_DS,
|
||||
.eip = (unsigned long) doublefault_fn,
|
||||
/* 0x2 bit is always set */
|
||||
.eflags = X86_EFLAGS_SF | 0x2,
|
||||
.esp = STACK_START,
|
||||
.es = __USER_DS,
|
||||
.cs = __KERNEL_CS,
|
||||
.ss = __KERNEL_DS,
|
||||
.ds = __USER_DS,
|
||||
|
||||
.__cr3 = __pa(swapper_pg_dir)
|
||||
.__cr3 = __pa(swapper_pg_dir)
|
||||
}
|
||||
};
|
||||
|
|
|
@ -161,26 +161,27 @@ static struct resource standard_io_resources[] = { {
|
|||
|
||||
static int __init romsignature(const unsigned char *rom)
|
||||
{
|
||||
const unsigned short * const ptr = (const unsigned short *)rom;
|
||||
unsigned short sig;
|
||||
|
||||
return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
|
||||
sig == ROMSIGNATURE;
|
||||
return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
|
||||
}
|
||||
|
||||
static int __init romchecksum(unsigned char *rom, unsigned long length)
|
||||
static int __init romchecksum(const unsigned char *rom, unsigned long length)
|
||||
{
|
||||
unsigned char sum;
|
||||
unsigned char sum, c;
|
||||
|
||||
for (sum = 0; length; length--)
|
||||
sum += *rom++;
|
||||
return sum == 0;
|
||||
for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
|
||||
sum += c;
|
||||
return !length && !sum;
|
||||
}
|
||||
|
||||
static void __init probe_roms(void)
|
||||
{
|
||||
const unsigned char *rom;
|
||||
unsigned long start, length, upper;
|
||||
unsigned char *rom;
|
||||
int i;
|
||||
unsigned char c;
|
||||
int i;
|
||||
|
||||
/* video rom */
|
||||
upper = adapter_rom_resources[0].start;
|
||||
|
@ -191,8 +192,11 @@ static void __init probe_roms(void)
|
|||
|
||||
video_rom_resource.start = start;
|
||||
|
||||
if (probe_kernel_address(rom + 2, c) != 0)
|
||||
continue;
|
||||
|
||||
/* 0 < length <= 0x7f * 512, historically */
|
||||
length = rom[2] * 512;
|
||||
length = c * 512;
|
||||
|
||||
/* if checksum okay, trust length byte */
|
||||
if (length && romchecksum(rom, length))
|
||||
|
@ -226,8 +230,11 @@ static void __init probe_roms(void)
|
|||
if (!romsignature(rom))
|
||||
continue;
|
||||
|
||||
if (probe_kernel_address(rom + 2, c) != 0)
|
||||
continue;
|
||||
|
||||
/* 0 < length <= 0x7f * 512, historically */
|
||||
length = rom[2] * 512;
|
||||
length = c * 512;
|
||||
|
||||
/* but accept any length that fits if checksum okay */
|
||||
if (!length || start + length > upper || !romchecksum(rom, length))
|
||||
|
@ -386,10 +393,8 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
|
|||
____________________33__
|
||||
______________________4_
|
||||
*/
|
||||
printk("sanitize start\n");
|
||||
/* if there's only one memory region, don't bother */
|
||||
if (*pnr_map < 2) {
|
||||
printk("sanitize bail 0\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -398,7 +403,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
|
|||
/* bail out if we find any unreasonable addresses in bios map */
|
||||
for (i=0; i<old_nr; i++)
|
||||
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
|
||||
printk("sanitize bail 1\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -494,7 +498,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
|
|||
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
|
||||
*pnr_map = new_nr;
|
||||
|
||||
printk("sanitize end\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -525,7 +528,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
|
|||
unsigned long long size = biosmap->size;
|
||||
unsigned long long end = start + size;
|
||||
unsigned long type = biosmap->type;
|
||||
printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
|
||||
|
||||
/* Overflow in 64 bits? Ignore the memory map. */
|
||||
if (start > end)
|
||||
|
@ -536,17 +538,11 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
|
|||
* Not right. Fix it up.
|
||||
*/
|
||||
if (type == E820_RAM) {
|
||||
printk("copy_e820_map() type is E820_RAM\n");
|
||||
if (start < 0x100000ULL && end > 0xA0000ULL) {
|
||||
printk("copy_e820_map() lies in range...\n");
|
||||
if (start < 0xA0000ULL) {
|
||||
printk("copy_e820_map() start < 0xA0000ULL\n");
|
||||
if (start < 0xA0000ULL)
|
||||
add_memory_region(start, 0xA0000ULL-start, type);
|
||||
}
|
||||
if (end <= 0x100000ULL) {
|
||||
printk("copy_e820_map() end <= 0x100000ULL\n");
|
||||
if (end <= 0x100000ULL)
|
||||
continue;
|
||||
}
|
||||
start = 0x100000ULL;
|
||||
size = end - start;
|
||||
}
|
||||
|
@ -818,6 +814,26 @@ void __init limit_regions(unsigned long long size)
|
|||
print_memory_map("limit_regions endfunc");
|
||||
}
|
||||
|
||||
/*
|
||||
* This function checks if any part of the range <start,end> is mapped
|
||||
* with type.
|
||||
*/
|
||||
int
|
||||
e820_any_mapped(u64 start, u64 end, unsigned type)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
const struct e820entry *ei = &e820.map[i];
|
||||
if (type && ei->type != type)
|
||||
continue;
|
||||
if (ei->addr >= end || ei->addr + ei->size <= start)
|
||||
continue;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(e820_any_mapped);
|
||||
|
||||
/*
|
||||
* This function checks if the entire range <start,end> is mapped with type.
|
||||
*
|
||||
|
|
|
@ -69,13 +69,11 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
|
|||
{
|
||||
unsigned long cr4;
|
||||
unsigned long temp;
|
||||
struct Xgt_desc_struct *cpu_gdt_descr;
|
||||
struct Xgt_desc_struct gdt_descr;
|
||||
|
||||
spin_lock(&efi_rt_lock);
|
||||
local_irq_save(efi_rt_eflags);
|
||||
|
||||
cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0);
|
||||
|
||||
/*
|
||||
* If I don't have PSE, I should just duplicate two entries in page
|
||||
* directory. If I have PSE, I just need to duplicate one entry in
|
||||
|
@ -105,17 +103,19 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock)
|
|||
*/
|
||||
local_flush_tlb();
|
||||
|
||||
cpu_gdt_descr->address = __pa(cpu_gdt_descr->address);
|
||||
load_gdt(cpu_gdt_descr);
|
||||
gdt_descr.address = __pa(get_cpu_gdt_table(0));
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
}
|
||||
|
||||
static void efi_call_phys_epilog(void) __releases(efi_rt_lock)
|
||||
{
|
||||
unsigned long cr4;
|
||||
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0);
|
||||
struct Xgt_desc_struct gdt_descr;
|
||||
|
||||
cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address);
|
||||
load_gdt(cpu_gdt_descr);
|
||||
gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
|
||||
gdt_descr.size = GDT_SIZE - 1;
|
||||
load_gdt(&gdt_descr);
|
||||
|
||||
cr4 = read_cr4();
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
* I changed all the .align's to 4 (16 byte alignment), as that's faster
|
||||
* on a 486.
|
||||
*
|
||||
* Stack layout in 'ret_from_system_call':
|
||||
* Stack layout in 'syscall_exit':
|
||||
* ptrace needs to have all regs on the stack.
|
||||
* if the order here is changed, it needs to be
|
||||
* updated in fork.c:copy_process, signal.c:do_signal,
|
||||
|
@ -132,7 +132,7 @@ VM_MASK = 0x00020000
|
|||
movl $(__USER_DS), %edx; \
|
||||
movl %edx, %ds; \
|
||||
movl %edx, %es; \
|
||||
movl $(__KERNEL_PDA), %edx; \
|
||||
movl $(__KERNEL_PERCPU), %edx; \
|
||||
movl %edx, %fs
|
||||
|
||||
#define RESTORE_INT_REGS \
|
||||
|
@ -305,16 +305,12 @@ sysenter_past_esp:
|
|||
pushl $(__USER_CS)
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET cs, 0*/
|
||||
#ifndef CONFIG_COMPAT_VDSO
|
||||
/*
|
||||
* Push current_thread_info()->sysenter_return to the stack.
|
||||
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
|
||||
* pushed above; +8 corresponds to copy_thread's esp0 setting.
|
||||
*/
|
||||
pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
|
||||
#else
|
||||
pushl $SYSENTER_RETURN
|
||||
#endif
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET eip, 0
|
||||
|
||||
|
@ -342,7 +338,7 @@ sysenter_past_esp:
|
|||
jae syscall_badsys
|
||||
call *sys_call_table(,%eax,4)
|
||||
movl %eax,PT_EAX(%esp)
|
||||
DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF
|
||||
movl TI_flags(%ebp), %ecx
|
||||
testw $_TIF_ALLWORK_MASK, %cx
|
||||
|
@ -560,9 +556,7 @@ END(syscall_badsys)
|
|||
|
||||
#define FIXUP_ESPFIX_STACK \
|
||||
/* since we are on a wrong stack, we cant make it a C code :( */ \
|
||||
movl %fs:PDA_cpu, %ebx; \
|
||||
PER_CPU(cpu_gdt_descr, %ebx); \
|
||||
movl GDS_address(%ebx), %ebx; \
|
||||
PER_CPU(gdt_page, %ebx); \
|
||||
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
|
||||
addl %esp, %eax; \
|
||||
pushl $__KERNEL_DS; \
|
||||
|
@ -635,7 +629,7 @@ ENTRY(name) \
|
|||
SAVE_ALL; \
|
||||
TRACE_IRQS_OFF \
|
||||
movl %esp,%eax; \
|
||||
call smp_/**/name; \
|
||||
call smp_##name; \
|
||||
jmp ret_from_intr; \
|
||||
CFI_ENDPROC; \
|
||||
ENDPROC(name)
|
||||
|
@ -643,11 +637,6 @@ ENDPROC(name)
|
|||
/* The include is where all of the SMP etc. interrupts come from */
|
||||
#include "entry_arch.h"
|
||||
|
||||
/* This alternate entry is needed because we hijack the apic LVTT */
|
||||
#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
|
||||
BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
|
||||
#endif
|
||||
|
||||
KPROBE_ENTRY(page_fault)
|
||||
RING0_EC_FRAME
|
||||
pushl $do_page_fault
|
||||
|
@ -686,7 +675,7 @@ error_code:
|
|||
pushl %fs
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
/*CFI_REL_OFFSET fs, 0*/
|
||||
movl $(__KERNEL_PDA), %ecx
|
||||
movl $(__KERNEL_PERCPU), %ecx
|
||||
movl %ecx, %fs
|
||||
UNWIND_ESPFIX_STACK
|
||||
popl %ecx
|
||||
|
|
|
@ -34,17 +34,32 @@
|
|||
|
||||
/*
|
||||
* This is how much memory *in addition to the memory covered up to
|
||||
* and including _end* we need mapped initially. We need one bit for
|
||||
* each possible page, but only in low memory, which means
|
||||
* 2^32/4096/8 = 128K worst case (4G/4G split.)
|
||||
* and including _end* we need mapped initially.
|
||||
* We need:
|
||||
* - one bit for each possible page, but only in low memory, which means
|
||||
* 2^32/4096/8 = 128K worst case (4G/4G split.)
|
||||
* - enough space to map all low memory, which means
|
||||
* (2^32/4096) / 1024 pages (worst case, non PAE)
|
||||
* (2^32/4096) / 512 + 4 pages (worst case for PAE)
|
||||
* - a few pages for allocator use before the kernel pagetable has
|
||||
* been set up
|
||||
*
|
||||
* Modulo rounding, each megabyte assigned here requires a kilobyte of
|
||||
* memory, which is currently unreclaimed.
|
||||
*
|
||||
* This should be a multiple of a page.
|
||||
*/
|
||||
#define INIT_MAP_BEYOND_END (128*1024)
|
||||
LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
|
||||
|
||||
#if PTRS_PER_PMD > 1
|
||||
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
|
||||
#else
|
||||
PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
|
||||
#endif
|
||||
BOOTBITMAP_SIZE = LOW_PAGES / 8
|
||||
ALLOCATOR_SLOP = 4
|
||||
|
||||
INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
|
||||
|
||||
/*
|
||||
* 32-bit kernel entrypoint; only used by the boot CPU. On entry,
|
||||
|
@ -147,8 +162,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
|
|||
/*
|
||||
* Non-boot CPU entry point; entered from trampoline.S
|
||||
* We can't lgdt here, because lgdt itself uses a data segment, but
|
||||
* we know the trampoline has already loaded the boot_gdt_table GDT
|
||||
* for us.
|
||||
* we know the trampoline has already loaded the boot_gdt for us.
|
||||
*
|
||||
* If cpu hotplug is not supported then this code can go in init section
|
||||
* which will be freed later
|
||||
|
@ -318,12 +332,12 @@ is386: movl $2,%ecx # set MP
|
|||
movl %eax,%cr0
|
||||
|
||||
call check_x87
|
||||
call setup_pda
|
||||
lgdt early_gdt_descr
|
||||
lidt idt_descr
|
||||
ljmp $(__KERNEL_CS),$1f
|
||||
1: movl $(__KERNEL_DS),%eax # reload all the segment registers
|
||||
movl %eax,%ss # after changing gdt.
|
||||
movl %eax,%fs # gets reset once there's real percpu
|
||||
|
||||
movl $(__USER_DS),%eax # DS/ES contains default USER segment
|
||||
movl %eax,%ds
|
||||
|
@ -333,16 +347,17 @@ is386: movl $2,%ecx # set MP
|
|||
movl %eax,%gs
|
||||
lldt %ax
|
||||
|
||||
movl $(__KERNEL_PDA),%eax
|
||||
mov %eax,%fs
|
||||
|
||||
cld # gcc2 wants the direction flag cleared at all times
|
||||
pushl $0 # fake return address for unwinder
|
||||
#ifdef CONFIG_SMP
|
||||
movb ready, %cl
|
||||
movb $1, ready
|
||||
cmpb $0,%cl # the first CPU calls start_kernel
|
||||
jne initialize_secondary # all other CPUs call initialize_secondary
|
||||
je 1f
|
||||
movl $(__KERNEL_PERCPU), %eax
|
||||
movl %eax,%fs # set this cpu's percpu
|
||||
jmp initialize_secondary # all other CPUs call initialize_secondary
|
||||
1:
|
||||
#endif /* CONFIG_SMP */
|
||||
jmp start_kernel
|
||||
|
||||
|
@ -365,23 +380,6 @@ check_x87:
|
|||
.byte 0xDB,0xE4 /* fsetpm for 287, ignored by 387 */
|
||||
ret
|
||||
|
||||
/*
|
||||
* Point the GDT at this CPU's PDA. On boot this will be
|
||||
* cpu_gdt_table and boot_pda; for secondary CPUs, these will be
|
||||
* that CPU's GDT and PDA.
|
||||
*/
|
||||
ENTRY(setup_pda)
|
||||
/* get the PDA pointer */
|
||||
movl start_pda, %eax
|
||||
|
||||
/* slot the PDA address into the GDT */
|
||||
mov early_gdt_descr+2, %ecx
|
||||
mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
|
||||
shr $16, %eax
|
||||
mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
|
||||
mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
|
||||
ret
|
||||
|
||||
/*
|
||||
* setup_idt
|
||||
*
|
||||
|
@ -554,9 +552,6 @@ ENTRY(empty_zero_page)
|
|||
* This starts the data section.
|
||||
*/
|
||||
.data
|
||||
ENTRY(start_pda)
|
||||
.long boot_pda
|
||||
|
||||
ENTRY(stack_start)
|
||||
.long init_thread_union+THREAD_SIZE
|
||||
.long __BOOT_DS
|
||||
|
@ -588,7 +583,7 @@ fault_msg:
|
|||
.word 0 # 32 bit align gdt_desc.address
|
||||
boot_gdt_descr:
|
||||
.word __BOOT_DS+7
|
||||
.long boot_gdt_table - __PAGE_OFFSET
|
||||
.long boot_gdt - __PAGE_OFFSET
|
||||
|
||||
.word 0 # 32-bit align idt_desc.address
|
||||
idt_descr:
|
||||
|
@ -599,67 +594,14 @@ idt_descr:
|
|||
.word 0 # 32 bit align gdt_desc.address
|
||||
ENTRY(early_gdt_descr)
|
||||
.word GDT_ENTRIES*8-1
|
||||
.long cpu_gdt_table
|
||||
.long per_cpu__gdt_page /* Overwritten for secondary CPUs */
|
||||
|
||||
/*
|
||||
* The boot_gdt_table must mirror the equivalent in setup.S and is
|
||||
* The boot_gdt must mirror the equivalent in setup.S and is
|
||||
* used only for booting.
|
||||
*/
|
||||
.align L1_CACHE_BYTES
|
||||
ENTRY(boot_gdt_table)
|
||||
ENTRY(boot_gdt)
|
||||
.fill GDT_ENTRY_BOOT_CS,8,0
|
||||
.quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
|
||||
.quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
|
||||
|
||||
/*
|
||||
* The Global Descriptor Table contains 28 quadwords, per-CPU.
|
||||
*/
|
||||
.align L1_CACHE_BYTES
|
||||
ENTRY(cpu_gdt_table)
|
||||
.quad 0x0000000000000000 /* NULL descriptor */
|
||||
.quad 0x0000000000000000 /* 0x0b reserved */
|
||||
.quad 0x0000000000000000 /* 0x13 reserved */
|
||||
.quad 0x0000000000000000 /* 0x1b reserved */
|
||||
.quad 0x0000000000000000 /* 0x20 unused */
|
||||
.quad 0x0000000000000000 /* 0x28 unused */
|
||||
.quad 0x0000000000000000 /* 0x33 TLS entry 1 */
|
||||
.quad 0x0000000000000000 /* 0x3b TLS entry 2 */
|
||||
.quad 0x0000000000000000 /* 0x43 TLS entry 3 */
|
||||
.quad 0x0000000000000000 /* 0x4b reserved */
|
||||
.quad 0x0000000000000000 /* 0x53 reserved */
|
||||
.quad 0x0000000000000000 /* 0x5b reserved */
|
||||
|
||||
.quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
|
||||
.quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
|
||||
.quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
|
||||
.quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
|
||||
|
||||
.quad 0x0000000000000000 /* 0x80 TSS descriptor */
|
||||
.quad 0x0000000000000000 /* 0x88 LDT descriptor */
|
||||
|
||||
/*
|
||||
* Segments used for calling PnP BIOS have byte granularity.
|
||||
* They code segments and data segments have fixed 64k limits,
|
||||
* the transfer segment sizes are set at run time.
|
||||
*/
|
||||
.quad 0x00409a000000ffff /* 0x90 32-bit code */
|
||||
.quad 0x00009a000000ffff /* 0x98 16-bit code */
|
||||
.quad 0x000092000000ffff /* 0xa0 16-bit data */
|
||||
.quad 0x0000920000000000 /* 0xa8 16-bit data */
|
||||
.quad 0x0000920000000000 /* 0xb0 16-bit data */
|
||||
|
||||
/*
|
||||
* The APM segments have byte granularity and their bases
|
||||
* are set at run time. All have 64k limits.
|
||||
*/
|
||||
.quad 0x00409a000000ffff /* 0xb8 APM CS code */
|
||||
.quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */
|
||||
.quad 0x004092000000ffff /* 0xc8 APM DS data */
|
||||
|
||||
.quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */
|
||||
.quad 0x00cf92000000ffff /* 0xd8 - PDA */
|
||||
.quad 0x0000000000000000 /* 0xe0 - unused */
|
||||
.quad 0x0000000000000000 /* 0xe8 - unused */
|
||||
.quad 0x0000000000000000 /* 0xf0 - unused */
|
||||
.quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
|
||||
|
||||
|
|
|
@ -28,5 +28,3 @@ EXPORT_SYMBOL(__read_lock_failed);
|
|||
#endif
|
||||
|
||||
EXPORT_SYMBOL(csum_partial);
|
||||
|
||||
EXPORT_SYMBOL(_proxy_pda);
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
#include <linux/msi.h>
|
||||
#include <linux/htirq.h>
|
||||
#include <linux/freezer.h>
|
||||
#include <linux/kthread.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/smp.h>
|
||||
|
@ -661,8 +662,6 @@ static int balanced_irq(void *unused)
|
|||
unsigned long prev_balance_time = jiffies;
|
||||
long time_remaining = balanced_irq_interval;
|
||||
|
||||
daemonize("kirqd");
|
||||
|
||||
/* push everything to CPU 0 to give us a starting point. */
|
||||
for (i = 0 ; i < NR_IRQS ; i++) {
|
||||
irq_desc[i].pending_mask = cpumask_of_cpu(0);
|
||||
|
@ -722,10 +721,9 @@ static int __init balanced_irq_init(void)
|
|||
}
|
||||
|
||||
printk(KERN_INFO "Starting balanced_irq\n");
|
||||
if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
|
||||
if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
|
||||
return 0;
|
||||
else
|
||||
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
|
||||
printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
|
||||
failed:
|
||||
for_each_possible_cpu(i) {
|
||||
kfree(irq_cpu_data[i].irq_delta);
|
||||
|
@ -1403,10 +1401,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
|
|||
enable_8259A_irq(0);
|
||||
}
|
||||
|
||||
static inline void UNEXPECTED_IO_APIC(void)
|
||||
{
|
||||
}
|
||||
|
||||
void __init print_IO_APIC(void)
|
||||
{
|
||||
int apic, i;
|
||||
|
@ -1446,34 +1440,12 @@ void __init print_IO_APIC(void)
|
|||
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
||||
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
|
||||
printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
|
||||
if (reg_00.bits.ID >= get_physical_broadcast())
|
||||
UNEXPECTED_IO_APIC();
|
||||
if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
|
||||
UNEXPECTED_IO_APIC();
|
||||
|
||||
printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
|
||||
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
|
||||
if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
|
||||
(reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
|
||||
(reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
|
||||
(reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
|
||||
(reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
|
||||
(reg_01.bits.entries != 0x2E) &&
|
||||
(reg_01.bits.entries != 0x3F)
|
||||
)
|
||||
UNEXPECTED_IO_APIC();
|
||||
|
||||
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
|
||||
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
|
||||
if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
|
||||
(reg_01.bits.version != 0x10) && /* oldest IO-APICs */
|
||||
(reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
|
||||
(reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
|
||||
(reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
|
||||
)
|
||||
UNEXPECTED_IO_APIC();
|
||||
if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
|
||||
UNEXPECTED_IO_APIC();
|
||||
|
||||
/*
|
||||
* Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
|
||||
|
@ -1483,8 +1455,6 @@ void __init print_IO_APIC(void)
|
|||
if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
|
||||
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
|
||||
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
|
||||
if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
|
||||
UNEXPECTED_IO_APIC();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1496,8 +1466,6 @@ void __init print_IO_APIC(void)
|
|||
reg_03.raw != reg_01.raw) {
|
||||
printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
|
||||
printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
|
||||
if (reg_03.bits.__reserved_1)
|
||||
UNEXPECTED_IO_APIC();
|
||||
}
|
||||
|
||||
printk(KERN_DEBUG ".... IRQ redirection table:\n");
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include <linux/stddef.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/thread_info.h>
|
||||
#include <linux/syscalls.h>
|
||||
|
||||
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
|
||||
static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value)
|
||||
|
@ -113,7 +114,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
|||
* Reset the owner so that a process switch will not set
|
||||
* tss->io_bitmap_base to IO_BITMAP_OFFSET.
|
||||
*/
|
||||
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
|
||||
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
|
||||
tss->io_bitmap_owner = NULL;
|
||||
|
||||
put_cpu();
|
||||
|
|
|
@ -24,6 +24,9 @@
|
|||
DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp;
|
||||
EXPORT_PER_CPU_SYMBOL(irq_stat);
|
||||
|
||||
DEFINE_PER_CPU(struct pt_regs *, irq_regs);
|
||||
EXPORT_PER_CPU_SYMBOL(irq_regs);
|
||||
|
||||
/*
|
||||
* 'what should we do if we get a hw irq event on an illegal vector'.
|
||||
* each architecture has to answer this themselves.
|
||||
|
|
|
@ -477,7 +477,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc)
|
|||
}
|
||||
++mpc_record;
|
||||
}
|
||||
clustered_apic_check();
|
||||
setup_apic_routing();
|
||||
if (!num_processors)
|
||||
printk(KERN_ERR "SMP mptable: no processors registered!\n");
|
||||
return num_processors;
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
#include <linux/sysdev.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
@ -28,30 +27,14 @@
|
|||
#include <asm/smp.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/kdebug.h>
|
||||
#include <asm/intel_arch_perfmon.h>
|
||||
|
||||
#include "mach_traps.h"
|
||||
|
||||
int unknown_nmi_panic;
|
||||
int nmi_watchdog_enabled;
|
||||
|
||||
/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
|
||||
* evtsel_nmi_owner tracks the ownership of the event selection
|
||||
* - different performance counters/ event selection may be reserved for
|
||||
* different subsystems this reservation system just tries to coordinate
|
||||
* things a little
|
||||
*/
|
||||
|
||||
/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
|
||||
* offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
|
||||
*/
|
||||
#define NMI_MAX_COUNTER_BITS 66
|
||||
#define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS)
|
||||
|
||||
static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]);
|
||||
static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]);
|
||||
|
||||
static cpumask_t backtrace_mask = CPU_MASK_NONE;
|
||||
|
||||
/* nmi_active:
|
||||
* >0: the lapic NMI watchdog is active, but can be disabled
|
||||
* <0: the lapic NMI watchdog has not been set up, and cannot
|
||||
|
@ -63,206 +46,11 @@ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
|
|||
unsigned int nmi_watchdog = NMI_DEFAULT;
|
||||
static unsigned int nmi_hz = HZ;
|
||||
|
||||
struct nmi_watchdog_ctlblk {
|
||||
int enabled;
|
||||
u64 check_bit;
|
||||
unsigned int cccr_msr;
|
||||
unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
|
||||
unsigned int evntsel_msr; /* the MSR to select the events to handle */
|
||||
};
|
||||
static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
|
||||
static DEFINE_PER_CPU(short, wd_enabled);
|
||||
|
||||
/* local prototypes */
|
||||
static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
|
||||
|
||||
extern void show_registers(struct pt_regs *regs);
|
||||
extern int unknown_nmi_panic;
|
||||
|
||||
/* converts an msr to an appropriate reservation bit */
|
||||
static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
|
||||
{
|
||||
/* returns the bit offset of the performance counter register */
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
return (msr - MSR_K7_PERFCTR0);
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
|
||||
return (msr - MSR_ARCH_PERFMON_PERFCTR0);
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return (msr - MSR_P6_PERFCTR0);
|
||||
case 15:
|
||||
return (msr - MSR_P4_BPU_PERFCTR0);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* converts an msr to an appropriate reservation bit */
|
||||
static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
|
||||
{
|
||||
/* returns the bit offset of the event selection register */
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
return (msr - MSR_K7_EVNTSEL0);
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
|
||||
return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
|
||||
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
return (msr - MSR_P6_EVNTSEL0);
|
||||
case 15:
|
||||
return (msr - MSR_P4_BSU_ESCR0);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* checks for a bit availability (hack for oprofile) */
|
||||
int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
|
||||
{
|
||||
int cpu;
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
for_each_possible_cpu (cpu) {
|
||||
if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* checks the an msr for availability */
|
||||
int avail_to_resrv_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
int cpu;
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
for_each_possible_cpu (cpu) {
|
||||
if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int __reserve_perfctr_nmi(int cpu, unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
if (cpu < 0)
|
||||
cpu = smp_processor_id();
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __release_perfctr_nmi(int cpu, unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
if (cpu < 0)
|
||||
cpu = smp_processor_id();
|
||||
|
||||
counter = nmi_perfctr_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]);
|
||||
}
|
||||
|
||||
int reserve_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
int cpu, i;
|
||||
for_each_possible_cpu (cpu) {
|
||||
if (!__reserve_perfctr_nmi(cpu, msr)) {
|
||||
for_each_possible_cpu (i) {
|
||||
if (i >= cpu)
|
||||
break;
|
||||
__release_perfctr_nmi(i, msr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void release_perfctr_nmi(unsigned int msr)
|
||||
{
|
||||
int cpu;
|
||||
for_each_possible_cpu (cpu) {
|
||||
__release_perfctr_nmi(cpu, msr);
|
||||
}
|
||||
}
|
||||
|
||||
int __reserve_evntsel_nmi(int cpu, unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
if (cpu < 0)
|
||||
cpu = smp_processor_id();
|
||||
|
||||
counter = nmi_evntsel_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __release_evntsel_nmi(int cpu, unsigned int msr)
|
||||
{
|
||||
unsigned int counter;
|
||||
if (cpu < 0)
|
||||
cpu = smp_processor_id();
|
||||
|
||||
counter = nmi_evntsel_msr_to_bit(msr);
|
||||
BUG_ON(counter > NMI_MAX_COUNTER_BITS);
|
||||
|
||||
clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]);
|
||||
}
|
||||
|
||||
int reserve_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
int cpu, i;
|
||||
for_each_possible_cpu (cpu) {
|
||||
if (!__reserve_evntsel_nmi(cpu, msr)) {
|
||||
for_each_possible_cpu (i) {
|
||||
if (i >= cpu)
|
||||
break;
|
||||
__release_evntsel_nmi(i, msr);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void release_evntsel_nmi(unsigned int msr)
|
||||
{
|
||||
int cpu;
|
||||
for_each_possible_cpu (cpu) {
|
||||
__release_evntsel_nmi(cpu, msr);
|
||||
}
|
||||
}
|
||||
|
||||
static __cpuinit inline int nmi_known_cpu(void)
|
||||
{
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)
|
||||
|| (boot_cpu_data.x86 == 16));
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
|
||||
return 1;
|
||||
else
|
||||
return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int endflag __initdata = 0;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
|
@ -284,28 +72,6 @@ static __init void nmi_cpu_busy(void *data)
|
|||
}
|
||||
#endif
|
||||
|
||||
static unsigned int adjust_for_32bit_ctr(unsigned int hz)
|
||||
{
|
||||
u64 counter_val;
|
||||
unsigned int retval = hz;
|
||||
|
||||
/*
|
||||
* On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
|
||||
* are writable, with higher bits sign extending from bit 31.
|
||||
* So, we can only program the counter with 31 bit values and
|
||||
* 32nd bit should be 1, for 33.. to be 1.
|
||||
* Find the appropriate nmi_hz
|
||||
*/
|
||||
counter_val = (u64)cpu_khz * 1000;
|
||||
do_div(counter_val, retval);
|
||||
if (counter_val > 0x7fffffffULL) {
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
do_div(count, 0x7fffffffUL);
|
||||
retval = count + 1;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
static int __init check_nmi_watchdog(void)
|
||||
{
|
||||
unsigned int *prev_nmi_count;
|
||||
|
@ -338,14 +104,14 @@ static int __init check_nmi_watchdog(void)
|
|||
if (!cpu_isset(cpu, cpu_callin_map))
|
||||
continue;
|
||||
#endif
|
||||
if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
|
||||
if (!per_cpu(wd_enabled, cpu))
|
||||
continue;
|
||||
if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
|
||||
printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
|
||||
cpu,
|
||||
prev_nmi_count[cpu],
|
||||
nmi_count(cpu));
|
||||
per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
|
||||
per_cpu(wd_enabled, cpu) = 0;
|
||||
atomic_dec(&nmi_active);
|
||||
}
|
||||
}
|
||||
|
@ -359,16 +125,8 @@ static int __init check_nmi_watchdog(void)
|
|||
|
||||
/* now that we know it works we can reduce NMI frequency to
|
||||
something more reasonable; makes a difference in some configs */
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC) {
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
nmi_hz = 1;
|
||||
|
||||
if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
|
||||
wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
|
||||
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
|
||||
}
|
||||
}
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
nmi_hz = lapic_adjust_nmi_hz(1);
|
||||
|
||||
kfree(prev_nmi_count);
|
||||
return 0;
|
||||
|
@ -391,85 +149,8 @@ static int __init setup_nmi_watchdog(char *str)
|
|||
|
||||
__setup("nmi_watchdog=", setup_nmi_watchdog);
|
||||
|
||||
static void disable_lapic_nmi_watchdog(void)
|
||||
{
|
||||
BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
|
||||
|
||||
if (atomic_read(&nmi_active) <= 0)
|
||||
return;
|
||||
|
||||
on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
|
||||
|
||||
BUG_ON(atomic_read(&nmi_active) != 0);
|
||||
}
|
||||
|
||||
static void enable_lapic_nmi_watchdog(void)
|
||||
{
|
||||
BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
|
||||
|
||||
/* are we already enabled */
|
||||
if (atomic_read(&nmi_active) != 0)
|
||||
return;
|
||||
|
||||
/* are we lapic aware */
|
||||
if (nmi_known_cpu() <= 0)
|
||||
return;
|
||||
|
||||
on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
void disable_timer_nmi_watchdog(void)
|
||||
{
|
||||
BUG_ON(nmi_watchdog != NMI_IO_APIC);
|
||||
|
||||
if (atomic_read(&nmi_active) <= 0)
|
||||
return;
|
||||
|
||||
disable_irq(0);
|
||||
on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
|
||||
|
||||
BUG_ON(atomic_read(&nmi_active) != 0);
|
||||
}
|
||||
|
||||
void enable_timer_nmi_watchdog(void)
|
||||
{
|
||||
BUG_ON(nmi_watchdog != NMI_IO_APIC);
|
||||
|
||||
if (atomic_read(&nmi_active) == 0) {
|
||||
touch_nmi_watchdog();
|
||||
on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
|
||||
enable_irq(0);
|
||||
}
|
||||
}
|
||||
|
||||
static void __acpi_nmi_disable(void *__unused)
|
||||
{
|
||||
apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable timer based NMIs on all CPUs:
|
||||
*/
|
||||
void acpi_nmi_disable(void)
|
||||
{
|
||||
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
|
||||
on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
|
||||
}
|
||||
|
||||
static void __acpi_nmi_enable(void *__unused)
|
||||
{
|
||||
apic_write_around(APIC_LVT0, APIC_DM_NMI);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable timer based NMIs on all CPUs:
|
||||
*/
|
||||
void acpi_nmi_enable(void)
|
||||
{
|
||||
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
|
||||
on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
|
||||
}
|
||||
/* Suspend/resume support */
|
||||
|
||||
#ifdef CONFIG_PM
|
||||
|
||||
|
@ -516,7 +197,7 @@ static int __init init_lapic_nmi_sysfs(void)
|
|||
if (nmi_watchdog != NMI_LOCAL_APIC)
|
||||
return 0;
|
||||
|
||||
if ( atomic_read(&nmi_active) < 0 )
|
||||
if (atomic_read(&nmi_active) < 0)
|
||||
return 0;
|
||||
|
||||
error = sysdev_class_register(&nmi_sysclass);
|
||||
|
@ -529,433 +210,69 @@ late_initcall(init_lapic_nmi_sysfs);
|
|||
|
||||
#endif /* CONFIG_PM */
|
||||
|
||||
static void __acpi_nmi_enable(void *__unused)
|
||||
{
|
||||
apic_write_around(APIC_LVT0, APIC_DM_NMI);
|
||||
}
|
||||
|
||||
/*
|
||||
* Activate the NMI watchdog via the local APIC.
|
||||
* Original code written by Keith Owens.
|
||||
* Enable timer based NMIs on all CPUs:
|
||||
*/
|
||||
|
||||
static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
|
||||
void acpi_nmi_enable(void)
|
||||
{
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
|
||||
do_div(count, nmi_hz);
|
||||
if(descr)
|
||||
Dprintk("setting %s to -0x%08Lx\n", descr, count);
|
||||
wrmsrl(perfctr_msr, 0 - count);
|
||||
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
|
||||
on_each_cpu(__acpi_nmi_enable, NULL, 0, 1);
|
||||
}
|
||||
|
||||
static void write_watchdog_counter32(unsigned int perfctr_msr,
|
||||
const char *descr)
|
||||
static void __acpi_nmi_disable(void *__unused)
|
||||
{
|
||||
u64 count = (u64)cpu_khz * 1000;
|
||||
|
||||
do_div(count, nmi_hz);
|
||||
if(descr)
|
||||
Dprintk("setting %s to -0x%08Lx\n", descr, count);
|
||||
wrmsr(perfctr_msr, (u32)(-count), 0);
|
||||
apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
/* Note that these events don't tick when the CPU idles. This means
|
||||
the frequency varies with CPU load. */
|
||||
|
||||
#define K7_EVNTSEL_ENABLE (1 << 22)
|
||||
#define K7_EVNTSEL_INT (1 << 20)
|
||||
#define K7_EVNTSEL_OS (1 << 17)
|
||||
#define K7_EVNTSEL_USR (1 << 16)
|
||||
#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
|
||||
#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
|
||||
|
||||
static int setup_k7_watchdog(void)
|
||||
/*
|
||||
* Disable timer based NMIs on all CPUs:
|
||||
*/
|
||||
void acpi_nmi_disable(void)
|
||||
{
|
||||
unsigned int perfctr_msr, evntsel_msr;
|
||||
unsigned int evntsel;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
perfctr_msr = MSR_K7_PERFCTR0;
|
||||
evntsel_msr = MSR_K7_EVNTSEL0;
|
||||
if (!__reserve_perfctr_nmi(-1, perfctr_msr))
|
||||
goto fail;
|
||||
|
||||
if (!__reserve_evntsel_nmi(-1, evntsel_msr))
|
||||
goto fail1;
|
||||
|
||||
wrmsrl(perfctr_msr, 0UL);
|
||||
|
||||
evntsel = K7_EVNTSEL_INT
|
||||
| K7_EVNTSEL_OS
|
||||
| K7_EVNTSEL_USR
|
||||
| K7_NMI_EVENT;
|
||||
|
||||
/* setup the timer */
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
evntsel |= K7_EVNTSEL_ENABLE;
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
wd->check_bit = 1ULL<<63;
|
||||
return 1;
|
||||
fail1:
|
||||
__release_perfctr_nmi(-1, perfctr_msr);
|
||||
fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void stop_k7_watchdog(void)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
wrmsr(wd->evntsel_msr, 0, 0);
|
||||
|
||||
__release_evntsel_nmi(-1, wd->evntsel_msr);
|
||||
__release_perfctr_nmi(-1, wd->perfctr_msr);
|
||||
}
|
||||
|
||||
#define P6_EVNTSEL0_ENABLE (1 << 22)
|
||||
#define P6_EVNTSEL_INT (1 << 20)
|
||||
#define P6_EVNTSEL_OS (1 << 17)
|
||||
#define P6_EVNTSEL_USR (1 << 16)
|
||||
#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
|
||||
#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
|
||||
|
||||
static int setup_p6_watchdog(void)
|
||||
{
|
||||
unsigned int perfctr_msr, evntsel_msr;
|
||||
unsigned int evntsel;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
perfctr_msr = MSR_P6_PERFCTR0;
|
||||
evntsel_msr = MSR_P6_EVNTSEL0;
|
||||
if (!__reserve_perfctr_nmi(-1, perfctr_msr))
|
||||
goto fail;
|
||||
|
||||
if (!__reserve_evntsel_nmi(-1, evntsel_msr))
|
||||
goto fail1;
|
||||
|
||||
wrmsrl(perfctr_msr, 0UL);
|
||||
|
||||
evntsel = P6_EVNTSEL_INT
|
||||
| P6_EVNTSEL_OS
|
||||
| P6_EVNTSEL_USR
|
||||
| P6_NMI_EVENT;
|
||||
|
||||
/* setup the timer */
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
|
||||
write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0");
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
evntsel |= P6_EVNTSEL0_ENABLE;
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
wd->check_bit = 1ULL<<39;
|
||||
return 1;
|
||||
fail1:
|
||||
__release_perfctr_nmi(-1, perfctr_msr);
|
||||
fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void stop_p6_watchdog(void)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
wrmsr(wd->evntsel_msr, 0, 0);
|
||||
|
||||
__release_evntsel_nmi(-1, wd->evntsel_msr);
|
||||
__release_perfctr_nmi(-1, wd->perfctr_msr);
|
||||
}
|
||||
|
||||
/* Note that these events don't tick when the CPU idles. This means
|
||||
the frequency varies with CPU load. */
|
||||
|
||||
#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
|
||||
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
|
||||
#define P4_ESCR_OS (1<<3)
|
||||
#define P4_ESCR_USR (1<<2)
|
||||
#define P4_CCCR_OVF_PMI0 (1<<26)
|
||||
#define P4_CCCR_OVF_PMI1 (1<<27)
|
||||
#define P4_CCCR_THRESHOLD(N) ((N)<<20)
|
||||
#define P4_CCCR_COMPLEMENT (1<<19)
|
||||
#define P4_CCCR_COMPARE (1<<18)
|
||||
#define P4_CCCR_REQUIRED (3<<16)
|
||||
#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
|
||||
#define P4_CCCR_ENABLE (1<<12)
|
||||
#define P4_CCCR_OVF (1<<31)
|
||||
/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
|
||||
CRU_ESCR0 (with any non-null event selector) through a complemented
|
||||
max threshold. [IA32-Vol3, Section 14.9.9] */
|
||||
|
||||
static int setup_p4_watchdog(void)
|
||||
{
|
||||
unsigned int perfctr_msr, evntsel_msr, cccr_msr;
|
||||
unsigned int evntsel, cccr_val;
|
||||
unsigned int misc_enable, dummy;
|
||||
unsigned int ht_num;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
|
||||
if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* detect which hyperthread we are on */
|
||||
if (smp_num_siblings == 2) {
|
||||
unsigned int ebx, apicid;
|
||||
|
||||
ebx = cpuid_ebx(1);
|
||||
apicid = (ebx >> 24) & 0xff;
|
||||
ht_num = apicid & 1;
|
||||
} else
|
||||
#endif
|
||||
ht_num = 0;
|
||||
|
||||
/* performance counters are shared resources
|
||||
* assign each hyperthread its own set
|
||||
* (re-use the ESCR0 register, seems safe
|
||||
* and keeps the cccr_val the same)
|
||||
*/
|
||||
if (!ht_num) {
|
||||
/* logical cpu 0 */
|
||||
perfctr_msr = MSR_P4_IQ_PERFCTR0;
|
||||
evntsel_msr = MSR_P4_CRU_ESCR0;
|
||||
cccr_msr = MSR_P4_IQ_CCCR0;
|
||||
cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
|
||||
} else {
|
||||
/* logical cpu 1 */
|
||||
perfctr_msr = MSR_P4_IQ_PERFCTR1;
|
||||
evntsel_msr = MSR_P4_CRU_ESCR0;
|
||||
cccr_msr = MSR_P4_IQ_CCCR1;
|
||||
cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
|
||||
}
|
||||
|
||||
if (!__reserve_perfctr_nmi(-1, perfctr_msr))
|
||||
goto fail;
|
||||
|
||||
if (!__reserve_evntsel_nmi(-1, evntsel_msr))
|
||||
goto fail1;
|
||||
|
||||
evntsel = P4_ESCR_EVENT_SELECT(0x3F)
|
||||
| P4_ESCR_OS
|
||||
| P4_ESCR_USR;
|
||||
|
||||
cccr_val |= P4_CCCR_THRESHOLD(15)
|
||||
| P4_CCCR_COMPLEMENT
|
||||
| P4_CCCR_COMPARE
|
||||
| P4_CCCR_REQUIRED;
|
||||
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
wrmsr(cccr_msr, cccr_val, 0);
|
||||
write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
cccr_val |= P4_CCCR_ENABLE;
|
||||
wrmsr(cccr_msr, cccr_val, 0);
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = cccr_msr;
|
||||
wd->check_bit = 1ULL<<39;
|
||||
return 1;
|
||||
fail1:
|
||||
__release_perfctr_nmi(-1, perfctr_msr);
|
||||
fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void stop_p4_watchdog(void)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
wrmsr(wd->cccr_msr, 0, 0);
|
||||
wrmsr(wd->evntsel_msr, 0, 0);
|
||||
|
||||
__release_evntsel_nmi(-1, wd->evntsel_msr);
|
||||
__release_perfctr_nmi(-1, wd->perfctr_msr);
|
||||
}
|
||||
|
||||
#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
|
||||
#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
|
||||
|
||||
static int setup_intel_arch_watchdog(void)
|
||||
{
|
||||
unsigned int ebx;
|
||||
union cpuid10_eax eax;
|
||||
unsigned int unused;
|
||||
unsigned int perfctr_msr, evntsel_msr;
|
||||
unsigned int evntsel;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
/*
|
||||
* Check whether the Architectural PerfMon supports
|
||||
* Unhalted Core Cycles Event or not.
|
||||
* NOTE: Corresponding bit = 0 in ebx indicates event present.
|
||||
*/
|
||||
cpuid(10, &(eax.full), &ebx, &unused, &unused);
|
||||
if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
|
||||
(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
|
||||
goto fail;
|
||||
|
||||
perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
|
||||
evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
|
||||
|
||||
if (!__reserve_perfctr_nmi(-1, perfctr_msr))
|
||||
goto fail;
|
||||
|
||||
if (!__reserve_evntsel_nmi(-1, evntsel_msr))
|
||||
goto fail1;
|
||||
|
||||
wrmsrl(perfctr_msr, 0UL);
|
||||
|
||||
evntsel = ARCH_PERFMON_EVENTSEL_INT
|
||||
| ARCH_PERFMON_EVENTSEL_OS
|
||||
| ARCH_PERFMON_EVENTSEL_USR
|
||||
| ARCH_PERFMON_NMI_EVENT_SEL
|
||||
| ARCH_PERFMON_NMI_EVENT_UMASK;
|
||||
|
||||
/* setup the timer */
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
nmi_hz = adjust_for_32bit_ctr(nmi_hz);
|
||||
write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0");
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
|
||||
wrmsr(evntsel_msr, evntsel, 0);
|
||||
|
||||
wd->perfctr_msr = perfctr_msr;
|
||||
wd->evntsel_msr = evntsel_msr;
|
||||
wd->cccr_msr = 0; //unused
|
||||
wd->check_bit = 1ULL << (eax.split.bit_width - 1);
|
||||
return 1;
|
||||
fail1:
|
||||
__release_perfctr_nmi(-1, perfctr_msr);
|
||||
fail:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void stop_intel_arch_watchdog(void)
|
||||
{
|
||||
unsigned int ebx;
|
||||
union cpuid10_eax eax;
|
||||
unsigned int unused;
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
/*
|
||||
* Check whether the Architectural PerfMon supports
|
||||
* Unhalted Core Cycles Event or not.
|
||||
* NOTE: Corresponding bit = 0 in ebx indicates event present.
|
||||
*/
|
||||
cpuid(10, &(eax.full), &ebx, &unused, &unused);
|
||||
if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
|
||||
(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
|
||||
return;
|
||||
|
||||
wrmsr(wd->evntsel_msr, 0, 0);
|
||||
__release_evntsel_nmi(-1, wd->evntsel_msr);
|
||||
__release_perfctr_nmi(-1, wd->perfctr_msr);
|
||||
if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
|
||||
on_each_cpu(__acpi_nmi_disable, NULL, 0, 1);
|
||||
}
|
||||
|
||||
void setup_apic_nmi_watchdog (void *unused)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
/* only support LOCAL and IO APICs for now */
|
||||
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
|
||||
(nmi_watchdog != NMI_IO_APIC))
|
||||
return;
|
||||
|
||||
if (wd->enabled == 1)
|
||||
return;
|
||||
if (__get_cpu_var(wd_enabled))
|
||||
return;
|
||||
|
||||
/* cheap hack to support suspend/resume */
|
||||
/* if cpu0 is not active neither should the other cpus */
|
||||
if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
|
||||
return;
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC) {
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 &&
|
||||
boot_cpu_data.x86 != 16)
|
||||
return;
|
||||
if (!setup_k7_watchdog())
|
||||
return;
|
||||
break;
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
|
||||
if (!setup_intel_arch_watchdog())
|
||||
return;
|
||||
break;
|
||||
}
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
if (boot_cpu_data.x86_model > 0xd)
|
||||
return;
|
||||
|
||||
if (!setup_p6_watchdog())
|
||||
return;
|
||||
break;
|
||||
case 15:
|
||||
if (boot_cpu_data.x86_model > 0x4)
|
||||
return;
|
||||
|
||||
if (!setup_p4_watchdog())
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
switch (nmi_watchdog) {
|
||||
case NMI_LOCAL_APIC:
|
||||
__get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */
|
||||
if (lapic_watchdog_init(nmi_hz) < 0) {
|
||||
__get_cpu_var(wd_enabled) = 0;
|
||||
return;
|
||||
}
|
||||
/* FALL THROUGH */
|
||||
case NMI_IO_APIC:
|
||||
__get_cpu_var(wd_enabled) = 1;
|
||||
atomic_inc(&nmi_active);
|
||||
}
|
||||
wd->enabled = 1;
|
||||
atomic_inc(&nmi_active);
|
||||
}
|
||||
|
||||
void stop_apic_nmi_watchdog(void *unused)
|
||||
{
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
|
||||
/* only support LOCAL and IO APICs for now */
|
||||
if ((nmi_watchdog != NMI_LOCAL_APIC) &&
|
||||
(nmi_watchdog != NMI_IO_APIC))
|
||||
return;
|
||||
|
||||
if (wd->enabled == 0)
|
||||
if (__get_cpu_var(wd_enabled) == 0)
|
||||
return;
|
||||
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC) {
|
||||
switch (boot_cpu_data.x86_vendor) {
|
||||
case X86_VENDOR_AMD:
|
||||
stop_k7_watchdog();
|
||||
break;
|
||||
case X86_VENDOR_INTEL:
|
||||
if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
|
||||
stop_intel_arch_watchdog();
|
||||
break;
|
||||
}
|
||||
switch (boot_cpu_data.x86) {
|
||||
case 6:
|
||||
if (boot_cpu_data.x86_model > 0xd)
|
||||
break;
|
||||
stop_p6_watchdog();
|
||||
break;
|
||||
case 15:
|
||||
if (boot_cpu_data.x86_model > 0x4)
|
||||
break;
|
||||
stop_p4_watchdog();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
}
|
||||
wd->enabled = 0;
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC)
|
||||
lapic_watchdog_stop();
|
||||
__get_cpu_var(wd_enabled) = 0;
|
||||
atomic_dec(&nmi_active);
|
||||
}
|
||||
|
||||
|
@ -1011,8 +328,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
|
|||
unsigned int sum;
|
||||
int touched = 0;
|
||||
int cpu = smp_processor_id();
|
||||
struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
|
||||
u64 dummy;
|
||||
int rc=0;
|
||||
|
||||
/* check for other users first */
|
||||
|
@ -1055,53 +370,20 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
|
|||
alert_counter[cpu] = 0;
|
||||
}
|
||||
/* see if the nmi watchdog went off */
|
||||
if (wd->enabled) {
|
||||
if (nmi_watchdog == NMI_LOCAL_APIC) {
|
||||
rdmsrl(wd->perfctr_msr, dummy);
|
||||
if (dummy & wd->check_bit){
|
||||
/* this wasn't a watchdog timer interrupt */
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* only Intel P4 uses the cccr msr */
|
||||
if (wd->cccr_msr != 0) {
|
||||
/*
|
||||
* P4 quirks:
|
||||
* - An overflown perfctr will assert its interrupt
|
||||
* until the OVF flag in its CCCR is cleared.
|
||||
* - LVTPC is masked on interrupt and must be
|
||||
* unmasked by the LVTPC handler.
|
||||
*/
|
||||
rdmsrl(wd->cccr_msr, dummy);
|
||||
dummy &= ~P4_CCCR_OVF;
|
||||
wrmsrl(wd->cccr_msr, dummy);
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
/* start the cycle over again */
|
||||
write_watchdog_counter(wd->perfctr_msr, NULL);
|
||||
}
|
||||
else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
|
||||
wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
|
||||
/* P6 based Pentium M need to re-unmask
|
||||
* the apic vector but it doesn't hurt
|
||||
* other P6 variant.
|
||||
* ArchPerfom/Core Duo also needs this */
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
/* P6/ARCH_PERFMON has 32 bit counter write */
|
||||
write_watchdog_counter32(wd->perfctr_msr, NULL);
|
||||
} else {
|
||||
/* start the cycle over again */
|
||||
write_watchdog_counter(wd->perfctr_msr, NULL);
|
||||
}
|
||||
rc = 1;
|
||||
} else if (nmi_watchdog == NMI_IO_APIC) {
|
||||
/* don't know how to accurately check for this.
|
||||
* just assume it was a watchdog timer interrupt
|
||||
* This matches the old behaviour.
|
||||
*/
|
||||
rc = 1;
|
||||
}
|
||||
if (!__get_cpu_var(wd_enabled))
|
||||
return rc;
|
||||
switch (nmi_watchdog) {
|
||||
case NMI_LOCAL_APIC:
|
||||
rc |= lapic_wd_event(nmi_hz);
|
||||
break;
|
||||
case NMI_IO_APIC:
|
||||
/* don't know how to accurately check for this.
|
||||
* just assume it was a watchdog timer interrupt
|
||||
* This matches the old behaviour.
|
||||
*/
|
||||
rc = 1;
|
||||
break;
|
||||
}
|
||||
done:
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -1146,7 +428,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
|
|||
}
|
||||
|
||||
if (nmi_watchdog == NMI_DEFAULT) {
|
||||
if (nmi_known_cpu() > 0)
|
||||
if (lapic_watchdog_ok())
|
||||
nmi_watchdog = NMI_LOCAL_APIC;
|
||||
else
|
||||
nmi_watchdog = NMI_IO_APIC;
|
||||
|
@ -1182,11 +464,3 @@ void __trigger_all_cpu_backtrace(void)
|
|||
|
||||
EXPORT_SYMBOL(nmi_active);
|
||||
EXPORT_SYMBOL(nmi_watchdog);
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
|
||||
EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
|
||||
EXPORT_SYMBOL(reserve_perfctr_nmi);
|
||||
EXPORT_SYMBOL(release_perfctr_nmi);
|
||||
EXPORT_SYMBOL(reserve_evntsel_nmi);
|
||||
EXPORT_SYMBOL(release_evntsel_nmi);
|
||||
EXPORT_SYMBOL(disable_timer_nmi_watchdog);
|
||||
EXPORT_SYMBOL(enable_timer_nmi_watchdog);
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/efi.h>
|
||||
#include <linux/bcd.h>
|
||||
#include <linux/start_kernel.h>
|
||||
#include <linux/highmem.h>
|
||||
|
||||
#include <asm/bug.h>
|
||||
#include <asm/paravirt.h>
|
||||
|
@ -35,7 +36,7 @@
|
|||
#include <asm/timer.h>
|
||||
|
||||
/* nop stub */
|
||||
static void native_nop(void)
|
||||
void _paravirt_nop(void)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -54,333 +55,150 @@ char *memory_setup(void)
|
|||
#define DEF_NATIVE(name, code) \
|
||||
extern const char start_##name[], end_##name[]; \
|
||||
asm("start_" #name ": " code "; end_" #name ":")
|
||||
DEF_NATIVE(cli, "cli");
|
||||
DEF_NATIVE(sti, "sti");
|
||||
DEF_NATIVE(popf, "push %eax; popf");
|
||||
DEF_NATIVE(pushf, "pushf; pop %eax");
|
||||
DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli");
|
||||
DEF_NATIVE(iret, "iret");
|
||||
DEF_NATIVE(sti_sysexit, "sti; sysexit");
|
||||
|
||||
static const struct native_insns
|
||||
{
|
||||
const char *start, *end;
|
||||
} native_insns[] = {
|
||||
[PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli },
|
||||
[PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti },
|
||||
[PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf },
|
||||
[PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf },
|
||||
[PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli },
|
||||
[PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret },
|
||||
[PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit },
|
||||
};
|
||||
DEF_NATIVE(irq_disable, "cli");
|
||||
DEF_NATIVE(irq_enable, "sti");
|
||||
DEF_NATIVE(restore_fl, "push %eax; popf");
|
||||
DEF_NATIVE(save_fl, "pushf; pop %eax");
|
||||
DEF_NATIVE(iret, "iret");
|
||||
DEF_NATIVE(irq_enable_sysexit, "sti; sysexit");
|
||||
DEF_NATIVE(read_cr2, "mov %cr2, %eax");
|
||||
DEF_NATIVE(write_cr3, "mov %eax, %cr3");
|
||||
DEF_NATIVE(read_cr3, "mov %cr3, %eax");
|
||||
DEF_NATIVE(clts, "clts");
|
||||
DEF_NATIVE(read_tsc, "rdtsc");
|
||||
|
||||
DEF_NATIVE(ud2a, "ud2a");
|
||||
|
||||
static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
|
||||
{
|
||||
unsigned int insn_len;
|
||||
const unsigned char *start, *end;
|
||||
unsigned ret;
|
||||
|
||||
/* Don't touch it if we don't have a replacement */
|
||||
if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start)
|
||||
return len;
|
||||
switch(type) {
|
||||
#define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site
|
||||
SITE(irq_disable);
|
||||
SITE(irq_enable);
|
||||
SITE(restore_fl);
|
||||
SITE(save_fl);
|
||||
SITE(iret);
|
||||
SITE(irq_enable_sysexit);
|
||||
SITE(read_cr2);
|
||||
SITE(read_cr3);
|
||||
SITE(write_cr3);
|
||||
SITE(clts);
|
||||
SITE(read_tsc);
|
||||
#undef SITE
|
||||
|
||||
insn_len = native_insns[type].end - native_insns[type].start;
|
||||
patch_site:
|
||||
ret = paravirt_patch_insns(insns, len, start, end);
|
||||
break;
|
||||
|
||||
/* Similarly if we can't fit replacement. */
|
||||
if (len < insn_len)
|
||||
return len;
|
||||
case PARAVIRT_PATCH(make_pgd):
|
||||
case PARAVIRT_PATCH(make_pte):
|
||||
case PARAVIRT_PATCH(pgd_val):
|
||||
case PARAVIRT_PATCH(pte_val):
|
||||
#ifdef CONFIG_X86_PAE
|
||||
case PARAVIRT_PATCH(make_pmd):
|
||||
case PARAVIRT_PATCH(pmd_val):
|
||||
#endif
|
||||
/* These functions end up returning exactly what
|
||||
they're passed, in the same registers. */
|
||||
ret = paravirt_patch_nop();
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = paravirt_patch_default(type, clobbers, insns, len);
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_nop(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_ignore(unsigned len)
|
||||
{
|
||||
return len;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_call(void *target, u16 tgt_clobbers,
|
||||
void *site, u16 site_clobbers,
|
||||
unsigned len)
|
||||
{
|
||||
unsigned char *call = site;
|
||||
unsigned long delta = (unsigned long)target - (unsigned long)(call+5);
|
||||
|
||||
if (tgt_clobbers & ~site_clobbers)
|
||||
return len; /* target would clobber too much for this site */
|
||||
if (len < 5)
|
||||
return len; /* call too long for patch site */
|
||||
|
||||
*call++ = 0xe8; /* call */
|
||||
*(unsigned long *)call = delta;
|
||||
|
||||
return 5;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_jmp(void *target, void *site, unsigned len)
|
||||
{
|
||||
unsigned char *jmp = site;
|
||||
unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5);
|
||||
|
||||
if (len < 5)
|
||||
return len; /* call too long for patch site */
|
||||
|
||||
*jmp++ = 0xe9; /* jmp */
|
||||
*(unsigned long *)jmp = delta;
|
||||
|
||||
return 5;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len)
|
||||
{
|
||||
void *opfunc = *((void **)¶virt_ops + type);
|
||||
unsigned ret;
|
||||
|
||||
if (opfunc == NULL)
|
||||
/* If there's no function, patch it with a ud2a (BUG) */
|
||||
ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a);
|
||||
else if (opfunc == paravirt_nop)
|
||||
/* If the operation is a nop, then nop the callsite */
|
||||
ret = paravirt_patch_nop();
|
||||
else if (type == PARAVIRT_PATCH(iret) ||
|
||||
type == PARAVIRT_PATCH(irq_enable_sysexit))
|
||||
/* If operation requires a jmp, then jmp */
|
||||
ret = paravirt_patch_jmp(opfunc, site, len);
|
||||
else
|
||||
/* Otherwise call the function; assume target could
|
||||
clobber any caller-save reg */
|
||||
ret = paravirt_patch_call(opfunc, CLBR_ANY,
|
||||
site, clobbers, len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned paravirt_patch_insns(void *site, unsigned len,
|
||||
const char *start, const char *end)
|
||||
{
|
||||
unsigned insn_len = end - start;
|
||||
|
||||
if (insn_len > len || start == NULL)
|
||||
insn_len = len;
|
||||
else
|
||||
memcpy(site, start, insn_len);
|
||||
|
||||
memcpy(insns, native_insns[type].start, insn_len);
|
||||
return insn_len;
|
||||
}
|
||||
|
||||
static unsigned long native_get_debugreg(int regno)
|
||||
{
|
||||
unsigned long val = 0; /* Damn you, gcc! */
|
||||
|
||||
switch (regno) {
|
||||
case 0:
|
||||
asm("movl %%db0, %0" :"=r" (val)); break;
|
||||
case 1:
|
||||
asm("movl %%db1, %0" :"=r" (val)); break;
|
||||
case 2:
|
||||
asm("movl %%db2, %0" :"=r" (val)); break;
|
||||
case 3:
|
||||
asm("movl %%db3, %0" :"=r" (val)); break;
|
||||
case 6:
|
||||
asm("movl %%db6, %0" :"=r" (val)); break;
|
||||
case 7:
|
||||
asm("movl %%db7, %0" :"=r" (val)); break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
static void native_set_debugreg(int regno, unsigned long value)
|
||||
{
|
||||
switch (regno) {
|
||||
case 0:
|
||||
asm("movl %0,%%db0" : /* no output */ :"r" (value));
|
||||
break;
|
||||
case 1:
|
||||
asm("movl %0,%%db1" : /* no output */ :"r" (value));
|
||||
break;
|
||||
case 2:
|
||||
asm("movl %0,%%db2" : /* no output */ :"r" (value));
|
||||
break;
|
||||
case 3:
|
||||
asm("movl %0,%%db3" : /* no output */ :"r" (value));
|
||||
break;
|
||||
case 6:
|
||||
asm("movl %0,%%db6" : /* no output */ :"r" (value));
|
||||
break;
|
||||
case 7:
|
||||
asm("movl %0,%%db7" : /* no output */ :"r" (value));
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
void init_IRQ(void)
|
||||
{
|
||||
paravirt_ops.init_IRQ();
|
||||
}
|
||||
|
||||
static void native_clts(void)
|
||||
{
|
||||
asm volatile ("clts");
|
||||
}
|
||||
|
||||
static unsigned long native_read_cr0(void)
|
||||
{
|
||||
unsigned long val;
|
||||
asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void native_write_cr0(unsigned long val)
|
||||
{
|
||||
asm volatile("movl %0,%%cr0": :"r" (val));
|
||||
}
|
||||
|
||||
static unsigned long native_read_cr2(void)
|
||||
{
|
||||
unsigned long val;
|
||||
asm volatile("movl %%cr2,%0\n\t" :"=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void native_write_cr2(unsigned long val)
|
||||
{
|
||||
asm volatile("movl %0,%%cr2": :"r" (val));
|
||||
}
|
||||
|
||||
static unsigned long native_read_cr3(void)
|
||||
{
|
||||
unsigned long val;
|
||||
asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void native_write_cr3(unsigned long val)
|
||||
{
|
||||
asm volatile("movl %0,%%cr3": :"r" (val));
|
||||
}
|
||||
|
||||
static unsigned long native_read_cr4(void)
|
||||
{
|
||||
unsigned long val;
|
||||
asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static unsigned long native_read_cr4_safe(void)
|
||||
{
|
||||
unsigned long val;
|
||||
/* This could fault if %cr4 does not exist */
|
||||
asm("1: movl %%cr4, %0 \n"
|
||||
"2: \n"
|
||||
".section __ex_table,\"a\" \n"
|
||||
".long 1b,2b \n"
|
||||
".previous \n"
|
||||
: "=r" (val): "0" (0));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void native_write_cr4(unsigned long val)
|
||||
{
|
||||
asm volatile("movl %0,%%cr4": :"r" (val));
|
||||
}
|
||||
|
||||
static unsigned long native_save_fl(void)
|
||||
{
|
||||
unsigned long f;
|
||||
asm volatile("pushfl ; popl %0":"=g" (f): /* no input */);
|
||||
return f;
|
||||
}
|
||||
|
||||
static void native_restore_fl(unsigned long f)
|
||||
{
|
||||
asm volatile("pushl %0 ; popfl": /* no output */
|
||||
:"g" (f)
|
||||
:"memory", "cc");
|
||||
}
|
||||
|
||||
static void native_irq_disable(void)
|
||||
{
|
||||
asm volatile("cli": : :"memory");
|
||||
}
|
||||
|
||||
static void native_irq_enable(void)
|
||||
{
|
||||
asm volatile("sti": : :"memory");
|
||||
}
|
||||
|
||||
static void native_safe_halt(void)
|
||||
{
|
||||
asm volatile("sti; hlt": : :"memory");
|
||||
}
|
||||
|
||||
static void native_halt(void)
|
||||
{
|
||||
asm volatile("hlt": : :"memory");
|
||||
}
|
||||
|
||||
static void native_wbinvd(void)
|
||||
{
|
||||
asm volatile("wbinvd": : :"memory");
|
||||
}
|
||||
|
||||
static unsigned long long native_read_msr(unsigned int msr, int *err)
|
||||
{
|
||||
unsigned long long val;
|
||||
|
||||
asm volatile("2: rdmsr ; xorl %0,%0\n"
|
||||
"1:\n\t"
|
||||
".section .fixup,\"ax\"\n\t"
|
||||
"3: movl %3,%0 ; jmp 1b\n\t"
|
||||
".previous\n\t"
|
||||
".section __ex_table,\"a\"\n"
|
||||
" .align 4\n\t"
|
||||
" .long 2b,3b\n\t"
|
||||
".previous"
|
||||
: "=r" (*err), "=A" (val)
|
||||
: "c" (msr), "i" (-EFAULT));
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static int native_write_msr(unsigned int msr, unsigned long long val)
|
||||
{
|
||||
int err;
|
||||
asm volatile("2: wrmsr ; xorl %0,%0\n"
|
||||
"1:\n\t"
|
||||
".section .fixup,\"ax\"\n\t"
|
||||
"3: movl %4,%0 ; jmp 1b\n\t"
|
||||
".previous\n\t"
|
||||
".section __ex_table,\"a\"\n"
|
||||
" .align 4\n\t"
|
||||
" .long 2b,3b\n\t"
|
||||
".previous"
|
||||
: "=a" (err)
|
||||
: "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)),
|
||||
"i" (-EFAULT));
|
||||
return err;
|
||||
}
|
||||
|
||||
static unsigned long long native_read_tsc(void)
|
||||
{
|
||||
unsigned long long val;
|
||||
asm volatile("rdtsc" : "=A" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static unsigned long long native_read_pmc(void)
|
||||
{
|
||||
unsigned long long val;
|
||||
asm volatile("rdpmc" : "=A" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void native_load_tr_desc(void)
|
||||
{
|
||||
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
|
||||
}
|
||||
|
||||
static void native_load_gdt(const struct Xgt_desc_struct *dtr)
|
||||
{
|
||||
asm volatile("lgdt %0"::"m" (*dtr));
|
||||
}
|
||||
|
||||
static void native_load_idt(const struct Xgt_desc_struct *dtr)
|
||||
{
|
||||
asm volatile("lidt %0"::"m" (*dtr));
|
||||
}
|
||||
|
||||
static void native_store_gdt(struct Xgt_desc_struct *dtr)
|
||||
{
|
||||
asm ("sgdt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
static void native_store_idt(struct Xgt_desc_struct *dtr)
|
||||
{
|
||||
asm ("sidt %0":"=m" (*dtr));
|
||||
}
|
||||
|
||||
static unsigned long native_store_tr(void)
|
||||
{
|
||||
unsigned long tr;
|
||||
asm ("str %0":"=r" (tr));
|
||||
return tr;
|
||||
}
|
||||
|
||||
static void native_load_tls(struct thread_struct *t, unsigned int cpu)
|
||||
{
|
||||
#define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]
|
||||
C(0); C(1); C(2);
|
||||
#undef C
|
||||
}
|
||||
|
||||
static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high)
|
||||
{
|
||||
u32 *lp = (u32 *)((char *)dt + entry*8);
|
||||
lp[0] = entry_low;
|
||||
lp[1] = entry_high;
|
||||
}
|
||||
|
||||
static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high)
|
||||
{
|
||||
native_write_dt_entry(dt, entrynum, low, high);
|
||||
}
|
||||
|
||||
static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high)
|
||||
{
|
||||
native_write_dt_entry(dt, entrynum, low, high);
|
||||
}
|
||||
|
||||
static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high)
|
||||
{
|
||||
native_write_dt_entry(dt, entrynum, low, high);
|
||||
}
|
||||
|
||||
static void native_load_esp0(struct tss_struct *tss,
|
||||
struct thread_struct *thread)
|
||||
{
|
||||
tss->esp0 = thread->esp0;
|
||||
|
||||
/* This can only happen when SEP is enabled, no need to test "SEP"arately */
|
||||
if (unlikely(tss->ss1 != thread->sysenter_cs)) {
|
||||
tss->ss1 = thread->sysenter_cs;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void native_io_delay(void)
|
||||
{
|
||||
asm volatile("outb %al,$0x80");
|
||||
}
|
||||
|
||||
static void native_flush_tlb(void)
|
||||
{
|
||||
__native_flush_tlb();
|
||||
|
@ -395,83 +213,11 @@ static void native_flush_tlb_global(void)
|
|||
__native_flush_tlb_global();
|
||||
}
|
||||
|
||||
static void native_flush_tlb_single(u32 addr)
|
||||
static void native_flush_tlb_single(unsigned long addr)
|
||||
{
|
||||
__native_flush_tlb_single(addr);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_X86_PAE
|
||||
static void native_set_pte(pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
*ptep = pteval;
|
||||
}
|
||||
|
||||
static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
*ptep = pteval;
|
||||
}
|
||||
|
||||
static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
|
||||
{
|
||||
*pmdp = pmdval;
|
||||
}
|
||||
|
||||
#else /* CONFIG_X86_PAE */
|
||||
|
||||
static void native_set_pte(pte_t *ptep, pte_t pte)
|
||||
{
|
||||
ptep->pte_high = pte.pte_high;
|
||||
smp_wmb();
|
||||
ptep->pte_low = pte.pte_low;
|
||||
}
|
||||
|
||||
static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
|
||||
{
|
||||
ptep->pte_high = pte.pte_high;
|
||||
smp_wmb();
|
||||
ptep->pte_low = pte.pte_low;
|
||||
}
|
||||
|
||||
static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
|
||||
{
|
||||
ptep->pte_low = 0;
|
||||
smp_wmb();
|
||||
ptep->pte_high = pte.pte_high;
|
||||
smp_wmb();
|
||||
ptep->pte_low = pte.pte_low;
|
||||
}
|
||||
|
||||
static void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
|
||||
{
|
||||
set_64bit((unsigned long long *)ptep,pte_val(pteval));
|
||||
}
|
||||
|
||||
static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
|
||||
{
|
||||
set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
|
||||
}
|
||||
|
||||
static void native_set_pud(pud_t *pudp, pud_t pudval)
|
||||
{
|
||||
*pudp = pudval;
|
||||
}
|
||||
|
||||
static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
ptep->pte_low = 0;
|
||||
smp_wmb();
|
||||
ptep->pte_high = 0;
|
||||
}
|
||||
|
||||
static void native_pmd_clear(pmd_t *pmd)
|
||||
{
|
||||
u32 *tmp = (u32 *)pmd;
|
||||
*tmp = 0;
|
||||
smp_wmb();
|
||||
*(tmp + 1) = 0;
|
||||
}
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
/* These are in entry.S */
|
||||
extern void native_iret(void);
|
||||
extern void native_irq_enable_sysexit(void);
|
||||
|
@ -487,10 +233,11 @@ struct paravirt_ops paravirt_ops = {
|
|||
.name = "bare hardware",
|
||||
.paravirt_enabled = 0,
|
||||
.kernel_rpl = 0,
|
||||
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
|
||||
|
||||
.patch = native_patch,
|
||||
.banner = default_banner,
|
||||
.arch_setup = native_nop,
|
||||
.arch_setup = paravirt_nop,
|
||||
.memory_setup = machine_specific_memory_setup,
|
||||
.get_wallclock = native_get_wallclock,
|
||||
.set_wallclock = native_set_wallclock,
|
||||
|
@ -517,8 +264,8 @@ struct paravirt_ops paravirt_ops = {
|
|||
.safe_halt = native_safe_halt,
|
||||
.halt = native_halt,
|
||||
.wbinvd = native_wbinvd,
|
||||
.read_msr = native_read_msr,
|
||||
.write_msr = native_write_msr,
|
||||
.read_msr = native_read_msr_safe,
|
||||
.write_msr = native_write_msr_safe,
|
||||
.read_tsc = native_read_tsc,
|
||||
.read_pmc = native_read_pmc,
|
||||
.get_scheduled_cycles = native_read_tsc,
|
||||
|
@ -531,9 +278,9 @@ struct paravirt_ops paravirt_ops = {
|
|||
.store_idt = native_store_idt,
|
||||
.store_tr = native_store_tr,
|
||||
.load_tls = native_load_tls,
|
||||
.write_ldt_entry = native_write_ldt_entry,
|
||||
.write_gdt_entry = native_write_gdt_entry,
|
||||
.write_idt_entry = native_write_idt_entry,
|
||||
.write_ldt_entry = write_dt_entry,
|
||||
.write_gdt_entry = write_dt_entry,
|
||||
.write_idt_entry = write_dt_entry,
|
||||
.load_esp0 = native_load_esp0,
|
||||
|
||||
.set_iopl_mask = native_set_iopl_mask,
|
||||
|
@ -545,44 +292,57 @@ struct paravirt_ops paravirt_ops = {
|
|||
.apic_read = native_apic_read,
|
||||
.setup_boot_clock = setup_boot_APIC_clock,
|
||||
.setup_secondary_clock = setup_secondary_APIC_clock,
|
||||
.startup_ipi_hook = paravirt_nop,
|
||||
#endif
|
||||
.set_lazy_mode = (void *)native_nop,
|
||||
.set_lazy_mode = paravirt_nop,
|
||||
|
||||
.pagetable_setup_start = native_pagetable_setup_start,
|
||||
.pagetable_setup_done = native_pagetable_setup_done,
|
||||
|
||||
.flush_tlb_user = native_flush_tlb,
|
||||
.flush_tlb_kernel = native_flush_tlb_global,
|
||||
.flush_tlb_single = native_flush_tlb_single,
|
||||
.flush_tlb_others = native_flush_tlb_others,
|
||||
|
||||
.map_pt_hook = (void *)native_nop,
|
||||
|
||||
.alloc_pt = (void *)native_nop,
|
||||
.alloc_pd = (void *)native_nop,
|
||||
.alloc_pd_clone = (void *)native_nop,
|
||||
.release_pt = (void *)native_nop,
|
||||
.release_pd = (void *)native_nop,
|
||||
.alloc_pt = paravirt_nop,
|
||||
.alloc_pd = paravirt_nop,
|
||||
.alloc_pd_clone = paravirt_nop,
|
||||
.release_pt = paravirt_nop,
|
||||
.release_pd = paravirt_nop,
|
||||
|
||||
.set_pte = native_set_pte,
|
||||
.set_pte_at = native_set_pte_at,
|
||||
.set_pmd = native_set_pmd,
|
||||
.pte_update = (void *)native_nop,
|
||||
.pte_update_defer = (void *)native_nop,
|
||||
.pte_update = paravirt_nop,
|
||||
.pte_update_defer = paravirt_nop,
|
||||
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
.kmap_atomic_pte = kmap_atomic,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
.set_pte_atomic = native_set_pte_atomic,
|
||||
.set_pte_present = native_set_pte_present,
|
||||
.set_pud = native_set_pud,
|
||||
.pte_clear = native_pte_clear,
|
||||
.pmd_clear = native_pmd_clear,
|
||||
|
||||
.pmd_val = native_pmd_val,
|
||||
.make_pmd = native_make_pmd,
|
||||
#endif
|
||||
|
||||
.pte_val = native_pte_val,
|
||||
.pgd_val = native_pgd_val,
|
||||
|
||||
.make_pte = native_make_pte,
|
||||
.make_pgd = native_make_pgd,
|
||||
|
||||
.irq_enable_sysexit = native_irq_enable_sysexit,
|
||||
.iret = native_iret,
|
||||
|
||||
.startup_ipi_hook = (void *)native_nop,
|
||||
.dup_mmap = paravirt_nop,
|
||||
.exit_mmap = paravirt_nop,
|
||||
.activate_mm = paravirt_nop,
|
||||
};
|
||||
|
||||
/*
|
||||
* NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops
|
||||
* semantics are subject to change. Hence we only do this
|
||||
* internal-only export of this, until it gets sorted out and
|
||||
* all lowlevel CPU ops used by modules are separately exported.
|
||||
*/
|
||||
EXPORT_SYMBOL_GPL(paravirt_ops);
|
||||
EXPORT_SYMBOL(paravirt_ops);
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include <linux/random.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/tick.h>
|
||||
#include <linux/percpu.h>
|
||||
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
@ -57,7 +58,6 @@
|
|||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/pda.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
|
||||
|
@ -66,6 +66,12 @@ static int hlt_counter;
|
|||
unsigned long boot_option_idle_override = 0;
|
||||
EXPORT_SYMBOL(boot_option_idle_override);
|
||||
|
||||
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
||||
EXPORT_PER_CPU_SYMBOL(current_task);
|
||||
|
||||
DEFINE_PER_CPU(int, cpu_number);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_number);
|
||||
|
||||
/*
|
||||
* Return saved PC of a blocked thread.
|
||||
*/
|
||||
|
@ -272,25 +278,24 @@ void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
|
|||
}
|
||||
}
|
||||
|
||||
static int __init idle_setup (char *str)
|
||||
static int __init idle_setup(char *str)
|
||||
{
|
||||
if (!strncmp(str, "poll", 4)) {
|
||||
if (!strcmp(str, "poll")) {
|
||||
printk("using polling idle threads.\n");
|
||||
pm_idle = poll_idle;
|
||||
#ifdef CONFIG_X86_SMP
|
||||
if (smp_num_siblings > 1)
|
||||
printk("WARNING: polling idle and HT enabled, performance may degrade.\n");
|
||||
#endif
|
||||
} else if (!strncmp(str, "halt", 4)) {
|
||||
printk("using halt in idle threads.\n");
|
||||
pm_idle = default_idle;
|
||||
}
|
||||
} else if (!strcmp(str, "mwait"))
|
||||
force_mwait = 1;
|
||||
else
|
||||
return -1;
|
||||
|
||||
boot_option_idle_override = 1;
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__setup("idle=", idle_setup);
|
||||
early_param("idle", idle_setup);
|
||||
|
||||
void show_regs(struct pt_regs * regs)
|
||||
{
|
||||
|
@ -343,7 +348,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
|
|||
|
||||
regs.xds = __USER_DS;
|
||||
regs.xes = __USER_DS;
|
||||
regs.xfs = __KERNEL_PDA;
|
||||
regs.xfs = __KERNEL_PERCPU;
|
||||
regs.orig_eax = -1;
|
||||
regs.eip = (unsigned long) kernel_thread_helper;
|
||||
regs.xcs = __KERNEL_CS | get_kernel_rpl();
|
||||
|
@ -376,7 +381,7 @@ void exit_thread(void)
|
|||
t->io_bitmap_max = 0;
|
||||
tss->io_bitmap_owner = NULL;
|
||||
tss->io_bitmap_max = 0;
|
||||
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
|
||||
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
|
||||
put_cpu();
|
||||
}
|
||||
}
|
||||
|
@ -555,7 +560,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
|
|||
* Disable the bitmap via an invalid offset. We still cache
|
||||
* the previous bitmap owner and the IO bitmap contents:
|
||||
*/
|
||||
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
|
||||
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -565,7 +570,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
|
|||
* matches the next task, we dont have to do anything but
|
||||
* to set a valid offset in the TSS:
|
||||
*/
|
||||
tss->io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
return;
|
||||
}
|
||||
/*
|
||||
|
@ -577,7 +582,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p,
|
|||
* redundant copies when the currently switched task does not
|
||||
* perform any I/O during its timeslice.
|
||||
*/
|
||||
tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
|
||||
tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -712,7 +717,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas
|
|||
if (prev->gs | next->gs)
|
||||
loadsegment(gs, next->gs);
|
||||
|
||||
write_pda(pcurrent, next_p);
|
||||
x86_write_percpu(current_task, next_p);
|
||||
|
||||
return prev_p;
|
||||
}
|
||||
|
|
|
@ -3,48 +3,10 @@
|
|||
*/
|
||||
#include <linux/pci.h>
|
||||
#include <linux/irq.h>
|
||||
#include <asm/pci-direct.h>
|
||||
#include <asm/genapic.h>
|
||||
#include <asm/cpu.h>
|
||||
|
||||
#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
|
||||
static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
|
||||
{
|
||||
u8 config, rev;
|
||||
u32 word;
|
||||
|
||||
/* BIOS may enable hardware IRQ balancing for
|
||||
* E7520/E7320/E7525(revision ID 0x9 and below)
|
||||
* based platforms.
|
||||
* For those platforms, make sure that the genapic is set to 'flat'
|
||||
*/
|
||||
pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
|
||||
if (rev > 0x9)
|
||||
return;
|
||||
|
||||
/* enable access to config space*/
|
||||
pci_read_config_byte(dev, 0xf4, &config);
|
||||
pci_write_config_byte(dev, 0xf4, config|0x2);
|
||||
|
||||
/* read xTPR register */
|
||||
raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
|
||||
|
||||
if (!(word & (1 << 13))) {
|
||||
#ifdef CONFIG_X86_64
|
||||
if (genapic != &apic_flat)
|
||||
panic("APIC mode must be flat on this system\n");
|
||||
#elif defined(CONFIG_X86_GENERICARCH)
|
||||
if (genapic != &apic_default)
|
||||
panic("APIC mode must be default(flat) on this system. Use apic=default\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* put back the original value for config space*/
|
||||
if (!(config & 0x2))
|
||||
pci_write_config_byte(dev, 0xf4, config);
|
||||
}
|
||||
|
||||
void __init quirk_intel_irqbalance(void)
|
||||
static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
|
||||
{
|
||||
u8 config, rev;
|
||||
u32 word;
|
||||
|
@ -54,18 +16,18 @@ void __init quirk_intel_irqbalance(void)
|
|||
* based platforms.
|
||||
* Disable SW irqbalance/affinity on those platforms.
|
||||
*/
|
||||
rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
|
||||
pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
|
||||
if (rev > 0x9)
|
||||
return;
|
||||
|
||||
printk(KERN_INFO "Intel E7520/7320/7525 detected.");
|
||||
|
||||
/* enable access to config space */
|
||||
config = read_pci_config_byte(0, 0, 0, 0xf4);
|
||||
write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
|
||||
/* enable access to config space*/
|
||||
pci_read_config_byte(dev, 0xf4, &config);
|
||||
pci_write_config_byte(dev, 0xf4, config|0x2);
|
||||
|
||||
/* read xTPR register */
|
||||
word = read_pci_config_16(0, 0, 0x40, 0x4c);
|
||||
raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
|
||||
|
||||
if (!(word & (1 << 13))) {
|
||||
printk(KERN_INFO "Disabling irq balancing and affinity\n");
|
||||
|
@ -75,25 +37,14 @@ void __init quirk_intel_irqbalance(void)
|
|||
noirqdebug_setup("");
|
||||
#ifdef CONFIG_PROC_FS
|
||||
no_irq_affinity = 1;
|
||||
#endif
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
printk(KERN_INFO "Disabling cpu hotplug control\n");
|
||||
enable_cpu_hotplug = 0;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_64
|
||||
/* force the genapic selection to flat mode so that
|
||||
* interrupts can be redirected to more than one CPU.
|
||||
*/
|
||||
genapic_force = &apic_flat;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* put back the original value for config space */
|
||||
/* put back the original value for config space*/
|
||||
if (!(config & 0x2))
|
||||
write_pci_config_byte(0, 0, 0, 0xf4, config);
|
||||
pci_write_config_byte(dev, 0xf4, config);
|
||||
}
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
|
||||
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
|
||||
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
|
||||
#endif
|
||||
|
|
|
@ -17,7 +17,8 @@
|
|||
#include <asm/apic.h>
|
||||
#include <asm/desc.h>
|
||||
#include "mach_reboot.h"
|
||||
#include <linux/reboot_fixups.h>
|
||||
#include <asm/reboot_fixups.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
/*
|
||||
* Power off function, if any
|
||||
|
@ -197,8 +198,6 @@ static unsigned char jump_to_bios [] =
|
|||
*/
|
||||
void machine_real_restart(unsigned char *code, int length)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_disable();
|
||||
|
||||
/* Write zero to CMOS register number 0x0f, which the BIOS POST
|
||||
|
@ -211,9 +210,9 @@ void machine_real_restart(unsigned char *code, int length)
|
|||
safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.)
|
||||
*/
|
||||
|
||||
spin_lock_irqsave(&rtc_lock, flags);
|
||||
spin_lock(&rtc_lock);
|
||||
CMOS_WRITE(0x00, 0x8f);
|
||||
spin_unlock_irqrestore(&rtc_lock, flags);
|
||||
spin_unlock(&rtc_lock);
|
||||
|
||||
/* Remap the kernel at virtual address zero, as well as offset zero
|
||||
from the kernel segment. This assumes the kernel segment starts at
|
||||
|
@ -280,7 +279,7 @@ void machine_real_restart(unsigned char *code, int length)
|
|||
EXPORT_SYMBOL(machine_real_restart);
|
||||
#endif
|
||||
|
||||
void machine_shutdown(void)
|
||||
static void native_machine_shutdown(void)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
int reboot_cpu_id;
|
||||
|
@ -316,7 +315,11 @@ void machine_shutdown(void)
|
|||
#endif
|
||||
}
|
||||
|
||||
void machine_emergency_restart(void)
|
||||
void __attribute__((weak)) mach_reboot_fixups(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void native_machine_emergency_restart(void)
|
||||
{
|
||||
if (!reboot_thru_bios) {
|
||||
if (efi_enabled) {
|
||||
|
@ -340,17 +343,17 @@ void machine_emergency_restart(void)
|
|||
machine_real_restart(jump_to_bios, sizeof(jump_to_bios));
|
||||
}
|
||||
|
||||
void machine_restart(char * __unused)
|
||||
static void native_machine_restart(char * __unused)
|
||||
{
|
||||
machine_shutdown();
|
||||
machine_emergency_restart();
|
||||
}
|
||||
|
||||
void machine_halt(void)
|
||||
static void native_machine_halt(void)
|
||||
{
|
||||
}
|
||||
|
||||
void machine_power_off(void)
|
||||
static void native_machine_power_off(void)
|
||||
{
|
||||
if (pm_power_off) {
|
||||
machine_shutdown();
|
||||
|
@ -359,3 +362,35 @@ void machine_power_off(void)
|
|||
}
|
||||
|
||||
|
||||
struct machine_ops machine_ops = {
|
||||
.power_off = native_machine_power_off,
|
||||
.shutdown = native_machine_shutdown,
|
||||
.emergency_restart = native_machine_emergency_restart,
|
||||
.restart = native_machine_restart,
|
||||
.halt = native_machine_halt,
|
||||
};
|
||||
|
||||
void machine_power_off(void)
|
||||
{
|
||||
machine_ops.power_off();
|
||||
}
|
||||
|
||||
void machine_shutdown(void)
|
||||
{
|
||||
machine_ops.shutdown();
|
||||
}
|
||||
|
||||
void machine_emergency_restart(void)
|
||||
{
|
||||
machine_ops.emergency_restart();
|
||||
}
|
||||
|
||||
void machine_restart(char *cmd)
|
||||
{
|
||||
machine_ops.restart(cmd);
|
||||
}
|
||||
|
||||
void machine_halt(void)
|
||||
{
|
||||
machine_ops.halt();
|
||||
}
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
#include <asm/delay.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/reboot_fixups.h>
|
||||
#include <asm/reboot_fixups.h>
|
||||
|
||||
static void cs5530a_warm_reset(struct pci_dev *dev)
|
||||
{
|
||||
|
|
|
@ -165,20 +165,20 @@ void fastcall send_IPI_self(int vector)
|
|||
}
|
||||
|
||||
/*
|
||||
* This is only used on smaller machines.
|
||||
* This is used to send an IPI with no shorthand notation (the destination is
|
||||
* specified in bits 56 to 63 of the ICR).
|
||||
*/
|
||||
void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
|
||||
static inline void __send_IPI_dest_field(unsigned long mask, int vector)
|
||||
{
|
||||
unsigned long mask = cpus_addr(cpumask)[0];
|
||||
unsigned long cfg;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
|
||||
/*
|
||||
* Wait for idle.
|
||||
*/
|
||||
apic_wait_icr_idle();
|
||||
if (unlikely(vector == NMI_VECTOR))
|
||||
safe_apic_wait_icr_idle();
|
||||
else
|
||||
apic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* prepare target chip field
|
||||
|
@ -195,13 +195,25 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
|
|||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
apic_write_around(APIC_ICR, cfg);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is only used on smaller machines.
|
||||
*/
|
||||
void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
|
||||
{
|
||||
unsigned long mask = cpus_addr(cpumask)[0];
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
|
||||
__send_IPI_dest_field(mask, vector);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
void send_IPI_mask_sequence(cpumask_t mask, int vector)
|
||||
{
|
||||
unsigned long cfg, flags;
|
||||
unsigned long flags;
|
||||
unsigned int query_cpu;
|
||||
|
||||
/*
|
||||
|
@ -211,30 +223,10 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector)
|
|||
*/
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) {
|
||||
if (cpu_isset(query_cpu, mask)) {
|
||||
|
||||
/*
|
||||
* Wait for idle.
|
||||
*/
|
||||
apic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* prepare target chip field
|
||||
*/
|
||||
cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu));
|
||||
apic_write_around(APIC_ICR2, cfg);
|
||||
|
||||
/*
|
||||
* program the ICR
|
||||
*/
|
||||
cfg = __prepare_ICR(0, vector);
|
||||
|
||||
/*
|
||||
* Send the IPI. The write to APIC_ICR fires this off.
|
||||
*/
|
||||
apic_write_around(APIC_ICR, cfg);
|
||||
__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
|
||||
vector);
|
||||
}
|
||||
}
|
||||
local_irq_restore(flags);
|
||||
|
@ -256,7 +248,6 @@ static cpumask_t flush_cpumask;
|
|||
static struct mm_struct * flush_mm;
|
||||
static unsigned long flush_va;
|
||||
static DEFINE_SPINLOCK(tlbstate_lock);
|
||||
#define FLUSH_ALL 0xffffffff
|
||||
|
||||
/*
|
||||
* We cannot call mmdrop() because we are in interrupt context,
|
||||
|
@ -338,7 +329,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
|
|||
|
||||
if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
|
||||
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
|
||||
if (flush_va == FLUSH_ALL)
|
||||
if (flush_va == TLB_FLUSH_ALL)
|
||||
local_flush_tlb();
|
||||
else
|
||||
__flush_tlb_one(flush_va);
|
||||
|
@ -353,9 +344,11 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs)
|
|||
put_cpu_no_resched();
|
||||
}
|
||||
|
||||
static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
|
||||
unsigned long va)
|
||||
void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
|
||||
unsigned long va)
|
||||
{
|
||||
cpumask_t cpumask = *cpumaskp;
|
||||
|
||||
/*
|
||||
* A couple of (to be removed) sanity checks:
|
||||
*
|
||||
|
@ -366,10 +359,12 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
|
|||
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
|
||||
BUG_ON(!mm);
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* If a CPU which we ran on has gone down, OK. */
|
||||
cpus_and(cpumask, cpumask, cpu_online_map);
|
||||
if (cpus_empty(cpumask))
|
||||
if (unlikely(cpus_empty(cpumask)))
|
||||
return;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* i'm not happy about this global shared spinlock in the
|
||||
|
@ -380,17 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
|
|||
|
||||
flush_mm = mm;
|
||||
flush_va = va;
|
||||
#if NR_CPUS <= BITS_PER_LONG
|
||||
atomic_set_mask(cpumask, &flush_cpumask);
|
||||
#else
|
||||
{
|
||||
int k;
|
||||
unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
|
||||
unsigned long *cpu_mask = (unsigned long *)&cpumask;
|
||||
for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
|
||||
atomic_set_mask(cpu_mask[k], &flush_mask[k]);
|
||||
}
|
||||
#endif
|
||||
cpus_or(flush_cpumask, cpumask, flush_cpumask);
|
||||
/*
|
||||
* We have to send the IPI only to
|
||||
* CPUs affected.
|
||||
|
@ -417,7 +402,7 @@ void flush_tlb_current_task(void)
|
|||
|
||||
local_flush_tlb();
|
||||
if (!cpus_empty(cpu_mask))
|
||||
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
|
||||
flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
|
@ -436,7 +421,7 @@ void flush_tlb_mm (struct mm_struct * mm)
|
|||
leave_mm(smp_processor_id());
|
||||
}
|
||||
if (!cpus_empty(cpu_mask))
|
||||
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
|
||||
flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
@ -483,7 +468,7 @@ void flush_tlb_all(void)
|
|||
* it goes straight through and wastes no time serializing
|
||||
* anything. Worst case is that we lose a reschedule ...
|
||||
*/
|
||||
void smp_send_reschedule(int cpu)
|
||||
void native_smp_send_reschedule(int cpu)
|
||||
{
|
||||
WARN_ON(cpu_is_offline(cpu));
|
||||
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
|
||||
|
@ -515,35 +500,14 @@ void unlock_ipi_call_lock(void)
|
|||
|
||||
static struct call_data_struct *call_data;
|
||||
|
||||
/**
|
||||
* smp_call_function(): Run a function on all other CPUs.
|
||||
* @func: The function to run. This must be fast and non-blocking.
|
||||
* @info: An arbitrary pointer to pass to the function.
|
||||
* @nonatomic: currently unused.
|
||||
* @wait: If true, wait (atomically) until function has completed on other CPUs.
|
||||
*
|
||||
* Returns 0 on success, else a negative status code. Does not return until
|
||||
* remote CPUs are nearly ready to execute <<func>> or are or have executed.
|
||||
*
|
||||
* You must not call this function with disabled interrupts or from a
|
||||
* hardware interrupt handler or from a bottom half handler.
|
||||
*/
|
||||
int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
|
||||
int wait)
|
||||
static void __smp_call_function(void (*func) (void *info), void *info,
|
||||
int nonatomic, int wait)
|
||||
{
|
||||
struct call_data_struct data;
|
||||
int cpus;
|
||||
int cpus = num_online_cpus() - 1;
|
||||
|
||||
/* Holding any lock stops cpus from going down. */
|
||||
spin_lock(&call_lock);
|
||||
cpus = num_online_cpus() - 1;
|
||||
if (!cpus) {
|
||||
spin_unlock(&call_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Can deadlock when called with interrupts disabled */
|
||||
WARN_ON(irqs_disabled());
|
||||
if (!cpus)
|
||||
return;
|
||||
|
||||
data.func = func;
|
||||
data.info = info;
|
||||
|
@ -558,6 +522,72 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
|
|||
/* Send a message to all other CPUs and wait for them to respond */
|
||||
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
|
||||
|
||||
/* Wait for response */
|
||||
while (atomic_read(&data.started) != cpus)
|
||||
cpu_relax();
|
||||
|
||||
if (wait)
|
||||
while (atomic_read(&data.finished) != cpus)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* smp_call_function_mask(): Run a function on a set of other CPUs.
|
||||
* @mask: The set of cpus to run on. Must not include the current cpu.
|
||||
* @func: The function to run. This must be fast and non-blocking.
|
||||
* @info: An arbitrary pointer to pass to the function.
|
||||
* @wait: If true, wait (atomically) until function has completed on other CPUs.
|
||||
*
|
||||
* Returns 0 on success, else a negative status code.
|
||||
*
|
||||
* If @wait is true, then returns once @func has returned; otherwise
|
||||
* it returns just before the target cpu calls @func.
|
||||
*
|
||||
* You must not call this function with disabled interrupts or from a
|
||||
* hardware interrupt handler or from a bottom half handler.
|
||||
*/
|
||||
int native_smp_call_function_mask(cpumask_t mask,
|
||||
void (*func)(void *), void *info,
|
||||
int wait)
|
||||
{
|
||||
struct call_data_struct data;
|
||||
cpumask_t allbutself;
|
||||
int cpus;
|
||||
|
||||
/* Can deadlock when called with interrupts disabled */
|
||||
WARN_ON(irqs_disabled());
|
||||
|
||||
/* Holding any lock stops cpus from going down. */
|
||||
spin_lock(&call_lock);
|
||||
|
||||
allbutself = cpu_online_map;
|
||||
cpu_clear(smp_processor_id(), allbutself);
|
||||
|
||||
cpus_and(mask, mask, allbutself);
|
||||
cpus = cpus_weight(mask);
|
||||
|
||||
if (!cpus) {
|
||||
spin_unlock(&call_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
data.func = func;
|
||||
data.info = info;
|
||||
atomic_set(&data.started, 0);
|
||||
data.wait = wait;
|
||||
if (wait)
|
||||
atomic_set(&data.finished, 0);
|
||||
|
||||
call_data = &data;
|
||||
mb();
|
||||
|
||||
/* Send a message to other CPUs */
|
||||
if (cpus_equal(mask, allbutself))
|
||||
send_IPI_allbutself(CALL_FUNCTION_VECTOR);
|
||||
else
|
||||
send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
|
||||
|
||||
/* Wait for response */
|
||||
while (atomic_read(&data.started) != cpus)
|
||||
cpu_relax();
|
||||
|
@ -569,15 +599,68 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* smp_call_function(): Run a function on all other CPUs.
|
||||
* @func: The function to run. This must be fast and non-blocking.
|
||||
* @info: An arbitrary pointer to pass to the function.
|
||||
* @nonatomic: Unused.
|
||||
* @wait: If true, wait (atomically) until function has completed on other CPUs.
|
||||
*
|
||||
* Returns 0 on success, else a negative status code.
|
||||
*
|
||||
* If @wait is true, then returns once @func has returned; otherwise
|
||||
* it returns just before the target cpu calls @func.
|
||||
*
|
||||
* You must not call this function with disabled interrupts or from a
|
||||
* hardware interrupt handler or from a bottom half handler.
|
||||
*/
|
||||
int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
|
||||
int wait)
|
||||
{
|
||||
return smp_call_function_mask(cpu_online_map, func, info, wait);
|
||||
}
|
||||
EXPORT_SYMBOL(smp_call_function);
|
||||
|
||||
/**
|
||||
* smp_call_function_single - Run a function on another CPU
|
||||
* @cpu: The target CPU. Cannot be the calling CPU.
|
||||
* @func: The function to run. This must be fast and non-blocking.
|
||||
* @info: An arbitrary pointer to pass to the function.
|
||||
* @nonatomic: Unused.
|
||||
* @wait: If true, wait until function has completed on other CPUs.
|
||||
*
|
||||
* Returns 0 on success, else a negative status code.
|
||||
*
|
||||
* If @wait is true, then returns once @func has returned; otherwise
|
||||
* it returns just before the target cpu calls @func.
|
||||
*/
|
||||
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
||||
int nonatomic, int wait)
|
||||
{
|
||||
/* prevent preemption and reschedule on another processor */
|
||||
int ret;
|
||||
int me = get_cpu();
|
||||
if (cpu == me) {
|
||||
WARN_ON(1);
|
||||
put_cpu();
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
|
||||
|
||||
put_cpu();
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(smp_call_function_single);
|
||||
|
||||
static void stop_this_cpu (void * dummy)
|
||||
{
|
||||
local_irq_disable();
|
||||
/*
|
||||
* Remove this CPU:
|
||||
*/
|
||||
cpu_clear(smp_processor_id(), cpu_online_map);
|
||||
local_irq_disable();
|
||||
disable_local_APIC();
|
||||
if (cpu_data[smp_processor_id()].hlt_works_ok)
|
||||
for(;;) halt();
|
||||
|
@ -588,13 +671,18 @@ static void stop_this_cpu (void * dummy)
|
|||
* this function calls the 'stop' function on all other CPUs in the system.
|
||||
*/
|
||||
|
||||
void smp_send_stop(void)
|
||||
void native_smp_send_stop(void)
|
||||
{
|
||||
smp_call_function(stop_this_cpu, NULL, 1, 0);
|
||||
/* Don't deadlock on the call lock in panic */
|
||||
int nolock = !spin_trylock(&call_lock);
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_disable();
|
||||
local_irq_save(flags);
|
||||
__smp_call_function(stop_this_cpu, NULL, 0, 0);
|
||||
if (!nolock)
|
||||
spin_unlock(&call_lock);
|
||||
disable_local_APIC();
|
||||
local_irq_enable();
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -633,77 +721,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* this function sends a 'generic call function' IPI to one other CPU
|
||||
* in the system.
|
||||
*
|
||||
* cpu is a standard Linux logical CPU number.
|
||||
*/
|
||||
static void
|
||||
__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
||||
int nonatomic, int wait)
|
||||
{
|
||||
struct call_data_struct data;
|
||||
int cpus = 1;
|
||||
|
||||
data.func = func;
|
||||
data.info = info;
|
||||
atomic_set(&data.started, 0);
|
||||
data.wait = wait;
|
||||
if (wait)
|
||||
atomic_set(&data.finished, 0);
|
||||
|
||||
call_data = &data;
|
||||
wmb();
|
||||
/* Send a message to all other CPUs and wait for them to respond */
|
||||
send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
|
||||
|
||||
/* Wait for response */
|
||||
while (atomic_read(&data.started) != cpus)
|
||||
cpu_relax();
|
||||
|
||||
if (!wait)
|
||||
return;
|
||||
|
||||
while (atomic_read(&data.finished) != cpus)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
/*
|
||||
* smp_call_function_single - Run a function on another CPU
|
||||
* @func: The function to run. This must be fast and non-blocking.
|
||||
* @info: An arbitrary pointer to pass to the function.
|
||||
* @nonatomic: Currently unused.
|
||||
* @wait: If true, wait until function has completed on other CPUs.
|
||||
*
|
||||
* Retrurns 0 on success, else a negative status code.
|
||||
*
|
||||
* Does not return until the remote CPU is nearly ready to execute <func>
|
||||
* or is or has executed.
|
||||
*/
|
||||
|
||||
int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
||||
int nonatomic, int wait)
|
||||
{
|
||||
/* prevent preemption and reschedule on another processor */
|
||||
int me = get_cpu();
|
||||
if (cpu == me) {
|
||||
WARN_ON(1);
|
||||
put_cpu();
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
/* Can deadlock when called with interrupts disabled */
|
||||
WARN_ON(irqs_disabled());
|
||||
|
||||
spin_lock_bh(&call_lock);
|
||||
__smp_call_function_single(cpu, func, info, nonatomic, wait);
|
||||
spin_unlock_bh(&call_lock);
|
||||
put_cpu();
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(smp_call_function_single);
|
||||
|
||||
static int convert_apicid_to_cpu(int apic_id)
|
||||
{
|
||||
int i;
|
||||
|
@ -730,3 +747,14 @@ int safe_smp_processor_id(void)
|
|||
|
||||
return cpuid >= 0 ? cpuid : 0;
|
||||
}
|
||||
|
||||
struct smp_ops smp_ops = {
|
||||
.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
|
||||
.smp_prepare_cpus = native_smp_prepare_cpus,
|
||||
.cpu_up = native_cpu_up,
|
||||
.smp_cpus_done = native_smp_cpus_done,
|
||||
|
||||
.smp_send_stop = native_smp_send_stop,
|
||||
.smp_send_reschedule = native_smp_send_reschedule,
|
||||
.smp_call_function_mask = native_smp_call_function_mask,
|
||||
};
|
||||
|
|
|
@ -53,13 +53,12 @@
|
|||
#include <asm/desc.h>
|
||||
#include <asm/arch_hooks.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/pda.h>
|
||||
#include <asm/genapic.h>
|
||||
|
||||
#include <mach_apic.h>
|
||||
#include <mach_wakecpu.h>
|
||||
#include <smpboot_hooks.h>
|
||||
#include <asm/vmi.h>
|
||||
#include <asm/mtrr.h>
|
||||
|
||||
/* Set if we find a B stepping CPU */
|
||||
static int __devinitdata smp_b_stepping;
|
||||
|
@ -100,6 +99,9 @@ EXPORT_SYMBOL(x86_cpu_to_apicid);
|
|||
|
||||
u8 apicid_2_node[MAX_APICID];
|
||||
|
||||
DEFINE_PER_CPU(unsigned long, this_cpu_off);
|
||||
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
|
||||
|
||||
/*
|
||||
* Trampoline 80x86 program as an array.
|
||||
*/
|
||||
|
@ -156,7 +158,7 @@ static void __cpuinit smp_store_cpu_info(int id)
|
|||
|
||||
*c = boot_cpu_data;
|
||||
if (id!=0)
|
||||
identify_cpu(c);
|
||||
identify_secondary_cpu(c);
|
||||
/*
|
||||
* Mask B, Pentium, but not Pentium MMX
|
||||
*/
|
||||
|
@ -379,14 +381,14 @@ set_cpu_sibling_map(int cpu)
|
|||
static void __cpuinit start_secondary(void *unused)
|
||||
{
|
||||
/*
|
||||
* Don't put *anything* before secondary_cpu_init(), SMP
|
||||
* booting is too fragile that we want to limit the
|
||||
* things done here to the most necessary things.
|
||||
* Don't put *anything* before cpu_init(), SMP booting is too
|
||||
* fragile that we want to limit the things done here to the
|
||||
* most necessary things.
|
||||
*/
|
||||
#ifdef CONFIG_VMI
|
||||
vmi_bringup();
|
||||
#endif
|
||||
secondary_cpu_init();
|
||||
cpu_init();
|
||||
preempt_disable();
|
||||
smp_callin();
|
||||
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
|
||||
|
@ -440,12 +442,6 @@ static void __cpuinit start_secondary(void *unused)
|
|||
*/
|
||||
void __devinit initialize_secondary(void)
|
||||
{
|
||||
/*
|
||||
* switch to the per CPU GDT we already set up
|
||||
* in do_boot_cpu()
|
||||
*/
|
||||
cpu_set_gdt(current_thread_info()->cpu);
|
||||
|
||||
/*
|
||||
* We don't actually need to load the full TSS,
|
||||
* basically just the stack pointer and the eip.
|
||||
|
@ -463,7 +459,6 @@ extern struct {
|
|||
void * esp;
|
||||
unsigned short ss;
|
||||
} stack_start;
|
||||
extern struct i386_pda *start_pda;
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
|
||||
|
@ -521,12 +516,12 @@ static void unmap_cpu_to_logical_apicid(int cpu)
|
|||
unmap_cpu_to_node(cpu);
|
||||
}
|
||||
|
||||
#if APIC_DEBUG
|
||||
static inline void __inquire_remote_apic(int apicid)
|
||||
{
|
||||
int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 };
|
||||
char *names[] = { "ID", "VERSION", "SPIV" };
|
||||
int timeout, status;
|
||||
int timeout;
|
||||
unsigned long status;
|
||||
|
||||
printk("Inquiring remote APIC #%d...\n", apicid);
|
||||
|
||||
|
@ -536,7 +531,9 @@ static inline void __inquire_remote_apic(int apicid)
|
|||
/*
|
||||
* Wait for idle.
|
||||
*/
|
||||
apic_wait_icr_idle();
|
||||
status = safe_apic_wait_icr_idle();
|
||||
if (status)
|
||||
printk("a previous APIC delivery may have failed\n");
|
||||
|
||||
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid));
|
||||
apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]);
|
||||
|
@ -550,14 +547,13 @@ static inline void __inquire_remote_apic(int apicid)
|
|||
switch (status) {
|
||||
case APIC_ICR_RR_VALID:
|
||||
status = apic_read(APIC_RRR);
|
||||
printk("%08x\n", status);
|
||||
printk("%lx\n", status);
|
||||
break;
|
||||
default:
|
||||
printk("failed\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WAKE_SECONDARY_VIA_NMI
|
||||
/*
|
||||
|
@ -568,8 +564,8 @@ static inline void __inquire_remote_apic(int apicid)
|
|||
static int __devinit
|
||||
wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
|
||||
{
|
||||
unsigned long send_status = 0, accept_status = 0;
|
||||
int timeout, maxlvt;
|
||||
unsigned long send_status, accept_status = 0;
|
||||
int maxlvt;
|
||||
|
||||
/* Target chip */
|
||||
apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
|
||||
|
@ -579,12 +575,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
|
|||
apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL);
|
||||
|
||||
Dprintk("Waiting for send to finish...\n");
|
||||
timeout = 0;
|
||||
do {
|
||||
Dprintk("+");
|
||||
udelay(100);
|
||||
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
|
||||
} while (send_status && (timeout++ < 1000));
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* Give the other CPU some time to accept the IPI.
|
||||
|
@ -614,8 +605,8 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
|
|||
static int __devinit
|
||||
wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
||||
{
|
||||
unsigned long send_status = 0, accept_status = 0;
|
||||
int maxlvt, timeout, num_starts, j;
|
||||
unsigned long send_status, accept_status = 0;
|
||||
int maxlvt, num_starts, j;
|
||||
|
||||
/*
|
||||
* Be paranoid about clearing APIC errors.
|
||||
|
@ -640,12 +631,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
| APIC_DM_INIT);
|
||||
|
||||
Dprintk("Waiting for send to finish...\n");
|
||||
timeout = 0;
|
||||
do {
|
||||
Dprintk("+");
|
||||
udelay(100);
|
||||
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
|
||||
} while (send_status && (timeout++ < 1000));
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
||||
mdelay(10);
|
||||
|
||||
|
@ -658,12 +644,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT);
|
||||
|
||||
Dprintk("Waiting for send to finish...\n");
|
||||
timeout = 0;
|
||||
do {
|
||||
Dprintk("+");
|
||||
udelay(100);
|
||||
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
|
||||
} while (send_status && (timeout++ < 1000));
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
||||
atomic_set(&init_deasserted, 1);
|
||||
|
||||
|
@ -719,12 +700,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
|
|||
Dprintk("Startup point 1.\n");
|
||||
|
||||
Dprintk("Waiting for send to finish...\n");
|
||||
timeout = 0;
|
||||
do {
|
||||
Dprintk("+");
|
||||
udelay(100);
|
||||
send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
|
||||
} while (send_status && (timeout++ < 1000));
|
||||
send_status = safe_apic_wait_icr_idle();
|
||||
|
||||
/*
|
||||
* Give the other CPU some time to accept the IPI.
|
||||
|
@ -788,6 +764,25 @@ static inline struct task_struct * alloc_idle_task(int cpu)
|
|||
#define alloc_idle_task(cpu) fork_idle(cpu)
|
||||
#endif
|
||||
|
||||
/* Initialize the CPU's GDT. This is either the boot CPU doing itself
|
||||
(still using the master per-cpu area), or a CPU doing it for a
|
||||
secondary which will soon come up. */
|
||||
static __cpuinit void init_gdt(int cpu)
|
||||
{
|
||||
struct desc_struct *gdt = get_cpu_gdt_table(cpu);
|
||||
|
||||
pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a,
|
||||
(u32 *)&gdt[GDT_ENTRY_PERCPU].b,
|
||||
__per_cpu_offset[cpu], 0xFFFFF,
|
||||
0x80 | DESCTYPE_S | 0x2, 0x8);
|
||||
|
||||
per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
|
||||
per_cpu(cpu_number, cpu) = cpu;
|
||||
}
|
||||
|
||||
/* Defined in head.S */
|
||||
extern struct Xgt_desc_struct early_gdt_descr;
|
||||
|
||||
static int __cpuinit do_boot_cpu(int apicid, int cpu)
|
||||
/*
|
||||
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
|
||||
|
@ -801,6 +796,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
|
|||
unsigned long start_eip;
|
||||
unsigned short nmi_high = 0, nmi_low = 0;
|
||||
|
||||
/*
|
||||
* Save current MTRR state in case it was changed since early boot
|
||||
* (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync:
|
||||
*/
|
||||
mtrr_save_state();
|
||||
|
||||
/*
|
||||
* We can't use kernel_thread since we must avoid to
|
||||
* reschedule the child.
|
||||
|
@ -809,13 +810,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
|
|||
if (IS_ERR(idle))
|
||||
panic("failed fork for CPU %d", cpu);
|
||||
|
||||
/* Pre-allocate and initialize the CPU's GDT and PDA so it
|
||||
doesn't have to do any memory allocation during the
|
||||
delicate CPU-bringup phase. */
|
||||
if (!init_gdt(cpu, idle)) {
|
||||
printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
|
||||
return -1; /* ? */
|
||||
}
|
||||
init_gdt(cpu);
|
||||
per_cpu(current_task, cpu) = idle;
|
||||
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
|
||||
|
||||
idle->thread.eip = (unsigned long) start_secondary;
|
||||
/* start_eip had better be page-aligned! */
|
||||
|
@ -941,7 +938,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
|
|||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
struct warm_boot_cpu_info info;
|
||||
int apicid, ret;
|
||||
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
||||
|
||||
apicid = x86_cpu_to_apicid[cpu];
|
||||
if (apicid == BAD_APICID) {
|
||||
|
@ -949,18 +945,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu)
|
|||
goto exit;
|
||||
}
|
||||
|
||||
/*
|
||||
* the CPU isn't initialized at boot time, allocate gdt table here.
|
||||
* cpu_init will initialize it
|
||||
*/
|
||||
if (!cpu_gdt_descr->address) {
|
||||
cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL);
|
||||
if (!cpu_gdt_descr->address)
|
||||
printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
|
||||
ret = -ENOMEM;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
info.complete = &done;
|
||||
info.apicid = apicid;
|
||||
info.cpu = cpu;
|
||||
|
@ -1173,7 +1157,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
|
|||
|
||||
/* These are wrappers to interface to the new boot process. Someone
|
||||
who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
|
||||
void __init smp_prepare_cpus(unsigned int max_cpus)
|
||||
void __init native_smp_prepare_cpus(unsigned int max_cpus)
|
||||
{
|
||||
smp_commenced_mask = cpumask_of_cpu(0);
|
||||
cpu_callin_map = cpumask_of_cpu(0);
|
||||
|
@ -1181,13 +1165,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
|
|||
smp_boot_cpus(max_cpus);
|
||||
}
|
||||
|
||||
void __devinit smp_prepare_boot_cpu(void)
|
||||
void __init native_smp_prepare_boot_cpu(void)
|
||||
{
|
||||
cpu_set(smp_processor_id(), cpu_online_map);
|
||||
cpu_set(smp_processor_id(), cpu_callout_map);
|
||||
cpu_set(smp_processor_id(), cpu_present_map);
|
||||
cpu_set(smp_processor_id(), cpu_possible_map);
|
||||
per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
|
||||
unsigned int cpu = smp_processor_id();
|
||||
|
||||
init_gdt(cpu);
|
||||
switch_to_new_gdt();
|
||||
|
||||
cpu_set(cpu, cpu_online_map);
|
||||
cpu_set(cpu, cpu_callout_map);
|
||||
cpu_set(cpu, cpu_present_map);
|
||||
cpu_set(cpu, cpu_possible_map);
|
||||
__get_cpu_var(cpu_state) = CPU_ONLINE;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
@ -1277,7 +1266,7 @@ void __cpu_die(unsigned int cpu)
|
|||
}
|
||||
#endif /* CONFIG_HOTPLUG_CPU */
|
||||
|
||||
int __cpuinit __cpu_up(unsigned int cpu)
|
||||
int __cpuinit native_cpu_up(unsigned int cpu)
|
||||
{
|
||||
unsigned long flags;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
|
@ -1319,15 +1308,10 @@ int __cpuinit __cpu_up(unsigned int cpu)
|
|||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_GENERICARCH
|
||||
if (num_online_cpus() > 8 && genapic == &apic_default)
|
||||
panic("Default flat APIC routing can't be used with > 8 cpus\n");
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __init smp_cpus_done(unsigned int max_cpus)
|
||||
void __init native_smp_cpus_done(unsigned int max_cpus)
|
||||
{
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
setup_ioapic_dest();
|
||||
|
|
|
@ -22,16 +22,26 @@
|
|||
#include <asm/msr.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/elf.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
enum {
|
||||
VDSO_DISABLED = 0,
|
||||
VDSO_ENABLED = 1,
|
||||
VDSO_COMPAT = 2,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_COMPAT_VDSO
|
||||
#define VDSO_DEFAULT VDSO_COMPAT
|
||||
#else
|
||||
#define VDSO_DEFAULT VDSO_ENABLED
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Should the kernel map a VDSO page into processes and pass its
|
||||
* address down to glibc upon exec()?
|
||||
*/
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
unsigned int __read_mostly vdso_enabled = 0;
|
||||
#else
|
||||
unsigned int __read_mostly vdso_enabled = 1;
|
||||
#endif
|
||||
unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT;
|
||||
|
||||
EXPORT_SYMBOL_GPL(vdso_enabled);
|
||||
|
||||
|
@ -46,6 +56,123 @@ __setup("vdso=", vdso_setup);
|
|||
|
||||
extern asmlinkage void sysenter_entry(void);
|
||||
|
||||
static __init void reloc_symtab(Elf32_Ehdr *ehdr,
|
||||
unsigned offset, unsigned size)
|
||||
{
|
||||
Elf32_Sym *sym = (void *)ehdr + offset;
|
||||
unsigned nsym = size / sizeof(*sym);
|
||||
unsigned i;
|
||||
|
||||
for(i = 0; i < nsym; i++, sym++) {
|
||||
if (sym->st_shndx == SHN_UNDEF ||
|
||||
sym->st_shndx == SHN_ABS)
|
||||
continue; /* skip */
|
||||
|
||||
if (sym->st_shndx > SHN_LORESERVE) {
|
||||
printk(KERN_INFO "VDSO: unexpected st_shndx %x\n",
|
||||
sym->st_shndx);
|
||||
continue;
|
||||
}
|
||||
|
||||
switch(ELF_ST_TYPE(sym->st_info)) {
|
||||
case STT_OBJECT:
|
||||
case STT_FUNC:
|
||||
case STT_SECTION:
|
||||
case STT_FILE:
|
||||
sym->st_value += VDSO_HIGH_BASE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset)
|
||||
{
|
||||
Elf32_Dyn *dyn = (void *)ehdr + offset;
|
||||
|
||||
for(; dyn->d_tag != DT_NULL; dyn++)
|
||||
switch(dyn->d_tag) {
|
||||
case DT_PLTGOT:
|
||||
case DT_HASH:
|
||||
case DT_STRTAB:
|
||||
case DT_SYMTAB:
|
||||
case DT_RELA:
|
||||
case DT_INIT:
|
||||
case DT_FINI:
|
||||
case DT_REL:
|
||||
case DT_DEBUG:
|
||||
case DT_JMPREL:
|
||||
case DT_VERSYM:
|
||||
case DT_VERDEF:
|
||||
case DT_VERNEED:
|
||||
case DT_ADDRRNGLO ... DT_ADDRRNGHI:
|
||||
/* definitely pointers needing relocation */
|
||||
dyn->d_un.d_ptr += VDSO_HIGH_BASE;
|
||||
break;
|
||||
|
||||
case DT_ENCODING ... OLD_DT_LOOS-1:
|
||||
case DT_LOOS ... DT_HIOS-1:
|
||||
/* Tags above DT_ENCODING are pointers if
|
||||
they're even */
|
||||
if (dyn->d_tag >= DT_ENCODING &&
|
||||
(dyn->d_tag & 1) == 0)
|
||||
dyn->d_un.d_ptr += VDSO_HIGH_BASE;
|
||||
break;
|
||||
|
||||
case DT_VERDEFNUM:
|
||||
case DT_VERNEEDNUM:
|
||||
case DT_FLAGS_1:
|
||||
case DT_RELACOUNT:
|
||||
case DT_RELCOUNT:
|
||||
case DT_VALRNGLO ... DT_VALRNGHI:
|
||||
/* definitely not pointers */
|
||||
break;
|
||||
|
||||
case OLD_DT_LOOS ... DT_LOOS-1:
|
||||
case DT_HIOS ... DT_VALRNGLO-1:
|
||||
default:
|
||||
if (dyn->d_tag > DT_ENCODING)
|
||||
printk(KERN_INFO "VDSO: unexpected DT_tag %x\n",
|
||||
dyn->d_tag);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static __init void relocate_vdso(Elf32_Ehdr *ehdr)
|
||||
{
|
||||
Elf32_Phdr *phdr;
|
||||
Elf32_Shdr *shdr;
|
||||
int i;
|
||||
|
||||
BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
|
||||
!elf_check_arch(ehdr) ||
|
||||
ehdr->e_type != ET_DYN);
|
||||
|
||||
ehdr->e_entry += VDSO_HIGH_BASE;
|
||||
|
||||
/* rebase phdrs */
|
||||
phdr = (void *)ehdr + ehdr->e_phoff;
|
||||
for (i = 0; i < ehdr->e_phnum; i++) {
|
||||
phdr[i].p_vaddr += VDSO_HIGH_BASE;
|
||||
|
||||
/* relocate dynamic stuff */
|
||||
if (phdr[i].p_type == PT_DYNAMIC)
|
||||
reloc_dyn(ehdr, phdr[i].p_offset);
|
||||
}
|
||||
|
||||
/* rebase sections */
|
||||
shdr = (void *)ehdr + ehdr->e_shoff;
|
||||
for(i = 0; i < ehdr->e_shnum; i++) {
|
||||
if (!(shdr[i].sh_flags & SHF_ALLOC))
|
||||
continue;
|
||||
|
||||
shdr[i].sh_addr += VDSO_HIGH_BASE;
|
||||
|
||||
if (shdr[i].sh_type == SHT_SYMTAB ||
|
||||
shdr[i].sh_type == SHT_DYNSYM)
|
||||
reloc_symtab(ehdr, shdr[i].sh_offset,
|
||||
shdr[i].sh_size);
|
||||
}
|
||||
}
|
||||
|
||||
void enable_sep_cpu(void)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
|
@ -56,14 +183,33 @@ void enable_sep_cpu(void)
|
|||
return;
|
||||
}
|
||||
|
||||
tss->ss1 = __KERNEL_CS;
|
||||
tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
|
||||
tss->x86_tss.ss1 = __KERNEL_CS;
|
||||
tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0);
|
||||
put_cpu();
|
||||
}
|
||||
|
||||
static struct vm_area_struct gate_vma;
|
||||
|
||||
static int __init gate_vma_init(void)
|
||||
{
|
||||
gate_vma.vm_mm = NULL;
|
||||
gate_vma.vm_start = FIXADDR_USER_START;
|
||||
gate_vma.vm_end = FIXADDR_USER_END;
|
||||
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
|
||||
gate_vma.vm_page_prot = __P101;
|
||||
/*
|
||||
* Make sure the vDSO gets into every core dump.
|
||||
* Dumping its contents makes post-mortem fully interpretable later
|
||||
* without matching up the same kernel and hardware config to see
|
||||
* what PC values meant.
|
||||
*/
|
||||
gate_vma.vm_flags |= VM_ALWAYSDUMP;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* These symbols are defined by vsyscall.o to mark the bounds
|
||||
* of the ELF DSO images included therein.
|
||||
|
@ -72,31 +218,48 @@ extern const char vsyscall_int80_start, vsyscall_int80_end;
|
|||
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
|
||||
static struct page *syscall_pages[1];
|
||||
|
||||
static void map_compat_vdso(int map)
|
||||
{
|
||||
static int vdso_mapped;
|
||||
|
||||
if (map == vdso_mapped)
|
||||
return;
|
||||
|
||||
vdso_mapped = map;
|
||||
|
||||
__set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT,
|
||||
map ? PAGE_READONLY_EXEC : PAGE_NONE);
|
||||
|
||||
/* flush stray tlbs */
|
||||
flush_tlb_all();
|
||||
}
|
||||
|
||||
int __init sysenter_setup(void)
|
||||
{
|
||||
void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
|
||||
const void *vsyscall;
|
||||
size_t vsyscall_len;
|
||||
|
||||
syscall_pages[0] = virt_to_page(syscall_page);
|
||||
|
||||
#ifdef CONFIG_COMPAT_VDSO
|
||||
__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC);
|
||||
gate_vma_init();
|
||||
|
||||
printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
|
||||
#endif
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_SEP)) {
|
||||
memcpy(syscall_page,
|
||||
&vsyscall_int80_start,
|
||||
&vsyscall_int80_end - &vsyscall_int80_start);
|
||||
return 0;
|
||||
vsyscall = &vsyscall_int80_start;
|
||||
vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start;
|
||||
} else {
|
||||
vsyscall = &vsyscall_sysenter_start;
|
||||
vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start;
|
||||
}
|
||||
|
||||
memcpy(syscall_page,
|
||||
&vsyscall_sysenter_start,
|
||||
&vsyscall_sysenter_end - &vsyscall_sysenter_start);
|
||||
memcpy(syscall_page, vsyscall, vsyscall_len);
|
||||
relocate_vdso(syscall_page);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_COMPAT_VDSO
|
||||
/* Defined in vsyscall-sysenter.S */
|
||||
extern void SYSENTER_RETURN;
|
||||
|
||||
|
@ -105,36 +268,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
|
|||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
unsigned long addr;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
bool compat;
|
||||
|
||||
down_write(&mm->mmap_sem);
|
||||
addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* MAYWRITE to allow gdb to COW and set breakpoints
|
||||
*
|
||||
* Make sure the vDSO gets into every core dump.
|
||||
* Dumping its contents makes post-mortem fully interpretable later
|
||||
* without matching up the same kernel and hardware config to see
|
||||
* what PC values meant.
|
||||
*/
|
||||
ret = install_special_mapping(mm, addr, PAGE_SIZE,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
|
||||
VM_ALWAYSDUMP,
|
||||
syscall_pages);
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
/* Test compat mode once here, in case someone
|
||||
changes it via sysctl */
|
||||
compat = (vdso_enabled == VDSO_COMPAT);
|
||||
|
||||
map_compat_vdso(compat);
|
||||
|
||||
if (compat)
|
||||
addr = VDSO_HIGH_BASE;
|
||||
else {
|
||||
addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
|
||||
if (IS_ERR_VALUE(addr)) {
|
||||
ret = addr;
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
/*
|
||||
* MAYWRITE to allow gdb to COW and set breakpoints
|
||||
*
|
||||
* Make sure the vDSO gets into every core dump.
|
||||
* Dumping its contents makes post-mortem fully
|
||||
* interpretable later without matching up the same
|
||||
* kernel and hardware config to see what PC values
|
||||
* meant.
|
||||
*/
|
||||
ret = install_special_mapping(mm, addr, PAGE_SIZE,
|
||||
VM_READ|VM_EXEC|
|
||||
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
|
||||
VM_ALWAYSDUMP,
|
||||
syscall_pages);
|
||||
|
||||
if (ret)
|
||||
goto up_fail;
|
||||
}
|
||||
|
||||
current->mm->context.vdso = (void *)addr;
|
||||
current_thread_info()->sysenter_return =
|
||||
(void *)VDSO_SYM(&SYSENTER_RETURN);
|
||||
up_fail:
|
||||
(void *)VDSO_SYM(&SYSENTER_RETURN);
|
||||
|
||||
up_fail:
|
||||
up_write(&mm->mmap_sem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -147,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma)
|
|||
|
||||
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
|
||||
{
|
||||
struct mm_struct *mm = tsk->mm;
|
||||
|
||||
/* Check to see if this task was created in compat vdso mode */
|
||||
if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE)
|
||||
return &gate_vma;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -159,4 +343,3 @@ int in_gate_area_no_task(unsigned long addr)
|
|||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -70,8 +70,6 @@
|
|||
|
||||
#include <asm/i8259.h>
|
||||
|
||||
int pit_latch_buggy; /* extern */
|
||||
|
||||
#include "do_timer.h"
|
||||
|
||||
unsigned int cpu_khz; /* Detected as we calibrate the TSC */
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
*
|
||||
* TYPE VALUE
|
||||
* R_386_32 startup_32_smp
|
||||
* R_386_32 boot_gdt_table
|
||||
* R_386_32 boot_gdt
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
@ -62,8 +62,8 @@ r_base = .
|
|||
* to 32 bit.
|
||||
*/
|
||||
|
||||
lidtl boot_idt - r_base # load idt with 0, 0
|
||||
lgdtl boot_gdt - r_base # load gdt with whatever is appropriate
|
||||
lidtl boot_idt_descr - r_base # load idt with 0, 0
|
||||
lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate
|
||||
|
||||
xor %ax, %ax
|
||||
inc %ax # protected mode (PE) bit
|
||||
|
@ -73,11 +73,11 @@ r_base = .
|
|||
|
||||
# These need to be in the same 64K segment as the above;
|
||||
# hence we don't use the boot_gdt_descr defined in head.S
|
||||
boot_gdt:
|
||||
boot_gdt_descr:
|
||||
.word __BOOT_DS + 7 # gdt limit
|
||||
.long boot_gdt_table-__PAGE_OFFSET # gdt base
|
||||
.long boot_gdt - __PAGE_OFFSET # gdt base
|
||||
|
||||
boot_idt:
|
||||
boot_idt_descr:
|
||||
.word 0 # idt limit = 0
|
||||
.long 0 # idt base = 0L
|
||||
|
||||
|
|
|
@ -476,8 +476,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
|
|||
siginfo_t *info)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_no = trapnr;
|
||||
|
||||
if (regs->eflags & VM_MASK) {
|
||||
if (vm86)
|
||||
|
@ -489,6 +487,18 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
|
|||
goto kernel_trap;
|
||||
|
||||
trap_signal: {
|
||||
/*
|
||||
* We want error_code and trap_no set for userspace faults and
|
||||
* kernelspace faults which result in die(), but not
|
||||
* kernelspace faults which are fixed up. die() gives the
|
||||
* process no chance to handle the signal and notice the
|
||||
* kernel fault information, so that won't result in polluting
|
||||
* the information about previously queued, but not yet
|
||||
* delivered, faults. See also do_general_protection below.
|
||||
*/
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_no = trapnr;
|
||||
|
||||
if (info)
|
||||
force_sig_info(signr, info, tsk);
|
||||
else
|
||||
|
@ -497,8 +507,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86,
|
|||
}
|
||||
|
||||
kernel_trap: {
|
||||
if (!fixup_exception(regs))
|
||||
if (!fixup_exception(regs)) {
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_no = trapnr;
|
||||
die(str, regs, error_code);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -583,7 +596,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
|
|||
* and we set the offset field correctly. Then we let the CPU to
|
||||
* restart the faulting instruction.
|
||||
*/
|
||||
if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
|
||||
if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
|
||||
thread->io_bitmap_ptr) {
|
||||
memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
|
||||
thread->io_bitmap_max);
|
||||
|
@ -596,16 +609,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
|
|||
thread->io_bitmap_max, 0xff,
|
||||
tss->io_bitmap_max - thread->io_bitmap_max);
|
||||
tss->io_bitmap_max = thread->io_bitmap_max;
|
||||
tss->io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
tss->io_bitmap_owner = thread;
|
||||
put_cpu();
|
||||
return;
|
||||
}
|
||||
put_cpu();
|
||||
|
||||
current->thread.error_code = error_code;
|
||||
current->thread.trap_no = 13;
|
||||
|
||||
if (regs->eflags & VM_MASK)
|
||||
goto gp_in_vm86;
|
||||
|
||||
|
@ -624,6 +634,8 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs,
|
|||
|
||||
gp_in_kernel:
|
||||
if (!fixup_exception(regs)) {
|
||||
current->thread.error_code = error_code;
|
||||
current->thread.trap_no = 13;
|
||||
if (notify_die(DIE_GPF, "general protection fault", regs,
|
||||
error_code, 13, SIGSEGV) == NOTIFY_STOP)
|
||||
return;
|
||||
|
@ -1018,9 +1030,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs,
|
|||
fastcall unsigned long patch_espfix_desc(unsigned long uesp,
|
||||
unsigned long kesp)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
||||
struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
||||
struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
|
||||
unsigned long base = (kesp - uesp) & -THREAD_SIZE;
|
||||
unsigned long new_kesp = kesp - base;
|
||||
unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
|
||||
|
|
|
@ -200,13 +200,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
|
|||
{
|
||||
struct cpufreq_freqs *freq = data;
|
||||
|
||||
if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
|
||||
write_seqlock_irq(&xtime_lock);
|
||||
|
||||
if (!ref_freq) {
|
||||
if (!freq->old){
|
||||
ref_freq = freq->new;
|
||||
goto end;
|
||||
return 0;
|
||||
}
|
||||
ref_freq = freq->old;
|
||||
loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy;
|
||||
|
@ -233,13 +230,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
|
|||
* TSC based sched_clock turns
|
||||
* to junk w/ cpufreq
|
||||
*/
|
||||
mark_tsc_unstable();
|
||||
mark_tsc_unstable("cpufreq changes");
|
||||
}
|
||||
}
|
||||
}
|
||||
end:
|
||||
if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE)
|
||||
write_sequnlock_irq(&xtime_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -281,11 +275,12 @@ static struct clocksource clocksource_tsc = {
|
|||
CLOCK_SOURCE_MUST_VERIFY,
|
||||
};
|
||||
|
||||
void mark_tsc_unstable(void)
|
||||
void mark_tsc_unstable(char *reason)
|
||||
{
|
||||
if (!tsc_unstable) {
|
||||
tsc_unstable = 1;
|
||||
tsc_enabled = 0;
|
||||
printk("Marking TSC unstable due to: %s.\n", reason);
|
||||
/* Can be called before registration */
|
||||
if (clocksource_tsc.mult)
|
||||
clocksource_change_rating(&clocksource_tsc, 0);
|
||||
|
|
65
arch/i386/kernel/verify_cpu.S
Normal file
65
arch/i386/kernel/verify_cpu.S
Normal file
|
@ -0,0 +1,65 @@
|
|||
/* Check if CPU has some minimum CPUID bits
|
||||
This runs in 16bit mode so that the caller can still use the BIOS
|
||||
to output errors on the screen */
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
verify_cpu:
|
||||
pushfl # Save caller passed flags
|
||||
pushl $0 # Kill any dangerous flags
|
||||
popfl
|
||||
|
||||
#if CONFIG_X86_MINIMUM_CPU_MODEL >= 4
|
||||
pushfl
|
||||
orl $(1<<18),(%esp) # try setting AC
|
||||
popfl
|
||||
pushfl
|
||||
popl %eax
|
||||
testl $(1<<18),%eax
|
||||
jz bad
|
||||
#endif
|
||||
#if REQUIRED_MASK1 != 0
|
||||
pushfl # standard way to check for cpuid
|
||||
popl %eax
|
||||
movl %eax,%ebx
|
||||
xorl $0x200000,%eax
|
||||
pushl %eax
|
||||
popfl
|
||||
pushfl
|
||||
popl %eax
|
||||
cmpl %eax,%ebx
|
||||
pushfl # standard way to check for cpuid
|
||||
popl %eax
|
||||
movl %eax,%ebx
|
||||
xorl $0x200000,%eax
|
||||
pushl %eax
|
||||
popfl
|
||||
pushfl
|
||||
popl %eax
|
||||
cmpl %eax,%ebx
|
||||
jz bad # REQUIRED_MASK1 != 0 requires CPUID
|
||||
|
||||
movl $0x0,%eax # See if cpuid 1 is implemented
|
||||
cpuid
|
||||
cmpl $0x1,%eax
|
||||
jb bad # no cpuid 1
|
||||
|
||||
movl $0x1,%eax # Does the cpu have what it takes
|
||||
cpuid
|
||||
|
||||
#if CONFIG_X86_MINIMUM_CPU_MODEL > 4
|
||||
#error add proper model checking here
|
||||
#endif
|
||||
|
||||
andl $REQUIRED_MASK1,%edx
|
||||
xorl $REQUIRED_MASK1,%edx
|
||||
jnz bad
|
||||
#endif /* REQUIRED_MASK1 */
|
||||
|
||||
popfl
|
||||
xor %eax,%eax
|
||||
ret
|
||||
|
||||
bad:
|
||||
popfl
|
||||
movl $1,%eax
|
||||
ret
|
|
@ -26,6 +26,7 @@
|
|||
#include <linux/cpu.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <asm/vmi.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/fixmap.h>
|
||||
|
@ -56,7 +57,7 @@ static int disable_noidle;
|
|||
static int disable_vmi_timer;
|
||||
|
||||
/* Cached VMI operations */
|
||||
struct {
|
||||
static struct {
|
||||
void (*cpuid)(void /* non-c */);
|
||||
void (*_set_ldt)(u32 selector);
|
||||
void (*set_tr)(u32 selector);
|
||||
|
@ -65,16 +66,15 @@ struct {
|
|||
void (*release_page)(u32, u32);
|
||||
void (*set_pte)(pte_t, pte_t *, unsigned);
|
||||
void (*update_pte)(pte_t *, unsigned);
|
||||
void (*set_linear_mapping)(int, u32, u32, u32);
|
||||
void (*flush_tlb)(int);
|
||||
void (*set_linear_mapping)(int, void *, u32, u32);
|
||||
void (*_flush_tlb)(int);
|
||||
void (*set_initial_ap_state)(int, int);
|
||||
void (*halt)(void);
|
||||
void (*set_lazy_mode)(int mode);
|
||||
} vmi_ops;
|
||||
|
||||
/* XXX move this to alternative.h */
|
||||
extern struct paravirt_patch __start_parainstructions[],
|
||||
__stop_parainstructions[];
|
||||
/* Cached VMI operations */
|
||||
struct vmi_timer_ops vmi_timer_ops;
|
||||
|
||||
/*
|
||||
* VMI patching routines.
|
||||
|
@ -83,11 +83,6 @@ extern struct paravirt_patch __start_parainstructions[],
|
|||
#define MNEM_JMP 0xe9
|
||||
#define MNEM_RET 0xc3
|
||||
|
||||
static char irq_save_disable_callout[] = {
|
||||
MNEM_CALL, 0, 0, 0, 0,
|
||||
MNEM_CALL, 0, 0, 0, 0,
|
||||
MNEM_RET
|
||||
};
|
||||
#define IRQ_PATCH_INT_MASK 0
|
||||
#define IRQ_PATCH_DISABLE 5
|
||||
|
||||
|
@ -135,33 +130,17 @@ static unsigned patch_internal(int call, unsigned len, void *insns)
|
|||
static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len)
|
||||
{
|
||||
switch (type) {
|
||||
case PARAVIRT_IRQ_DISABLE:
|
||||
case PARAVIRT_PATCH(irq_disable):
|
||||
return patch_internal(VMI_CALL_DisableInterrupts, len, insns);
|
||||
case PARAVIRT_IRQ_ENABLE:
|
||||
case PARAVIRT_PATCH(irq_enable):
|
||||
return patch_internal(VMI_CALL_EnableInterrupts, len, insns);
|
||||
case PARAVIRT_RESTORE_FLAGS:
|
||||
case PARAVIRT_PATCH(restore_fl):
|
||||
return patch_internal(VMI_CALL_SetInterruptMask, len, insns);
|
||||
case PARAVIRT_SAVE_FLAGS:
|
||||
case PARAVIRT_PATCH(save_fl):
|
||||
return patch_internal(VMI_CALL_GetInterruptMask, len, insns);
|
||||
case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE:
|
||||
if (len >= 10) {
|
||||
patch_internal(VMI_CALL_GetInterruptMask, len, insns);
|
||||
patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5);
|
||||
return 10;
|
||||
} else {
|
||||
/*
|
||||
* You bastards didn't leave enough room to
|
||||
* patch save_flags_irq_disable inline. Patch
|
||||
* to a helper
|
||||
*/
|
||||
BUG_ON(len < 5);
|
||||
*(char *)insns = MNEM_CALL;
|
||||
patch_offset(insns, irq_save_disable_callout);
|
||||
return 5;
|
||||
}
|
||||
case PARAVIRT_INTERRUPT_RETURN:
|
||||
case PARAVIRT_PATCH(iret):
|
||||
return patch_internal(VMI_CALL_IRET, len, insns);
|
||||
case PARAVIRT_STI_SYSEXIT:
|
||||
case PARAVIRT_PATCH(irq_enable_sysexit):
|
||||
return patch_internal(VMI_CALL_SYSEXIT, len, insns);
|
||||
default:
|
||||
break;
|
||||
|
@ -230,24 +209,24 @@ static void vmi_set_tr(void)
|
|||
static void vmi_load_esp0(struct tss_struct *tss,
|
||||
struct thread_struct *thread)
|
||||
{
|
||||
tss->esp0 = thread->esp0;
|
||||
tss->x86_tss.esp0 = thread->esp0;
|
||||
|
||||
/* This can only happen when SEP is enabled, no need to test "SEP"arately */
|
||||
if (unlikely(tss->ss1 != thread->sysenter_cs)) {
|
||||
tss->ss1 = thread->sysenter_cs;
|
||||
if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
|
||||
tss->x86_tss.ss1 = thread->sysenter_cs;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
}
|
||||
vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0);
|
||||
vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0);
|
||||
}
|
||||
|
||||
static void vmi_flush_tlb_user(void)
|
||||
{
|
||||
vmi_ops.flush_tlb(VMI_FLUSH_TLB);
|
||||
vmi_ops._flush_tlb(VMI_FLUSH_TLB);
|
||||
}
|
||||
|
||||
static void vmi_flush_tlb_kernel(void)
|
||||
{
|
||||
vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
|
||||
vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL);
|
||||
}
|
||||
|
||||
/* Stub to do nothing at all; used for delays and unimplemented calls */
|
||||
|
@ -255,18 +234,6 @@ static void vmi_nop(void)
|
|||
{
|
||||
}
|
||||
|
||||
/* For NO_IDLE_HZ, we stop the clock when halting the kernel */
|
||||
static fastcall void vmi_safe_halt(void)
|
||||
{
|
||||
int idle = vmi_stop_hz_timer();
|
||||
vmi_ops.halt();
|
||||
if (idle) {
|
||||
local_irq_disable();
|
||||
vmi_account_time_restart_hz_timer();
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DEBUG_PAGE_TYPE
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
|
@ -370,8 +337,11 @@ static void vmi_check_page_type(u32 pfn, int type)
|
|||
#define vmi_check_page_type(p,t) do { } while (0)
|
||||
#endif
|
||||
|
||||
static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn)
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
|
||||
{
|
||||
void *va = kmap_atomic(page, type);
|
||||
|
||||
/*
|
||||
* Internally, the VMI ROM must map virtual addresses to physical
|
||||
* addresses for processing MMU updates. By the time MMU updates
|
||||
|
@ -385,8 +355,11 @@ static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn)
|
|||
* args: SLOT VA COUNT PFN
|
||||
*/
|
||||
BUG_ON(type != KM_PTE0 && type != KM_PTE1);
|
||||
vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn);
|
||||
vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page));
|
||||
|
||||
return va;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void vmi_allocate_pt(u32 pfn)
|
||||
{
|
||||
|
@ -443,13 +416,13 @@ static void vmi_release_pd(u32 pfn)
|
|||
((level) | (is_current_as(mm, user) ? \
|
||||
(VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0))
|
||||
|
||||
static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep)
|
||||
static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
|
||||
vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
|
||||
}
|
||||
|
||||
static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep)
|
||||
static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
||||
{
|
||||
vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
|
||||
vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0));
|
||||
|
@ -462,7 +435,7 @@ static void vmi_set_pte(pte_t *ptep, pte_t pte)
|
|||
vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT);
|
||||
}
|
||||
|
||||
static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
|
||||
static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
|
||||
{
|
||||
vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE);
|
||||
vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
|
||||
|
@ -516,7 +489,7 @@ static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|||
vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0));
|
||||
}
|
||||
|
||||
void vmi_pmd_clear(pmd_t *pmd)
|
||||
static void vmi_pmd_clear(pmd_t *pmd)
|
||||
{
|
||||
const pte_t pte = { 0 };
|
||||
vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD);
|
||||
|
@ -525,8 +498,6 @@ void vmi_pmd_clear(pmd_t *pmd)
|
|||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
extern void setup_pda(void);
|
||||
|
||||
static void __devinit
|
||||
vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
|
||||
unsigned long start_esp)
|
||||
|
@ -551,13 +522,11 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
|
|||
|
||||
ap.ds = __USER_DS;
|
||||
ap.es = __USER_DS;
|
||||
ap.fs = __KERNEL_PDA;
|
||||
ap.fs = __KERNEL_PERCPU;
|
||||
ap.gs = 0;
|
||||
|
||||
ap.eflags = 0;
|
||||
|
||||
setup_pda();
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/* efer should match BSP efer. */
|
||||
if (cpu_has_nx) {
|
||||
|
@ -575,9 +544,9 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
|
|||
}
|
||||
#endif
|
||||
|
||||
static void vmi_set_lazy_mode(int mode)
|
||||
static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode)
|
||||
{
|
||||
static DEFINE_PER_CPU(int, lazy_mode);
|
||||
static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode);
|
||||
|
||||
if (!vmi_ops.set_lazy_mode)
|
||||
return;
|
||||
|
@ -685,7 +654,7 @@ void vmi_bringup(void)
|
|||
{
|
||||
/* We must establish the lowmem mapping for MMU ops to work */
|
||||
if (vmi_ops.set_linear_mapping)
|
||||
vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0);
|
||||
vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -740,7 +709,6 @@ do { \
|
|||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*
|
||||
* Activate the VMI interface and switch into paravirtualized mode
|
||||
*/
|
||||
|
@ -796,12 +764,6 @@ static inline int __init activate_vmi(void)
|
|||
para_fill(irq_disable, DisableInterrupts);
|
||||
para_fill(irq_enable, EnableInterrupts);
|
||||
|
||||
/* irq_save_disable !!! sheer pain */
|
||||
patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK],
|
||||
(char *)paravirt_ops.save_fl);
|
||||
patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE],
|
||||
(char *)paravirt_ops.irq_disable);
|
||||
|
||||
para_fill(wbinvd, WBINVD);
|
||||
para_fill(read_tsc, RDTSC);
|
||||
|
||||
|
@ -831,8 +793,8 @@ static inline int __init activate_vmi(void)
|
|||
para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode);
|
||||
|
||||
/* user and kernel flush are just handled with different flags to FlushTLB */
|
||||
para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB);
|
||||
para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB);
|
||||
para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB);
|
||||
para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB);
|
||||
para_fill(flush_tlb_single, InvalPage);
|
||||
|
||||
/*
|
||||
|
@ -878,8 +840,13 @@ static inline int __init activate_vmi(void)
|
|||
paravirt_ops.release_pt = vmi_release_pt;
|
||||
paravirt_ops.release_pd = vmi_release_pd;
|
||||
}
|
||||
para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping,
|
||||
SetLinearMapping);
|
||||
|
||||
/* Set linear is needed in all cases */
|
||||
vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping);
|
||||
#ifdef CONFIG_HIGHPTE
|
||||
if (vmi_ops.set_linear_mapping)
|
||||
paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* These MUST always be patched. Don't support indirect jumps
|
||||
|
@ -920,8 +887,8 @@ static inline int __init activate_vmi(void)
|
|||
paravirt_ops.get_wallclock = vmi_get_wallclock;
|
||||
paravirt_ops.set_wallclock = vmi_set_wallclock;
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm;
|
||||
paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm;
|
||||
paravirt_ops.setup_boot_clock = vmi_time_bsp_init;
|
||||
paravirt_ops.setup_secondary_clock = vmi_time_ap_init;
|
||||
#endif
|
||||
paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles;
|
||||
paravirt_ops.get_cpu_khz = vmi_cpu_khz;
|
||||
|
@ -933,11 +900,7 @@ static inline int __init activate_vmi(void)
|
|||
disable_vmi_timer = 1;
|
||||
}
|
||||
|
||||
/* No idle HZ mode only works if VMI timer and no idle is enabled */
|
||||
if (disable_noidle || disable_vmi_timer)
|
||||
para_fill(safe_halt, Halt);
|
||||
else
|
||||
para_wrap(safe_halt, vmi_safe_halt, halt, Halt);
|
||||
para_fill(safe_halt, Halt);
|
||||
|
||||
/*
|
||||
* Alternative instruction rewriting doesn't happen soon enough
|
||||
|
@ -945,7 +908,7 @@ static inline int __init activate_vmi(void)
|
|||
* to do this before IRQs get reenabled. Fortunately, it is
|
||||
* idempotent.
|
||||
*/
|
||||
apply_paravirt(__start_parainstructions, __stop_parainstructions);
|
||||
apply_paravirt(__parainstructions, __parainstructions_end);
|
||||
|
||||
vmi_bringup();
|
||||
|
||||
|
|
318
arch/i386/kernel/vmiclock.c
Normal file
318
arch/i386/kernel/vmiclock.c
Normal file
|
@ -0,0 +1,318 @@
|
|||
/*
|
||||
* VMI paravirtual timer support routines.
|
||||
*
|
||||
* Copyright (C) 2007, VMware, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
||||
* NON INFRINGEMENT. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/smp.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/clockchips.h>
|
||||
|
||||
#include <asm/vmi.h>
|
||||
#include <asm/vmi_time.h>
|
||||
#include <asm/arch_hooks.h>
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/timer.h>
|
||||
|
||||
#include <irq_vectors.h>
|
||||
#include "io_ports.h"
|
||||
|
||||
#define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
|
||||
#define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring())
|
||||
|
||||
static DEFINE_PER_CPU(struct clock_event_device, local_events);
|
||||
|
||||
static inline u32 vmi_counter(u32 flags)
|
||||
{
|
||||
/* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding
|
||||
* cycle counter. */
|
||||
return flags & VMI_ALARM_COUNTER_MASK;
|
||||
}
|
||||
|
||||
/* paravirt_ops.get_wallclock = vmi_get_wallclock */
|
||||
unsigned long vmi_get_wallclock(void)
|
||||
{
|
||||
unsigned long long wallclock;
|
||||
wallclock = vmi_timer_ops.get_wallclock(); // nsec
|
||||
(void)do_div(wallclock, 1000000000); // sec
|
||||
|
||||
return wallclock;
|
||||
}
|
||||
|
||||
/* paravirt_ops.set_wallclock = vmi_set_wallclock */
|
||||
int vmi_set_wallclock(unsigned long now)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */
|
||||
unsigned long long vmi_get_sched_cycles(void)
|
||||
{
|
||||
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
|
||||
}
|
||||
|
||||
/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */
|
||||
unsigned long vmi_cpu_khz(void)
|
||||
{
|
||||
unsigned long long khz;
|
||||
khz = vmi_timer_ops.get_cycle_frequency();
|
||||
(void)do_div(khz, 1000);
|
||||
return khz;
|
||||
}
|
||||
|
||||
static inline unsigned int vmi_get_timer_vector(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_IO_APIC
|
||||
return FIRST_DEVICE_VECTOR;
|
||||
#else
|
||||
return FIRST_EXTERNAL_VECTOR;
|
||||
#endif
|
||||
}
|
||||
|
||||
/** vmi clockchip */
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
static unsigned int startup_timer_irq(unsigned int irq)
|
||||
{
|
||||
unsigned long val = apic_read(APIC_LVTT);
|
||||
apic_write(APIC_LVTT, vmi_get_timer_vector());
|
||||
|
||||
return (val & APIC_SEND_PENDING);
|
||||
}
|
||||
|
||||
static void mask_timer_irq(unsigned int irq)
|
||||
{
|
||||
unsigned long val = apic_read(APIC_LVTT);
|
||||
apic_write(APIC_LVTT, val | APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
static void unmask_timer_irq(unsigned int irq)
|
||||
{
|
||||
unsigned long val = apic_read(APIC_LVTT);
|
||||
apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED);
|
||||
}
|
||||
|
||||
static void ack_timer_irq(unsigned int irq)
|
||||
{
|
||||
ack_APIC_irq();
|
||||
}
|
||||
|
||||
static struct irq_chip vmi_chip __read_mostly = {
|
||||
.name = "VMI-LOCAL",
|
||||
.startup = startup_timer_irq,
|
||||
.mask = mask_timer_irq,
|
||||
.unmask = unmask_timer_irq,
|
||||
.ack = ack_timer_irq
|
||||
};
|
||||
#endif
|
||||
|
||||
/** vmi clockevent */
|
||||
#define VMI_ALARM_WIRED_IRQ0 0x00000000
|
||||
#define VMI_ALARM_WIRED_LVTT 0x00010000
|
||||
static int vmi_wiring = VMI_ALARM_WIRED_IRQ0;
|
||||
|
||||
static inline int vmi_get_alarm_wiring(void)
|
||||
{
|
||||
return vmi_wiring;
|
||||
}
|
||||
|
||||
static void vmi_timer_set_mode(enum clock_event_mode mode,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
cycle_t now, cycles_per_hz;
|
||||
BUG_ON(!irqs_disabled());
|
||||
|
||||
switch (mode) {
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
break;
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
|
||||
(void)do_div(cycles_per_hz, HZ);
|
||||
now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC));
|
||||
vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_UNUSED:
|
||||
case CLOCK_EVT_MODE_SHUTDOWN:
|
||||
switch (evt->mode) {
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
vmi_timer_ops.cancel_alarm(VMI_ONESHOT);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
vmi_timer_ops.cancel_alarm(VMI_PERIODIC);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int vmi_timer_next_event(unsigned long delta,
|
||||
struct clock_event_device *evt)
|
||||
{
|
||||
/* Unfortunately, set_next_event interface only passes relative
|
||||
* expiry, but we want absolute expiry. It'd be better if were
|
||||
* were passed an aboslute expiry, since a bunch of time may
|
||||
* have been stolen between the time the delta is computed and
|
||||
* when we set the alarm below. */
|
||||
cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT));
|
||||
|
||||
BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
|
||||
vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct clock_event_device vmi_clockevent = {
|
||||
.name = "vmi-timer",
|
||||
.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
|
||||
.shift = 22,
|
||||
.set_mode = vmi_timer_set_mode,
|
||||
.set_next_event = vmi_timer_next_event,
|
||||
.rating = 1000,
|
||||
.irq = 0,
|
||||
};
|
||||
|
||||
static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
struct clock_event_device *evt = &__get_cpu_var(local_events);
|
||||
evt->event_handler(evt);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static struct irqaction vmi_clock_action = {
|
||||
.name = "vmi-timer",
|
||||
.handler = vmi_timer_interrupt,
|
||||
.flags = IRQF_DISABLED | IRQF_NOBALANCING,
|
||||
.mask = CPU_MASK_ALL,
|
||||
};
|
||||
|
||||
static void __devinit vmi_time_init_clockevent(void)
|
||||
{
|
||||
cycle_t cycles_per_msec;
|
||||
struct clock_event_device *evt;
|
||||
|
||||
int cpu = smp_processor_id();
|
||||
evt = &__get_cpu_var(local_events);
|
||||
|
||||
/* Use cycles_per_msec since div_sc params are 32-bits. */
|
||||
cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
|
||||
(void)do_div(cycles_per_msec, 1000);
|
||||
|
||||
memcpy(evt, &vmi_clockevent, sizeof(*evt));
|
||||
/* Must pick .shift such that .mult fits in 32-bits. Choosing
|
||||
* .shift to be 22 allows 2^(32-22) cycles per nano-seconds
|
||||
* before overflow. */
|
||||
evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift);
|
||||
/* Upper bound is clockevent's use of ulong for cycle deltas. */
|
||||
evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt);
|
||||
evt->min_delta_ns = clockevent_delta2ns(1, evt);
|
||||
evt->cpumask = cpumask_of_cpu(cpu);
|
||||
|
||||
printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n",
|
||||
evt->name, evt->mult, evt->shift);
|
||||
clockevents_register_device(evt);
|
||||
}
|
||||
|
||||
void __init vmi_time_init(void)
|
||||
{
|
||||
/* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */
|
||||
outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
|
||||
|
||||
vmi_time_init_clockevent();
|
||||
setup_irq(0, &vmi_clock_action);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
void __devinit vmi_time_bsp_init(void)
|
||||
{
|
||||
/*
|
||||
* On APIC systems, we want local timers to fire on each cpu. We do
|
||||
* this by programming LVTT to deliver timer events to the IRQ handler
|
||||
* for IRQ-0, since we can't re-use the APIC local timer handler
|
||||
* without interfering with that code.
|
||||
*/
|
||||
clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
|
||||
local_irq_disable();
|
||||
#ifdef CONFIG_X86_SMP
|
||||
/*
|
||||
* XXX handle_percpu_irq only defined for SMP; we need to switch over
|
||||
* to using it, since this is a local interrupt, which each CPU must
|
||||
* handle individually without locking out or dropping simultaneous
|
||||
* local timers on other CPUs. We also don't want to trigger the
|
||||
* quirk workaround code for interrupts which gets invoked from
|
||||
* handle_percpu_irq via eoi, so we use our own IRQ chip.
|
||||
*/
|
||||
set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt");
|
||||
#else
|
||||
set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt");
|
||||
#endif
|
||||
vmi_wiring = VMI_ALARM_WIRED_LVTT;
|
||||
apic_write(APIC_LVTT, vmi_get_timer_vector());
|
||||
local_irq_enable();
|
||||
clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
|
||||
}
|
||||
|
||||
void __devinit vmi_time_ap_init(void)
|
||||
{
|
||||
vmi_time_init_clockevent();
|
||||
apic_write(APIC_LVTT, vmi_get_timer_vector());
|
||||
}
|
||||
#endif
|
||||
|
||||
/** vmi clocksource */
|
||||
|
||||
static cycle_t read_real_cycles(void)
|
||||
{
|
||||
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
|
||||
}
|
||||
|
||||
static struct clocksource clocksource_vmi = {
|
||||
.name = "vmi-timer",
|
||||
.rating = 450,
|
||||
.read = read_real_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(64),
|
||||
.mult = 0, /* to be set */
|
||||
.shift = 22,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
static int __init init_vmi_clocksource(void)
|
||||
{
|
||||
cycle_t cycles_per_msec;
|
||||
|
||||
if (!vmi_timer_ops.get_cycle_frequency)
|
||||
return 0;
|
||||
/* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */
|
||||
cycles_per_msec = vmi_timer_ops.get_cycle_frequency();
|
||||
(void)do_div(cycles_per_msec, 1000);
|
||||
|
||||
/* Note that clocksource.{mult, shift} converts in the opposite direction
|
||||
* as clockevents. */
|
||||
clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
|
||||
clocksource_vmi.shift);
|
||||
|
||||
printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec);
|
||||
return clocksource_register(&clocksource_vmi);
|
||||
|
||||
}
|
||||
module_init(init_vmi_clocksource);
|
|
@ -1,482 +0,0 @@
|
|||
/*
|
||||
* VMI paravirtual timer support routines.
|
||||
*
|
||||
* Copyright (C) 2005, VMware, Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
||||
* NON INFRINGEMENT. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
*
|
||||
* Send feedback to dhecht@vmware.com
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* Portions of this code from arch/i386/kernel/timers/timer_tsc.c.
|
||||
* Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c.
|
||||
* See comments there for proper credits.
|
||||
*/
|
||||
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/clocksource.h>
|
||||
|
||||
#include <asm/timer.h>
|
||||
#include <asm/io.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/div64.h>
|
||||
#include <asm/timer.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
#include <asm/vmi.h>
|
||||
#include <asm/vmi_time.h>
|
||||
|
||||
#include <mach_timer.h>
|
||||
#include <io_ports.h>
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
#define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT
|
||||
#else
|
||||
#define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0
|
||||
#endif
|
||||
|
||||
/* Cached VMI operations */
|
||||
struct vmi_timer_ops vmi_timer_ops;
|
||||
|
||||
#ifdef CONFIG_NO_IDLE_HZ
|
||||
|
||||
/* /proc/sys/kernel/hz_timer state. */
|
||||
int sysctl_hz_timer;
|
||||
|
||||
/* Some stats */
|
||||
static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs);
|
||||
static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies);
|
||||
static DEFINE_PER_CPU(unsigned long, idle_start_jiffies);
|
||||
|
||||
#endif /* CONFIG_NO_IDLE_HZ */
|
||||
|
||||
/* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */
|
||||
static int alarm_hz = CONFIG_VMI_ALARM_HZ;
|
||||
|
||||
/* Cache of the value get_cycle_frequency / HZ. */
|
||||
static signed long long cycles_per_jiffy;
|
||||
|
||||
/* Cache of the value get_cycle_frequency / alarm_hz. */
|
||||
static signed long long cycles_per_alarm;
|
||||
|
||||
/* The number of cycles accounted for by the 'jiffies'/'xtime' count.
|
||||
* Protected by xtime_lock. */
|
||||
static unsigned long long real_cycles_accounted_system;
|
||||
|
||||
/* The number of cycles accounted for by update_process_times(), per cpu. */
|
||||
static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu);
|
||||
|
||||
/* The number of stolen cycles accounted, per cpu. */
|
||||
static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu);
|
||||
|
||||
/* Clock source. */
|
||||
static cycle_t read_real_cycles(void)
|
||||
{
|
||||
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
|
||||
}
|
||||
|
||||
static cycle_t read_available_cycles(void)
|
||||
{
|
||||
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE);
|
||||
}
|
||||
|
||||
#if 0
|
||||
static cycle_t read_stolen_cycles(void)
|
||||
{
|
||||
return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN);
|
||||
}
|
||||
#endif /* 0 */
|
||||
|
||||
static struct clocksource clocksource_vmi = {
|
||||
.name = "vmi-timer",
|
||||
.rating = 450,
|
||||
.read = read_real_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(64),
|
||||
.mult = 0, /* to be set */
|
||||
.shift = 22,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
|
||||
/* Timer interrupt handler. */
|
||||
static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id);
|
||||
|
||||
static struct irqaction vmi_timer_irq = {
|
||||
.handler = vmi_timer_interrupt,
|
||||
.flags = IRQF_DISABLED,
|
||||
.mask = CPU_MASK_NONE,
|
||||
.name = "VMI-alarm",
|
||||
};
|
||||
|
||||
/* Alarm rate */
|
||||
static int __init vmi_timer_alarm_rate_setup(char* str)
|
||||
{
|
||||
int alarm_rate;
|
||||
if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) {
|
||||
alarm_hz = alarm_rate;
|
||||
printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
__setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup);
|
||||
|
||||
|
||||
/* Initialization */
|
||||
static void vmi_get_wallclock_ts(struct timespec *ts)
|
||||
{
|
||||
unsigned long long wallclock;
|
||||
wallclock = vmi_timer_ops.get_wallclock(); // nsec units
|
||||
ts->tv_nsec = do_div(wallclock, 1000000000);
|
||||
ts->tv_sec = wallclock;
|
||||
}
|
||||
|
||||
unsigned long vmi_get_wallclock(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
vmi_get_wallclock_ts(&ts);
|
||||
return ts.tv_sec;
|
||||
}
|
||||
|
||||
int vmi_set_wallclock(unsigned long now)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned long long vmi_get_sched_cycles(void)
|
||||
{
|
||||
return read_available_cycles();
|
||||
}
|
||||
|
||||
unsigned long vmi_cpu_khz(void)
|
||||
{
|
||||
unsigned long long khz;
|
||||
|
||||
khz = vmi_timer_ops.get_cycle_frequency();
|
||||
(void)do_div(khz, 1000);
|
||||
return khz;
|
||||
}
|
||||
|
||||
void __init vmi_time_init(void)
|
||||
{
|
||||
unsigned long long cycles_per_sec, cycles_per_msec;
|
||||
unsigned long flags;
|
||||
|
||||
local_irq_save(flags);
|
||||
setup_irq(0, &vmi_timer_irq);
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt);
|
||||
#endif
|
||||
|
||||
real_cycles_accounted_system = read_real_cycles();
|
||||
per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles();
|
||||
|
||||
cycles_per_sec = vmi_timer_ops.get_cycle_frequency();
|
||||
cycles_per_jiffy = cycles_per_sec;
|
||||
(void)do_div(cycles_per_jiffy, HZ);
|
||||
cycles_per_alarm = cycles_per_sec;
|
||||
(void)do_div(cycles_per_alarm, alarm_hz);
|
||||
cycles_per_msec = cycles_per_sec;
|
||||
(void)do_div(cycles_per_msec, 1000);
|
||||
|
||||
printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;"
|
||||
"cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy,
|
||||
cycles_per_alarm);
|
||||
|
||||
clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec,
|
||||
clocksource_vmi.shift);
|
||||
if (clocksource_register(&clocksource_vmi))
|
||||
printk(KERN_WARNING "Error registering VMITIME clocksource.");
|
||||
|
||||
/* Disable PIT. */
|
||||
outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */
|
||||
|
||||
/* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu
|
||||
* reduce the latency calling update_process_times. */
|
||||
vmi_timer_ops.set_alarm(
|
||||
VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
|
||||
per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
|
||||
cycles_per_alarm);
|
||||
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
||||
void __init vmi_timer_setup_boot_alarm(void)
|
||||
{
|
||||
local_irq_disable();
|
||||
|
||||
/* Route the interrupt to the correct vector. */
|
||||
apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
|
||||
|
||||
/* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */
|
||||
vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
|
||||
vmi_timer_ops.set_alarm(
|
||||
VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
|
||||
per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm,
|
||||
cycles_per_alarm);
|
||||
local_irq_enable();
|
||||
}
|
||||
|
||||
/* Initialize the time accounting variables for an AP on an SMP system.
|
||||
* Also, set the local alarm for the AP. */
|
||||
void __devinit vmi_timer_setup_secondary_alarm(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/* Route the interrupt to the correct vector. */
|
||||
apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR);
|
||||
|
||||
per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles();
|
||||
|
||||
vmi_timer_ops.set_alarm(
|
||||
VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
|
||||
per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
|
||||
cycles_per_alarm);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Update system wide (real) time accounting (e.g. jiffies, xtime). */
|
||||
static void vmi_account_real_cycles(unsigned long long cur_real_cycles)
|
||||
{
|
||||
long long cycles_not_accounted;
|
||||
|
||||
write_seqlock(&xtime_lock);
|
||||
|
||||
cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system;
|
||||
while (cycles_not_accounted >= cycles_per_jiffy) {
|
||||
/* systems wide jiffies. */
|
||||
do_timer(1);
|
||||
|
||||
cycles_not_accounted -= cycles_per_jiffy;
|
||||
real_cycles_accounted_system += cycles_per_jiffy;
|
||||
}
|
||||
|
||||
write_sequnlock(&xtime_lock);
|
||||
}
|
||||
|
||||
/* Update per-cpu process times. */
|
||||
static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu,
|
||||
unsigned long long cur_process_times_cycles)
|
||||
{
|
||||
long long cycles_not_accounted;
|
||||
cycles_not_accounted = cur_process_times_cycles -
|
||||
per_cpu(process_times_cycles_accounted_cpu, cpu);
|
||||
|
||||
while (cycles_not_accounted >= cycles_per_jiffy) {
|
||||
/* Account time to the current process. This includes
|
||||
* calling into the scheduler to decrement the timeslice
|
||||
* and possibly reschedule.*/
|
||||
update_process_times(user_mode(regs));
|
||||
/* XXX handle /proc/profile multiplier. */
|
||||
profile_tick(CPU_PROFILING);
|
||||
|
||||
cycles_not_accounted -= cycles_per_jiffy;
|
||||
per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_IDLE_HZ
|
||||
/* Update per-cpu idle times. Used when a no-hz halt is ended. */
|
||||
static void vmi_account_no_hz_idle_cycles(int cpu,
|
||||
unsigned long long cur_process_times_cycles)
|
||||
{
|
||||
long long cycles_not_accounted;
|
||||
unsigned long no_idle_hz_jiffies = 0;
|
||||
|
||||
cycles_not_accounted = cur_process_times_cycles -
|
||||
per_cpu(process_times_cycles_accounted_cpu, cpu);
|
||||
|
||||
while (cycles_not_accounted >= cycles_per_jiffy) {
|
||||
no_idle_hz_jiffies++;
|
||||
cycles_not_accounted -= cycles_per_jiffy;
|
||||
per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
|
||||
}
|
||||
/* Account time to the idle process. */
|
||||
account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies));
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Update per-cpu stolen time. */
|
||||
static void vmi_account_stolen_cycles(int cpu,
|
||||
unsigned long long cur_real_cycles,
|
||||
unsigned long long cur_avail_cycles)
|
||||
{
|
||||
long long stolen_cycles_not_accounted;
|
||||
unsigned long stolen_jiffies = 0;
|
||||
|
||||
if (cur_real_cycles < cur_avail_cycles)
|
||||
return;
|
||||
|
||||
stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles -
|
||||
per_cpu(stolen_cycles_accounted_cpu, cpu);
|
||||
|
||||
while (stolen_cycles_not_accounted >= cycles_per_jiffy) {
|
||||
stolen_jiffies++;
|
||||
stolen_cycles_not_accounted -= cycles_per_jiffy;
|
||||
per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy;
|
||||
}
|
||||
/* HACK: pass NULL to force time onto cpustat->steal. */
|
||||
account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies));
|
||||
}
|
||||
|
||||
/* Body of either IRQ0 interrupt handler (UP no local-APIC) or
|
||||
* local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */
|
||||
static void vmi_local_timer_interrupt(int cpu)
|
||||
{
|
||||
unsigned long long cur_real_cycles, cur_process_times_cycles;
|
||||
|
||||
cur_real_cycles = read_real_cycles();
|
||||
cur_process_times_cycles = read_available_cycles();
|
||||
/* Update system wide (real) time state (xtime, jiffies). */
|
||||
vmi_account_real_cycles(cur_real_cycles);
|
||||
/* Update per-cpu process times. */
|
||||
vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles);
|
||||
/* Update time stolen from this cpu by the hypervisor. */
|
||||
vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_IDLE_HZ
|
||||
|
||||
/* Must be called only from idle loop, with interrupts disabled. */
|
||||
int vmi_stop_hz_timer(void)
|
||||
{
|
||||
/* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */
|
||||
|
||||
unsigned long seq, next;
|
||||
unsigned long long real_cycles_expiry;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
BUG_ON(!irqs_disabled());
|
||||
if (sysctl_hz_timer != 0)
|
||||
return 0;
|
||||
|
||||
cpu_set(cpu, nohz_cpu_mask);
|
||||
smp_mb();
|
||||
|
||||
if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
|
||||
(next = next_timer_interrupt(),
|
||||
time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) {
|
||||
cpu_clear(cpu, nohz_cpu_mask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Convert jiffies to the real cycle counter. */
|
||||
do {
|
||||
seq = read_seqbegin(&xtime_lock);
|
||||
real_cycles_expiry = real_cycles_accounted_system +
|
||||
(long)(next - jiffies) * cycles_per_jiffy;
|
||||
} while (read_seqretry(&xtime_lock, seq));
|
||||
|
||||
/* This cpu is going idle. Disable the periodic alarm. */
|
||||
vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE);
|
||||
per_cpu(idle_start_jiffies, cpu) = jiffies;
|
||||
/* Set the real time alarm to expire at the next event. */
|
||||
vmi_timer_ops.set_alarm(
|
||||
VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL,
|
||||
real_cycles_expiry, 0);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void vmi_reenable_hz_timer(int cpu)
|
||||
{
|
||||
/* For /proc/vmi/info idle_hz stat. */
|
||||
per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu);
|
||||
per_cpu(vmi_idle_no_hz_irqs, cpu)++;
|
||||
|
||||
/* Don't bother explicitly cancelling the one-shot alarm -- at
|
||||
* worse we will receive a spurious timer interrupt. */
|
||||
vmi_timer_ops.set_alarm(
|
||||
VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE,
|
||||
per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm,
|
||||
cycles_per_alarm);
|
||||
/* Indicate this cpu is no longer nohz idle. */
|
||||
cpu_clear(cpu, nohz_cpu_mask);
|
||||
}
|
||||
|
||||
/* Called from interrupt handlers when (local) HZ timer is disabled. */
|
||||
void vmi_account_time_restart_hz_timer(void)
|
||||
{
|
||||
unsigned long long cur_real_cycles, cur_process_times_cycles;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
BUG_ON(!irqs_disabled());
|
||||
/* Account the time during which the HZ timer was disabled. */
|
||||
cur_real_cycles = read_real_cycles();
|
||||
cur_process_times_cycles = read_available_cycles();
|
||||
/* Update system wide (real) time state (xtime, jiffies). */
|
||||
vmi_account_real_cycles(cur_real_cycles);
|
||||
/* Update per-cpu idle times. */
|
||||
vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles);
|
||||
/* Update time stolen from this cpu by the hypervisor. */
|
||||
vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles);
|
||||
/* Reenable the hz timer. */
|
||||
vmi_reenable_hz_timer(cpu);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_NO_IDLE_HZ */
|
||||
|
||||
/* UP (and no local-APIC) VMI-timer alarm interrupt handler.
|
||||
* Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after
|
||||
* APIC setup and setup_boot_vmi_alarm() is called. */
|
||||
static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
vmi_local_timer_interrupt(smp_processor_id());
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_LOCAL_APIC
|
||||
|
||||
/* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector.
|
||||
* Also used in UP when CONFIG_X86_LOCAL_APIC.
|
||||
* The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */
|
||||
void smp_apic_vmi_timer_interrupt(struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
/*
|
||||
* the NMI deadlock-detector uses this.
|
||||
*/
|
||||
per_cpu(irq_stat,cpu).apic_timer_irqs++;
|
||||
|
||||
/*
|
||||
* NOTE! We'd better ACK the irq immediately,
|
||||
* because timer handling can be slow.
|
||||
*/
|
||||
ack_APIC_irq();
|
||||
|
||||
/*
|
||||
* update_process_times() expects us to have done irq_enter().
|
||||
* Besides, if we don't timer interrupts ignore the global
|
||||
* interrupt lock, which is the WrongThing (tm) to do.
|
||||
*/
|
||||
irq_enter();
|
||||
vmi_local_timer_interrupt(cpu);
|
||||
irq_exit();
|
||||
set_irq_regs(old_regs);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_X86_LOCAL_APIC */
|
|
@ -26,12 +26,11 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
|
|||
OUTPUT_ARCH(i386)
|
||||
ENTRY(phys_startup_32)
|
||||
jiffies = jiffies_64;
|
||||
_proxy_pda = 1;
|
||||
|
||||
PHDRS {
|
||||
text PT_LOAD FLAGS(5); /* R_E */
|
||||
data PT_LOAD FLAGS(7); /* RWE */
|
||||
note PT_NOTE FLAGS(4); /* R__ */
|
||||
note PT_NOTE FLAGS(0); /* ___ */
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
|
@ -61,8 +60,6 @@ SECTIONS
|
|||
__stop___ex_table = .;
|
||||
}
|
||||
|
||||
RODATA
|
||||
|
||||
BUG_TABLE
|
||||
|
||||
. = ALIGN(4);
|
||||
|
@ -72,6 +69,8 @@ SECTIONS
|
|||
__tracedata_end = .;
|
||||
}
|
||||
|
||||
RODATA
|
||||
|
||||
/* writeable */
|
||||
. = ALIGN(4096);
|
||||
.data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */
|
||||
|
@ -117,22 +116,11 @@ SECTIONS
|
|||
|
||||
/* might get freed after init */
|
||||
. = ALIGN(4096);
|
||||
.smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
|
||||
__smp_alt_begin = .;
|
||||
__smp_alt_instructions = .;
|
||||
*(.smp_altinstructions)
|
||||
__smp_alt_instructions_end = .;
|
||||
}
|
||||
. = ALIGN(4);
|
||||
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
|
||||
__smp_locks = .;
|
||||
*(.smp_locks)
|
||||
__smp_locks_end = .;
|
||||
}
|
||||
.smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
|
||||
*(.smp_altinstr_replacement)
|
||||
__smp_alt_end = .;
|
||||
}
|
||||
/* will be freed after init
|
||||
* Following ALIGN() is required to make sure no other data falls on the
|
||||
* same page where __smp_alt_end is pointing as that page might be freed
|
||||
|
@ -178,9 +166,9 @@ SECTIONS
|
|||
}
|
||||
. = ALIGN(4);
|
||||
.parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
|
||||
__start_parainstructions = .;
|
||||
__parainstructions = .;
|
||||
*(.parainstructions)
|
||||
__stop_parainstructions = .;
|
||||
__parainstructions_end = .;
|
||||
}
|
||||
/* .exit.text is discard at runtime, not link time, to deal with references
|
||||
from .altinstructions and .eh_frame */
|
||||
|
@ -194,7 +182,7 @@ SECTIONS
|
|||
__initramfs_end = .;
|
||||
}
|
||||
#endif
|
||||
. = ALIGN(L1_CACHE_BYTES);
|
||||
. = ALIGN(4096);
|
||||
.data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
|
||||
__per_cpu_start = .;
|
||||
*(.data.percpu)
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
SECTIONS
|
||||
{
|
||||
. = VDSO_PRELINK + SIZEOF_HEADERS;
|
||||
. = VDSO_PRELINK_asm + SIZEOF_HEADERS;
|
||||
|
||||
.hash : { *(.hash) } :text
|
||||
.gnu.hash : { *(.gnu.hash) }
|
||||
|
@ -21,7 +21,7 @@ SECTIONS
|
|||
For the layouts to match, we need to skip more than enough
|
||||
space for the dynamic symbol table et al. If this amount
|
||||
is insufficient, ld -shared will barf. Just increase it here. */
|
||||
. = VDSO_PRELINK + 0x400;
|
||||
. = VDSO_PRELINK_asm + 0x400;
|
||||
|
||||
.text : { *(.text) } :text =0x90909090
|
||||
.note : { *(.note.*) } :text :note
|
||||
|
|
|
@ -43,7 +43,7 @@ EXPORT_SYMBOL(find_next_bit);
|
|||
*/
|
||||
int find_next_zero_bit(const unsigned long *addr, int size, int offset)
|
||||
{
|
||||
unsigned long * p = ((unsigned long *) addr) + (offset >> 5);
|
||||
const unsigned long *p = addr + (offset >> 5);
|
||||
int set = 0, bit = offset & 31, res;
|
||||
|
||||
if (bit) {
|
||||
|
@ -64,7 +64,7 @@ int find_next_zero_bit(const unsigned long *addr, int size, int offset)
|
|||
/*
|
||||
* No zero yet, search remaining full bytes for a zero
|
||||
*/
|
||||
res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr));
|
||||
res = find_first_zero_bit(p, size - 32 * (p - addr));
|
||||
return (offset + set + res);
|
||||
}
|
||||
EXPORT_SYMBOL(find_next_zero_bit);
|
||||
|
|
|
@ -25,6 +25,8 @@
|
|||
* 2 of the License, or (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/errno.h>
|
||||
|
||||
/*
|
||||
|
@ -36,8 +38,6 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
|
|||
*/
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl csum_partial
|
||||
|
||||
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
|
||||
|
||||
|
@ -48,9 +48,14 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
|
|||
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
|
||||
* alignment for the unrolled loop.
|
||||
*/
|
||||
csum_partial:
|
||||
ENTRY(csum_partial)
|
||||
CFI_STARTPROC
|
||||
pushl %esi
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
movl 20(%esp),%eax # Function arg: unsigned int sum
|
||||
movl 16(%esp),%ecx # Function arg: int len
|
||||
movl 12(%esp),%esi # Function arg: unsigned char *buff
|
||||
|
@ -128,16 +133,27 @@ csum_partial:
|
|||
roll $8, %eax
|
||||
8:
|
||||
popl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ebx
|
||||
popl %esi
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE esi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial)
|
||||
|
||||
#else
|
||||
|
||||
/* Version for PentiumII/PPro */
|
||||
|
||||
csum_partial:
|
||||
ENTRY(csum_partial)
|
||||
CFI_STARTPROC
|
||||
pushl %esi
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
movl 20(%esp),%eax # Function arg: unsigned int sum
|
||||
movl 16(%esp),%ecx # Function arg: int len
|
||||
movl 12(%esp),%esi # Function arg: const unsigned char *buf
|
||||
|
@ -245,8 +261,14 @@ csum_partial:
|
|||
roll $8, %eax
|
||||
90:
|
||||
popl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ebx
|
||||
popl %esi
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE esi
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial)
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -278,19 +300,24 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
|
|||
.long 9999b, 6002f ; \
|
||||
.previous
|
||||
|
||||
.align 4
|
||||
.globl csum_partial_copy_generic
|
||||
|
||||
#ifndef CONFIG_X86_USE_PPRO_CHECKSUM
|
||||
|
||||
#define ARGBASE 16
|
||||
#define FP 12
|
||||
|
||||
csum_partial_copy_generic:
|
||||
ENTRY(csum_partial_copy_generic)
|
||||
CFI_STARTPROC
|
||||
subl $4,%esp
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
pushl %edi
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl %esi
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET esi, 0
|
||||
pushl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
movl ARGBASE+16(%esp),%eax # sum
|
||||
movl ARGBASE+12(%esp),%ecx # len
|
||||
movl ARGBASE+4(%esp),%esi # src
|
||||
|
@ -400,10 +427,19 @@ DST( movb %cl, (%edi) )
|
|||
.previous
|
||||
|
||||
popl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ebx
|
||||
popl %esi
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE esi
|
||||
popl %edi
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE edi
|
||||
popl %ecx # equivalent to addl $4,%esp
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial_copy_generic)
|
||||
|
||||
#else
|
||||
|
||||
|
@ -421,10 +457,17 @@ DST( movb %cl, (%edi) )
|
|||
|
||||
#define ARGBASE 12
|
||||
|
||||
csum_partial_copy_generic:
|
||||
ENTRY(csum_partial_copy_generic)
|
||||
CFI_STARTPROC
|
||||
pushl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET ebx, 0
|
||||
pushl %edi
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET edi, 0
|
||||
pushl %esi
|
||||
CFI_ADJUST_CFA_OFFSET 4
|
||||
CFI_REL_OFFSET esi, 0
|
||||
movl ARGBASE+4(%esp),%esi #src
|
||||
movl ARGBASE+8(%esp),%edi #dst
|
||||
movl ARGBASE+12(%esp),%ecx #len
|
||||
|
@ -485,9 +528,17 @@ DST( movb %dl, (%edi) )
|
|||
.previous
|
||||
|
||||
popl %esi
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE esi
|
||||
popl %edi
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE edi
|
||||
popl %ebx
|
||||
CFI_ADJUST_CFA_OFFSET -4
|
||||
CFI_RESTORE ebx
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(csum_partial_copy_generic)
|
||||
|
||||
#undef ROUND
|
||||
#undef ROUND1
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
* return an error value in addition to the "real"
|
||||
* return value.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
|
||||
|
@ -24,19 +26,19 @@
|
|||
*/
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __get_user_1
|
||||
__get_user_1:
|
||||
ENTRY(__get_user_1)
|
||||
CFI_STARTPROC
|
||||
GET_THREAD_INFO(%edx)
|
||||
cmpl TI_addr_limit(%edx),%eax
|
||||
jae bad_get_user
|
||||
1: movzbl (%eax),%edx
|
||||
xorl %eax,%eax
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(__get_user_1)
|
||||
|
||||
.align 4
|
||||
.globl __get_user_2
|
||||
__get_user_2:
|
||||
ENTRY(__get_user_2)
|
||||
CFI_STARTPROC
|
||||
addl $1,%eax
|
||||
jc bad_get_user
|
||||
GET_THREAD_INFO(%edx)
|
||||
|
@ -45,10 +47,11 @@ __get_user_2:
|
|||
2: movzwl -1(%eax),%edx
|
||||
xorl %eax,%eax
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(__get_user_2)
|
||||
|
||||
.align 4
|
||||
.globl __get_user_4
|
||||
__get_user_4:
|
||||
ENTRY(__get_user_4)
|
||||
CFI_STARTPROC
|
||||
addl $3,%eax
|
||||
jc bad_get_user
|
||||
GET_THREAD_INFO(%edx)
|
||||
|
@ -57,11 +60,16 @@ __get_user_4:
|
|||
3: movl -3(%eax),%edx
|
||||
xorl %eax,%eax
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
ENDPROC(__get_user_4)
|
||||
|
||||
bad_get_user:
|
||||
CFI_STARTPROC
|
||||
xorl %edx,%edx
|
||||
movl $-14,%eax
|
||||
ret
|
||||
CFI_ENDPROC
|
||||
END(bad_get_user)
|
||||
|
||||
.section __ex_table,"a"
|
||||
.long 1b,bad_get_user
|
||||
|
|
|
@ -8,6 +8,8 @@
|
|||
* return an error value in addition to the "real"
|
||||
* return value.
|
||||
*/
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/dwarf2.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
|
||||
|
@ -23,23 +25,28 @@
|
|||
* as they get called from within inline assembly.
|
||||
*/
|
||||
|
||||
#define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx)
|
||||
#define EXIT popl %ebx ; ret
|
||||
#define ENTER CFI_STARTPROC ; \
|
||||
pushl %ebx ; \
|
||||
CFI_ADJUST_CFA_OFFSET 4 ; \
|
||||
CFI_REL_OFFSET ebx, 0 ; \
|
||||
GET_THREAD_INFO(%ebx)
|
||||
#define EXIT popl %ebx ; \
|
||||
CFI_ADJUST_CFA_OFFSET -4 ; \
|
||||
CFI_RESTORE ebx ; \
|
||||
ret ; \
|
||||
CFI_ENDPROC
|
||||
|
||||
.text
|
||||
.align 4
|
||||
.globl __put_user_1
|
||||
__put_user_1:
|
||||
ENTRY(__put_user_1)
|
||||
ENTER
|
||||
cmpl TI_addr_limit(%ebx),%ecx
|
||||
jae bad_put_user
|
||||
1: movb %al,(%ecx)
|
||||
xorl %eax,%eax
|
||||
EXIT
|
||||
ENDPROC(__put_user_1)
|
||||
|
||||
.align 4
|
||||
.globl __put_user_2
|
||||
__put_user_2:
|
||||
ENTRY(__put_user_2)
|
||||
ENTER
|
||||
movl TI_addr_limit(%ebx),%ebx
|
||||
subl $1,%ebx
|
||||
|
@ -48,10 +55,9 @@ __put_user_2:
|
|||
2: movw %ax,(%ecx)
|
||||
xorl %eax,%eax
|
||||
EXIT
|
||||
ENDPROC(__put_user_2)
|
||||
|
||||
.align 4
|
||||
.globl __put_user_4
|
||||
__put_user_4:
|
||||
ENTRY(__put_user_4)
|
||||
ENTER
|
||||
movl TI_addr_limit(%ebx),%ebx
|
||||
subl $3,%ebx
|
||||
|
@ -60,10 +66,9 @@ __put_user_4:
|
|||
3: movl %eax,(%ecx)
|
||||
xorl %eax,%eax
|
||||
EXIT
|
||||
ENDPROC(__put_user_4)
|
||||
|
||||
.align 4
|
||||
.globl __put_user_8
|
||||
__put_user_8:
|
||||
ENTRY(__put_user_8)
|
||||
ENTER
|
||||
movl TI_addr_limit(%ebx),%ebx
|
||||
subl $7,%ebx
|
||||
|
@ -73,10 +78,16 @@ __put_user_8:
|
|||
5: movl %edx,4(%ecx)
|
||||
xorl %eax,%eax
|
||||
EXIT
|
||||
ENDPROC(__put_user_8)
|
||||
|
||||
bad_put_user:
|
||||
CFI_STARTPROC simple
|
||||
CFI_DEF_CFA esp, 2*4
|
||||
CFI_OFFSET eip, -1*4
|
||||
CFI_OFFSET ebx, -2*4
|
||||
movl $-14,%eax
|
||||
EXIT
|
||||
END(bad_put_user)
|
||||
|
||||
.section __ex_table,"a"
|
||||
.long 1b,bad_put_user
|
||||
|
|
|
@ -716,7 +716,6 @@ do { \
|
|||
unsigned long __copy_to_user_ll(void __user *to, const void *from,
|
||||
unsigned long n)
|
||||
{
|
||||
BUG_ON((long) n < 0);
|
||||
#ifndef CONFIG_X86_WP_WORKS_OK
|
||||
if (unlikely(boot_cpu_data.wp_works_ok == 0) &&
|
||||
((unsigned long )to) < TASK_SIZE) {
|
||||
|
@ -785,7 +784,6 @@ EXPORT_SYMBOL(__copy_to_user_ll);
|
|||
unsigned long __copy_from_user_ll(void *to, const void __user *from,
|
||||
unsigned long n)
|
||||
{
|
||||
BUG_ON((long)n < 0);
|
||||
if (movsl_is_ok(to, from, n))
|
||||
__copy_user_zeroing(to, from, n);
|
||||
else
|
||||
|
@ -797,7 +795,6 @@ EXPORT_SYMBOL(__copy_from_user_ll);
|
|||
unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from,
|
||||
unsigned long n)
|
||||
{
|
||||
BUG_ON((long)n < 0);
|
||||
if (movsl_is_ok(to, from, n))
|
||||
__copy_user(to, from, n);
|
||||
else
|
||||
|
@ -810,7 +807,6 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero);
|
|||
unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
|
||||
unsigned long n)
|
||||
{
|
||||
BUG_ON((long)n < 0);
|
||||
#ifdef CONFIG_X86_INTEL_USERCOPY
|
||||
if ( n > 64 && cpu_has_xmm2)
|
||||
n = __copy_user_zeroing_intel_nocache(to, from, n);
|
||||
|
@ -825,7 +821,6 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from,
|
|||
unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from,
|
||||
unsigned long n)
|
||||
{
|
||||
BUG_ON((long)n < 0);
|
||||
#ifdef CONFIG_X86_INTEL_USERCOPY
|
||||
if ( n > 64 && cpu_has_xmm2)
|
||||
n = __copy_user_intel_nocache(to, from, n);
|
||||
|
@ -853,7 +848,6 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr
|
|||
unsigned long
|
||||
copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
BUG_ON((long) n < 0);
|
||||
if (access_ok(VERIFY_WRITE, to, n))
|
||||
n = __copy_to_user(to, from, n);
|
||||
return n;
|
||||
|
@ -879,7 +873,6 @@ EXPORT_SYMBOL(copy_to_user);
|
|||
unsigned long
|
||||
copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
BUG_ON((long) n < 0);
|
||||
if (access_ok(VERIFY_READ, from, n))
|
||||
n = __copy_from_user(to, from, n);
|
||||
else
|
||||
|
|
|
@ -45,7 +45,7 @@ static struct dmi_system_id __initdata bigsmp_dmi_table[] = {
|
|||
};
|
||||
|
||||
|
||||
static int probe_bigsmp(void)
|
||||
static int __init probe_bigsmp(void)
|
||||
{
|
||||
if (def_to_bigsmp)
|
||||
dmi_bigsmp = 1;
|
||||
|
|
|
@ -25,4 +25,45 @@ static int probe_es7000(void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern void es7000_sw_apic(void);
|
||||
static void __init enable_apic_mode(void)
|
||||
{
|
||||
es7000_sw_apic();
|
||||
return;
|
||||
}
|
||||
|
||||
static __init int mps_oem_check(struct mp_config_table *mpc, char *oem,
|
||||
char *productid)
|
||||
{
|
||||
if (mpc->mpc_oemptr) {
|
||||
struct mp_config_oemtable *oem_table =
|
||||
(struct mp_config_oemtable *)mpc->mpc_oemptr;
|
||||
if (!strncmp(oem, "UNISYS", 6))
|
||||
return parse_unisys_oem((char *)oem_table);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
/* Hook from generic ACPI tables.c */
|
||||
static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
{
|
||||
unsigned long oem_addr;
|
||||
if (!find_unisys_acpi_oem_table(&oem_addr)) {
|
||||
if (es7000_check_dsdt())
|
||||
return parse_unisys_oem((char *)oem_addr);
|
||||
else {
|
||||
setup_unisys();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000);
|
||||
|
|
|
@ -573,15 +573,7 @@ do_boot_cpu(__u8 cpu)
|
|||
/* init_tasks (in sched.c) is indexed logically */
|
||||
stack_start.esp = (void *) idle->thread.esp;
|
||||
|
||||
/* Pre-allocate and initialize the CPU's GDT and PDA so it
|
||||
doesn't have to do any memory allocation during the
|
||||
delicate CPU-bringup phase. */
|
||||
if (!init_gdt(cpu, idle)) {
|
||||
printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu);
|
||||
cpucount--;
|
||||
return;
|
||||
}
|
||||
|
||||
init_gdt(cpu, idle);
|
||||
irq_ctx_init(cpu);
|
||||
|
||||
/* Note: Don't modify initial ss override */
|
||||
|
@ -748,12 +740,6 @@ initialize_secondary(void)
|
|||
set_current(hard_get_current());
|
||||
#endif
|
||||
|
||||
/*
|
||||
* switch to the per CPU GDT we already set up
|
||||
* in do_boot_cpu()
|
||||
*/
|
||||
cpu_set_gdt(current_thread_info()->cpu);
|
||||
|
||||
/*
|
||||
* We don't actually need to load the full TSS,
|
||||
* basically just the stack pointer and the eip.
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <linux/tty.h>
|
||||
#include <linux/vt_kern.h> /* For unblank_screen() */
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/bootmem.h> /* for max_low_pfn */
|
||||
#include <linux/module.h>
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
@ -301,7 +302,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
|
|||
struct mm_struct *mm;
|
||||
struct vm_area_struct * vma;
|
||||
unsigned long address;
|
||||
unsigned long page;
|
||||
int write, si_code;
|
||||
|
||||
/* get the address */
|
||||
|
@ -510,7 +510,9 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
|
|||
bust_spinlocks(1);
|
||||
|
||||
if (oops_may_print()) {
|
||||
#ifdef CONFIG_X86_PAE
|
||||
__typeof__(pte_val(__pte(0))) page;
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
if (error_code & 16) {
|
||||
pte_t *pte = lookup_address(address);
|
||||
|
||||
|
@ -519,7 +521,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
|
|||
"NX-protected page - exploit attempt? "
|
||||
"(uid: %d)\n", current->uid);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
if (address < PAGE_SIZE)
|
||||
printk(KERN_ALERT "BUG: unable to handle kernel NULL "
|
||||
"pointer dereference");
|
||||
|
@ -529,25 +531,38 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
|
|||
printk(" at virtual address %08lx\n",address);
|
||||
printk(KERN_ALERT " printing eip:\n");
|
||||
printk("%08lx\n", regs->eip);
|
||||
}
|
||||
page = read_cr3();
|
||||
page = ((unsigned long *) __va(page))[address >> 22];
|
||||
if (oops_may_print())
|
||||
|
||||
page = read_cr3();
|
||||
page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
|
||||
#ifdef CONFIG_X86_PAE
|
||||
printk(KERN_ALERT "*pdpt = %016Lx\n", page);
|
||||
if ((page >> PAGE_SHIFT) < max_low_pfn
|
||||
&& page & _PAGE_PRESENT) {
|
||||
page &= PAGE_MASK;
|
||||
page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
|
||||
& (PTRS_PER_PMD - 1)];
|
||||
printk(KERN_ALERT "*pde = %016Lx\n", page);
|
||||
page &= ~_PAGE_NX;
|
||||
}
|
||||
#else
|
||||
printk(KERN_ALERT "*pde = %08lx\n", page);
|
||||
/*
|
||||
* We must not directly access the pte in the highpte
|
||||
* case, the page table might be allocated in highmem.
|
||||
* And lets rather not kmap-atomic the pte, just in case
|
||||
* it's allocated already.
|
||||
*/
|
||||
#ifndef CONFIG_HIGHPTE
|
||||
if ((page & 1) && oops_may_print()) {
|
||||
page &= PAGE_MASK;
|
||||
address &= 0x003ff000;
|
||||
page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
|
||||
printk(KERN_ALERT "*pte = %08lx\n", page);
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We must not directly access the pte in the highpte
|
||||
* case if the page table is located in highmem.
|
||||
* And let's rather not kmap-atomic the pte, just in case
|
||||
* it's allocated already.
|
||||
*/
|
||||
if ((page >> PAGE_SHIFT) < max_low_pfn
|
||||
&& (page & _PAGE_PRESENT)) {
|
||||
page &= PAGE_MASK;
|
||||
page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
|
||||
& (PTRS_PER_PTE - 1)];
|
||||
printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page);
|
||||
}
|
||||
}
|
||||
|
||||
tsk->thread.cr2 = address;
|
||||
tsk->thread.trap_no = 14;
|
||||
tsk->thread.error_code = error_code;
|
||||
|
@ -588,7 +603,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
|
|||
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_X86_PAE
|
||||
void vmalloc_sync_all(void)
|
||||
{
|
||||
/*
|
||||
|
@ -601,6 +615,9 @@ void vmalloc_sync_all(void)
|
|||
static unsigned long start = TASK_SIZE;
|
||||
unsigned long address;
|
||||
|
||||
if (SHARED_KERNEL_PMD)
|
||||
return;
|
||||
|
||||
BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
|
||||
for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) {
|
||||
if (!test_bit(pgd_index(address), insync)) {
|
||||
|
@ -623,4 +640,3 @@ void vmalloc_sync_all(void)
|
|||
start = address + PGDIR_SIZE;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -26,7 +26,7 @@ void kunmap(struct page *page)
|
|||
* However when holding an atomic kmap is is not legal to sleep, so atomic
|
||||
* kmaps are appropriate for short, tight code paths only.
|
||||
*/
|
||||
void *kmap_atomic(struct page *page, enum km_type type)
|
||||
void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
|
||||
{
|
||||
enum fixed_addresses idx;
|
||||
unsigned long vaddr;
|
||||
|
@ -41,12 +41,17 @@ void *kmap_atomic(struct page *page, enum km_type type)
|
|||
return page_address(page);
|
||||
|
||||
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
||||
set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
|
||||
set_pte(kmap_pte-idx, mk_pte(page, prot));
|
||||
arch_flush_lazy_mmu_mode();
|
||||
|
||||
return (void*) vaddr;
|
||||
}
|
||||
|
||||
void *kmap_atomic(struct page *page, enum km_type type)
|
||||
{
|
||||
return kmap_atomic_prot(page, type, kmap_prot);
|
||||
}
|
||||
|
||||
void kunmap_atomic(void *kvaddr, enum km_type type)
|
||||
{
|
||||
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
|
||||
|
@ -67,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
|
|||
#endif
|
||||
}
|
||||
|
||||
arch_flush_lazy_mmu_mode();
|
||||
pagefault_enable();
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <linux/init.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/poison.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/slab.h>
|
||||
|
@ -42,6 +43,7 @@
|
|||
#include <asm/tlb.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/paravirt.h>
|
||||
|
||||
unsigned int __VMALLOC_RESERVE = 128 << 20;
|
||||
|
||||
|
@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
|
|||
pmd_t *pmd_table;
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
||||
paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
|
||||
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
||||
pud = pud_offset(pgd, 0);
|
||||
if (pmd_table != pmd_offset(pud, 0))
|
||||
BUG();
|
||||
#else
|
||||
if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
|
||||
pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
||||
|
||||
paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
|
||||
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
||||
pud = pud_offset(pgd, 0);
|
||||
if (pmd_table != pmd_offset(pud, 0))
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
pud = pud_offset(pgd, 0);
|
||||
pmd_table = pmd_offset(pud, 0);
|
||||
#endif
|
||||
|
||||
return pmd_table;
|
||||
}
|
||||
|
||||
|
@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
|
|||
*/
|
||||
static pte_t * __init one_page_table_init(pmd_t *pmd)
|
||||
{
|
||||
if (pmd_none(*pmd)) {
|
||||
if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
|
||||
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
||||
|
||||
paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
|
||||
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
|
||||
if (page_table != pte_offset_kernel(pmd, 0))
|
||||
BUG();
|
||||
|
||||
return page_table;
|
||||
BUG_ON(page_table != pte_offset_kernel(pmd, 0));
|
||||
}
|
||||
|
||||
return pte_offset_kernel(pmd, 0);
|
||||
|
@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
|
|||
static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
int pgd_idx, pmd_idx;
|
||||
unsigned long vaddr;
|
||||
|
@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end
|
|||
pgd = pgd_base + pgd_idx;
|
||||
|
||||
for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
|
||||
if (pgd_none(*pgd))
|
||||
one_md_table_init(pgd);
|
||||
pud = pud_offset(pgd, vaddr);
|
||||
pmd = pmd_offset(pud, vaddr);
|
||||
pmd = one_md_table_init(pgd);
|
||||
pmd = pmd + pmd_index(vaddr);
|
||||
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
|
||||
if (pmd_none(*pmd))
|
||||
one_page_table_init(pmd);
|
||||
one_page_table_init(pmd);
|
||||
|
||||
vaddr += PMD_SIZE;
|
||||
}
|
||||
|
@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
|
|||
/* Map with big pages if possible, otherwise create normal page tables. */
|
||||
if (cpu_has_pse) {
|
||||
unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
|
||||
|
||||
if (is_kernel_text(address) || is_kernel_text(address2))
|
||||
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
|
||||
else
|
||||
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
|
||||
|
||||
pfn += PTRS_PER_PTE;
|
||||
} else {
|
||||
pte = one_page_table_init(pmd);
|
||||
|
||||
for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
|
||||
if (is_kernel_text(address))
|
||||
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
|
||||
else
|
||||
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
|
||||
for (pte_ofs = 0;
|
||||
pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
|
||||
pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
|
||||
if (is_kernel_text(address))
|
||||
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
|
||||
else
|
||||
set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void);
|
|||
#define remap_numa_kva() do {} while (0)
|
||||
#endif
|
||||
|
||||
static void __init pagetable_init (void)
|
||||
void __init native_pagetable_setup_start(pgd_t *base)
|
||||
{
|
||||
unsigned long vaddr;
|
||||
pgd_t *pgd_base = swapper_pg_dir;
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
int i;
|
||||
/* Init entries of the first-level page table to the zero page */
|
||||
for (i = 0; i < PTRS_PER_PGD; i++)
|
||||
set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
|
||||
|
||||
/*
|
||||
* Init entries of the first-level page table to the
|
||||
* zero page, if they haven't already been set up.
|
||||
*
|
||||
* In a normal native boot, we'll be running on a
|
||||
* pagetable rooted in swapper_pg_dir, but not in PAE
|
||||
* mode, so this will end up clobbering the mappings
|
||||
* for the lower 24Mbytes of the address space,
|
||||
* without affecting the kernel address space.
|
||||
*/
|
||||
for (i = 0; i < USER_PTRS_PER_PGD; i++)
|
||||
set_pgd(&base[i],
|
||||
__pgd(__pa(empty_zero_page) | _PAGE_PRESENT));
|
||||
|
||||
/* Make sure kernel address space is empty so that a pagetable
|
||||
will be allocated for it. */
|
||||
memset(&base[USER_PTRS_PER_PGD], 0,
|
||||
KERNEL_PGD_PTRS * sizeof(pgd_t));
|
||||
#else
|
||||
paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init native_pagetable_setup_done(pgd_t *base)
|
||||
{
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/*
|
||||
* Add low memory identity-mappings - SMP needs it when
|
||||
* starting up on an AP from real-mode. In the non-PAE
|
||||
* case we already have these mappings through head.S.
|
||||
* All user-space mappings are explicitly cleared after
|
||||
* SMP startup.
|
||||
*/
|
||||
set_pgd(&base[0], base[USER_PTRS_PER_PGD]);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Build a proper pagetable for the kernel mappings. Up until this
|
||||
* point, we've been running on some set of pagetables constructed by
|
||||
* the boot process.
|
||||
*
|
||||
* If we're booting on native hardware, this will be a pagetable
|
||||
* constructed in arch/i386/kernel/head.S, and not running in PAE mode
|
||||
* (even if we'll end up running in PAE). The root of the pagetable
|
||||
* will be swapper_pg_dir.
|
||||
*
|
||||
* If we're booting paravirtualized under a hypervisor, then there are
|
||||
* more options: we may already be running PAE, and the pagetable may
|
||||
* or may not be based in swapper_pg_dir. In any case,
|
||||
* paravirt_pagetable_setup_start() will set up swapper_pg_dir
|
||||
* appropriately for the rest of the initialization to work.
|
||||
*
|
||||
* In general, pagetable_init() assumes that the pagetable may already
|
||||
* be partially populated, and so it avoids stomping on any existing
|
||||
* mappings.
|
||||
*/
|
||||
static void __init pagetable_init (void)
|
||||
{
|
||||
unsigned long vaddr, end;
|
||||
pgd_t *pgd_base = swapper_pg_dir;
|
||||
|
||||
paravirt_pagetable_setup_start(pgd_base);
|
||||
|
||||
/* Enable PSE if available */
|
||||
if (cpu_has_pse) {
|
||||
if (cpu_has_pse)
|
||||
set_in_cr4(X86_CR4_PSE);
|
||||
}
|
||||
|
||||
/* Enable PGE if available */
|
||||
if (cpu_has_pge) {
|
||||
|
@ -371,20 +424,12 @@ static void __init pagetable_init (void)
|
|||
* created - mappings will be set by set_fixmap():
|
||||
*/
|
||||
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
|
||||
page_table_range_init(vaddr, 0, pgd_base);
|
||||
end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
|
||||
page_table_range_init(vaddr, end, pgd_base);
|
||||
|
||||
permanent_kmaps_init(pgd_base);
|
||||
|
||||
#ifdef CONFIG_X86_PAE
|
||||
/*
|
||||
* Add low memory identity-mappings - SMP needs it when
|
||||
* starting up on an AP from real-mode. In the non-PAE
|
||||
* case we already have these mappings through head.S.
|
||||
* All user-space mappings are explicitly cleared after
|
||||
* SMP startup.
|
||||
*/
|
||||
set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]);
|
||||
#endif
|
||||
paravirt_pagetable_setup_done(pgd_base);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
|
||||
|
@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache;
|
|||
|
||||
void __init pgtable_cache_init(void)
|
||||
{
|
||||
size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
|
||||
|
||||
if (PTRS_PER_PMD > 1) {
|
||||
pmd_cache = kmem_cache_create("pmd",
|
||||
PTRS_PER_PMD*sizeof(pmd_t),
|
||||
|
@ -709,13 +756,23 @@ void __init pgtable_cache_init(void)
|
|||
NULL);
|
||||
if (!pmd_cache)
|
||||
panic("pgtable_cache_init(): cannot create pmd cache");
|
||||
|
||||
if (!SHARED_KERNEL_PMD) {
|
||||
/* If we're in PAE mode and have a non-shared
|
||||
kernel pmd, then the pgd size must be a
|
||||
page size. This is because the pgd_list
|
||||
links through the page structure, so there
|
||||
can only be one pgd per page for this to
|
||||
work. */
|
||||
pgd_size = PAGE_SIZE;
|
||||
}
|
||||
}
|
||||
pgd_cache = kmem_cache_create("pgd",
|
||||
PTRS_PER_PGD*sizeof(pgd_t),
|
||||
PTRS_PER_PGD*sizeof(pgd_t),
|
||||
pgd_size,
|
||||
pgd_size,
|
||||
0,
|
||||
pgd_ctor,
|
||||
PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
|
||||
(!SHARED_KERNEL_PMD) ? pgd_dtor : NULL);
|
||||
if (!pgd_cache)
|
||||
panic("pgtable_cache_init(): Cannot create pgd cache");
|
||||
}
|
||||
|
@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void)
|
|||
|
||||
void mark_rodata_ro(void)
|
||||
{
|
||||
unsigned long addr = (unsigned long)__start_rodata;
|
||||
unsigned long start = PFN_ALIGN(_text);
|
||||
unsigned long size = PFN_ALIGN(_etext) - start;
|
||||
|
||||
for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
|
||||
change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
/* It must still be possible to apply SMP alternatives. */
|
||||
if (num_possible_cpus() <= 1)
|
||||
#endif
|
||||
{
|
||||
change_page_attr(virt_to_page(start),
|
||||
size >> PAGE_SHIFT, PAGE_KERNEL_RX);
|
||||
printk("Write protecting the kernel text: %luk\n", size >> 10);
|
||||
}
|
||||
|
||||
printk("Write protecting the kernel read-only data: %uk\n",
|
||||
(__end_rodata - __start_rodata) >> 10);
|
||||
start += size;
|
||||
size = (unsigned long)__end_rodata - start;
|
||||
change_page_attr(virt_to_page(start),
|
||||
size >> PAGE_SHIFT, PAGE_KERNEL_RO);
|
||||
printk("Write protecting the kernel read-only data: %luk\n",
|
||||
size >> 10);
|
||||
|
||||
/*
|
||||
* change_page_attr() requires a global_flush_tlb() call after it.
|
||||
|
@ -774,26 +843,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
|
|||
unsigned long addr;
|
||||
|
||||
for (addr = begin; addr < end; addr += PAGE_SIZE) {
|
||||
ClearPageReserved(virt_to_page(addr));
|
||||
init_page_count(virt_to_page(addr));
|
||||
memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
|
||||
free_page(addr);
|
||||
struct page *page = pfn_to_page(addr >> PAGE_SHIFT);
|
||||
ClearPageReserved(page);
|
||||
init_page_count(page);
|
||||
memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE);
|
||||
__free_page(page);
|
||||
totalram_pages++;
|
||||
}
|
||||
printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
|
||||
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
|
||||
}
|
||||
|
||||
void free_initmem(void)
|
||||
{
|
||||
free_init_pages("unused kernel memory",
|
||||
(unsigned long)(&__init_begin),
|
||||
(unsigned long)(&__init_end));
|
||||
__pa_symbol(&__init_begin),
|
||||
__pa_symbol(&__init_end));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
void free_initrd_mem(unsigned long start, unsigned long end)
|
||||
{
|
||||
free_init_pages("initrd memory", start, end);
|
||||
free_init_pages("initrd memory", __pa(start), __pa(end));
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte)
|
|||
unsigned long flags;
|
||||
|
||||
set_pte_atomic(kpte, pte); /* change init_mm */
|
||||
if (PTRS_PER_PMD > 1)
|
||||
if (SHARED_KERNEL_PMD)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
|
@ -142,7 +142,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
|
|||
return -EINVAL;
|
||||
kpte_page = virt_to_page(kpte);
|
||||
if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
|
||||
if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
|
||||
if (!pte_huge(*kpte)) {
|
||||
set_pte_atomic(kpte, mk_pte(page, prot));
|
||||
} else {
|
||||
pgprot_t ref_prot;
|
||||
|
@ -158,7 +158,7 @@ __change_page_attr(struct page *page, pgprot_t prot)
|
|||
kpte_page = split;
|
||||
}
|
||||
page_private(kpte_page)++;
|
||||
} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
|
||||
} else if (!pte_huge(*kpte)) {
|
||||
set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
|
||||
BUG_ON(page_private(kpte_page) == 0);
|
||||
page_private(kpte_page)--;
|
||||
|
|
|
@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags)
|
|||
}
|
||||
|
||||
static int fixmaps;
|
||||
#ifndef CONFIG_COMPAT_VDSO
|
||||
unsigned long __FIXADDR_TOP = 0xfffff000;
|
||||
EXPORT_SYMBOL(__FIXADDR_TOP);
|
||||
#endif
|
||||
|
||||
void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
|
||||
{
|
||||
|
@ -173,12 +171,8 @@ void reserve_top_address(unsigned long reserve)
|
|||
BUG_ON(fixmaps > 0);
|
||||
printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
|
||||
(int)-reserve);
|
||||
#ifdef CONFIG_COMPAT_VDSO
|
||||
BUG_ON(reserve != 0);
|
||||
#else
|
||||
__FIXADDR_TOP = -reserve - PAGE_SIZE;
|
||||
__VMALLOC_RESERVE += reserve;
|
||||
#endif
|
||||
}
|
||||
|
||||
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
|
||||
|
@ -238,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd)
|
|||
set_page_private(next, (unsigned long)pprev);
|
||||
}
|
||||
|
||||
#if (PTRS_PER_PMD == 1)
|
||||
/* Non-PAE pgd constructor */
|
||||
void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (PTRS_PER_PMD == 1) {
|
||||
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
|
||||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
}
|
||||
/* !PAE, no pagetable sharing */
|
||||
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
|
||||
|
||||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
|
||||
/* must happen under lock */
|
||||
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
|
||||
swapper_pg_dir + USER_PTRS_PER_PGD,
|
||||
KERNEL_PGD_PTRS);
|
||||
|
||||
if (PTRS_PER_PMD > 1)
|
||||
return;
|
||||
|
||||
/* must happen under lock */
|
||||
paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
|
||||
__pa(swapper_pg_dir) >> PAGE_SHIFT,
|
||||
USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
|
||||
|
||||
__pa(swapper_pg_dir) >> PAGE_SHIFT,
|
||||
USER_PTRS_PER_PGD,
|
||||
KERNEL_PGD_PTRS);
|
||||
pgd_list_add(pgd);
|
||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||
}
|
||||
#else /* PTRS_PER_PMD > 1 */
|
||||
/* PAE pgd constructor */
|
||||
void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
|
||||
{
|
||||
/* PAE, kernel PMD may be shared */
|
||||
|
||||
if (SHARED_KERNEL_PMD) {
|
||||
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
|
||||
swapper_pg_dir + USER_PTRS_PER_PGD,
|
||||
KERNEL_PGD_PTRS);
|
||||
} else {
|
||||
unsigned long flags;
|
||||
|
||||
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
|
||||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
pgd_list_add(pgd);
|
||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||
}
|
||||
}
|
||||
#endif /* PTRS_PER_PMD */
|
||||
|
||||
/* never called when PTRS_PER_PMD > 1 */
|
||||
void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
|
||||
{
|
||||
unsigned long flags; /* can be called from interrupt context */
|
||||
|
||||
BUG_ON(SHARED_KERNEL_PMD);
|
||||
|
||||
paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
|
||||
spin_lock_irqsave(&pgd_lock, flags);
|
||||
pgd_list_del(pgd);
|
||||
spin_unlock_irqrestore(&pgd_lock, flags);
|
||||
}
|
||||
|
||||
#define UNSHARED_PTRS_PER_PGD \
|
||||
(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
|
||||
|
||||
/* If we allocate a pmd for part of the kernel address space, then
|
||||
make sure its initialized with the appropriate kernel mappings.
|
||||
Otherwise use a cached zeroed pmd. */
|
||||
static pmd_t *pmd_cache_alloc(int idx)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
|
||||
if (idx >= USER_PTRS_PER_PGD) {
|
||||
pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
|
||||
|
||||
if (pmd)
|
||||
memcpy(pmd,
|
||||
(void *)pgd_page_vaddr(swapper_pg_dir[idx]),
|
||||
sizeof(pmd_t) * PTRS_PER_PMD);
|
||||
} else
|
||||
pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
|
||||
|
||||
return pmd;
|
||||
}
|
||||
|
||||
static void pmd_cache_free(pmd_t *pmd, int idx)
|
||||
{
|
||||
if (idx >= USER_PTRS_PER_PGD)
|
||||
free_page((unsigned long)pmd);
|
||||
else
|
||||
kmem_cache_free(pmd_cache, pmd);
|
||||
}
|
||||
|
||||
pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
int i;
|
||||
|
@ -282,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|||
if (PTRS_PER_PMD == 1 || !pgd)
|
||||
return pgd;
|
||||
|
||||
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
|
||||
pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
|
||||
for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
|
||||
pmd_t *pmd = pmd_cache_alloc(i);
|
||||
|
||||
if (!pmd)
|
||||
goto out_oom;
|
||||
|
||||
paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
|
||||
set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
|
||||
}
|
||||
|
@ -296,7 +342,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|||
pgd_t pgdent = pgd[i];
|
||||
void* pmd = (void *)__va(pgd_val(pgdent)-1);
|
||||
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
|
||||
kmem_cache_free(pmd_cache, pmd);
|
||||
pmd_cache_free(pmd, i);
|
||||
}
|
||||
kmem_cache_free(pgd_cache, pgd);
|
||||
return NULL;
|
||||
|
@ -308,11 +354,11 @@ void pgd_free(pgd_t *pgd)
|
|||
|
||||
/* in the PAE case user pgd entries are overwritten before usage */
|
||||
if (PTRS_PER_PMD > 1)
|
||||
for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
|
||||
for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
|
||||
pgd_t pgdent = pgd[i];
|
||||
void* pmd = (void *)__va(pgd_val(pgdent)-1);
|
||||
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
|
||||
kmem_cache_free(pmd_cache, pmd);
|
||||
pmd_cache_free(pmd, i);
|
||||
}
|
||||
/* in the non-PAE case, free_pgtables() clears user pgd entries */
|
||||
kmem_cache_free(pgd_cache, pgd);
|
||||
|
|
|
@ -414,6 +414,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
|
|||
user space an consistent name. */
|
||||
cpu_type = "x86-64/hammer";
|
||||
break;
|
||||
case 0x10:
|
||||
model = &op_athlon_spec;
|
||||
cpu_type = "x86-64/family10";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
in the right sequence from here. */
|
||||
static __init int pci_access_init(void)
|
||||
{
|
||||
int type = 0;
|
||||
int type __attribute__((unused)) = 0;
|
||||
|
||||
#ifdef CONFIG_PCI_DIRECT
|
||||
type = pci_direct_probe();
|
||||
|
|
|
@ -60,14 +60,19 @@ static const char __init *pci_mmcfg_e7520(void)
|
|||
u32 win;
|
||||
pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win);
|
||||
|
||||
pci_mmcfg_config_num = 1;
|
||||
pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
|
||||
if (!pci_mmcfg_config)
|
||||
return NULL;
|
||||
pci_mmcfg_config[0].address = (win & 0xf000) << 16;
|
||||
pci_mmcfg_config[0].pci_segment = 0;
|
||||
pci_mmcfg_config[0].start_bus_number = 0;
|
||||
pci_mmcfg_config[0].end_bus_number = 255;
|
||||
win = win & 0xf000;
|
||||
if(win == 0x0000 || win == 0xf000)
|
||||
pci_mmcfg_config_num = 0;
|
||||
else {
|
||||
pci_mmcfg_config_num = 1;
|
||||
pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
|
||||
if (!pci_mmcfg_config)
|
||||
return NULL;
|
||||
pci_mmcfg_config[0].address = win << 16;
|
||||
pci_mmcfg_config[0].pci_segment = 0;
|
||||
pci_mmcfg_config[0].start_bus_number = 0;
|
||||
pci_mmcfg_config[0].end_bus_number = 255;
|
||||
}
|
||||
|
||||
return "Intel Corporation E7520 Memory Controller Hub";
|
||||
}
|
||||
|
@ -108,6 +113,10 @@ static const char __init *pci_mmcfg_intel_945(void)
|
|||
if ((pciexbar & mask) & 0x0fffffffU)
|
||||
pci_mmcfg_config_num = 0;
|
||||
|
||||
/* Don't hit the APIC registers and their friends */
|
||||
if ((pciexbar & mask) >= 0xf0000000U)
|
||||
pci_mmcfg_config_num = 0;
|
||||
|
||||
if (pci_mmcfg_config_num) {
|
||||
pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL);
|
||||
if (!pci_mmcfg_config)
|
||||
|
|
|
@ -21,6 +21,7 @@ unsigned long saved_context_eflags;
|
|||
|
||||
void __save_processor_state(struct saved_context *ctxt)
|
||||
{
|
||||
mtrr_save_fixed_ranges(NULL);
|
||||
kernel_fpu_begin();
|
||||
|
||||
/*
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
/* Defined in arch/i386/power/swsusp.S */
|
||||
extern int restore_image(void);
|
||||
|
||||
/* References to section boundaries */
|
||||
extern const void __nosave_begin, __nosave_end;
|
||||
|
||||
/* Pointer to the temporary resume page tables */
|
||||
pgd_t *resume_pg_dir;
|
||||
|
||||
|
@ -156,3 +159,14 @@ int swsusp_arch_resume(void)
|
|||
restore_image();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* pfn_is_nosave - check if given pfn is in the 'nosave' section
|
||||
*/
|
||||
|
||||
int pfn_is_nosave(unsigned long pfn)
|
||||
{
|
||||
unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
|
||||
unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
|
||||
return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
|
||||
}
|
||||
|
|
|
@ -110,7 +110,7 @@ SECTIONS
|
|||
__initramfs_end = .;
|
||||
#endif
|
||||
|
||||
. = ALIGN(32);
|
||||
. = ALIGN(4096);
|
||||
__per_cpu_start = .;
|
||||
.data.percpu : { *(.data.percpu) }
|
||||
__per_cpu_end = .;
|
||||
|
|
|
@ -119,7 +119,7 @@ SECTIONS
|
|||
.init.ramfs : { *(.init.ramfs) }
|
||||
__initramfs_end = .;
|
||||
#endif
|
||||
. = ALIGN(32);
|
||||
. = ALIGN(_PAGE_SIZE);
|
||||
__per_cpu_start = .;
|
||||
.data.percpu : { *(.data.percpu) }
|
||||
__per_cpu_end = .;
|
||||
|
|
|
@ -181,7 +181,7 @@ SECTIONS
|
|||
.init.ramfs : { *(.init.ramfs) }
|
||||
__initramfs_end = .;
|
||||
#endif
|
||||
. = ALIGN(32);
|
||||
. = ALIGN(ASM_PAGE_SIZE);
|
||||
__per_cpu_start = .;
|
||||
.data.percpu : { *(.data.percpu) }
|
||||
__per_cpu_end = .;
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue