Merge branch 'x86-core-v2-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

This merges in:

  x86/build, x86/microcode, x86/spinlocks, x86/memory-corruption-check,
  x86/early-printk, x86/xsave, x86/quirks, x86/setup, x86/signal,
  core/signal, x86/urgent, x86/xen

* 'x86-core-v2-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (142 commits)
  x86: make processor type select depend on CONFIG_EMBEDDED
  x86: extend processor type select help text
  x86, amd-iommu: propagate PCI device enabling error
  warnings: fix arch/x86/kernel/io_apic_64.c
  warnings: fix arch/x86/kernel/early_printk.c
  x86, fpu: check __clear_user() return value
  x86: memory corruption check - cleanup
  x86: ioperm user_regset
  xen: do not reserve 2 pages of padding between hypervisor and fixmap.
  xen: use spin_lock_nest_lock when pinning a pagetable
  x86: xsave: set FP, SSE bits in the xsave header in the user sigcontext
  x86: xsave: fix error condition in save_i387_xstate()
  x86: SB450: deprioritize DMI quirks
  x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC
  x86: replace a magic number with a named constant in the VESA boot code
  x86 setup: remove IMAGE_OFFSET
  x86 setup: remove DEF_INITSEG and DEF_SETUPSEG
  Revert "x86: fix ghost EDD devices in /sys again"
  x86 setup: fix ghost entries under /sys/firmware/edd take 3
  x86: signal: remove indent in restore_sigcontext()
  ...
This commit is contained in:
Linus Torvalds 2008-10-12 12:04:59 -07:00
commit 807f4f8cdd
105 changed files with 5027 additions and 2203 deletions

View file

@ -658,11 +658,12 @@ and is between 256 and 4096 characters. It is defined in the file
earlyprintk= [X86-32,X86-64,SH,BLACKFIN] earlyprintk= [X86-32,X86-64,SH,BLACKFIN]
earlyprintk=vga earlyprintk=vga
earlyprintk=serial[,ttySn[,baudrate]] earlyprintk=serial[,ttySn[,baudrate]]
earlyprintk=dbgp
Append ",keep" to not disable it when the real console Append ",keep" to not disable it when the real console
takes over. takes over.
Only vga or serial at a time, not both. Only vga or serial or usb debug port at a time.
Currently only ttyS0 and ttyS1 are supported. Currently only ttyS0 and ttyS1 are supported.
@ -1231,6 +1232,29 @@ and is between 256 and 4096 characters. It is defined in the file
or or
memmap=0x10000$0x18690000 memmap=0x10000$0x18690000
memory_corruption_check=0/1 [X86]
Some BIOSes seem to corrupt the first 64k of
memory when doing things like suspend/resume.
Setting this option will scan the memory
looking for corruption. Enabling this will
both detect corruption and prevent the kernel
from using the memory being corrupted.
However, its intended as a diagnostic tool; if
repeatable BIOS-originated corruption always
affects the same memory, you can use memmap=
to prevent the kernel from using that memory.
memory_corruption_check_size=size [X86]
By default it checks for corruption in the low
64k, making this memory unavailable for normal
use. Use this parameter to scan for
corruption in more or less memory.
memory_corruption_check_period=seconds [X86]
By default it checks for corruption every 60
seconds. Use this parameter to check at some
other rate. 0 disables periodic checking.
memtest= [KNL,X86] Enable memtest memtest= [KNL,X86] Enable memtest
Format: <integer> Format: <integer>
range: 0,4 : pattern number range: 0,4 : pattern number

View file

@ -390,6 +390,11 @@ L: iommu@lists.linux-foundation.org
T: git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git T: git://git.kernel.org/pub/scm/linux/kernel/git/joro/linux-2.6-iommu.git
S: Supported S: Supported
AMD MICROCODE UPDATE SUPPORT
P: Peter Oruba
M: peter.oruba@amd.com
S: Supported
AMS (Apple Motion Sensor) DRIVER AMS (Apple Motion Sensor) DRIVER
P: Stelian Pop P: Stelian Pop
M: stelian@popies.net M: stelian@popies.net

View file

@ -113,11 +113,6 @@ typedef struct siginfo {
#undef NSIGSEGV #undef NSIGSEGV
#define NSIGSEGV 3 #define NSIGSEGV 3
/*
* SIGTRAP si_codes
*/
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint or watchpoint */
#undef NSIGTRAP #undef NSIGTRAP
#define NSIGTRAP 4 #define NSIGTRAP 4

View file

@ -15,11 +15,6 @@
#include <asm-generic/siginfo.h> #include <asm-generic/siginfo.h>
/*
* SIGTRAP si_codes
*/
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint or watchpoint */
#undef NSIGTRAP #undef NSIGTRAP
#define NSIGTRAP 4 #define NSIGTRAP 4

View file

@ -778,23 +778,45 @@ config X86_REBOOTFIXUPS
Say N otherwise. Say N otherwise.
config MICROCODE config MICROCODE
tristate "/dev/cpu/microcode - Intel IA32 CPU microcode support" tristate "/dev/cpu/microcode - microcode support"
select FW_LOADER select FW_LOADER
---help--- ---help---
If you say Y here, you will be able to update the microcode on If you say Y here, you will be able to update the microcode on
Intel processors in the IA32 family, e.g. Pentium Pro, Pentium II, certain Intel and AMD processors. The Intel support is for the
Pentium III, Pentium 4, Xeon etc. You will obviously need the IA32 family, e.g. Pentium Pro, Pentium II, Pentium III,
actual microcode binary data itself which is not shipped with the Pentium 4, Xeon etc. The AMD support is for family 0x10 and
Linux kernel. 0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra.
You will obviously need the actual microcode binary data itself
which is not shipped with the Linux kernel.
For latest news and information on obtaining all the required This option selects the general module only, you need to select
ingredients for this driver, check: at least one vendor specific module as well.
<http://www.urbanmyth.org/microcode/>.
To compile this driver as a module, choose M here: the To compile this driver as a module, choose M here: the
module will be called microcode. module will be called microcode.
config MICROCODE_OLD_INTERFACE config MICROCODE_INTEL
bool "Intel microcode patch loading support"
depends on MICROCODE
default MICROCODE
select FW_LOADER
--help---
This options enables microcode patch loading support for Intel
processors.
For latest news and information on obtaining all the required
Intel ingredients for this driver, check:
<http://www.urbanmyth.org/microcode/>.
config MICROCODE_AMD
bool "AMD microcode patch loading support"
depends on MICROCODE
select FW_LOADER
--help---
If you select this option, microcode patch loading support for AMD
processors will be enabled.
config MICROCODE_OLD_INTERFACE
def_bool y def_bool y
depends on MICROCODE depends on MICROCODE
@ -1061,6 +1083,56 @@ config HIGHPTE
low memory. Setting this option will put user-space page table low memory. Setting this option will put user-space page table
entries in high memory. entries in high memory.
config X86_CHECK_BIOS_CORRUPTION
bool "Check for low memory corruption"
help
Periodically check for memory corruption in low memory, which
is suspected to be caused by BIOS. Even when enabled in the
configuration, it is disabled at runtime. Enable it by
setting "memory_corruption_check=1" on the kernel command
line. By default it scans the low 64k of memory every 60
seconds; see the memory_corruption_check_size and
memory_corruption_check_period parameters in
Documentation/kernel-parameters.txt to adjust this.
When enabled with the default parameters, this option has
almost no overhead, as it reserves a relatively small amount
of memory and scans it infrequently. It both detects corruption
and prevents it from affecting the running system.
It is, however, intended as a diagnostic tool; if repeatable
BIOS-originated corruption always affects the same memory,
you can use memmap= to prevent the kernel from using that
memory.
config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
bool "Set the default setting of memory_corruption_check"
depends on X86_CHECK_BIOS_CORRUPTION
default y
help
Set whether the default state of memory_corruption_check is
on or off.
config X86_RESERVE_LOW_64K
bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
default y
help
Reserve the first 64K of physical RAM on BIOSes that are known
to potentially corrupt that memory range. A numbers of BIOSes are
known to utilize this area during suspend/resume, so it must not
be used by the kernel.
Set this to N if you are absolutely sure that you trust the BIOS
to get all its memory reservations and usages right.
If you have doubts about the BIOS (e.g. suspend/resume does not
work or there's kernel crashes after certain hardware hotplug
events) and it's not AMI or Phoenix, then you might want to enable
X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
corruption patterns.
Say Y if unsure.
config MATH_EMULATION config MATH_EMULATION
bool bool
prompt "Math emulation" if X86_32 prompt "Math emulation" if X86_32

View file

@ -420,7 +420,6 @@ config X86_DEBUGCTLMSR
depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
menuconfig PROCESSOR_SELECT menuconfig PROCESSOR_SELECT
default y
bool "Supported processor vendors" if EMBEDDED bool "Supported processor vendors" if EMBEDDED
help help
This lets you choose what x86 vendor support code your kernel This lets you choose what x86 vendor support code your kernel
@ -430,48 +429,97 @@ config CPU_SUP_INTEL
default y default y
bool "Support Intel processors" if PROCESSOR_SELECT bool "Support Intel processors" if PROCESSOR_SELECT
help help
This enables extended support for Intel processors This enables detection, tunings and quirks for Intel processors
You need this enabled if you want your kernel to run on an
Intel CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on an Intel
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CYRIX_32 config CPU_SUP_CYRIX_32
default y default y
bool "Support Cyrix processors" if PROCESSOR_SELECT bool "Support Cyrix processors" if PROCESSOR_SELECT
depends on !64BIT depends on !64BIT
help help
This enables extended support for Cyrix processors This enables detection, tunings and quirks for Cyrix processors
You need this enabled if you want your kernel to run on a
Cyrix CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Cyrix
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_AMD config CPU_SUP_AMD
default y default y
bool "Support AMD processors" if PROCESSOR_SELECT bool "Support AMD processors" if PROCESSOR_SELECT
help help
This enables extended support for AMD processors This enables detection, tunings and quirks for AMD processors
You need this enabled if you want your kernel to run on an
AMD CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on an AMD
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CENTAUR_32 config CPU_SUP_CENTAUR_32
default y default y
bool "Support Centaur processors" if PROCESSOR_SELECT bool "Support Centaur processors" if PROCESSOR_SELECT
depends on !64BIT depends on !64BIT
help help
This enables extended support for Centaur processors This enables detection, tunings and quirks for Centaur processors
You need this enabled if you want your kernel to run on a
Centaur CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Centaur
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_CENTAUR_64 config CPU_SUP_CENTAUR_64
default y default y
bool "Support Centaur processors" if PROCESSOR_SELECT bool "Support Centaur processors" if PROCESSOR_SELECT
depends on 64BIT depends on 64BIT
help help
This enables extended support for Centaur processors This enables detection, tunings and quirks for Centaur processors
You need this enabled if you want your kernel to run on a
Centaur CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Centaur
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_TRANSMETA_32 config CPU_SUP_TRANSMETA_32
default y default y
bool "Support Transmeta processors" if PROCESSOR_SELECT bool "Support Transmeta processors" if PROCESSOR_SELECT
depends on !64BIT depends on !64BIT
help help
This enables extended support for Transmeta processors This enables detection, tunings and quirks for Transmeta processors
You need this enabled if you want your kernel to run on a
Transmeta CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a Transmeta
CPU might render the kernel unbootable.
If unsure, say N.
config CPU_SUP_UMC_32 config CPU_SUP_UMC_32
default y default y
bool "Support UMC processors" if PROCESSOR_SELECT bool "Support UMC processors" if PROCESSOR_SELECT
depends on !64BIT depends on !64BIT
help help
This enables extended support for UMC processors This enables detection, tunings and quirks for UMC processors
You need this enabled if you want your kernel to run on a
UMC CPU. Disabling this option on other types of CPUs
makes the kernel a tiny bit smaller. Disabling it on a UMC
CPU might render the kernel unbootable.
If unsure, say N.
config X86_DS config X86_DS
bool "Debug Store support" bool "Debug Store support"

View file

@ -43,6 +43,19 @@ config EARLY_PRINTK
with klogd/syslogd or the X server. You should normally N here, with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash. unless you want to debug such a crash.
config EARLY_PRINTK_DBGP
bool "Early printk via EHCI debug port"
default n
depends on EARLY_PRINTK && PCI
help
Write kernel log output directly into the EHCI debug port.
This is useful for kernel debugging when your machine crashes very
early before the console code is initialized. For normal operation
it is not recommended because it looks ugly and doesn't cooperate
with klogd/syslogd or the X server. You should normally N here,
unless you want to debug such a crash. You need usb debug device.
config DEBUG_STACKOVERFLOW config DEBUG_STACKOVERFLOW
bool "Check for stack overflows" bool "Check for stack overflows"
depends on DEBUG_KERNEL depends on DEBUG_KERNEL

View file

@ -45,3 +45,8 @@ cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx
# cpu entries # cpu entries
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))
# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
ifneq ($(CONFIG_X86_P6_NOP),y)
cflags-y += $(call cc-option,-Wa$(comma)-mtune=generic32,)
endif

View file

@ -72,9 +72,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
KBUILD_CFLAGS += $(call cc-option,-m32) KBUILD_CFLAGS += $(call cc-option,-m32)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
$(obj)/zImage: IMAGE_OFFSET := 0x1000
$(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK) $(obj)/zImage: asflags-y := $(SVGA_MODE) $(RAMDISK)
$(obj)/bzImage: IMAGE_OFFSET := 0x100000
$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__ $(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ $(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
$(obj)/bzImage: BUILDFLAGS := -b $(obj)/bzImage: BUILDFLAGS := -b
@ -117,7 +115,7 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
$(call if_changed,objcopy) $(call if_changed,objcopy)
$(obj)/compressed/vmlinux: FORCE $(obj)/compressed/vmlinux: FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed IMAGE_OFFSET=$(IMAGE_OFFSET) $@ $(Q)$(MAKE) $(build)=$(obj)/compressed $@
# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel # Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
FDARGS = FDARGS =
@ -181,6 +179,7 @@ isoimage: $(BOOTIMAGE)
mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \ mkisofs -J -r -o $(obj)/image.iso -b isolinux.bin -c boot.cat \
-no-emul-boot -boot-load-size 4 -boot-info-table \ -no-emul-boot -boot-load-size 4 -boot-info-table \
$(obj)/isoimage $(obj)/isoimage
isohybrid $(obj)/image.iso 2>/dev/null || true
rm -rf $(obj)/isoimage rm -rf $(obj)/isoimage
zlilo: $(BOOTIMAGE) zlilo: $(BOOTIMAGE)

View file

@ -27,9 +27,8 @@ $(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy) $(call if_changed,objcopy)
ifeq ($(CONFIG_X86_32),y) targets += vmlinux.bin.all vmlinux.relocs relocs
targets += vmlinux.bin.all vmlinux.relocs hostprogs-$(CONFIG_X86_32) += relocs
hostprogs-y := relocs
quiet_cmd_relocs = RELOCS $@ quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@ -43,6 +42,8 @@ quiet_cmd_relocbin = BUILD $@
$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE $(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
$(call if_changed,relocbin) $(call if_changed,relocbin)
ifeq ($(CONFIG_X86_32),y)
ifdef CONFIG_RELOCATABLE ifdef CONFIG_RELOCATABLE
$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
$(call if_changed,gzip) $(call if_changed,gzip)
@ -59,6 +60,5 @@ $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
endif endif
$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
$(call if_changed,ld) $(call if_changed,ld)

View file

@ -41,6 +41,7 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
char *mbrbuf_ptr, *mbrbuf_end; char *mbrbuf_ptr, *mbrbuf_end;
u32 buf_base, mbr_base; u32 buf_base, mbr_base;
extern char _end[]; extern char _end[];
u16 mbr_magic;
sector_size = ei->params.bytes_per_sector; sector_size = ei->params.bytes_per_sector;
if (!sector_size) if (!sector_size)
@ -58,11 +59,15 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr) if (mbrbuf_end > (char *)(size_t)boot_params.hdr.heap_end_ptr)
return -1; return -1;
memset(mbrbuf_ptr, 0, sector_size);
if (read_mbr(devno, mbrbuf_ptr)) if (read_mbr(devno, mbrbuf_ptr))
return -1; return -1;
*mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET]; *mbrsig = *(u32 *)&mbrbuf_ptr[EDD_MBR_SIG_OFFSET];
return 0; mbr_magic = *(u16 *)&mbrbuf_ptr[510];
/* check for valid MBR magic */
return mbr_magic == 0xAA55 ? 0 : -1;
} }
static int get_edd_info(u8 devno, struct edd_info *ei) static int get_edd_info(u8 devno, struct edd_info *ei)

View file

@ -224,7 +224,7 @@ static void vesa_store_pm_info(void)
static void vesa_store_mode_params_graphics(void) static void vesa_store_mode_params_graphics(void)
{ {
/* Tell the kernel we're in VESA graphics mode */ /* Tell the kernel we're in VESA graphics mode */
boot_params.screen_info.orig_video_isVGA = 0x23; boot_params.screen_info.orig_video_isVGA = VIDEO_TYPE_VLFB;
/* Mode parameters */ /* Mode parameters */
boot_params.screen_info.vesa_attributes = vminfo.mode_attr; boot_params.screen_info.vesa_attributes = vminfo.mode_attr;

View file

@ -1535,7 +1535,6 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_VGA_CONSOLE=y CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE is not set # CONFIG_FRAMEBUFFER_CONSOLE is not set
CONFIG_LOGO=y CONFIG_LOGO=y

View file

@ -1505,7 +1505,6 @@ CONFIG_BACKLIGHT_CLASS_DEVICE=y
CONFIG_VGA_CONSOLE=y CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64 CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=64
CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y CONFIG_DUMMY_CONSOLE=y
# CONFIG_FRAMEBUFFER_CONSOLE is not set # CONFIG_FRAMEBUFFER_CONSOLE is not set
CONFIG_LOGO=y CONFIG_LOGO=y

View file

@ -351,31 +351,28 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
savesegment(es, tmp); savesegment(es, tmp);
err |= __put_user(tmp, (unsigned int __user *)&sc->es); err |= __put_user(tmp, (unsigned int __user *)&sc->es);
err |= __put_user((u32)regs->di, &sc->di); err |= __put_user(regs->di, &sc->di);
err |= __put_user((u32)regs->si, &sc->si); err |= __put_user(regs->si, &sc->si);
err |= __put_user((u32)regs->bp, &sc->bp); err |= __put_user(regs->bp, &sc->bp);
err |= __put_user((u32)regs->sp, &sc->sp); err |= __put_user(regs->sp, &sc->sp);
err |= __put_user((u32)regs->bx, &sc->bx); err |= __put_user(regs->bx, &sc->bx);
err |= __put_user((u32)regs->dx, &sc->dx); err |= __put_user(regs->dx, &sc->dx);
err |= __put_user((u32)regs->cx, &sc->cx); err |= __put_user(regs->cx, &sc->cx);
err |= __put_user((u32)regs->ax, &sc->ax); err |= __put_user(regs->ax, &sc->ax);
err |= __put_user((u32)regs->cs, &sc->cs); err |= __put_user(regs->cs, &sc->cs);
err |= __put_user((u32)regs->ss, &sc->ss); err |= __put_user(regs->ss, &sc->ss);
err |= __put_user(current->thread.trap_no, &sc->trapno); err |= __put_user(current->thread.trap_no, &sc->trapno);
err |= __put_user(current->thread.error_code, &sc->err); err |= __put_user(current->thread.error_code, &sc->err);
err |= __put_user((u32)regs->ip, &sc->ip); err |= __put_user(regs->ip, &sc->ip);
err |= __put_user((u32)regs->flags, &sc->flags); err |= __put_user(regs->flags, &sc->flags);
err |= __put_user((u32)regs->sp, &sc->sp_at_signal); err |= __put_user(regs->sp, &sc->sp_at_signal);
tmp = save_i387_xstate_ia32(fpstate); tmp = save_i387_xstate_ia32(fpstate);
if (tmp < 0) if (tmp < 0)
err = -EFAULT; err = -EFAULT;
else { else
clear_used_math();
stts();
err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL), err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL),
&sc->fpstate); &sc->fpstate);
}
/* non-iBCS2 extensions.. */ /* non-iBCS2 extensions.. */
err |= __put_user(mask, &sc->oldmask); err |= __put_user(mask, &sc->oldmask);
@ -444,21 +441,18 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; return -EFAULT;
err |= __put_user(sig, &frame->sig); if (__put_user(sig, &frame->sig))
if (err) return -EFAULT;
goto give_sigsegv;
err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]); if (ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
if (err) return -EFAULT;
goto give_sigsegv;
if (_COMPAT_NSIG_WORDS > 1) { if (_COMPAT_NSIG_WORDS > 1) {
err |= __copy_to_user(frame->extramask, &set->sig[1], if (__copy_to_user(frame->extramask, &set->sig[1],
sizeof(frame->extramask)); sizeof(frame->extramask)))
if (err) return -EFAULT;
goto give_sigsegv;
} }
if (ka->sa.sa_flags & SA_RESTORER) { if (ka->sa.sa_flags & SA_RESTORER) {
@ -479,7 +473,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
*/ */
err |= __copy_to_user(frame->retcode, &code, 8); err |= __copy_to_user(frame->retcode, &code, 8);
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Set up registers for signal handler */ /* Set up registers for signal handler */
regs->sp = (unsigned long) frame; regs->sp = (unsigned long) frame;
@ -502,10 +496,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
#endif #endif
return 0; return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
} }
int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
@ -533,14 +523,14 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; return -EFAULT;
err |= __put_user(sig, &frame->sig); err |= __put_user(sig, &frame->sig);
err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
err |= copy_siginfo_to_user32(&frame->info, info); err |= copy_siginfo_to_user32(&frame->info, info);
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Create the ucontext. */ /* Create the ucontext. */
if (cpu_has_xsave) if (cpu_has_xsave)
@ -556,7 +546,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs, set->sig[0]); regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err) if (err)
goto give_sigsegv; return -EFAULT;
if (ka->sa.sa_flags & SA_RESTORER) if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer; restorer = ka->sa.sa_restorer;
@ -571,7 +561,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
*/ */
err |= __copy_to_user(frame->retcode, &code, 8); err |= __copy_to_user(frame->retcode, &code, 8);
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Set up registers for signal handler */ /* Set up registers for signal handler */
regs->sp = (unsigned long) frame; regs->sp = (unsigned long) frame;
@ -599,8 +589,4 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
#endif #endif
return 0; return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
} }

View file

@ -10,7 +10,7 @@ ifdef CONFIG_FTRACE
# Do not profile debug and lowlevel utilities # Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
endif endif
# #
@ -51,7 +51,6 @@ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o
obj-$(CONFIG_MCA) += mca_32.o obj-$(CONFIG_MCA) += mca_32.o
obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_CPUID) += cpuid.o obj-$(CONFIG_X86_CPUID) += cpuid.o
obj-$(CONFIG_MICROCODE) += microcode.o
obj-$(CONFIG_PCI) += early-quirks.o obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o obj-$(CONFIG_APM) += apm.o
@ -90,7 +89,7 @@ obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o
obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_KVM_GUEST) += kvm.o
obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o paravirt-spinlocks.o
obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
@ -100,6 +99,11 @@ scx200-y += scx200_32.o
obj-$(CONFIG_OLPC) += olpc.o obj-$(CONFIG_OLPC) += olpc.o
microcode-y := microcode_core.o
microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o
microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o
obj-$(CONFIG_MICROCODE) += microcode.o
### ###
# 64 bit specific files # 64 bit specific files
ifeq ($(CONFIG_X86_64),y) ifeq ($(CONFIG_X86_64),y)

View file

@ -1418,8 +1418,16 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
*/ */
static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
{ {
pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident); /*
acpi_skip_timer_override = 1; * The ati_ixp4x0_rev() early PCI quirk should have set
* the acpi_skip_timer_override flag already:
*/
if (!acpi_skip_timer_override) {
WARN(1, KERN_ERR "ati_ixp4x0 quirk not complete.\n");
pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n",
d->ident);
acpi_skip_timer_override = 1;
}
return 0; return 0;
} }

View file

@ -723,9 +723,7 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
init_iommu_from_acpi(iommu, h); init_iommu_from_acpi(iommu, h);
init_iommu_devices(iommu); init_iommu_devices(iommu);
pci_enable_device(iommu->dev); return pci_enable_device(iommu->dev);
return 0;
} }
/* /*

View file

@ -1121,16 +1121,5 @@ void __cpuinit cpu_init(void)
xsave_init(); xsave_init();
} }
#ifdef CONFIG_HOTPLUG_CPU
void __cpuinit cpu_uninit(void)
{
int cpu = raw_smp_processor_id();
cpu_clear(cpu, cpu_initialized);
/* lazy TLB state */
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}
#endif
#endif #endif

View file

@ -66,6 +66,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
.ds = __USER_DS, .ds = __USER_DS,
.fs = __KERNEL_PERCPU, .fs = __KERNEL_PERCPU,
.__cr3 = __pa(swapper_pg_dir) .__cr3 = __phys_addr_const((unsigned long)swapper_pg_dir)
} }
}; };

View file

@ -95,6 +95,52 @@ static void __init nvidia_bugs(int num, int slot, int func)
} }
static u32 ati_ixp4x0_rev(int num, int slot, int func)
{
u32 d;
u8 b;
b = read_pci_config_byte(num, slot, func, 0xac);
b &= ~(1<<5);
write_pci_config_byte(num, slot, func, 0xac, b);
d = read_pci_config(num, slot, func, 0x70);
d |= 1<<8;
write_pci_config(num, slot, func, 0x70, d);
d = read_pci_config(num, slot, func, 0x8);
d &= 0xff;
return d;
}
static void __init ati_bugs(int num, int slot, int func)
{
#if defined(CONFIG_ACPI) && defined (CONFIG_X86_IO_APIC)
u32 d;
u8 b;
if (acpi_use_timer_override)
return;
d = ati_ixp4x0_rev(num, slot, func);
if (d < 0x82)
acpi_skip_timer_override = 1;
else {
/* check for IRQ0 interrupt swap */
outb(0x72, 0xcd6); b = inb(0xcd7);
if (!(b & 0x2))
acpi_skip_timer_override = 1;
}
if (acpi_skip_timer_override) {
printk(KERN_INFO "SB4X0 revision 0x%x\n", d);
printk(KERN_INFO "Ignoring ACPI timer override.\n");
printk(KERN_INFO "If you got timer trouble "
"try acpi_use_timer_override\n");
}
#endif
}
#ifdef CONFIG_DMAR #ifdef CONFIG_DMAR
static void __init intel_g33_dmar(int num, int slot, int func) static void __init intel_g33_dmar(int num, int slot, int func)
{ {
@ -128,6 +174,8 @@ static struct chipset early_qrk[] __initdata = {
PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs }, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, QFLAG_APPLY_ONCE, via_bugs },
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config }, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, fix_hypertransport_config },
{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_IXP400_SMBUS,
PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
#ifdef CONFIG_DMAR #ifdef CONFIG_DMAR
{ PCI_VENDOR_ID_INTEL, 0x29c0, { PCI_VENDOR_ID_INTEL, 0x29c0,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },

View file

@ -3,11 +3,19 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/screen_info.h> #include <linux/screen_info.h>
#include <linux/usb/ch9.h>
#include <linux/pci_regs.h>
#include <linux/pci_ids.h>
#include <linux/errno.h>
#include <asm/io.h> #include <asm/io.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/fcntl.h> #include <asm/fcntl.h>
#include <asm/setup.h> #include <asm/setup.h>
#include <xen/hvc-console.h> #include <xen/hvc-console.h>
#include <asm/pci-direct.h>
#include <asm/pgtable.h>
#include <asm/fixmap.h>
#include <linux/usb/ehci_def.h>
/* Simple VGA output */ /* Simple VGA output */
#define VGABASE (__ISA_IO_base + 0xb8000) #define VGABASE (__ISA_IO_base + 0xb8000)
@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /* ttyS0 */
static int early_serial_putc(unsigned char ch) static int early_serial_putc(unsigned char ch)
{ {
unsigned timeout = 0xffff; unsigned timeout = 0xffff;
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout) while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
cpu_relax(); cpu_relax();
outb(ch, early_serial_base + TXR); outb(ch, early_serial_base + TXR);
@ -111,7 +120,7 @@ static __init void early_serial_init(char *s)
if (!strncmp(s, "0x", 2)) { if (!strncmp(s, "0x", 2)) {
early_serial_base = simple_strtoul(s, &e, 16); early_serial_base = simple_strtoul(s, &e, 16);
} else { } else {
static int bases[] = { 0x3f8, 0x2f8 }; static const int __initconst bases[] = { 0x3f8, 0x2f8 };
if (!strncmp(s, "ttyS", 4)) if (!strncmp(s, "ttyS", 4))
s += 4; s += 4;
@ -151,6 +160,721 @@ static struct console early_serial_console = {
.index = -1, .index = -1,
}; };
#ifdef CONFIG_EARLY_PRINTK_DBGP
static struct ehci_caps __iomem *ehci_caps;
static struct ehci_regs __iomem *ehci_regs;
static struct ehci_dbg_port __iomem *ehci_debug;
static unsigned int dbgp_endpoint_out;
struct ehci_dev {
u32 bus;
u32 slot;
u32 func;
};
static struct ehci_dev ehci_dev;
#define USB_DEBUG_DEVNUM 127
#define DBGP_DATA_TOGGLE 0x8800
static inline u32 dbgp_pid_update(u32 x, u32 tok)
{
return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
}
static inline u32 dbgp_len_update(u32 x, u32 len)
{
return (x & ~0x0f) | (len & 0x0f);
}
/*
* USB Packet IDs (PIDs)
*/
/* token */
#define USB_PID_OUT 0xe1
#define USB_PID_IN 0x69
#define USB_PID_SOF 0xa5
#define USB_PID_SETUP 0x2d
/* handshake */
#define USB_PID_ACK 0xd2
#define USB_PID_NAK 0x5a
#define USB_PID_STALL 0x1e
#define USB_PID_NYET 0x96
/* data */
#define USB_PID_DATA0 0xc3
#define USB_PID_DATA1 0x4b
#define USB_PID_DATA2 0x87
#define USB_PID_MDATA 0x0f
/* Special */
#define USB_PID_PREAMBLE 0x3c
#define USB_PID_ERR 0x3c
#define USB_PID_SPLIT 0x78
#define USB_PID_PING 0xb4
#define USB_PID_UNDEF_0 0xf0
#define USB_PID_DATA_TOGGLE 0x88
#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
#define PCI_CAP_ID_EHCI_DEBUG 0xa
#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
#define HUB_SHORT_RESET_TIME 10
#define HUB_LONG_RESET_TIME 200
#define HUB_RESET_TIMEOUT 500
#define DBGP_MAX_PACKET 8
static int dbgp_wait_until_complete(void)
{
u32 ctrl;
int loop = 0x100000;
do {
ctrl = readl(&ehci_debug->control);
/* Stop when the transaction is finished */
if (ctrl & DBGP_DONE)
break;
} while (--loop > 0);
if (!loop)
return -1;
/*
* Now that we have observed the completed transaction,
* clear the done bit.
*/
writel(ctrl | DBGP_DONE, &ehci_debug->control);
return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
}
static void dbgp_mdelay(int ms)
{
int i;
while (ms--) {
for (i = 0; i < 1000; i++)
outb(0x1, 0x80);
}
}
static void dbgp_breath(void)
{
/* Sleep to give the debug port a chance to breathe */
}
static int dbgp_wait_until_done(unsigned ctrl)
{
u32 pids, lpid;
int ret;
int loop = 3;
retry:
writel(ctrl | DBGP_GO, &ehci_debug->control);
ret = dbgp_wait_until_complete();
pids = readl(&ehci_debug->pids);
lpid = DBGP_PID_GET(pids);
if (ret < 0)
return ret;
/*
* If the port is getting full or it has dropped data
* start pacing ourselves, not necessary but it's friendly.
*/
if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
dbgp_breath();
/* If I get a NACK reissue the transmission */
if (lpid == USB_PID_NAK) {
if (--loop > 0)
goto retry;
}
return ret;
}
static void dbgp_set_data(const void *buf, int size)
{
const unsigned char *bytes = buf;
u32 lo, hi;
int i;
lo = hi = 0;
for (i = 0; i < 4 && i < size; i++)
lo |= bytes[i] << (8*i);
for (; i < 8 && i < size; i++)
hi |= bytes[i] << (8*(i - 4));
writel(lo, &ehci_debug->data03);
writel(hi, &ehci_debug->data47);
}
static void dbgp_get_data(void *buf, int size)
{
unsigned char *bytes = buf;
u32 lo, hi;
int i;
lo = readl(&ehci_debug->data03);
hi = readl(&ehci_debug->data47);
for (i = 0; i < 4 && i < size; i++)
bytes[i] = (lo >> (8*i)) & 0xff;
for (; i < 8 && i < size; i++)
bytes[i] = (hi >> (8*(i - 4))) & 0xff;
}
static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
const char *bytes, int size)
{
u32 pids, addr, ctrl;
int ret;
if (size > DBGP_MAX_PACKET)
return -1;
addr = DBGP_EPADDR(devnum, endpoint);
pids = readl(&ehci_debug->pids);
pids = dbgp_pid_update(pids, USB_PID_OUT);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, size);
ctrl |= DBGP_OUT;
ctrl |= DBGP_GO;
dbgp_set_data(bytes, size);
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
return ret;
}
static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
int size)
{
u32 pids, addr, ctrl;
int ret;
if (size > DBGP_MAX_PACKET)
return -1;
addr = DBGP_EPADDR(devnum, endpoint);
pids = readl(&ehci_debug->pids);
pids = dbgp_pid_update(pids, USB_PID_IN);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, size);
ctrl &= ~DBGP_OUT;
ctrl |= DBGP_GO;
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
if (size > ret)
size = ret;
dbgp_get_data(data, size);
return ret;
}
static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
int value, int index, void *data, int size)
{
u32 pids, addr, ctrl;
struct usb_ctrlrequest req;
int read;
int ret;
read = (requesttype & USB_DIR_IN) != 0;
if (size > (read ? DBGP_MAX_PACKET:0))
return -1;
/* Compute the control message */
req.bRequestType = requesttype;
req.bRequest = request;
req.wValue = cpu_to_le16(value);
req.wIndex = cpu_to_le16(index);
req.wLength = cpu_to_le16(size);
pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
addr = DBGP_EPADDR(devnum, 0);
ctrl = readl(&ehci_debug->control);
ctrl = dbgp_len_update(ctrl, sizeof(req));
ctrl |= DBGP_OUT;
ctrl |= DBGP_GO;
/* Send the setup message */
dbgp_set_data(&req, sizeof(req));
writel(addr, &ehci_debug->address);
writel(pids, &ehci_debug->pids);
ret = dbgp_wait_until_done(ctrl);
if (ret < 0)
return ret;
/* Read the result */
return dbgp_bulk_read(devnum, 0, data, size);
}
/* Find a PCI capability */
static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
{
u8 pos;
int bytes;
if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
PCI_STATUS_CAP_LIST))
return 0;
pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
u8 id;
pos &= ~3;
id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
if (id == 0xff)
break;
if (id == cap)
return pos;
pos = read_pci_config_byte(num, slot, func,
pos+PCI_CAP_LIST_NEXT);
}
return 0;
}
static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
{
u32 class;
class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
return 0;
return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
}
static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
{
u32 bus, slot, func;
for (bus = 0; bus < 256; bus++) {
for (slot = 0; slot < 32; slot++) {
for (func = 0; func < 8; func++) {
unsigned cap;
cap = __find_dbgp(bus, slot, func);
if (!cap)
continue;
if (ehci_num-- != 0)
continue;
*rbus = bus;
*rslot = slot;
*rfunc = func;
return cap;
}
}
}
return 0;
}
static int ehci_reset_port(int port)
{
u32 portsc;
u32 delay_time, delay;
int loop;
/* Reset the usb debug port */
portsc = readl(&ehci_regs->port_status[port - 1]);
portsc &= ~PORT_PE;
portsc |= PORT_RESET;
writel(portsc, &ehci_regs->port_status[port - 1]);
delay = HUB_ROOT_RESET_TIME;
for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
delay_time += delay) {
dbgp_mdelay(delay);
portsc = readl(&ehci_regs->port_status[port - 1]);
if (portsc & PORT_RESET) {
/* force reset to complete */
loop = 2;
writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
&ehci_regs->port_status[port - 1]);
do {
portsc = readl(&ehci_regs->port_status[port-1]);
} while ((portsc & PORT_RESET) && (--loop > 0));
}
/* Device went away? */
if (!(portsc & PORT_CONNECT))
return -ENOTCONN;
/* bomb out completely if something weird happend */
if ((portsc & PORT_CSC))
return -EINVAL;
/* If we've finished resetting, then break out of the loop */
if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
return 0;
}
return -EBUSY;
}
static int ehci_wait_for_port(int port)
{
u32 status;
int ret, reps;
for (reps = 0; reps < 3; reps++) {
dbgp_mdelay(100);
status = readl(&ehci_regs->status);
if (status & STS_PCD) {
ret = ehci_reset_port(port);
if (ret == 0)
return 0;
}
}
return -ENOTCONN;
}
#ifdef DBGP_DEBUG
# define dbgp_printk early_printk
#else
static inline void dbgp_printk(const char *fmt, ...) { }
#endif
typedef void (*set_debug_port_t)(int port);
static void default_set_debug_port(int port)
{
}
static set_debug_port_t set_debug_port = default_set_debug_port;
static void nvidia_set_debug_port(int port)
{
u32 dword;
dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
0x74);
dword &= ~(0x0f<<12);
dword |= ((port & 0x0f)<<12);
write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
dword);
dbgp_printk("set debug port to %d\n", port);
}
static void __init detect_set_debug_port(void)
{
u32 vendorid;
vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
0x00);
if ((vendorid & 0xffff) == 0x10de) {
dbgp_printk("using nvidia set_debug_port\n");
set_debug_port = nvidia_set_debug_port;
}
}
static int __init ehci_setup(void)
{
struct usb_debug_descriptor dbgp_desc;
u32 cmd, ctrl, status, portsc, hcs_params;
u32 debug_port, new_debug_port = 0, n_ports;
u32 devnum;
int ret, i;
int loop;
int port_map_tried;
int playtimes = 3;
try_next_time:
port_map_tried = 0;
try_next_port:
hcs_params = readl(&ehci_caps->hcs_params);
debug_port = HCS_DEBUG_PORT(hcs_params);
n_ports = HCS_N_PORTS(hcs_params);
dbgp_printk("debug_port: %d\n", debug_port);
dbgp_printk("n_ports: %d\n", n_ports);
for (i = 1; i <= n_ports; i++) {
portsc = readl(&ehci_regs->port_status[i-1]);
dbgp_printk("portstatus%d: %08x\n", i, portsc);
}
if (port_map_tried && (new_debug_port != debug_port)) {
if (--playtimes) {
set_debug_port(new_debug_port);
goto try_next_time;
}
return -1;
}
loop = 10;
/* Reset the EHCI controller */
cmd = readl(&ehci_regs->command);
cmd |= CMD_RESET;
writel(cmd, &ehci_regs->command);
do {
cmd = readl(&ehci_regs->command);
} while ((cmd & CMD_RESET) && (--loop > 0));
if (!loop) {
dbgp_printk("can not reset ehci\n");
return -1;
}
dbgp_printk("ehci reset done\n");
/* Claim ownership, but do not enable yet */
ctrl = readl(&ehci_debug->control);
ctrl |= DBGP_OWNER;
ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
writel(ctrl, &ehci_debug->control);
/* Start the ehci running */
cmd = readl(&ehci_regs->command);
cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
cmd |= CMD_RUN;
writel(cmd, &ehci_regs->command);
/* Ensure everything is routed to the EHCI */
writel(FLAG_CF, &ehci_regs->configured_flag);
/* Wait until the controller is no longer halted */
loop = 10;
do {
status = readl(&ehci_regs->status);
} while ((status & STS_HALT) && (--loop > 0));
if (!loop) {
dbgp_printk("ehci can be started\n");
return -1;
}
dbgp_printk("ehci started\n");
/* Wait for a device to show up in the debug port */
ret = ehci_wait_for_port(debug_port);
if (ret < 0) {
dbgp_printk("No device found in debug port\n");
goto next_debug_port;
}
dbgp_printk("ehci wait for port done\n");
/* Enable the debug port */
ctrl = readl(&ehci_debug->control);
ctrl |= DBGP_CLAIM;
writel(ctrl, &ehci_debug->control);
ctrl = readl(&ehci_debug->control);
if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
dbgp_printk("No device in debug port\n");
writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
goto err;
}
dbgp_printk("debug ported enabled\n");
/* Completely transfer the debug device to the debug controller */
portsc = readl(&ehci_regs->port_status[debug_port - 1]);
portsc &= ~PORT_PE;
writel(portsc, &ehci_regs->port_status[debug_port - 1]);
dbgp_mdelay(100);
/* Find the debug device and make it device number 127 */
for (devnum = 0; devnum <= 127; devnum++) {
ret = dbgp_control_msg(devnum,
USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
&dbgp_desc, sizeof(dbgp_desc));
if (ret > 0)
break;
}
if (devnum > 127) {
dbgp_printk("Could not find attached debug device\n");
goto err;
}
if (ret < 0) {
dbgp_printk("Attached device is not a debug device\n");
goto err;
}
dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
/* Move the device to 127 if it isn't already there */
if (devnum != USB_DEBUG_DEVNUM) {
ret = dbgp_control_msg(devnum,
USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
if (ret < 0) {
dbgp_printk("Could not move attached device to %d\n",
USB_DEBUG_DEVNUM);
goto err;
}
devnum = USB_DEBUG_DEVNUM;
dbgp_printk("debug device renamed to 127\n");
}
/* Enable the debug interface */
ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
if (ret < 0) {
dbgp_printk(" Could not enable the debug device\n");
goto err;
}
dbgp_printk("debug interface enabled\n");
/* Perform a small write to get the even/odd data state in sync
*/
ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
if (ret < 0) {
dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
goto err;
}
dbgp_printk("small write doned\n");
return 0;
err:
/* Things didn't work so remove my claim */
ctrl = readl(&ehci_debug->control);
ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
writel(ctrl, &ehci_debug->control);
return -1;
next_debug_port:
port_map_tried |= (1<<(debug_port - 1));
new_debug_port = ((debug_port-1+1)%n_ports) + 1;
if (port_map_tried != ((1<<n_ports) - 1)) {
set_debug_port(new_debug_port);
goto try_next_port;
}
if (--playtimes) {
set_debug_port(new_debug_port);
goto try_next_time;
}
return -1;
}
static int __init early_dbgp_init(char *s)
{
u32 debug_port, bar, offset;
u32 bus, slot, func, cap;
void __iomem *ehci_bar;
u32 dbgp_num;
u32 bar_val;
char *e;
int ret;
u8 byte;
if (!early_pci_allowed())
return -1;
dbgp_num = 0;
if (*s)
dbgp_num = simple_strtoul(s, &e, 10);
dbgp_printk("dbgp_num: %d\n", dbgp_num);
cap = find_dbgp(dbgp_num, &bus, &slot, &func);
if (!cap)
return -1;
dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
func);
debug_port = read_pci_config(bus, slot, func, cap);
bar = (debug_port >> 29) & 0x7;
bar = (bar * 4) + 0xc;
offset = (debug_port >> 16) & 0xfff;
dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
if (bar != PCI_BASE_ADDRESS_0) {
dbgp_printk("only debug ports on bar 1 handled.\n");
return -1;
}
bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
dbgp_printk("only simple 32bit mmio bars supported\n");
return -1;
}
/* double check if the mem space is enabled */
byte = read_pci_config_byte(bus, slot, func, 0x04);
if (!(byte & 0x2)) {
byte |= 0x02;
write_pci_config_byte(bus, slot, func, 0x04, byte);
dbgp_printk("mmio for ehci enabled\n");
}
/*
* FIXME I don't have the bar size so just guess PAGE_SIZE is more
* than enough. 1K is the biggest I have seen.
*/
set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
ehci_bar += bar_val & ~PAGE_MASK;
dbgp_printk("ehci_bar: %p\n", ehci_bar);
ehci_caps = ehci_bar;
ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
ehci_debug = ehci_bar + offset;
ehci_dev.bus = bus;
ehci_dev.slot = slot;
ehci_dev.func = func;
detect_set_debug_port();
ret = ehci_setup();
if (ret < 0) {
dbgp_printk("ehci_setup failed\n");
ehci_debug = NULL;
return -1;
}
return 0;
}
static void early_dbgp_write(struct console *con, const char *str, u32 n)
{
int chunk, ret;
if (!ehci_debug)
return;
while (n > 0) {
chunk = n;
if (chunk > DBGP_MAX_PACKET)
chunk = DBGP_MAX_PACKET;
ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
dbgp_endpoint_out, str, chunk);
str += chunk;
n -= chunk;
}
}
static struct console early_dbgp_console = {
.name = "earlydbg",
.write = early_dbgp_write,
.flags = CON_PRINTBUFFER,
.index = -1,
};
#endif
/* Console interface to a host file on AMD's SimNow! */ /* Console interface to a host file on AMD's SimNow! */
static int simnow_fd; static int simnow_fd;
@ -165,6 +889,7 @@ enum {
static noinline long simnow(long cmd, long a, long b, long c) static noinline long simnow(long cmd, long a, long b, long c)
{ {
long ret; long ret;
asm volatile("cpuid" : asm volatile("cpuid" :
"=a" (ret) : "=a" (ret) :
"b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
@ -174,6 +899,7 @@ static noinline long simnow(long cmd, long a, long b, long c)
static void __init simnow_init(char *str) static void __init simnow_init(char *str)
{ {
char *fn = "klog"; char *fn = "klog";
if (*str == '=') if (*str == '=')
fn = ++str; fn = ++str;
/* error ignored */ /* error ignored */
@ -194,7 +920,7 @@ static struct console simnow_console = {
/* Direct interface for emergencies */ /* Direct interface for emergencies */
static struct console *early_console = &early_vga_console; static struct console *early_console = &early_vga_console;
static int early_console_initialized; static int __initdata early_console_initialized;
asmlinkage void early_printk(const char *fmt, ...) asmlinkage void early_printk(const char *fmt, ...)
{ {
@ -208,10 +934,11 @@ asmlinkage void early_printk(const char *fmt, ...)
va_end(ap); va_end(ap);
} }
static int __initdata keep_early;
static int __init setup_early_printk(char *buf) static int __init setup_early_printk(char *buf)
{ {
int keep_early;
if (!buf) if (!buf)
return 0; return 0;
@ -219,8 +946,7 @@ static int __init setup_early_printk(char *buf)
return 0; return 0;
early_console_initialized = 1; early_console_initialized = 1;
if (strstr(buf, "keep")) keep_early = (strstr(buf, "keep") != NULL);
keep_early = 1;
if (!strncmp(buf, "serial", 6)) { if (!strncmp(buf, "serial", 6)) {
early_serial_init(buf + 6); early_serial_init(buf + 6);
@ -238,6 +964,17 @@ static int __init setup_early_printk(char *buf)
simnow_init(buf + 6); simnow_init(buf + 6);
early_console = &simnow_console; early_console = &simnow_console;
keep_early = 1; keep_early = 1;
#ifdef CONFIG_EARLY_PRINTK_DBGP
} else if (!strncmp(buf, "dbgp", 4)) {
if (early_dbgp_init(buf+4) < 0)
return 0;
early_console = &early_dbgp_console;
/*
* usb subsys will reset ehci controller, so don't keep
* that early console
*/
keep_early = 0;
#endif
#ifdef CONFIG_HVC_XEN #ifdef CONFIG_HVC_XEN
} else if (!strncmp(buf, "xen", 3)) { } else if (!strncmp(buf, "xen", 3)) {
early_console = &xenboot_console; early_console = &xenboot_console;
@ -251,4 +988,5 @@ static int __init setup_early_printk(char *buf)
register_console(early_console); register_console(early_console);
return 0; return 0;
} }
early_param("earlyprintk", setup_early_printk); early_param("earlyprintk", setup_early_printk);

View file

@ -468,9 +468,23 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
static int save_i387_xsave(void __user *buf) static int save_i387_xsave(void __user *buf)
{ {
struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fx = buf; struct _fpstate_ia32 __user *fx = buf;
int err = 0; int err = 0;
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context.
* This will enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware applications can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
tsk->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
if (save_i387_fxsave(fx) < 0) if (save_i387_fxsave(fx) < 0)
return -1; return -1;

View file

@ -1281,8 +1281,8 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
printk(KERN_DEBUG "... APIC ESR: %08x\n", v); printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
icr = apic_icr_read(); icr = apic_icr_read();
printk(KERN_DEBUG "... APIC ICR: %08x\n", icr); printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32); printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
v = apic_read(APIC_LVTT); v = apic_read(APIC_LVTT);
printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);

View file

@ -52,6 +52,8 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
memset(newldt + oldsize * LDT_ENTRY_SIZE, 0, memset(newldt + oldsize * LDT_ENTRY_SIZE, 0,
(mincount - oldsize) * LDT_ENTRY_SIZE); (mincount - oldsize) * LDT_ENTRY_SIZE);
paravirt_alloc_ldt(newldt, mincount);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* CHECKME: Do we really need this ? */ /* CHECKME: Do we really need this ? */
wmb(); wmb();
@ -74,6 +76,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
#endif #endif
} }
if (oldsize) { if (oldsize) {
paravirt_free_ldt(oldldt, oldsize);
if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE) if (oldsize * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(oldldt); vfree(oldldt);
else else
@ -85,10 +88,13 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
static inline int copy_ldt(mm_context_t *new, mm_context_t *old) static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
{ {
int err = alloc_ldt(new, old->size, 0); int err = alloc_ldt(new, old->size, 0);
int i;
if (err < 0) if (err < 0)
return err; return err;
memcpy(new->ldt, old->ldt, old->size * LDT_ENTRY_SIZE);
for(i = 0; i < old->size; i++)
write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
return 0; return 0;
} }
@ -125,6 +131,7 @@ void destroy_context(struct mm_struct *mm)
if (mm == current->active_mm) if (mm == current->active_mm)
clear_LDT(); clear_LDT();
#endif #endif
paravirt_free_ldt(mm->context.ldt, mm->context.size);
if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE) if (mm->context.size * LDT_ENTRY_SIZE > PAGE_SIZE)
vfree(mm->context.ldt); vfree(mm->context.ldt);
else else

View file

@ -1,853 +0,0 @@
/*
* Intel CPU Microcode Update Driver for Linux
*
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
* 2006 Shaohua Li <shaohua.li@intel.com>
*
* This driver allows to upgrade microcode on Intel processors
* belonging to IA-32 family - PentiumPro, Pentium II,
* Pentium III, Xeon, Pentium 4, etc.
*
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
* Software Developer's Manual
* Order Number 253668 or free download from:
*
* http://developer.intel.com/design/pentium4/manuals/253668.htm
*
* For more information, go to http://www.urbanmyth.org/microcode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Initial release.
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added read() support + cleanups.
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added 'device trimming' support. open(O_WRONLY) zeroes
* and frees the saved copy of applied microcode.
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Made to use devfs (/dev/cpu/microcode) + cleanups.
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
* Added misc device support (now uses both devfs and misc).
* Added MICROCODE_IOCFREE ioctl to clear memory.
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
* Messages for error cases (non Intel & no suitable microcode).
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
* Removed ->release(). Removed exclusive open and status bitmap.
* Added microcode_rwsem to serialize read()/write()/ioctl().
* Removed global kernel lock usage.
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
* Write 0 to 0x8B msr and then cpuid before reading revision,
* so that it works even if there were no update done by the
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
* to be 0 on my machine which is why it worked even when I
* disabled update by the BIOS)
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
* Tigran Aivazian <tigran@veritas.com>
* Intel Pentium 4 processor support and bugfixes.
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
* Bugfix for HT (Hyper-Threading) enabled processors
* whereby processor resources are shared by all logical processors
* in a single CPU package.
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
* Tigran Aivazian <tigran@veritas.com>,
* Serialize updates as required on HT processors due to speculative
* nature of implementation.
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
* Fix the panic when writing zero-length microcode chunk.
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
* Jun Nakajima <jun.nakajima@intel.com>
* Support for the microcode updates in the new format.
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
* because we no longer hold a copy of applied microcode
* in kernel memory.
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
//#define DEBUG /* pr_debug */
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
#define MICROCODE_VERSION "1.14a"
#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
#define EXT_HEADER_SIZE (sizeof (struct extended_sigtable)) /* 20 bytes */
#define EXT_SIGNATURE_SIZE (sizeof (struct extended_signature)) /* 12 bytes */
#define DWSIZE (sizeof (u32))
#define get_totalsize(mc) \
(((microcode_t *)mc)->hdr.totalsize ? \
((microcode_t *)mc)->hdr.totalsize : DEFAULT_UCODE_TOTALSIZE)
#define get_datasize(mc) \
(((microcode_t *)mc)->hdr.datasize ? \
((microcode_t *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
#define sigmatch(s1, s2, p1, p2) \
(((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
static DEFINE_SPINLOCK(microcode_update_lock);
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
static struct ucode_cpu_info {
int valid;
unsigned int sig;
unsigned int pf;
unsigned int rev;
microcode_t *mc;
} ucode_cpu_info[NR_CPUS];
static void collect_cpu_info(int cpu_num)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
unsigned int val[2];
/* We should bind the task to the CPU */
BUG_ON(raw_smp_processor_id() != cpu_num);
uci->pf = uci->rev = 0;
uci->mc = NULL;
uci->valid = 1;
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64)) {
printk(KERN_ERR "microcode: CPU%d not a capable Intel "
"processor\n", cpu_num);
uci->valid = 0;
return;
}
uci->sig = cpuid_eax(0x00000001);
if ((c->x86_model >= 5) || (c->x86 > 6)) {
/* get processor flags from MSR 0x17 */
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
uci->pf = 1 << ((val[1] >> 18) & 7);
}
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], uci->rev);
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
uci->sig, uci->pf, uci->rev);
}
static inline int microcode_update_match(int cpu_num,
microcode_header_t *mc_header, int sig, int pf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
if (!sigmatch(sig, uci->sig, pf, uci->pf)
|| mc_header->rev <= uci->rev)
return 0;
return 1;
}
static int microcode_sanity_check(void *mc)
{
microcode_header_t *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
struct extended_signature *ext_sig;
unsigned long total_size, data_size, ext_table_size;
int sum, orig_sum, ext_sigcount = 0, i;
total_size = get_totalsize(mc_header);
data_size = get_datasize(mc_header);
if (data_size + MC_HEADER_SIZE > total_size) {
printk(KERN_ERR "microcode: error! "
"Bad data size in microcode data file\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
printk(KERN_ERR "microcode: error! "
"Unknown microcode update format\n");
return -EINVAL;
}
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
printk(KERN_ERR "microcode: error! "
"Small exttable size in microcode data file\n");
return -EINVAL;
}
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
printk(KERN_ERR "microcode: error! "
"Bad exttable size in microcode data file\n");
return -EFAULT;
}
ext_sigcount = ext_header->count;
}
/* check extended table checksum */
if (ext_table_size) {
int ext_table_sum = 0;
int *ext_tablep = (int *)ext_header;
i = ext_table_size / DWSIZE;
while (i--)
ext_table_sum += ext_tablep[i];
if (ext_table_sum) {
printk(KERN_WARNING "microcode: aborting, "
"bad extended signature table checksum\n");
return -EINVAL;
}
}
/* calculate the checksum */
orig_sum = 0;
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
while (i--)
orig_sum += ((int *)mc)[i];
if (orig_sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
if (!ext_table_size)
return 0;
/* check extended signature checksum */
for (i = 0; i < ext_sigcount; i++) {
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
EXT_SIGNATURE_SIZE * i;
sum = orig_sum
- (mc_header->sig + mc_header->pf + mc_header->cksum)
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
}
return 0;
}
/*
* return 0 - no update found
* return 1 - found update
* return < 0 - error
*/
static int get_maching_microcode(void *mc, int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
microcode_header_t *mc_header = mc;
struct extended_sigtable *ext_header;
unsigned long total_size = get_totalsize(mc_header);
int ext_sigcount, i;
struct extended_signature *ext_sig;
void *new_mc;
if (microcode_update_match(cpu, mc_header,
mc_header->sig, mc_header->pf))
goto find;
if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
return 0;
ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
ext_sigcount = ext_header->count;
ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
for (i = 0; i < ext_sigcount; i++) {
if (microcode_update_match(cpu, mc_header,
ext_sig->sig, ext_sig->pf))
goto find;
ext_sig++;
}
return 0;
find:
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n", cpu, mc_header->rev,uci->rev);
new_mc = vmalloc(total_size);
if (!new_mc) {
printk(KERN_ERR "microcode: error! Can not allocate memory\n");
return -ENOMEM;
}
/* free previous update file */
vfree(uci->mc);
memcpy(new_mc, mc, total_size);
uci->mc = new_mc;
return 1;
}
static void apply_microcode(int cpu)
{
unsigned long flags;
unsigned int val[2];
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
if (uci->mc == NULL)
return;
/* serialize access to the physical write to MSR 0x79 */
spin_lock_irqsave(&microcode_update_lock, flags);
/* write microcode via MSR 0x79 */
wrmsr(MSR_IA32_UCODE_WRITE,
(unsigned long) uci->mc->bits,
(unsigned long) uci->mc->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
spin_unlock_irqrestore(&microcode_update_lock, flags);
if (val[1] != uci->mc->hdr.rev) {
printk(KERN_ERR "microcode: CPU%d update from revision "
"0x%x to 0x%x failed\n", cpu_num, uci->rev, val[1]);
return;
}
printk(KERN_INFO "microcode: CPU%d updated from revision "
"0x%x to 0x%x, date = %08x \n",
cpu_num, uci->rev, val[1], uci->mc->hdr.date);
uci->rev = val[1];
}
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
static void __user *user_buffer; /* user area microcode data buffer */
static unsigned int user_buffer_size; /* it's size */
static long get_next_ucode(void **mc, long offset)
{
microcode_header_t mc_header;
unsigned long total_size;
/* No more data */
if (offset >= user_buffer_size)
return 0;
if (copy_from_user(&mc_header, user_buffer + offset, MC_HEADER_SIZE)) {
printk(KERN_ERR "microcode: error! Can not read user data\n");
return -EFAULT;
}
total_size = get_totalsize(&mc_header);
if (offset + total_size > user_buffer_size) {
printk(KERN_ERR "microcode: error! Bad total size in microcode "
"data file\n");
return -EINVAL;
}
*mc = vmalloc(total_size);
if (!*mc)
return -ENOMEM;
if (copy_from_user(*mc, user_buffer + offset, total_size)) {
printk(KERN_ERR "microcode: error! Can not read user data\n");
vfree(*mc);
return -EFAULT;
}
return offset + total_size;
}
static int do_microcode_update (void)
{
long cursor = 0;
int error = 0;
void *new_mc = NULL;
int cpu;
cpumask_t old;
old = current->cpus_allowed;
while ((cursor = get_next_ucode(&new_mc, cursor)) > 0) {
error = microcode_sanity_check(new_mc);
if (error)
goto out;
/*
* It's possible the data file has multiple matching ucode,
* lets keep searching till the latest version
*/
for_each_online_cpu(cpu) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!uci->valid)
continue;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
error = get_maching_microcode(new_mc, cpu);
if (error < 0)
goto out;
if (error == 1)
apply_microcode(cpu);
}
vfree(new_mc);
}
out:
if (cursor > 0)
vfree(new_mc);
if (cursor < 0)
error = cursor;
set_cpus_allowed_ptr(current, &old);
return error;
}
static int microcode_open (struct inode *unused1, struct file *unused2)
{
cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
{
ssize_t ret;
if ((len >> PAGE_SHIFT) > num_physpages) {
printk(KERN_ERR "microcode: too much data (max %ld pages)\n", num_physpages);
return -EINVAL;
}
get_online_cpus();
mutex_lock(&microcode_mutex);
user_buffer = (void __user *) buf;
user_buffer_size = (int) len;
ret = do_microcode_update();
if (!ret)
ret = (ssize_t)len;
mutex_unlock(&microcode_mutex);
put_online_cpus();
return ret;
}
static const struct file_operations microcode_fops = {
.owner = THIS_MODULE,
.write = microcode_write,
.open = microcode_open,
};
static struct miscdevice microcode_dev = {
.minor = MICROCODE_MINOR,
.name = "microcode",
.fops = &microcode_fops,
};
static int __init microcode_dev_init (void)
{
int error;
error = misc_register(&microcode_dev);
if (error) {
printk(KERN_ERR
"microcode: can't misc_register on minor=%d\n",
MICROCODE_MINOR);
return error;
}
return 0;
}
static void microcode_dev_exit (void)
{
misc_deregister(&microcode_dev);
}
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#else
#define microcode_dev_init() 0
#define microcode_dev_exit() do { } while(0)
#endif
static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
unsigned long size, long offset)
{
microcode_header_t *mc_header;
unsigned long total_size;
/* No more data */
if (offset >= size)
return 0;
mc_header = (microcode_header_t *)(buf + offset);
total_size = get_totalsize(mc_header);
if (offset + total_size > size) {
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
return -EINVAL;
}
*mc = vmalloc(total_size);
if (!*mc) {
printk(KERN_ERR "microcode: error! Can not allocate memory\n");
return -ENOMEM;
}
memcpy(*mc, buf + offset, total_size);
return offset + total_size;
}
/* fake device for request_firmware */
static struct platform_device *microcode_pdev;
static int cpu_request_microcode(int cpu)
{
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
const u8 *buf;
unsigned long size;
long offset = 0;
int error;
void *mc;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
sprintf(name,"intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
error = request_firmware(&firmware, name, &microcode_pdev->dev);
if (error) {
pr_debug("microcode: data file %s load failed\n", name);
return error;
}
buf = firmware->data;
size = firmware->size;
while ((offset = get_next_ucode_from_buffer(&mc, buf, size, offset))
> 0) {
error = microcode_sanity_check(mc);
if (error)
break;
error = get_maching_microcode(mc, cpu);
if (error < 0)
break;
/*
* It's possible the data file has multiple matching ucode,
* lets keep searching till the latest version
*/
if (error == 1) {
apply_microcode(cpu);
error = 0;
}
vfree(mc);
}
if (offset > 0)
vfree(mc);
if (offset < 0)
error = offset;
release_firmware(firmware);
return error;
}
static int apply_microcode_check_cpu(int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
cpumask_t old;
unsigned int val[2];
int err = 0;
/* Check if the microcode is available */
if (!uci->mc)
return 0;
old = current->cpus_allowed;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
/* Check if the microcode we have in memory matches the CPU */
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001))
err = -EINVAL;
if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) {
/* get processor flags from MSR 0x17 */
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
if (uci->pf != (1 << ((val[1] >> 18) & 7)))
err = -EINVAL;
}
if (!err) {
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
if (uci->rev != val[1])
err = -EINVAL;
}
if (!err)
apply_microcode(cpu);
else
printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
" sig=0x%x, pf=0x%x, rev=0x%x\n",
cpu, uci->sig, uci->pf, uci->rev);
set_cpus_allowed_ptr(current, &old);
return err;
}
static void microcode_init_cpu(int cpu, int resume)
{
cpumask_t old;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
old = current->cpus_allowed;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
collect_cpu_info(cpu);
if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
cpu_request_microcode(cpu);
mutex_unlock(&microcode_mutex);
set_cpus_allowed_ptr(current, &old);
}
static void microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
mutex_lock(&microcode_mutex);
uci->valid = 0;
vfree(uci->mc);
uci->mc = NULL;
mutex_unlock(&microcode_mutex);
}
static ssize_t reload_store(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf, size_t sz)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
char *end;
unsigned long val = simple_strtoul(buf, &end, 0);
int err = 0;
int cpu = dev->id;
if (end == buf)
return -EINVAL;
if (val == 1) {
cpumask_t old = current->cpus_allowed;
get_online_cpus();
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
if (uci->valid)
err = cpu_request_microcode(cpu);
mutex_unlock(&microcode_mutex);
put_online_cpus();
set_cpus_allowed_ptr(current, &old);
}
if (err)
return err;
return sz;
}
static ssize_t version_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->rev);
}
static ssize_t pf_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->pf);
}
static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
static SYSDEV_ATTR(version, 0400, version_show, NULL);
static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
static struct attribute *mc_default_attrs[] = {
&attr_reload.attr,
&attr_version.attr,
&attr_processor_flags.attr,
NULL
};
static struct attribute_group mc_attr_group = {
.attrs = mc_default_attrs,
.name = "microcode",
};
static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
{
int err, cpu = sys_dev->id;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d added\n", cpu);
memset(uci, 0, sizeof(*uci));
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
return err;
microcode_init_cpu(cpu, resume);
return 0;
}
static int mc_sysdev_add(struct sys_device *sys_dev)
{
return __mc_sysdev_add(sys_dev, 0);
}
static int mc_sysdev_remove(struct sys_device *sys_dev)
{
int cpu = sys_dev->id;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d removed\n", cpu);
microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
}
static int mc_sysdev_resume(struct sys_device *dev)
{
int cpu = dev->id;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d resumed\n", cpu);
/* only CPU 0 will apply ucode here */
apply_microcode(0);
return 0;
}
static struct sysdev_driver mc_sysdev_driver = {
.add = mc_sysdev_add,
.remove = mc_sysdev_remove,
.resume = mc_sysdev_resume,
};
static __cpuinit int
mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_UP_CANCELED_FROZEN:
/* The CPU refused to come up during a system resume */
microcode_fini_cpu(cpu);
break;
case CPU_ONLINE:
case CPU_DOWN_FAILED:
mc_sysdev_add(sys_dev);
break;
case CPU_ONLINE_FROZEN:
/* System-wide resume is in progress, try to apply microcode */
if (apply_microcode_check_cpu(cpu)) {
/* The application of microcode failed */
microcode_fini_cpu(cpu);
__mc_sysdev_add(sys_dev, 1);
break;
}
case CPU_DOWN_FAILED_FROZEN:
if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
printk(KERN_ERR "microcode: Failed to create the sysfs "
"group for CPU%d\n", cpu);
break;
case CPU_DOWN_PREPARE:
mc_sysdev_remove(sys_dev);
break;
case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
break;
}
return NOTIFY_OK;
}
static struct notifier_block __refdata mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
static int __init microcode_init (void)
{
int error;
printk(KERN_INFO
"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
error = microcode_dev_init();
if (error)
return error;
microcode_pdev = platform_device_register_simple("microcode", -1,
NULL, 0);
if (IS_ERR(microcode_pdev)) {
microcode_dev_exit();
return PTR_ERR(microcode_pdev);
}
get_online_cpus();
error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
if (error) {
microcode_dev_exit();
platform_device_unregister(microcode_pdev);
return error;
}
register_hotcpu_notifier(&mc_cpu_notifier);
return 0;
}
static void __exit microcode_exit (void)
{
microcode_dev_exit();
unregister_hotcpu_notifier(&mc_cpu_notifier);
get_online_cpus();
sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
platform_device_unregister(microcode_pdev);
}
module_init(microcode_init)
module_exit(microcode_exit)

View file

@ -0,0 +1,435 @@
/*
* AMD CPU Microcode Update Driver for Linux
* Copyright (C) 2008 Advanced Micro Devices Inc.
*
* Author: Peter Oruba <peter.oruba@amd.com>
*
* Based on work by:
* Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
*
* This driver allows to upgrade microcode on AMD
* family 0x10 and 0x11 processors.
*
* Licensed unter the terms of the GNU General Public
* License version 2. See file COPYING for details.
*/
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <linux/pci.h>
#include <linux/pci_ids.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
MODULE_DESCRIPTION("AMD Microcode Update Driver");
MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>");
MODULE_LICENSE("GPL v2");
#define UCODE_MAGIC 0x00414d44
#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000
#define UCODE_UCODE_TYPE 0x00000001
struct equiv_cpu_entry {
unsigned int installed_cpu;
unsigned int fixed_errata_mask;
unsigned int fixed_errata_compare;
unsigned int equiv_cpu;
};
struct microcode_header_amd {
unsigned int data_code;
unsigned int patch_id;
unsigned char mc_patch_data_id[2];
unsigned char mc_patch_data_len;
unsigned char init_flag;
unsigned int mc_patch_data_checksum;
unsigned int nb_dev_id;
unsigned int sb_dev_id;
unsigned char processor_rev_id[2];
unsigned char nb_rev_id;
unsigned char sb_rev_id;
unsigned char bios_api_rev;
unsigned char reserved1[3];
unsigned int match_reg[8];
};
struct microcode_amd {
struct microcode_header_amd hdr;
unsigned int mpb[0];
};
#define UCODE_MAX_SIZE (2048)
#define DEFAULT_UCODE_DATASIZE (896)
#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define DWSIZE (sizeof(u32))
/* For now we support a fixed ucode total size only */
#define get_totalsize(mc) \
((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
+ MC_HEADER_SIZE)
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
static struct equiv_cpu_entry *equiv_cpu_table;
static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) {
printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
cpu);
return -1;
}
asm volatile("movl %1, %%ecx; rdmsr"
: "=a" (csig->rev)
: "i" (0x0000008B) : "ecx");
printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n",
csig->rev);
return 0;
}
static int get_matching_microcode(int cpu, void *mc, int rev)
{
struct microcode_header_amd *mc_header = mc;
struct pci_dev *nb_pci_dev, *sb_pci_dev;
unsigned int current_cpu_id;
unsigned int equiv_cpu_id = 0x00;
unsigned int i = 0;
BUG_ON(equiv_cpu_table == NULL);
current_cpu_id = cpuid_eax(0x00000001);
while (equiv_cpu_table[i].installed_cpu != 0) {
if (current_cpu_id == equiv_cpu_table[i].installed_cpu) {
equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
break;
}
i++;
}
if (!equiv_cpu_id) {
printk(KERN_ERR "microcode: CPU%d cpu_id "
"not found in equivalent cpu table \n", cpu);
return 0;
}
if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) {
printk(KERN_ERR
"microcode: CPU%d patch does not match "
"(patch is %x, cpu extended is %x) \n",
cpu, mc_header->processor_rev_id[0],
(equiv_cpu_id & 0xff));
return 0;
}
if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) {
printk(KERN_ERR "microcode: CPU%d patch does not match "
"(patch is %x, cpu base id is %x) \n",
cpu, mc_header->processor_rev_id[1],
((equiv_cpu_id >> 16) & 0xff));
return 0;
}
/* ucode may be northbridge specific */
if (mc_header->nb_dev_id) {
nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
(mc_header->nb_dev_id & 0xff),
NULL);
if ((!nb_pci_dev) ||
(mc_header->nb_rev_id != nb_pci_dev->revision)) {
printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu);
pci_dev_put(nb_pci_dev);
return 0;
}
pci_dev_put(nb_pci_dev);
}
/* ucode may be southbridge specific */
if (mc_header->sb_dev_id) {
sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD,
(mc_header->sb_dev_id & 0xff),
NULL);
if ((!sb_pci_dev) ||
(mc_header->sb_rev_id != sb_pci_dev->revision)) {
printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu);
pci_dev_put(sb_pci_dev);
return 0;
}
pci_dev_put(sb_pci_dev);
}
if (mc_header->patch_id <= rev)
return 0;
return 1;
}
static void apply_microcode_amd(int cpu)
{
unsigned long flags;
unsigned int eax, edx;
unsigned int rev;
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
struct microcode_amd *mc_amd = uci->mc;
unsigned long addr;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
if (mc_amd == NULL)
return;
spin_lock_irqsave(&microcode_update_lock, flags);
addr = (unsigned long)&mc_amd->hdr.data_code;
edx = (unsigned int)(((unsigned long)upper_32_bits(addr)));
eax = (unsigned int)(((unsigned long)lower_32_bits(addr)));
asm volatile("movl %0, %%ecx; wrmsr" :
: "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx");
/* get patch id after patching */
asm volatile("movl %1, %%ecx; rdmsr"
: "=a" (rev)
: "i" (0x0000008B) : "ecx");
spin_unlock_irqrestore(&microcode_update_lock, flags);
/* check current patch id and patch's id for match */
if (rev != mc_amd->hdr.patch_id) {
printk(KERN_ERR "microcode: CPU%d update from revision "
"0x%x to 0x%x failed\n", cpu_num,
mc_amd->hdr.patch_id, rev);
return;
}
printk(KERN_INFO "microcode: CPU%d updated from revision "
"0x%x to 0x%x \n",
cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
uci->cpu_sig.rev = rev;
}
static void * get_next_ucode(u8 *buf, unsigned int size,
int (*get_ucode_data)(void *, const void *, size_t),
unsigned int *mc_size)
{
unsigned int total_size;
#define UCODE_CONTAINER_SECTION_HDR 8
u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
void *mc;
if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
return NULL;
if (section_hdr[0] != UCODE_UCODE_TYPE) {
printk(KERN_ERR "microcode: error! "
"Wrong microcode payload type field\n");
return NULL;
}
total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8));
printk(KERN_INFO "microcode: size %u, total_size %u\n",
size, total_size);
if (total_size > size || total_size > UCODE_MAX_SIZE) {
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
return NULL;
}
mc = vmalloc(UCODE_MAX_SIZE);
if (mc) {
memset(mc, 0, UCODE_MAX_SIZE);
if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) {
vfree(mc);
mc = NULL;
} else
*mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
}
#undef UCODE_CONTAINER_SECTION_HDR
return mc;
}
static int install_equiv_cpu_table(u8 *buf,
int (*get_ucode_data)(void *, const void *, size_t))
{
#define UCODE_CONTAINER_HEADER_SIZE 12
u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
unsigned int *buf_pos = (unsigned int *)container_hdr;
unsigned long size;
if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
return 0;
size = buf_pos[2];
if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) {
printk(KERN_ERR "microcode: error! "
"Wrong microcode equivalnet cpu table\n");
return 0;
}
equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size);
if (!equiv_cpu_table) {
printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n");
return 0;
}
buf += UCODE_CONTAINER_HEADER_SIZE;
if (get_ucode_data(equiv_cpu_table, buf, size)) {
vfree(equiv_cpu_table);
return 0;
}
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
#undef UCODE_CONTAINER_HEADER_SIZE
}
static void free_equiv_cpu_table(void)
{
if (equiv_cpu_table) {
vfree(equiv_cpu_table);
equiv_cpu_table = NULL;
}
}
static int generic_load_microcode(int cpu, void *data, size_t size,
int (*get_ucode_data)(void *, const void *, size_t))
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
u8 *ucode_ptr = data, *new_mc = NULL, *mc;
int new_rev = uci->cpu_sig.rev;
unsigned int leftover;
unsigned long offset;
offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data);
if (!offset) {
printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
return -EINVAL;
}
ucode_ptr += offset;
leftover = size - offset;
while (leftover) {
unsigned int uninitialized_var(mc_size);
struct microcode_header_amd *mc_header;
mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size);
if (!mc)
break;
mc_header = (struct microcode_header_amd *)mc;
if (get_matching_microcode(cpu, mc, new_rev)) {
if (new_mc)
vfree(new_mc);
new_rev = mc_header->patch_id;
new_mc = mc;
} else
vfree(mc);
ucode_ptr += mc_size;
leftover -= mc_size;
}
if (new_mc) {
if (!leftover) {
if (uci->mc)
vfree(uci->mc);
uci->mc = new_mc;
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
} else
vfree(new_mc);
}
free_equiv_cpu_table();
return (int)leftover;
}
static int get_ucode_fw(void *to, const void *from, size_t n)
{
memcpy(to, from, n);
return 0;
}
static int request_microcode_fw(int cpu, struct device *device)
{
const char *fw_name = "amd-ucode/microcode_amd.bin";
const struct firmware *firmware;
int ret;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
ret = request_firmware(&firmware, fw_name, device);
if (ret) {
printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name);
return ret;
}
ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
&get_ucode_fw);
release_firmware(firmware);
return ret;
}
static int request_microcode_user(int cpu, const void __user *buf, size_t size)
{
printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode"
"is not supported\n");
return -1;
}
static void microcode_fini_cpu_amd(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
vfree(uci->mc);
uci->mc = NULL;
}
static struct microcode_ops microcode_amd_ops = {
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info_amd,
.apply_microcode = apply_microcode_amd,
.microcode_fini_cpu = microcode_fini_cpu_amd,
};
struct microcode_ops * __init init_amd_microcode(void)
{
return &microcode_amd_ops;
}

View file

@ -0,0 +1,508 @@
/*
* Intel CPU Microcode Update Driver for Linux
*
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
* 2006 Shaohua Li <shaohua.li@intel.com>
*
* This driver allows to upgrade microcode on Intel processors
* belonging to IA-32 family - PentiumPro, Pentium II,
* Pentium III, Xeon, Pentium 4, etc.
*
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
* Software Developer's Manual
* Order Number 253668 or free download from:
*
* http://developer.intel.com/design/pentium4/manuals/253668.htm
*
* For more information, go to http://www.urbanmyth.org/microcode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Initial release.
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added read() support + cleanups.
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added 'device trimming' support. open(O_WRONLY) zeroes
* and frees the saved copy of applied microcode.
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Made to use devfs (/dev/cpu/microcode) + cleanups.
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
* Added misc device support (now uses both devfs and misc).
* Added MICROCODE_IOCFREE ioctl to clear memory.
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
* Messages for error cases (non Intel & no suitable microcode).
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
* Removed ->release(). Removed exclusive open and status bitmap.
* Added microcode_rwsem to serialize read()/write()/ioctl().
* Removed global kernel lock usage.
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
* Write 0 to 0x8B msr and then cpuid before reading revision,
* so that it works even if there were no update done by the
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
* to be 0 on my machine which is why it worked even when I
* disabled update by the BIOS)
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
* Tigran Aivazian <tigran@veritas.com>
* Intel Pentium 4 processor support and bugfixes.
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
* Bugfix for HT (Hyper-Threading) enabled processors
* whereby processor resources are shared by all logical processors
* in a single CPU package.
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
* Tigran Aivazian <tigran@veritas.com>,
* Serialize updates as required on HT processors due to
* speculative nature of implementation.
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
* Fix the panic when writing zero-length microcode chunk.
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
* Jun Nakajima <jun.nakajima@intel.com>
* Support for the microcode updates in the new format.
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
* because we no longer hold a copy of applied microcode
* in kernel memory.
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
#define MICROCODE_VERSION "2.00"
struct microcode_ops *microcode_ops;
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
static DEFINE_MUTEX(microcode_mutex);
struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
EXPORT_SYMBOL_GPL(ucode_cpu_info);
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
static int do_microcode_update(const void __user *buf, size_t size)
{
cpumask_t old;
int error = 0;
int cpu;
old = current->cpus_allowed;
for_each_online_cpu(cpu) {
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!uci->valid)
continue;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
error = microcode_ops->request_microcode_user(cpu, buf, size);
if (error < 0)
goto out;
if (!error)
microcode_ops->apply_microcode(cpu);
}
out:
set_cpus_allowed_ptr(current, &old);
return error;
}
static int microcode_open(struct inode *unused1, struct file *unused2)
{
cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
static ssize_t microcode_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
ssize_t ret;
if ((len >> PAGE_SHIFT) > num_physpages) {
printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
num_physpages);
return -EINVAL;
}
get_online_cpus();
mutex_lock(&microcode_mutex);
ret = do_microcode_update(buf, len);
if (!ret)
ret = (ssize_t)len;
mutex_unlock(&microcode_mutex);
put_online_cpus();
return ret;
}
static const struct file_operations microcode_fops = {
.owner = THIS_MODULE,
.write = microcode_write,
.open = microcode_open,
};
static struct miscdevice microcode_dev = {
.minor = MICROCODE_MINOR,
.name = "microcode",
.fops = &microcode_fops,
};
static int __init microcode_dev_init(void)
{
int error;
error = misc_register(&microcode_dev);
if (error) {
printk(KERN_ERR
"microcode: can't misc_register on minor=%d\n",
MICROCODE_MINOR);
return error;
}
return 0;
}
static void microcode_dev_exit(void)
{
misc_deregister(&microcode_dev);
}
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
#else
#define microcode_dev_init() 0
#define microcode_dev_exit() do { } while (0)
#endif
/* fake device for request_firmware */
struct platform_device *microcode_pdev;
static ssize_t reload_store(struct sys_device *dev,
struct sysdev_attribute *attr,
const char *buf, size_t sz)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
char *end;
unsigned long val = simple_strtoul(buf, &end, 0);
int err = 0;
int cpu = dev->id;
if (end == buf)
return -EINVAL;
if (val == 1) {
cpumask_t old = current->cpus_allowed;
get_online_cpus();
if (cpu_online(cpu)) {
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
if (uci->valid) {
err = microcode_ops->request_microcode_fw(cpu,
&microcode_pdev->dev);
if (!err)
microcode_ops->apply_microcode(cpu);
}
mutex_unlock(&microcode_mutex);
set_cpus_allowed_ptr(current, &old);
}
put_online_cpus();
}
if (err)
return err;
return sz;
}
static ssize_t version_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->cpu_sig.rev);
}
static ssize_t pf_show(struct sys_device *dev,
struct sysdev_attribute *attr, char *buf)
{
struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
return sprintf(buf, "0x%x\n", uci->cpu_sig.pf);
}
static SYSDEV_ATTR(reload, 0200, NULL, reload_store);
static SYSDEV_ATTR(version, 0400, version_show, NULL);
static SYSDEV_ATTR(processor_flags, 0400, pf_show, NULL);
static struct attribute *mc_default_attrs[] = {
&attr_reload.attr,
&attr_version.attr,
&attr_processor_flags.attr,
NULL
};
static struct attribute_group mc_attr_group = {
.attrs = mc_default_attrs,
.name = "microcode",
};
static void microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
mutex_lock(&microcode_mutex);
microcode_ops->microcode_fini_cpu(cpu);
uci->valid = 0;
mutex_unlock(&microcode_mutex);
}
static void collect_cpu_info(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
memset(uci, 0, sizeof(*uci));
if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig))
uci->valid = 1;
}
static int microcode_resume_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpu_signature nsig;
pr_debug("microcode: CPU%d resumed\n", cpu);
if (!uci->mc)
return 1;
/*
* Let's verify that the 'cached' ucode does belong
* to this cpu (a bit of paranoia):
*/
if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
microcode_fini_cpu(cpu);
return -1;
}
if (memcmp(&nsig, &uci->cpu_sig, sizeof(nsig))) {
microcode_fini_cpu(cpu);
/* Should we look for a new ucode here? */
return 1;
}
return 0;
}
void microcode_update_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
int err = 0;
/*
* Check if the system resume is in progress (uci->valid != NULL),
* otherwise just request a firmware:
*/
if (uci->valid) {
err = microcode_resume_cpu(cpu);
} else {
collect_cpu_info(cpu);
if (uci->valid && system_state == SYSTEM_RUNNING)
err = microcode_ops->request_microcode_fw(cpu,
&microcode_pdev->dev);
}
if (!err)
microcode_ops->apply_microcode(cpu);
}
static void microcode_init_cpu(int cpu)
{
cpumask_t old = current->cpus_allowed;
set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
/* We should bind the task to the CPU */
BUG_ON(raw_smp_processor_id() != cpu);
mutex_lock(&microcode_mutex);
microcode_update_cpu(cpu);
mutex_unlock(&microcode_mutex);
set_cpus_allowed_ptr(current, &old);
}
static int mc_sysdev_add(struct sys_device *sys_dev)
{
int err, cpu = sys_dev->id;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d added\n", cpu);
memset(uci, 0, sizeof(*uci));
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
return err;
microcode_init_cpu(cpu);
return 0;
}
static int mc_sysdev_remove(struct sys_device *sys_dev)
{
int cpu = sys_dev->id;
if (!cpu_online(cpu))
return 0;
pr_debug("microcode: CPU%d removed\n", cpu);
microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
}
static int mc_sysdev_resume(struct sys_device *dev)
{
int cpu = dev->id;
if (!cpu_online(cpu))
return 0;
/* only CPU 0 will apply ucode here */
microcode_update_cpu(0);
return 0;
}
static struct sysdev_driver mc_sysdev_driver = {
.add = mc_sysdev_add,
.remove = mc_sysdev_remove,
.resume = mc_sysdev_resume,
};
static __cpuinit int
mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
{
unsigned int cpu = (unsigned long)hcpu;
struct sys_device *sys_dev;
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
microcode_init_cpu(cpu);
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
pr_debug("microcode: CPU%d added\n", cpu);
if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
printk(KERN_ERR "microcode: Failed to create the sysfs "
"group for CPU%d\n", cpu);
break;
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
/* Suspend is in progress, only remove the interface */
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
pr_debug("microcode: CPU%d removed\n", cpu);
break;
case CPU_DEAD:
case CPU_UP_CANCELED_FROZEN:
/* The CPU refused to come up during a system resume */
microcode_fini_cpu(cpu);
break;
}
return NOTIFY_OK;
}
static struct notifier_block __refdata mc_cpu_notifier = {
.notifier_call = mc_cpu_callback,
};
static int __init microcode_init(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
int error;
if (c->x86_vendor == X86_VENDOR_INTEL)
microcode_ops = init_intel_microcode();
else if (c->x86_vendor == X86_VENDOR_AMD)
microcode_ops = init_amd_microcode();
if (!microcode_ops) {
printk(KERN_ERR "microcode: no support for this CPU vendor\n");
return -ENODEV;
}
error = microcode_dev_init();
if (error)
return error;
microcode_pdev = platform_device_register_simple("microcode", -1,
NULL, 0);
if (IS_ERR(microcode_pdev)) {
microcode_dev_exit();
return PTR_ERR(microcode_pdev);
}
get_online_cpus();
error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
if (error) {
microcode_dev_exit();
platform_device_unregister(microcode_pdev);
return error;
}
register_hotcpu_notifier(&mc_cpu_notifier);
printk(KERN_INFO
"Microcode Update Driver: v" MICROCODE_VERSION
" <tigran@aivazian.fsnet.co.uk>"
" <peter.oruba@amd.com>\n");
return 0;
}
static void __exit microcode_exit(void)
{
microcode_dev_exit();
unregister_hotcpu_notifier(&mc_cpu_notifier);
get_online_cpus();
sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
put_online_cpus();
platform_device_unregister(microcode_pdev);
microcode_ops = NULL;
printk(KERN_INFO
"Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
}
module_init(microcode_init);
module_exit(microcode_exit);

View file

@ -0,0 +1,480 @@
/*
* Intel CPU Microcode Update Driver for Linux
*
* Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
* 2006 Shaohua Li <shaohua.li@intel.com>
*
* This driver allows to upgrade microcode on Intel processors
* belonging to IA-32 family - PentiumPro, Pentium II,
* Pentium III, Xeon, Pentium 4, etc.
*
* Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
* Software Developer's Manual
* Order Number 253668 or free download from:
*
* http://developer.intel.com/design/pentium4/manuals/253668.htm
*
* For more information, go to http://www.urbanmyth.org/microcode
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* 1.0 16 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Initial release.
* 1.01 18 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added read() support + cleanups.
* 1.02 21 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Added 'device trimming' support. open(O_WRONLY) zeroes
* and frees the saved copy of applied microcode.
* 1.03 29 Feb 2000, Tigran Aivazian <tigran@sco.com>
* Made to use devfs (/dev/cpu/microcode) + cleanups.
* 1.04 06 Jun 2000, Simon Trimmer <simon@veritas.com>
* Added misc device support (now uses both devfs and misc).
* Added MICROCODE_IOCFREE ioctl to clear memory.
* 1.05 09 Jun 2000, Simon Trimmer <simon@veritas.com>
* Messages for error cases (non Intel & no suitable microcode).
* 1.06 03 Aug 2000, Tigran Aivazian <tigran@veritas.com>
* Removed ->release(). Removed exclusive open and status bitmap.
* Added microcode_rwsem to serialize read()/write()/ioctl().
* Removed global kernel lock usage.
* 1.07 07 Sep 2000, Tigran Aivazian <tigran@veritas.com>
* Write 0 to 0x8B msr and then cpuid before reading revision,
* so that it works even if there were no update done by the
* BIOS. Otherwise, reading from 0x8B gives junk (which happened
* to be 0 on my machine which is why it worked even when I
* disabled update by the BIOS)
* Thanks to Eric W. Biederman <ebiederman@lnxi.com> for the fix.
* 1.08 11 Dec 2000, Richard Schaal <richard.schaal@intel.com> and
* Tigran Aivazian <tigran@veritas.com>
* Intel Pentium 4 processor support and bugfixes.
* 1.09 30 Oct 2001, Tigran Aivazian <tigran@veritas.com>
* Bugfix for HT (Hyper-Threading) enabled processors
* whereby processor resources are shared by all logical processors
* in a single CPU package.
* 1.10 28 Feb 2002 Asit K Mallick <asit.k.mallick@intel.com> and
* Tigran Aivazian <tigran@veritas.com>,
* Serialize updates as required on HT processors due to
* speculative nature of implementation.
* 1.11 22 Mar 2002 Tigran Aivazian <tigran@veritas.com>
* Fix the panic when writing zero-length microcode chunk.
* 1.12 29 Sep 2003 Nitin Kamble <nitin.a.kamble@intel.com>,
* Jun Nakajima <jun.nakajima@intel.com>
* Support for the microcode updates in the new format.
* 1.13 10 Oct 2003 Tigran Aivazian <tigran@veritas.com>
* Removed ->read() method and obsoleted MICROCODE_IOCFREE ioctl
* because we no longer hold a copy of applied microcode
* in kernel memory.
* 1.14 25 Jun 2004 Tigran Aivazian <tigran@veritas.com>
* Fix sigmatch() macro to handle old CPUs with pf == 0.
* Thanks to Stuart Swales for pointing out this bug.
*/
#include <linux/capability.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/cpumask.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/mutex.h>
#include <linux/cpu.h>
#include <linux/firmware.h>
#include <linux/platform_device.h>
#include <asm/msr.h>
#include <asm/uaccess.h>
#include <asm/processor.h>
#include <asm/microcode.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
MODULE_LICENSE("GPL");
struct microcode_header_intel {
unsigned int hdrver;
unsigned int rev;
unsigned int date;
unsigned int sig;
unsigned int cksum;
unsigned int ldrver;
unsigned int pf;
unsigned int datasize;
unsigned int totalsize;
unsigned int reserved[3];
};
struct microcode_intel {
struct microcode_header_intel hdr;
unsigned int bits[0];
};
/* microcode format is extended from prescott processors */
struct extended_signature {
unsigned int sig;
unsigned int pf;
unsigned int cksum;
};
struct extended_sigtable {
unsigned int count;
unsigned int cksum;
unsigned int reserved[3];
struct extended_signature sigs[0];
};
#define DEFAULT_UCODE_DATASIZE (2000)
#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel))
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable))
#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature))
#define DWSIZE (sizeof(u32))
#define get_totalsize(mc) \
(((struct microcode_intel *)mc)->hdr.totalsize ? \
((struct microcode_intel *)mc)->hdr.totalsize : \
DEFAULT_UCODE_TOTALSIZE)
#define get_datasize(mc) \
(((struct microcode_intel *)mc)->hdr.datasize ? \
((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE)
#define sigmatch(s1, s2, p1, p2) \
(((s1) == (s2)) && (((p1) & (p2)) || (((p1) == 0) && ((p2) == 0))))
#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
/* serialize access to the physical write to MSR 0x79 */
static DEFINE_SPINLOCK(microcode_update_lock);
static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data(cpu_num);
unsigned int val[2];
memset(csig, 0, sizeof(*csig));
if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
cpu_has(c, X86_FEATURE_IA64)) {
printk(KERN_ERR "microcode: CPU%d not a capable Intel "
"processor\n", cpu_num);
return -1;
}
csig->sig = cpuid_eax(0x00000001);
if ((c->x86_model >= 5) || (c->x86 > 6)) {
/* get processor flags from MSR 0x17 */
rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
csig->pf = 1 << ((val[1] >> 18) & 7);
}
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
csig->sig, csig->pf, csig->rev);
return 0;
}
static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
{
return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
}
static inline int
update_match_revision(struct microcode_header_intel *mc_header, int rev)
{
return (mc_header->rev <= rev) ? 0 : 1;
}
static int microcode_sanity_check(void *mc)
{
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header = NULL;
struct extended_signature *ext_sig;
unsigned long total_size, data_size, ext_table_size;
int sum, orig_sum, ext_sigcount = 0, i;
total_size = get_totalsize(mc_header);
data_size = get_datasize(mc_header);
if (data_size + MC_HEADER_SIZE > total_size) {
printk(KERN_ERR "microcode: error! "
"Bad data size in microcode data file\n");
return -EINVAL;
}
if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
printk(KERN_ERR "microcode: error! "
"Unknown microcode update format\n");
return -EINVAL;
}
ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
if (ext_table_size) {
if ((ext_table_size < EXT_HEADER_SIZE)
|| ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) {
printk(KERN_ERR "microcode: error! "
"Small exttable size in microcode data file\n");
return -EINVAL;
}
ext_header = mc + MC_HEADER_SIZE + data_size;
if (ext_table_size != exttable_size(ext_header)) {
printk(KERN_ERR "microcode: error! "
"Bad exttable size in microcode data file\n");
return -EFAULT;
}
ext_sigcount = ext_header->count;
}
/* check extended table checksum */
if (ext_table_size) {
int ext_table_sum = 0;
int *ext_tablep = (int *)ext_header;
i = ext_table_size / DWSIZE;
while (i--)
ext_table_sum += ext_tablep[i];
if (ext_table_sum) {
printk(KERN_WARNING "microcode: aborting, "
"bad extended signature table checksum\n");
return -EINVAL;
}
}
/* calculate the checksum */
orig_sum = 0;
i = (MC_HEADER_SIZE + data_size) / DWSIZE;
while (i--)
orig_sum += ((int *)mc)[i];
if (orig_sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
if (!ext_table_size)
return 0;
/* check extended signature checksum */
for (i = 0; i < ext_sigcount; i++) {
ext_sig = (void *)ext_header + EXT_HEADER_SIZE +
EXT_SIGNATURE_SIZE * i;
sum = orig_sum
- (mc_header->sig + mc_header->pf + mc_header->cksum)
+ (ext_sig->sig + ext_sig->pf + ext_sig->cksum);
if (sum) {
printk(KERN_ERR "microcode: aborting, bad checksum\n");
return -EINVAL;
}
}
return 0;
}
/*
* return 0 - no update found
* return 1 - found update
*/
static int
get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
{
struct microcode_header_intel *mc_header = mc;
struct extended_sigtable *ext_header;
unsigned long total_size = get_totalsize(mc_header);
int ext_sigcount, i;
struct extended_signature *ext_sig;
if (!update_match_revision(mc_header, rev))
return 0;
if (update_match_cpu(cpu_sig, mc_header->sig, mc_header->pf))
return 1;
/* Look for ext. headers: */
if (total_size <= get_datasize(mc_header) + MC_HEADER_SIZE)
return 0;
ext_header = mc + get_datasize(mc_header) + MC_HEADER_SIZE;
ext_sigcount = ext_header->count;
ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
for (i = 0; i < ext_sigcount; i++) {
if (update_match_cpu(cpu_sig, ext_sig->sig, ext_sig->pf))
return 1;
ext_sig++;
}
return 0;
}
static void apply_microcode(int cpu)
{
unsigned long flags;
unsigned int val[2];
int cpu_num = raw_smp_processor_id();
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct microcode_intel *mc_intel = uci->mc;
/* We should bind the task to the CPU */
BUG_ON(cpu_num != cpu);
if (mc_intel == NULL)
return;
/* serialize access to the physical write to MSR 0x79 */
spin_lock_irqsave(&microcode_update_lock, flags);
/* write microcode via MSR 0x79 */
wrmsr(MSR_IA32_UCODE_WRITE,
(unsigned long) mc_intel->bits,
(unsigned long) mc_intel->bits >> 16 >> 16);
wrmsr(MSR_IA32_UCODE_REV, 0, 0);
/* see notes above for revision 1.07. Apparent chip bug */
sync_core();
/* get the current revision from MSR 0x8B */
rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
spin_unlock_irqrestore(&microcode_update_lock, flags);
if (val[1] != mc_intel->hdr.rev) {
printk(KERN_ERR "microcode: CPU%d update from revision "
"0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]);
return;
}
printk(KERN_INFO "microcode: CPU%d updated from revision "
"0x%x to 0x%x, date = %04x-%02x-%02x \n",
cpu_num, uci->cpu_sig.rev, val[1],
mc_intel->hdr.date & 0xffff,
mc_intel->hdr.date >> 24,
(mc_intel->hdr.date >> 16) & 0xff);
uci->cpu_sig.rev = val[1];
}
static int generic_load_microcode(int cpu, void *data, size_t size,
int (*get_ucode_data)(void *, const void *, size_t))
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
u8 *ucode_ptr = data, *new_mc = NULL, *mc;
int new_rev = uci->cpu_sig.rev;
unsigned int leftover = size;
while (leftover) {
struct microcode_header_intel mc_header;
unsigned int mc_size;
if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
break;
mc_size = get_totalsize(&mc_header);
if (!mc_size || mc_size > leftover) {
printk(KERN_ERR "microcode: error!"
"Bad data in microcode data file\n");
break;
}
mc = vmalloc(mc_size);
if (!mc)
break;
if (get_ucode_data(mc, ucode_ptr, mc_size) ||
microcode_sanity_check(mc) < 0) {
vfree(mc);
break;
}
if (get_matching_microcode(&uci->cpu_sig, mc, new_rev)) {
if (new_mc)
vfree(new_mc);
new_rev = mc_header.rev;
new_mc = mc;
} else
vfree(mc);
ucode_ptr += mc_size;
leftover -= mc_size;
}
if (new_mc) {
if (!leftover) {
if (uci->mc)
vfree(uci->mc);
uci->mc = (struct microcode_intel *)new_mc;
pr_debug("microcode: CPU%d found a matching microcode update with"
" version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
} else
vfree(new_mc);
}
return (int)leftover;
}
static int get_ucode_fw(void *to, const void *from, size_t n)
{
memcpy(to, from, n);
return 0;
}
static int request_microcode_fw(int cpu, struct device *device)
{
char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
int ret;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
sprintf(name, "intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
ret = request_firmware(&firmware, name, device);
if (ret) {
pr_debug("microcode: data file %s load failed\n", name);
return ret;
}
ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
&get_ucode_fw);
release_firmware(firmware);
return ret;
}
static int get_ucode_user(void *to, const void *from, size_t n)
{
return copy_from_user(to, from, n);
}
static int request_microcode_user(int cpu, const void __user *buf, size_t size)
{
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
}
static void microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
vfree(uci->mc);
uci->mc = NULL;
}
struct microcode_ops microcode_intel_ops = {
.request_microcode_user = request_microcode_user,
.request_microcode_fw = request_microcode_fw,
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode,
.microcode_fini_cpu = microcode_fini_cpu,
};
struct microcode_ops * __init init_intel_microcode(void)
{
return &microcode_intel_ops;
}

View file

@ -0,0 +1,37 @@
/*
* Split spinlock implementation out into its own file, so it can be
* compiled in a FTRACE-compatible way.
*/
#include <linux/spinlock.h>
#include <linux/module.h>
#include <asm/paravirt.h>
static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
{
__raw_spin_lock(lock);
}
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
.spin_is_locked = __ticket_spin_is_locked,
.spin_is_contended = __ticket_spin_is_contended,
.spin_lock = __ticket_spin_lock,
.spin_lock_flags = default_spin_lock_flags,
.spin_trylock = __ticket_spin_trylock,
.spin_unlock = __ticket_spin_unlock,
#endif
};
EXPORT_SYMBOL(pv_lock_ops);
void __init paravirt_use_bytelocks(void)
{
#ifdef CONFIG_SMP
pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
pv_lock_ops.spin_lock = __byte_spin_lock;
pv_lock_ops.spin_trylock = __byte_spin_trylock;
pv_lock_ops.spin_unlock = __byte_spin_unlock;
#endif
}

View file

@ -268,17 +268,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return __get_cpu_var(paravirt_lazy_mode); return __get_cpu_var(paravirt_lazy_mode);
} }
void __init paravirt_use_bytelocks(void)
{
#ifdef CONFIG_SMP
pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
pv_lock_ops.spin_lock = __byte_spin_lock;
pv_lock_ops.spin_trylock = __byte_spin_trylock;
pv_lock_ops.spin_unlock = __byte_spin_unlock;
#endif
}
struct pv_info pv_info = { struct pv_info pv_info = {
.name = "bare hardware", .name = "bare hardware",
.paravirt_enabled = 0, .paravirt_enabled = 0,
@ -349,6 +338,10 @@ struct pv_cpu_ops pv_cpu_ops = {
.write_ldt_entry = native_write_ldt_entry, .write_ldt_entry = native_write_ldt_entry,
.write_gdt_entry = native_write_gdt_entry, .write_gdt_entry = native_write_gdt_entry,
.write_idt_entry = native_write_idt_entry, .write_idt_entry = native_write_idt_entry,
.alloc_ldt = paravirt_nop,
.free_ldt = paravirt_nop,
.load_sp0 = native_load_sp0, .load_sp0 = native_load_sp0,
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
@ -460,18 +453,6 @@ struct pv_mmu_ops pv_mmu_ops = {
.set_fixmap = native_set_fixmap, .set_fixmap = native_set_fixmap,
}; };
struct pv_lock_ops pv_lock_ops = {
#ifdef CONFIG_SMP
.spin_is_locked = __ticket_spin_is_locked,
.spin_is_contended = __ticket_spin_is_contended,
.spin_lock = __ticket_spin_lock,
.spin_trylock = __ticket_spin_trylock,
.spin_unlock = __ticket_spin_unlock,
#endif
};
EXPORT_SYMBOL(pv_lock_ops);
EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL_GPL(pv_time_ops);
EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_cpu_ops);
EXPORT_SYMBOL (pv_mmu_ops); EXPORT_SYMBOL (pv_mmu_ops);

View file

@ -76,47 +76,12 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return ((unsigned long *)tsk->thread.sp)[3]; return ((unsigned long *)tsk->thread.sp)[3];
} }
#ifdef CONFIG_HOTPLUG_CPU #ifndef CONFIG_SMP
#include <asm/nmi.h>
static void cpu_exit_clear(void)
{
int cpu = raw_smp_processor_id();
idle_task_exit();
cpu_uninit();
irq_ctx_exit(cpu);
cpu_clear(cpu, cpu_callout_map);
cpu_clear(cpu, cpu_callin_map);
numa_remove_cpu(cpu);
c1e_remove_cpu(cpu);
}
/* We don't actually take CPU down, just spin without interrupts. */
static inline void play_dead(void)
{
/* This must be done before dead CPU ack */
cpu_exit_clear();
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
/*
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
/* mask all interrupts, flush any and all caches, and halt */
wbinvd_halt();
}
#else
static inline void play_dead(void) static inline void play_dead(void)
{ {
BUG(); BUG();
} }
#endif /* CONFIG_HOTPLUG_CPU */ #endif
/* /*
* The idle thread. There's no useful work to be * The idle thread. There's no useful work to be

View file

@ -86,30 +86,12 @@ void exit_idle(void)
__exit_idle(); __exit_idle();
} }
#ifdef CONFIG_HOTPLUG_CPU #ifndef CONFIG_SMP
DECLARE_PER_CPU(int, cpu_state);
#include <linux/nmi.h>
/* We halt the CPU with physical CPU hotplug */
static inline void play_dead(void)
{
idle_task_exit();
c1e_remove_cpu(raw_smp_processor_id());
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
local_irq_disable();
/* mask all interrupts, flush any and all caches, and halt */
wbinvd_halt();
}
#else
static inline void play_dead(void) static inline void play_dead(void)
{ {
BUG(); BUG();
} }
#endif /* CONFIG_HOTPLUG_CPU */ #endif
/* /*
* The idle thread. There's no useful work to be * The idle thread. There's no useful work to be

View file

@ -40,7 +40,9 @@ enum x86_regset {
REGSET_GENERAL, REGSET_GENERAL,
REGSET_FP, REGSET_FP,
REGSET_XFP, REGSET_XFP,
REGSET_IOPERM64 = REGSET_XFP,
REGSET_TLS, REGSET_TLS,
REGSET_IOPERM32,
}; };
/* /*
@ -555,6 +557,29 @@ static int ptrace_set_debugreg(struct task_struct *child,
return 0; return 0;
} }
/*
* These access the current or another (stopped) task's io permission
* bitmap for debugging or core dump.
*/
static int ioperm_active(struct task_struct *target,
const struct user_regset *regset)
{
return target->thread.io_bitmap_max / regset->size;
}
static int ioperm_get(struct task_struct *target,
const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
if (!target->thread.io_bitmap_ptr)
return -ENXIO;
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
target->thread.io_bitmap_ptr,
0, IO_BITMAP_BYTES);
}
#ifdef CONFIG_X86_PTRACE_BTS #ifdef CONFIG_X86_PTRACE_BTS
/* /*
* The configuration for a particular BTS hardware implementation. * The configuration for a particular BTS hardware implementation.
@ -1385,6 +1410,12 @@ static const struct user_regset x86_64_regsets[] = {
.size = sizeof(long), .align = sizeof(long), .size = sizeof(long), .align = sizeof(long),
.active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set
}, },
[REGSET_IOPERM64] = {
.core_note_type = NT_386_IOPERM,
.n = IO_BITMAP_LONGS,
.size = sizeof(long), .align = sizeof(long),
.active = ioperm_active, .get = ioperm_get
},
}; };
static const struct user_regset_view user_x86_64_view = { static const struct user_regset_view user_x86_64_view = {
@ -1431,6 +1462,12 @@ static const struct user_regset x86_32_regsets[] = {
.active = regset_tls_active, .active = regset_tls_active,
.get = regset_tls_get, .set = regset_tls_set .get = regset_tls_get, .set = regset_tls_set
}, },
[REGSET_IOPERM32] = {
.core_note_type = NT_386_IOPERM,
.n = IO_BITMAP_BYTES / sizeof(u32),
.size = sizeof(u32), .align = sizeof(u32),
.active = ioperm_active, .get = ioperm_get
},
}; };
static const struct user_regset_view user_x86_32_view = { static const struct user_regset_view user_x86_32_view = {
@ -1452,7 +1489,8 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
#endif #endif
} }
void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
int error_code, int si_code)
{ {
struct siginfo info; struct siginfo info;
@ -1461,7 +1499,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code)
memset(&info, 0, sizeof(info)); memset(&info, 0, sizeof(info));
info.si_signo = SIGTRAP; info.si_signo = SIGTRAP;
info.si_code = TRAP_BRKPT; info.si_code = si_code;
/* User-mode ip? */ /* User-mode ip? */
info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL;
@ -1548,5 +1586,5 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
*/ */
if (test_thread_flag(TIF_SINGLESTEP) && if (test_thread_flag(TIF_SINGLESTEP) &&
tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL)) tracehook_consider_fatal_signal(current, SIGTRAP, SIG_DFL))
send_sigtrap(current, regs, 0); send_sigtrap(current, regs, 0, TRAP_BRKPT);
} }

View file

@ -581,6 +581,190 @@ static struct x86_quirks default_x86_quirks __initdata;
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
/*
* Some BIOSes seem to corrupt the low 64k of memory during events
* like suspend/resume and unplugging an HDMI cable. Reserve all
* remaining free memory in that area and fill it with a distinct
* pattern.
*/
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
#define MAX_SCAN_AREAS 8
static int __read_mostly memory_corruption_check = -1;
static unsigned __read_mostly corruption_check_size = 64*1024;
static unsigned __read_mostly corruption_check_period = 60; /* seconds */
static struct e820entry scan_areas[MAX_SCAN_AREAS];
static int num_scan_areas;
static int set_corruption_check(char *arg)
{
char *end;
memory_corruption_check = simple_strtol(arg, &end, 10);
return (*end == 0) ? 0 : -EINVAL;
}
early_param("memory_corruption_check", set_corruption_check);
static int set_corruption_check_period(char *arg)
{
char *end;
corruption_check_period = simple_strtoul(arg, &end, 10);
return (*end == 0) ? 0 : -EINVAL;
}
early_param("memory_corruption_check_period", set_corruption_check_period);
static int set_corruption_check_size(char *arg)
{
char *end;
unsigned size;
size = memparse(arg, &end);
if (*end == '\0')
corruption_check_size = size;
return (size == corruption_check_size) ? 0 : -EINVAL;
}
early_param("memory_corruption_check_size", set_corruption_check_size);
static void __init setup_bios_corruption_check(void)
{
u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
if (memory_corruption_check == -1) {
memory_corruption_check =
#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
1
#else
0
#endif
;
}
if (corruption_check_size == 0)
memory_corruption_check = 0;
if (!memory_corruption_check)
return;
corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
u64 size;
addr = find_e820_area_size(addr, &size, PAGE_SIZE);
if (addr == 0)
break;
if ((addr + size) > corruption_check_size)
size = corruption_check_size - addr;
if (size == 0)
break;
e820_update_range(addr, size, E820_RAM, E820_RESERVED);
scan_areas[num_scan_areas].addr = addr;
scan_areas[num_scan_areas].size = size;
num_scan_areas++;
/* Assume we've already mapped this early memory */
memset(__va(addr), 0, size);
addr += size;
}
printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
num_scan_areas);
update_e820();
}
static struct timer_list periodic_check_timer;
void check_for_bios_corruption(void)
{
int i;
int corruption = 0;
if (!memory_corruption_check)
return;
for(i = 0; i < num_scan_areas; i++) {
unsigned long *addr = __va(scan_areas[i].addr);
unsigned long size = scan_areas[i].size;
for(; size; addr++, size -= sizeof(unsigned long)) {
if (!*addr)
continue;
printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
addr, __pa(addr), *addr);
corruption = 1;
*addr = 0;
}
}
WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
}
static void periodic_check_for_corruption(unsigned long data)
{
check_for_bios_corruption();
mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
}
void start_periodic_check_for_corruption(void)
{
if (!memory_corruption_check || corruption_check_period == 0)
return;
printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
corruption_check_period);
init_timer(&periodic_check_timer);
periodic_check_timer.function = &periodic_check_for_corruption;
periodic_check_for_corruption(0);
}
#endif
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
printk(KERN_NOTICE
"%s detected: BIOS may corrupt low RAM, working it around.\n",
d->ident);
e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
return 0;
}
/* List of systems that have known low memory corruption BIOS problems */
static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
#ifdef CONFIG_X86_RESERVE_LOW_64K
{
.callback = dmi_low_memory_corruption,
.ident = "AMI BIOS",
.matches = {
DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
},
},
{
.callback = dmi_low_memory_corruption,
.ident = "Phoenix BIOS",
.matches = {
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
},
},
#endif
{}
};
/* /*
* Determine if we were loaded by an EFI loader. If so, then we have also been * Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures * passed the efi memmap, systab, etc., so we should use these data structures
@ -715,6 +899,10 @@ void __init setup_arch(char **cmdline_p)
finish_e820_parsing(); finish_e820_parsing();
dmi_scan_machine();
dmi_check_system(bad_bios_dmi_table);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
probe_roms(); probe_roms();
#endif #endif
@ -771,6 +959,10 @@ void __init setup_arch(char **cmdline_p)
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
#endif #endif
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
setup_bios_corruption_check();
#endif
/* max_pfn_mapped is updated here */ /* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped; max_pfn_mapped = max_low_pfn_mapped;
@ -799,8 +991,6 @@ void __init setup_arch(char **cmdline_p)
vsmp_init(); vsmp_init();
#endif #endif
dmi_scan_machine();
io_delay_init(); io_delay_init();
/* /*
@ -903,3 +1093,5 @@ void __init setup_arch(char **cmdline_p)
#endif #endif
#endif #endif
} }

View file

@ -27,6 +27,7 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/i387.h> #include <asm/i387.h>
#include <asm/vdso.h> #include <asm/vdso.h>
#include <asm/syscall.h>
#include <asm/syscalls.h> #include <asm/syscalls.h>
#include "sigframe.h" #include "sigframe.h"
@ -112,6 +113,27 @@ asmlinkage int sys_sigaltstack(unsigned long bx)
return do_sigaltstack(uss, uoss, regs->sp); return do_sigaltstack(uss, uoss, regs->sp);
} }
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
#define COPY_SEG(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp; \
}
#define COPY_SEG_STRICT(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
#define GET_SEG(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
loadsegment(seg, tmp); \
}
/* /*
* Do a signal return; undo the signal stack. * Do a signal return; undo the signal stack.
@ -120,28 +142,13 @@ static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax) unsigned long *pax)
{ {
void __user *buf;
unsigned int tmpflags;
unsigned int err = 0; unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */ /* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall; current_thread_info()->restart_block.fn = do_no_restart_syscall;
#define COPY(x) err |= __get_user(regs->x, &sc->x)
#define COPY_SEG(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp; }
#define COPY_SEG_STRICT(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp|3; }
#define GET_SEG(seg) \
{ unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
loadsegment(seg, tmp); }
GET_SEG(gs); GET_SEG(gs);
COPY_SEG(fs); COPY_SEG(fs);
COPY_SEG(es); COPY_SEG(es);
@ -151,21 +158,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
COPY_SEG_STRICT(cs); COPY_SEG_STRICT(cs);
COPY_SEG_STRICT(ss); COPY_SEG_STRICT(ss);
{ err |= __get_user(tmpflags, &sc->flags);
unsigned int tmpflags; regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
err |= __get_user(tmpflags, &sc->flags); err |= __get_user(buf, &sc->fpstate);
regs->flags = (regs->flags & ~FIX_EFLAGS) | err |= restore_i387_xstate(buf);
(tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
}
{
void __user *buf;
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
}
err |= __get_user(*pax, &sc->ax); err |= __get_user(*pax, &sc->ax);
return err; return err;
@ -214,9 +212,8 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
return 0; return 0;
} }
asmlinkage int sys_rt_sigreturn(unsigned long __unused) static long do_rt_sigreturn(struct pt_regs *regs)
{ {
struct pt_regs *regs = (struct pt_regs *)&__unused;
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
unsigned long ax; unsigned long ax;
sigset_t set; sigset_t set;
@ -242,10 +239,17 @@ asmlinkage int sys_rt_sigreturn(unsigned long __unused)
return ax; return ax;
badframe: badframe:
force_sig(SIGSEGV, current); signal_fault(regs, frame, "rt_sigreturn");
return 0; return 0;
} }
asmlinkage int sys_rt_sigreturn(unsigned long __unused)
{
struct pt_regs *regs = (struct pt_regs *)&__unused;
return do_rt_sigreturn(regs);
}
/* /*
* Set up a signal frame. * Set up a signal frame.
*/ */
@ -337,39 +341,29 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
} }
static int static int
setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
struct pt_regs *regs) struct pt_regs *regs)
{ {
struct sigframe __user *frame; struct sigframe __user *frame;
void __user *restorer; void __user *restorer;
int err = 0; int err = 0;
int usig;
void __user *fpstate = NULL; void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; return -EFAULT;
usig = current_thread_info()->exec_domain if (__put_user(sig, &frame->sig))
&& current_thread_info()->exec_domain->signal_invmap return -EFAULT;
&& sig < 32
? current_thread_info()->exec_domain->signal_invmap[sig]
: sig;
err = __put_user(usig, &frame->sig); if (setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]))
if (err) return -EFAULT;
goto give_sigsegv;
err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
if (err)
goto give_sigsegv;
if (_NSIG_WORDS > 1) { if (_NSIG_WORDS > 1) {
err = __copy_to_user(&frame->extramask, &set->sig[1], if (__copy_to_user(&frame->extramask, &set->sig[1],
sizeof(frame->extramask)); sizeof(frame->extramask)))
if (err) return -EFAULT;
goto give_sigsegv;
} }
if (current->mm->context.vdso) if (current->mm->context.vdso)
@ -394,7 +388,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Set up registers for signal handler */ /* Set up registers for signal handler */
regs->sp = (unsigned long)frame; regs->sp = (unsigned long)frame;
@ -409,38 +403,27 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
regs->cs = __USER_CS; regs->cs = __USER_CS;
return 0; return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
} }
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs) sigset_t *set, struct pt_regs *regs)
{ {
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
void __user *restorer; void __user *restorer;
int err = 0; int err = 0;
int usig;
void __user *fpstate = NULL; void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; return -EFAULT;
usig = current_thread_info()->exec_domain err |= __put_user(sig, &frame->sig);
&& current_thread_info()->exec_domain->signal_invmap
&& sig < 32
? current_thread_info()->exec_domain->signal_invmap[sig]
: sig;
err |= __put_user(usig, &frame->sig);
err |= __put_user(&frame->info, &frame->pinfo); err |= __put_user(&frame->info, &frame->pinfo);
err |= __put_user(&frame->uc, &frame->puc); err |= __put_user(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, info); err |= copy_siginfo_to_user(&frame->info, info);
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Create the ucontext. */ /* Create the ucontext. */
if (cpu_has_xsave) if (cpu_has_xsave)
@ -456,7 +439,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs, set->sig[0]); regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Set up to return from userspace. */ /* Set up to return from userspace. */
restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
@ -476,12 +459,12 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Set up registers for signal handler */ /* Set up registers for signal handler */
regs->sp = (unsigned long)frame; regs->sp = (unsigned long)frame;
regs->ip = (unsigned long)ka->sa.sa_handler; regs->ip = (unsigned long)ka->sa.sa_handler;
regs->ax = (unsigned long)usig; regs->ax = (unsigned long)sig;
regs->dx = (unsigned long)&frame->info; regs->dx = (unsigned long)&frame->info;
regs->cx = (unsigned long)&frame->uc; regs->cx = (unsigned long)&frame->uc;
@ -491,15 +474,48 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS; regs->cs = __USER_CS;
return 0; return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
} }
/* /*
* OK, we're invoking a handler: * OK, we're invoking a handler:
*/ */
static int signr_convert(int sig)
{
struct thread_info *info = current_thread_info();
if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32)
return info->exec_domain->signal_invmap[sig];
return sig;
}
#define is_ia32 1
#define ia32_setup_frame __setup_frame
#define ia32_setup_rt_frame __setup_rt_frame
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
int usig = signr_convert(sig);
int ret;
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
else
ret = ia32_setup_frame(usig, ka, set, regs);
} else
ret = __setup_rt_frame(sig, ka, info, set, regs);
if (ret) {
force_sigsegv(sig, current);
return -EFAULT;
}
return ret;
}
static int static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs *regs) sigset_t *oldset, struct pt_regs *regs)
@ -507,9 +523,9 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
int ret; int ret;
/* Are we from a system call? */ /* Are we from a system call? */
if ((long)regs->orig_ax >= 0) { if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */ /* If so, check system call restarting.. */
switch (regs->ax) { switch (syscall_get_error(current, regs)) {
case -ERESTART_RESTARTBLOCK: case -ERESTART_RESTARTBLOCK:
case -ERESTARTNOHAND: case -ERESTARTNOHAND:
regs->ax = -EINTR; regs->ax = -EINTR;
@ -536,15 +552,20 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF))) likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF; regs->flags &= ~X86_EFLAGS_TF;
/* Set up the stack frame */ ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ka->sa.sa_flags & SA_SIGINFO)
ret = setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = setup_frame(sig, ka, oldset, regs);
if (ret) if (ret)
return ret; return ret;
#ifdef CONFIG_X86_64
/*
* This has nothing to do with segment registers,
* despite the name. This magic affects uaccess.h
* macros' behavior. Reset it to the normal setting.
*/
set_fs(USER_DS);
#endif
/* /*
* Clear the direction flag as per the ABI for function entry. * Clear the direction flag as per the ABI for function entry.
*/ */
@ -571,6 +592,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
return 0; return 0;
} }
#define NR_restart_syscall __NR_restart_syscall
/* /*
* Note that 'init' is a special process: it doesn't get signals it doesn't * Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by * want to handle. Thus you cannot kill init even with a SIGKILL even by
@ -623,9 +645,9 @@ static void do_signal(struct pt_regs *regs)
} }
/* Did we come from a system call? */ /* Did we come from a system call? */
if ((long)regs->orig_ax >= 0) { if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */ /* Restart the system call - no handlers present */
switch (regs->ax) { switch (syscall_get_error(current, regs)) {
case -ERESTARTNOHAND: case -ERESTARTNOHAND:
case -ERESTARTSYS: case -ERESTARTSYS:
case -ERESTARTNOINTR: case -ERESTARTNOINTR:
@ -634,7 +656,7 @@ static void do_signal(struct pt_regs *regs)
break; break;
case -ERESTART_RESTARTBLOCK: case -ERESTART_RESTARTBLOCK:
regs->ax = __NR_restart_syscall; regs->ax = NR_restart_syscall;
regs->ip -= 2; regs->ip -= 2;
break; break;
} }
@ -657,6 +679,12 @@ static void do_signal(struct pt_regs *regs)
void void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{ {
#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
/* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_user();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
/* deal with pending signal delivery */ /* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING) if (thread_info_flags & _TIF_SIGPENDING)
do_signal(regs); do_signal(regs);
@ -666,5 +694,23 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
tracehook_notify_resume(regs); tracehook_notify_resume(regs);
} }
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET); clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
}
void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{
struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) {
printk(KERN_INFO
"%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
me->comm, me->pid, where, frame,
regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip);
printk(KERN_CONT "\n");
}
force_sig(SIGSEGV, me);
} }

View file

@ -52,6 +52,16 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
return do_sigaltstack(uss, uoss, regs->sp); return do_sigaltstack(uss, uoss, regs->sp);
} }
#define COPY(x) { \
err |= __get_user(regs->x, &sc->x); \
}
#define COPY_SEG_STRICT(seg) { \
unsigned short tmp; \
err |= __get_user(tmp, &sc->seg); \
regs->seg = tmp | 3; \
}
/* /*
* Do a signal return; undo the signal stack. * Do a signal return; undo the signal stack.
*/ */
@ -59,13 +69,13 @@ static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
unsigned long *pax) unsigned long *pax)
{ {
void __user *buf;
unsigned int tmpflags;
unsigned int err = 0; unsigned int err = 0;
/* Always make any pending restarted system calls return -EINTR */ /* Always make any pending restarted system calls return -EINTR */
current_thread_info()->restart_block.fn = do_no_restart_syscall; current_thread_info()->restart_block.fn = do_no_restart_syscall;
#define COPY(x) (err |= __get_user(regs->x, &sc->x))
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(dx); COPY(cx); COPY(ip);
COPY(r8); COPY(r8);
@ -80,34 +90,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
/* Kernel saves and restores only the CS segment register on signals, /* Kernel saves and restores only the CS segment register on signals,
* which is the bare minimum needed to allow mixed 32/64-bit code. * which is the bare minimum needed to allow mixed 32/64-bit code.
* App's signal handler can save/restore other segments if needed. */ * App's signal handler can save/restore other segments if needed. */
{ COPY_SEG_STRICT(cs);
unsigned cs;
err |= __get_user(cs, &sc->cs);
regs->cs = cs | 3; /* Force into user mode */
}
{ err |= __get_user(tmpflags, &sc->flags);
unsigned int tmpflags; regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
err |= __get_user(tmpflags, &sc->flags); regs->orig_ax = -1; /* disable syscall checks */
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
}
{ err |= __get_user(buf, &sc->fpstate);
struct _fpstate __user *buf; err |= restore_i387_xstate(buf);
err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
}
err |= __get_user(*pax, &sc->ax); err |= __get_user(*pax, &sc->ax);
return err; return err;
} }
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) static long do_rt_sigreturn(struct pt_regs *regs)
{ {
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
sigset_t set;
unsigned long ax; unsigned long ax;
sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@ -130,10 +130,15 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
return ax; return ax;
badframe: badframe:
signal_fault(regs, frame, "sigreturn"); signal_fault(regs, frame, "rt_sigreturn");
return 0; return 0;
} }
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
{
return do_rt_sigreturn(regs);
}
/* /*
* Set up a signal frame. * Set up a signal frame.
*/ */
@ -195,8 +200,8 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
return (void __user *)round_down(sp - size, 64); return (void __user *)round_down(sp - size, 64);
} }
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs) sigset_t *set, struct pt_regs *regs)
{ {
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
void __user *fp = NULL; void __user *fp = NULL;
@ -209,17 +214,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (save_i387_xstate(fp) < 0) if (save_i387_xstate(fp) < 0)
err |= -1; return -EFAULT;
} else } else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; return -EFAULT;
if (ka->sa.sa_flags & SA_SIGINFO) { if (ka->sa.sa_flags & SA_SIGINFO) {
err |= copy_siginfo_to_user(&frame->info, info); if (copy_siginfo_to_user(&frame->info, info))
if (err) return -EFAULT;
goto give_sigsegv;
} }
/* Create the ucontext. */ /* Create the ucontext. */
@ -247,11 +251,11 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
} else { } else {
/* could use a vstub here */ /* could use a vstub here */
goto give_sigsegv; return -EFAULT;
} }
if (err) if (err)
goto give_sigsegv; return -EFAULT;
/* Set up registers for signal handler */ /* Set up registers for signal handler */
regs->di = sig; regs->di = sig;
@ -271,15 +275,45 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
regs->cs = __USER_CS; regs->cs = __USER_CS;
return 0; return 0;
give_sigsegv:
force_sigsegv(sig, current);
return -EFAULT;
} }
/* /*
* OK, we're invoking a handler * OK, we're invoking a handler
*/ */
static int signr_convert(int sig)
{
return sig;
}
#ifdef CONFIG_IA32_EMULATION
#define is_ia32 test_thread_flag(TIF_IA32)
#else
#define is_ia32 0
#endif
static int
setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs *regs)
{
int usig = signr_convert(sig);
int ret;
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
else
ret = ia32_setup_frame(usig, ka, set, regs);
} else
ret = __setup_rt_frame(sig, ka, info, set, regs);
if (ret) {
force_sigsegv(sig, current);
return -EFAULT;
}
return ret;
}
static int static int
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
@ -317,51 +351,48 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF))) likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF; regs->flags &= ~X86_EFLAGS_TF;
#ifdef CONFIG_IA32_EMULATION
if (test_thread_flag(TIF_IA32)) {
if (ka->sa.sa_flags & SA_SIGINFO)
ret = ia32_setup_rt_frame(sig, ka, info, oldset, regs);
else
ret = ia32_setup_frame(sig, ka, oldset, regs);
} else
#endif
ret = setup_rt_frame(sig, ka, info, oldset, regs); ret = setup_rt_frame(sig, ka, info, oldset, regs);
if (ret == 0) { if (ret)
/* return ret;
* This has nothing to do with segment registers,
* despite the name. This magic affects uaccess.h
* macros' behavior. Reset it to the normal setting.
*/
set_fs(USER_DS);
/* #ifdef CONFIG_X86_64
* Clear the direction flag as per the ABI for function entry. /*
*/ * This has nothing to do with segment registers,
regs->flags &= ~X86_EFLAGS_DF; * despite the name. This magic affects uaccess.h
* macros' behavior. Reset it to the normal setting.
*/
set_fs(USER_DS);
#endif
/* /*
* Clear TF when entering the signal handler, but * Clear the direction flag as per the ABI for function entry.
* notify any tracer that was single-stepping it. */
* The tracer may want to single-step inside the regs->flags &= ~X86_EFLAGS_DF;
* handler too.
*/
regs->flags &= ~X86_EFLAGS_TF;
spin_lock_irq(&current->sighand->siglock); /*
sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask); * Clear TF when entering the signal handler, but
if (!(ka->sa.sa_flags & SA_NODEFER)) * notify any tracer that was single-stepping it.
sigaddset(&current->blocked, sig); * The tracer may want to single-step inside the
recalc_sigpending(); * handler too.
spin_unlock_irq(&current->sighand->siglock); */
regs->flags &= ~X86_EFLAGS_TF;
tracehook_signal_handler(sig, info, ka, regs, spin_lock_irq(&current->sighand->siglock);
test_thread_flag(TIF_SINGLESTEP)); sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
} if (!(ka->sa.sa_flags & SA_NODEFER))
sigaddset(&current->blocked, sig);
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
return ret; tracehook_signal_handler(sig, info, ka, regs,
test_thread_flag(TIF_SINGLESTEP));
return 0;
} }
#define NR_restart_syscall \
test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall
/* /*
* Note that 'init' is a special process: it doesn't get signals it doesn't * Note that 'init' is a special process: it doesn't get signals it doesn't
* want to handle. Thus you cannot kill init even with a SIGKILL even by * want to handle. Thus you cannot kill init even with a SIGKILL even by
@ -391,7 +422,8 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL); signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) { if (signr > 0) {
/* Re-enable any watchpoints before delivering the /*
* Re-enable any watchpoints before delivering the
* signal to user space. The processor register will * signal to user space. The processor register will
* have been cleared if the watchpoint triggered * have been cleared if the watchpoint triggered
* inside the kernel. * inside the kernel.
@ -399,7 +431,7 @@ static void do_signal(struct pt_regs *regs)
if (current->thread.debugreg7) if (current->thread.debugreg7)
set_debugreg(current->thread.debugreg7, 7); set_debugreg(current->thread.debugreg7, 7);
/* Whee! Actually deliver the signal. */ /* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/* /*
* A signal was successfully delivered; the saved * A signal was successfully delivered; the saved
@ -422,10 +454,9 @@ static void do_signal(struct pt_regs *regs)
regs->ax = regs->orig_ax; regs->ax = regs->orig_ax;
regs->ip -= 2; regs->ip -= 2;
break; break;
case -ERESTART_RESTARTBLOCK: case -ERESTART_RESTARTBLOCK:
regs->ax = test_thread_flag(TIF_IA32) ? regs->ax = NR_restart_syscall;
__NR_ia32_restart_syscall :
__NR_restart_syscall;
regs->ip -= 2; regs->ip -= 2;
break; break;
} }
@ -441,14 +472,18 @@ static void do_signal(struct pt_regs *regs)
} }
} }
void do_notify_resume(struct pt_regs *regs, void *unused, /*
__u32 thread_info_flags) * notification of userspace execution resumption
* - triggered by the TIF_WORK_MASK flags
*/
void
do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
{ {
#ifdef CONFIG_X86_MCE #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
/* notify userspace of pending MCEs */ /* notify userspace of pending MCEs */
if (thread_info_flags & _TIF_MCE_NOTIFY) if (thread_info_flags & _TIF_MCE_NOTIFY)
mce_notify_user(); mce_notify_user();
#endif /* CONFIG_X86_MCE */ #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
/* deal with pending signal delivery */ /* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING) if (thread_info_flags & _TIF_SIGPENDING)
@ -458,17 +493,23 @@ void do_notify_resume(struct pt_regs *regs, void *unused,
clear_thread_flag(TIF_NOTIFY_RESUME); clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs); tracehook_notify_resume(regs);
} }
#ifdef CONFIG_X86_32
clear_thread_flag(TIF_IRET);
#endif /* CONFIG_X86_32 */
} }
void signal_fault(struct pt_regs *regs, void __user *frame, char *where) void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
{ {
struct task_struct *me = current; struct task_struct *me = current;
if (show_unhandled_signals && printk_ratelimit()) { if (show_unhandled_signals && printk_ratelimit()) {
printk("%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", printk(KERN_INFO
me->comm, me->pid, where, frame, regs->ip, "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx",
regs->sp, regs->orig_ax); me->comm, me->pid, where, frame,
regs->ip, regs->sp, regs->orig_ax);
print_vma_addr(" in ", regs->ip); print_vma_addr(" in ", regs->ip);
printk("\n"); printk(KERN_CONT "\n");
} }
force_sig(SIGSEGV, me); force_sig(SIGSEGV, me);

View file

@ -214,12 +214,16 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
struct smp_ops smp_ops = { struct smp_ops smp_ops = {
.smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
.smp_prepare_cpus = native_smp_prepare_cpus, .smp_prepare_cpus = native_smp_prepare_cpus,
.cpu_up = native_cpu_up,
.smp_cpus_done = native_smp_cpus_done, .smp_cpus_done = native_smp_cpus_done,
.smp_send_stop = native_smp_send_stop, .smp_send_stop = native_smp_send_stop,
.smp_send_reschedule = native_smp_send_reschedule, .smp_send_reschedule = native_smp_send_reschedule,
.cpu_up = native_cpu_up,
.cpu_die = native_cpu_die,
.cpu_disable = native_cpu_disable,
.play_dead = native_play_dead,
.send_call_func_ipi = native_send_call_func_ipi, .send_call_func_ipi = native_send_call_func_ipi,
.send_call_func_single_ipi = native_send_call_func_single_ipi, .send_call_func_single_ipi = native_send_call_func_single_ipi,
}; };

View file

@ -52,6 +52,7 @@
#include <asm/desc.h> #include <asm/desc.h>
#include <asm/nmi.h> #include <asm/nmi.h>
#include <asm/irq.h> #include <asm/irq.h>
#include <asm/idle.h>
#include <asm/smp.h> #include <asm/smp.h>
#include <asm/trampoline.h> #include <asm/trampoline.h>
#include <asm/cpu.h> #include <asm/cpu.h>
@ -1344,7 +1345,29 @@ static void __ref remove_cpu_from_maps(int cpu)
numa_remove_cpu(cpu); numa_remove_cpu(cpu);
} }
int __cpu_disable(void) void cpu_disable_common(void)
{
int cpu = smp_processor_id();
/*
* HACK:
* Allow any queued timer interrupts to get serviced
* This is only a temporary solution until we cleanup
* fixup_irqs as we do for IA64.
*/
local_irq_enable();
mdelay(1);
local_irq_disable();
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
lock_vector_lock();
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs(cpu_online_map);
}
int native_cpu_disable(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
@ -1363,27 +1386,11 @@ int __cpu_disable(void)
stop_apic_nmi_watchdog(NULL); stop_apic_nmi_watchdog(NULL);
clear_local_APIC(); clear_local_APIC();
/* cpu_disable_common();
* HACK:
* Allow any queued timer interrupts to get serviced
* This is only a temporary solution until we cleanup
* fixup_irqs as we do for IA64.
*/
local_irq_enable();
mdelay(1);
local_irq_disable();
remove_siblinginfo(cpu);
/* It's now safe to remove this processor from the online map */
lock_vector_lock();
remove_cpu_from_maps(cpu);
unlock_vector_lock();
fixup_irqs(cpu_online_map);
return 0; return 0;
} }
void __cpu_die(unsigned int cpu) void native_cpu_die(unsigned int cpu)
{ {
/* We don't do anything here: idle task is faking death itself. */ /* We don't do anything here: idle task is faking death itself. */
unsigned int i; unsigned int i;
@ -1400,15 +1407,45 @@ void __cpu_die(unsigned int cpu)
} }
printk(KERN_ERR "CPU %u didn't die...\n", cpu); printk(KERN_ERR "CPU %u didn't die...\n", cpu);
} }
void play_dead_common(void)
{
idle_task_exit();
reset_lazy_tlbstate();
irq_ctx_exit(raw_smp_processor_id());
c1e_remove_cpu(raw_smp_processor_id());
mb();
/* Ack it */
__get_cpu_var(cpu_state) = CPU_DEAD;
/*
* With physical CPU hotplug, we should halt the cpu
*/
local_irq_disable();
}
void native_play_dead(void)
{
play_dead_common();
wbinvd_halt();
}
#else /* ... !CONFIG_HOTPLUG_CPU */ #else /* ... !CONFIG_HOTPLUG_CPU */
int __cpu_disable(void) int native_cpu_disable(void)
{ {
return -ENOSYS; return -ENOSYS;
} }
void __cpu_die(unsigned int cpu) void native_cpu_die(unsigned int cpu)
{ {
/* We said "no" in __cpu_disable */ /* We said "no" in __cpu_disable */
BUG(); BUG();
} }
void native_play_dead(void)
{
BUG();
}
#endif #endif

View file

@ -241,3 +241,11 @@ void flush_tlb_all(void)
on_each_cpu(do_flush_tlb_all, NULL, 1); on_each_cpu(do_flush_tlb_all, NULL, 1);
} }
void reset_lazy_tlbstate(void)
{
int cpu = raw_smp_processor_id();
per_cpu(cpu_tlbstate, cpu).state = 0;
per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
}

View file

@ -891,6 +891,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
unsigned int condition; unsigned int condition;
int si_code;
trace_hardirqs_fixup(); trace_hardirqs_fixup();
@ -935,8 +936,9 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
goto clear_TF_reenable; goto clear_TF_reenable;
} }
si_code = get_si_code((unsigned long)condition);
/* Ok, finally something we can handle */ /* Ok, finally something we can handle */
send_sigtrap(tsk, regs, error_code); send_sigtrap(tsk, regs, error_code, si_code);
/* /*
* Disable additional traps. They'll be re-enabled when * Disable additional traps. They'll be re-enabled when

View file

@ -940,7 +940,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs *regs,
tsk->thread.error_code = error_code; tsk->thread.error_code = error_code;
info.si_signo = SIGTRAP; info.si_signo = SIGTRAP;
info.si_errno = 0; info.si_errno = 0;
info.si_code = TRAP_BRKPT; info.si_code = get_si_code(condition);
info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
force_sig_info(SIGTRAP, &info, tsk); force_sig_info(SIGTRAP, &info, tsk);

View file

@ -95,7 +95,9 @@ int save_i387_xstate(void __user *buf)
* Start with clearing the user buffer. This will present a * Start with clearing the user buffer. This will present a
* clean context for the bytes not touched by the fxsave/xsave. * clean context for the bytes not touched by the fxsave/xsave.
*/ */
__clear_user(buf, sig_xstate_size); err = __clear_user(buf, sig_xstate_size);
if (err)
return err;
if (task_thread_info(tsk)->status & TS_XSAVE) if (task_thread_info(tsk)->status & TS_XSAVE)
err = xsave_user(buf); err = xsave_user(buf);
@ -114,6 +116,8 @@ int save_i387_xstate(void __user *buf)
if (task_thread_info(tsk)->status & TS_XSAVE) { if (task_thread_info(tsk)->status & TS_XSAVE) {
struct _fpstate __user *fx = buf; struct _fpstate __user *fx = buf;
struct _xstate __user *x = buf;
u64 xstate_bv;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved, err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
sizeof(struct _fpx_sw_bytes)); sizeof(struct _fpx_sw_bytes));
@ -121,6 +125,31 @@ int save_i387_xstate(void __user *buf)
err |= __put_user(FP_XSTATE_MAGIC2, err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_size (__u32 __user *) (buf + sig_xstate_size
- FP_XSTATE_MAGIC2_SIZE)); - FP_XSTATE_MAGIC2_SIZE));
/*
* Read the xstate_bv which we copied (directly from the cpu or
* from the state in task struct) to the user buffers and
* set the FP/SSE bits.
*/
err |= __get_user(xstate_bv, &x->xstate_hdr.xstate_bv);
/*
* For legacy compatible, we always set FP/SSE bits in the bit
* vector while saving the state to the user context. This will
* enable us capturing any changes(during sigreturn) to
* the FP/SSE bits by the legacy applications which don't touch
* xstate_bv in the xsave header.
*
* xsave aware apps can change the xstate_bv in the xsave
* header as well as change any contents in the memory layout.
* xrestore as part of sigreturn will capture all the changes.
*/
xstate_bv |= XSTATE_FPSSE;
err |= __put_user(xstate_bv, &x->xstate_hdr.xstate_bv);
if (err)
return err;
} }
return 1; return 1;
@ -272,7 +301,7 @@ void __cpuinit xsave_init(void)
/* /*
* setup the xstate image representing the init state * setup the xstate image representing the init state
*/ */
void setup_xstate_init(void) static void __init setup_xstate_init(void)
{ {
init_xstate_buf = alloc_bootmem(xstate_size); init_xstate_buf = alloc_bootmem(xstate_size);
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;

View file

@ -914,15 +914,15 @@ LIST_HEAD(pgd_list);
void vmalloc_sync_all(void) void vmalloc_sync_all(void)
{ {
#ifdef CONFIG_X86_32
unsigned long start = VMALLOC_START & PGDIR_MASK;
unsigned long address; unsigned long address;
#ifdef CONFIG_X86_32
if (SHARED_KERNEL_PMD) if (SHARED_KERNEL_PMD)
return; return;
BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); for (address = VMALLOC_START & PMD_MASK;
for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { address >= TASK_SIZE && address < FIXADDR_TOP;
address += PMD_SIZE) {
unsigned long flags; unsigned long flags;
struct page *page; struct page *page;
@ -935,10 +935,8 @@ void vmalloc_sync_all(void)
spin_unlock_irqrestore(&pgd_lock, flags); spin_unlock_irqrestore(&pgd_lock, flags);
} }
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
unsigned long start = VMALLOC_START & PGDIR_MASK; for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
unsigned long address; address += PGDIR_SIZE) {
for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
const pgd_t *pgd_ref = pgd_offset_k(address); const pgd_t *pgd_ref = pgd_offset_k(address);
unsigned long flags; unsigned long flags;
struct page *page; struct page *page;

View file

@ -31,6 +31,7 @@
#include <linux/cpumask.h> #include <linux/cpumask.h>
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/bios_ebda.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
@ -969,6 +970,8 @@ void __init mem_init(void)
int codesize, reservedpages, datasize, initsize; int codesize, reservedpages, datasize, initsize;
int tmp; int tmp;
start_periodic_check_for_corruption();
#ifdef CONFIG_FLATMEM #ifdef CONFIG_FLATMEM
BUG_ON(!mem_map); BUG_ON(!mem_map);
#endif #endif

View file

@ -31,6 +31,7 @@
#include <linux/nmi.h> #include <linux/nmi.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/bios_ebda.h>
#include <asm/system.h> #include <asm/system.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
@ -881,6 +882,8 @@ void __init mem_init(void)
{ {
long codesize, reservedpages, datasize, initsize; long codesize, reservedpages, datasize, initsize;
start_periodic_check_for_corruption();
pci_iommu_alloc(); pci_iommu_alloc();
/* clear_bss() already clear the empty_zero_page */ /* clear_bss() already clear the empty_zero_page */

View file

@ -24,19 +24,27 @@
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
unsigned long __phys_addr(unsigned long x)
{
if (x >= __START_KERNEL_map)
return x - __START_KERNEL_map + phys_base;
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
static inline int phys_addr_valid(unsigned long addr) static inline int phys_addr_valid(unsigned long addr)
{ {
return addr < (1UL << boot_cpu_data.x86_phys_bits); return addr < (1UL << boot_cpu_data.x86_phys_bits);
} }
unsigned long __phys_addr(unsigned long x)
{
if (x >= __START_KERNEL_map) {
x -= __START_KERNEL_map;
VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
x += phys_base;
} else {
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
x -= PAGE_OFFSET;
VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
!phys_addr_valid(x));
}
return x;
}
EXPORT_SYMBOL(__phys_addr);
#else #else
static inline int phys_addr_valid(unsigned long addr) static inline int phys_addr_valid(unsigned long addr)
@ -44,6 +52,17 @@ static inline int phys_addr_valid(unsigned long addr)
return 1; return 1;
} }
#ifdef CONFIG_DEBUG_VIRTUAL
unsigned long __phys_addr(unsigned long x)
{
/* VMALLOC_* aren't constants; not available at the boot time */
VIRTUAL_BUG_ON(x < PAGE_OFFSET || (system_state != SYSTEM_BOOTING &&
is_vmalloc_addr((void *)x)));
return x - PAGE_OFFSET;
}
EXPORT_SYMBOL(__phys_addr);
#endif
#endif #endif
int page_is_ram(unsigned long pagenr) int page_is_ram(unsigned long pagenr)

View file

@ -26,5 +26,13 @@ config XEN_MAX_DOMAIN_MEMORY
config XEN_SAVE_RESTORE config XEN_SAVE_RESTORE
bool bool
depends on PM depends on XEN && PM
default y default y
config XEN_DEBUG_FS
bool "Enable Xen debug and tuning parameters in debugfs"
depends on XEN && DEBUG_FS
default n
help
Enable statistics output and various tuning options in debugfs.
Enabling this option may incur a significant performance overhead.

View file

@ -1,4 +1,12 @@
obj-y := enlighten.o setup.o multicalls.o mmu.o \ ifdef CONFIG_FTRACE
# Do not profile debug and lowlevel utilities
CFLAGS_REMOVE_spinlock.o = -pg
CFLAGS_REMOVE_time.o = -pg
CFLAGS_REMOVE_irq.o = -pg
endif
obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
time.o xen-asm_$(BITS).o grant-table.o suspend.o time.o xen-asm_$(BITS).o grant-table.o suspend.o
obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += smp.o spinlock.o
obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o

123
arch/x86/xen/debugfs.c Normal file
View file

@ -0,0 +1,123 @@
#include <linux/init.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include "debugfs.h"
static struct dentry *d_xen_debug;
struct dentry * __init xen_init_debugfs(void)
{
if (!d_xen_debug) {
d_xen_debug = debugfs_create_dir("xen", NULL);
if (!d_xen_debug)
pr_warning("Could not create 'xen' debugfs directory\n");
}
return d_xen_debug;
}
struct array_data
{
void *array;
unsigned elements;
};
static int u32_array_open(struct inode *inode, struct file *file)
{
file->private_data = NULL;
return nonseekable_open(inode, file);
}
static size_t format_array(char *buf, size_t bufsize, const char *fmt,
u32 *array, unsigned array_size)
{
size_t ret = 0;
unsigned i;
for(i = 0; i < array_size; i++) {
size_t len;
len = snprintf(buf, bufsize, fmt, array[i]);
len++; /* ' ' or '\n' */
ret += len;
if (buf) {
buf += len;
bufsize -= len;
buf[-1] = (i == array_size-1) ? '\n' : ' ';
}
}
ret++; /* \0 */
if (buf)
*buf = '\0';
return ret;
}
static char *format_array_alloc(const char *fmt, u32 *array, unsigned array_size)
{
size_t len = format_array(NULL, 0, fmt, array, array_size);
char *ret;
ret = kmalloc(len, GFP_KERNEL);
if (ret == NULL)
return NULL;
format_array(ret, len, fmt, array, array_size);
return ret;
}
static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len,
loff_t *ppos)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct array_data *data = inode->i_private;
size_t size;
if (*ppos == 0) {
if (file->private_data) {
kfree(file->private_data);
file->private_data = NULL;
}
file->private_data = format_array_alloc("%u", data->array, data->elements);
}
size = 0;
if (file->private_data)
size = strlen(file->private_data);
return simple_read_from_buffer(buf, len, ppos, file->private_data, size);
}
static int xen_array_release(struct inode *inode, struct file *file)
{
kfree(file->private_data);
return 0;
}
static struct file_operations u32_array_fops = {
.owner = THIS_MODULE,
.open = u32_array_open,
.release= xen_array_release,
.read = u32_array_read,
};
struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
struct dentry *parent,
u32 *array, unsigned elements)
{
struct array_data *data = kmalloc(sizeof(*data), GFP_KERNEL);
if (data == NULL)
return NULL;
data->array = array;
data->elements = elements;
return debugfs_create_file(name, mode, parent, data, &u32_array_fops);
}

10
arch/x86/xen/debugfs.h Normal file
View file

@ -0,0 +1,10 @@
#ifndef _XEN_DEBUGFS_H
#define _XEN_DEBUGFS_H
struct dentry * __init xen_init_debugfs(void);
struct dentry *xen_debugfs_create_u32_array(const char *name, mode_t mode,
struct dentry *parent,
u32 *array, unsigned elements);
#endif /* _XEN_DEBUGFS_H */

View file

@ -30,7 +30,6 @@
#include <xen/interface/xen.h> #include <xen/interface/xen.h>
#include <xen/interface/physdev.h> #include <xen/interface/physdev.h>
#include <xen/interface/vcpu.h> #include <xen/interface/vcpu.h>
#include <xen/interface/sched.h>
#include <xen/features.h> #include <xen/features.h>
#include <xen/page.h> #include <xen/page.h>
#include <xen/hvc-console.h> #include <xen/hvc-console.h>
@ -58,6 +57,9 @@ EXPORT_SYMBOL_GPL(hypercall_page);
DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info);
enum xen_domain_type xen_domain_type = XEN_NATIVE;
EXPORT_SYMBOL_GPL(xen_domain_type);
/* /*
* Identity map, in addition to plain kernel map. This needs to be * Identity map, in addition to plain kernel map. This needs to be
* large enough to allocate page table pages to allocate the rest. * large enough to allocate page table pages to allocate the rest.
@ -111,7 +113,14 @@ struct shared_info *HYPERVISOR_shared_info = (void *)&xen_dummy_shared_info;
* *
* 0: not available, 1: available * 0: not available, 1: available
*/ */
static int have_vcpu_info_placement = 1; static int have_vcpu_info_placement =
#ifdef CONFIG_X86_32
1
#else
0
#endif
;
static void xen_vcpu_setup(int cpu) static void xen_vcpu_setup(int cpu)
{ {
@ -227,94 +236,6 @@ static unsigned long xen_get_debugreg(int reg)
return HYPERVISOR_get_debugreg(reg); return HYPERVISOR_get_debugreg(reg);
} }
static unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = x86_read_percpu(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* convert to IF type flag
-0 -> 0x00000000
-1 -> 0xffffffff
*/
return (-flags) & X86_EFLAGS_IF;
}
static void xen_restore_fl(unsigned long flags)
{
struct vcpu_info *vcpu;
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
if (flags == 0) {
preempt_check_resched();
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
}
}
static void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
static void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
/* We don't need to worry about being preempted here, since
either a) interrupts are disabled, so no preemption, or b)
the caller is confused and is trying to re-enable interrupts
on an indeterminate processor. */
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
force_evtchn_callback();
}
static void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
BUG();
}
static void xen_halt(void)
{
if (irqs_disabled())
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
else
xen_safe_halt();
}
static void xen_leave_lazy(void) static void xen_leave_lazy(void)
{ {
paravirt_leave_lazy(paravirt_get_lazy_mode()); paravirt_leave_lazy(paravirt_get_lazy_mode());
@ -326,6 +247,59 @@ static unsigned long xen_store_tr(void)
return 0; return 0;
} }
/*
* Set the page permissions for a particular virtual address. If the
* address is a vmalloc mapping (or other non-linear mapping), then
* find the linear mapping of the page and also set its protections to
* match.
*/
static void set_aliased_prot(void *v, pgprot_t prot)
{
int level;
pte_t *ptep;
pte_t pte;
unsigned long pfn;
struct page *page;
ptep = lookup_address((unsigned long)v, &level);
BUG_ON(ptep == NULL);
pfn = pte_pfn(*ptep);
page = pfn_to_page(pfn);
pte = pfn_pte(pfn, prot);
if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0))
BUG();
if (!PageHighMem(page)) {
void *av = __va(PFN_PHYS(pfn));
if (av != v)
if (HYPERVISOR_update_va_mapping((unsigned long)av, pte, 0))
BUG();
} else
kmap_flush_unused();
}
static void xen_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
int i;
for(i = 0; i < entries; i += entries_per_page)
set_aliased_prot(ldt + i, PAGE_KERNEL_RO);
}
static void xen_free_ldt(struct desc_struct *ldt, unsigned entries)
{
const unsigned entries_per_page = PAGE_SIZE / LDT_ENTRY_SIZE;
int i;
for(i = 0; i < entries; i += entries_per_page)
set_aliased_prot(ldt + i, PAGE_KERNEL);
}
static void xen_set_ldt(const void *addr, unsigned entries) static void xen_set_ldt(const void *addr, unsigned entries)
{ {
struct mmuext_op *op; struct mmuext_op *op;
@ -426,8 +400,7 @@ static void xen_load_gs_index(unsigned int idx)
static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
const void *ptr) const void *ptr)
{ {
unsigned long lp = (unsigned long)&dt[entrynum]; xmaddr_t mach_lp = arbitrary_virt_to_machine(&dt[entrynum]);
xmaddr_t mach_lp = virt_to_machine(lp);
u64 entry = *(u64 *)ptr; u64 entry = *(u64 *)ptr;
preempt_disable(); preempt_disable();
@ -560,7 +533,7 @@ static void xen_write_gdt_entry(struct desc_struct *dt, int entry,
} }
static void xen_load_sp0(struct tss_struct *tss, static void xen_load_sp0(struct tss_struct *tss,
struct thread_struct *thread) struct thread_struct *thread)
{ {
struct multicall_space mcs = xen_mc_entry(0); struct multicall_space mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0); MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
@ -835,6 +808,19 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
ret = -EFAULT; ret = -EFAULT;
break; break;
#endif #endif
case MSR_STAR:
case MSR_CSTAR:
case MSR_LSTAR:
case MSR_SYSCALL_MASK:
case MSR_IA32_SYSENTER_CS:
case MSR_IA32_SYSENTER_ESP:
case MSR_IA32_SYSENTER_EIP:
/* Fast syscall setup is all done in hypercalls, so
these are all ignored. Stub them out here to stop
Xen console noise. */
break;
default: default:
ret = native_write_msr_safe(msr, low, high); ret = native_write_msr_safe(msr, low, high);
} }
@ -878,8 +864,8 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l
SetPagePinned(page); SetPagePinned(page);
if (!PageHighMem(page)) { if (!PageHighMem(page)) {
make_lowmem_page_readonly(__va(PFN_PHYS(pfn))); make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
if (level == PT_PTE) if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
} else } else
/* make sure there are no stray mappings of /* make sure there are no stray mappings of
@ -947,7 +933,7 @@ static void xen_release_ptpage(unsigned long pfn, unsigned level)
if (PagePinned(page)) { if (PagePinned(page)) {
if (!PageHighMem(page)) { if (!PageHighMem(page)) {
if (level == PT_PTE) if (level == PT_PTE && USE_SPLIT_PTLOCKS)
pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn); pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
make_lowmem_page_readwrite(__va(PFN_PHYS(pfn))); make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
} }
@ -994,6 +980,7 @@ static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
} }
#endif #endif
#ifdef CONFIG_X86_32
static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte) static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
{ {
/* If there's an existing pte, then don't allow _PAGE_RW to be set */ /* If there's an existing pte, then don't allow _PAGE_RW to be set */
@ -1012,6 +999,7 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
xen_set_pte(ptep, pte); xen_set_pte(ptep, pte);
} }
#endif
static __init void xen_pagetable_setup_start(pgd_t *base) static __init void xen_pagetable_setup_start(pgd_t *base)
{ {
@ -1078,7 +1066,6 @@ void xen_setup_vcpu_info_placement(void)
/* xen_vcpu_setup managed to place the vcpu_info within the /* xen_vcpu_setup managed to place the vcpu_info within the
percpu area for all cpus, so make use of it */ percpu area for all cpus, so make use of it */
#ifdef CONFIG_X86_32
if (have_vcpu_info_placement) { if (have_vcpu_info_placement) {
printk(KERN_INFO "Xen: using vcpu_info placement\n"); printk(KERN_INFO "Xen: using vcpu_info placement\n");
@ -1088,7 +1075,6 @@ void xen_setup_vcpu_info_placement(void)
pv_irq_ops.irq_enable = xen_irq_enable_direct; pv_irq_ops.irq_enable = xen_irq_enable_direct;
pv_mmu_ops.read_cr2 = xen_read_cr2_direct; pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
} }
#endif
} }
static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
@ -1109,12 +1095,10 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
goto patch_site goto patch_site
switch (type) { switch (type) {
#ifdef CONFIG_X86_32
SITE(pv_irq_ops, irq_enable); SITE(pv_irq_ops, irq_enable);
SITE(pv_irq_ops, irq_disable); SITE(pv_irq_ops, irq_disable);
SITE(pv_irq_ops, save_fl); SITE(pv_irq_ops, save_fl);
SITE(pv_irq_ops, restore_fl); SITE(pv_irq_ops, restore_fl);
#endif /* CONFIG_X86_32 */
#undef SITE #undef SITE
patch_site: patch_site:
@ -1252,6 +1236,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.load_gs_index = xen_load_gs_index, .load_gs_index = xen_load_gs_index,
#endif #endif
.alloc_ldt = xen_alloc_ldt,
.free_ldt = xen_free_ldt,
.store_gdt = native_store_gdt, .store_gdt = native_store_gdt,
.store_idt = native_store_idt, .store_idt = native_store_idt,
.store_tr = xen_store_tr, .store_tr = xen_store_tr,
@ -1273,36 +1260,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
}, },
}; };
static void __init __xen_init_IRQ(void)
{
#ifdef CONFIG_X86_64
int i;
/* Create identity vector->irq map */
for(i = 0; i < NR_VECTORS; i++) {
int cpu;
for_each_possible_cpu(cpu)
per_cpu(vector_irq, cpu)[i] = i;
}
#endif /* CONFIG_X86_64 */
xen_init_IRQ();
}
static const struct pv_irq_ops xen_irq_ops __initdata = {
.init_IRQ = __xen_init_IRQ,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
.adjust_exception_frame = xen_adjust_exception_frame,
#endif
};
static const struct pv_apic_ops xen_apic_ops __initdata = { static const struct pv_apic_ops xen_apic_ops __initdata = {
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
.setup_boot_clock = paravirt_nop, .setup_boot_clock = paravirt_nop,
@ -1443,7 +1400,7 @@ static void __init xen_reserve_top(void)
if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
top = pp.virt_start; top = pp.virt_start;
reserve_top_address(-top + 2 * PAGE_SIZE); reserve_top_address(-top);
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
} }
@ -1477,48 +1434,11 @@ static void *m2v(phys_addr_t maddr)
return __ka(m2p(maddr)); return __ka(m2p(maddr));
} }
#ifdef CONFIG_X86_64
static void walk(pgd_t *pgd, unsigned long addr)
{
unsigned l4idx = pgd_index(addr);
unsigned l3idx = pud_index(addr);
unsigned l2idx = pmd_index(addr);
unsigned l1idx = pte_index(addr);
pgd_t l4;
pud_t l3;
pmd_t l2;
pte_t l1;
xen_raw_printk("walk %p, %lx -> %d %d %d %d\n",
pgd, addr, l4idx, l3idx, l2idx, l1idx);
l4 = pgd[l4idx];
xen_raw_printk(" l4: %016lx\n", l4.pgd);
xen_raw_printk(" %016lx\n", pgd_val(l4));
l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx];
xen_raw_printk(" l3: %016lx\n", l3.pud);
xen_raw_printk(" %016lx\n", pud_val(l3));
l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx];
xen_raw_printk(" l2: %016lx\n", l2.pmd);
xen_raw_printk(" %016lx\n", pmd_val(l2));
l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx];
xen_raw_printk(" l1: %016lx\n", l1.pte);
xen_raw_printk(" %016lx\n", pte_val(l1));
}
#endif
static void set_page_prot(void *addr, pgprot_t prot) static void set_page_prot(void *addr, pgprot_t prot)
{ {
unsigned long pfn = __pa(addr) >> PAGE_SHIFT; unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot); pte_t pte = pfn_pte(pfn, prot);
xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n",
addr, pfn, get_phys_to_machine(pfn),
pgprot_val(prot), pte.pte);
if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
BUG(); BUG();
} }
@ -1694,6 +1614,8 @@ asmlinkage void __init xen_start_kernel(void)
if (!xen_start_info) if (!xen_start_info)
return; return;
xen_domain_type = XEN_PV_DOMAIN;
BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0); BUG_ON(memcmp(xen_start_info->magic, "xen-3", 5) != 0);
xen_setup_features(); xen_setup_features();
@ -1703,10 +1625,11 @@ asmlinkage void __init xen_start_kernel(void)
pv_init_ops = xen_init_ops; pv_init_ops = xen_init_ops;
pv_time_ops = xen_time_ops; pv_time_ops = xen_time_ops;
pv_cpu_ops = xen_cpu_ops; pv_cpu_ops = xen_cpu_ops;
pv_irq_ops = xen_irq_ops;
pv_apic_ops = xen_apic_ops; pv_apic_ops = xen_apic_ops;
pv_mmu_ops = xen_mmu_ops; pv_mmu_ops = xen_mmu_ops;
xen_init_irq_ops();
#ifdef CONFIG_X86_LOCAL_APIC #ifdef CONFIG_X86_LOCAL_APIC
/* /*
* set up the basic apic ops. * set up the basic apic ops.
@ -1737,7 +1660,7 @@ asmlinkage void __init xen_start_kernel(void)
/* Prevent unwanted bits from being set in PTEs. */ /* Prevent unwanted bits from being set in PTEs. */
__supported_pte_mask &= ~_PAGE_GLOBAL; __supported_pte_mask &= ~_PAGE_GLOBAL;
if (!is_initial_xendomain()) if (!xen_initial_domain())
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
/* Don't do the full vcpu_info placement stuff until we have a /* Don't do the full vcpu_info placement stuff until we have a
@ -1772,7 +1695,7 @@ asmlinkage void __init xen_start_kernel(void)
boot_params.hdr.ramdisk_size = xen_start_info->mod_len; boot_params.hdr.ramdisk_size = xen_start_info->mod_len;
boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line);
if (!is_initial_xendomain()) { if (!xen_initial_domain()) {
add_preferred_console("xenboot", 0, NULL); add_preferred_console("xenboot", 0, NULL);
add_preferred_console("tty", 0, NULL); add_preferred_console("tty", 0, NULL);
add_preferred_console("hvc", 0, NULL); add_preferred_console("hvc", 0, NULL);
@ -1780,15 +1703,6 @@ asmlinkage void __init xen_start_kernel(void)
xen_raw_console_write("about to get started...\n"); xen_raw_console_write("about to get started...\n");
#if 0
xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n",
&boot_params, __pa_symbol(&boot_params),
__va(__pa_symbol(&boot_params)));
walk(pgd, &boot_params);
walk(pgd, __va(__pa(&boot_params)));
#endif
/* Start the world */ /* Start the world */
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
i386_start_kernel(); i386_start_kernel();

143
arch/x86/xen/irq.c Normal file
View file

@ -0,0 +1,143 @@
#include <linux/hardirq.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
#include <xen/interface/vcpu.h>
#include <asm/xen/hypercall.h>
#include <asm/xen/hypervisor.h>
#include "xen-ops.h"
/*
* Force a proper event-channel callback from Xen after clearing the
* callback mask. We do this in a very simple manner, by making a call
* down into Xen. The pending flag will be checked by Xen on return.
*/
void xen_force_evtchn_callback(void)
{
(void)HYPERVISOR_xen_version(0, NULL);
}
static void __init __xen_init_IRQ(void)
{
#ifdef CONFIG_X86_64
int i;
/* Create identity vector->irq map */
for(i = 0; i < NR_VECTORS; i++) {
int cpu;
for_each_possible_cpu(cpu)
per_cpu(vector_irq, cpu)[i] = i;
}
#endif /* CONFIG_X86_64 */
xen_init_IRQ();
}
static unsigned long xen_save_fl(void)
{
struct vcpu_info *vcpu;
unsigned long flags;
vcpu = x86_read_percpu(xen_vcpu);
/* flag has opposite sense of mask */
flags = !vcpu->evtchn_upcall_mask;
/* convert to IF type flag
-0 -> 0x00000000
-1 -> 0xffffffff
*/
return (-flags) & X86_EFLAGS_IF;
}
static void xen_restore_fl(unsigned long flags)
{
struct vcpu_info *vcpu;
/* convert from IF type flag */
flags = !(flags & X86_EFLAGS_IF);
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = flags;
preempt_enable_no_resched();
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
if (flags == 0) {
preempt_check_resched();
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
xen_force_evtchn_callback();
}
}
static void xen_irq_disable(void)
{
/* There's a one instruction preempt window here. We need to
make sure we're don't switch CPUs between getting the vcpu
pointer and updating the mask. */
preempt_disable();
x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
preempt_enable_no_resched();
}
static void xen_irq_enable(void)
{
struct vcpu_info *vcpu;
/* We don't need to worry about being preempted here, since
either a) interrupts are disabled, so no preemption, or b)
the caller is confused and is trying to re-enable interrupts
on an indeterminate processor. */
vcpu = x86_read_percpu(xen_vcpu);
vcpu->evtchn_upcall_mask = 0;
/* Doesn't matter if we get preempted here, because any
pending event will get dealt with anyway. */
barrier(); /* unmask then check (avoid races) */
if (unlikely(vcpu->evtchn_upcall_pending))
xen_force_evtchn_callback();
}
static void xen_safe_halt(void)
{
/* Blocking includes an implicit local_irq_enable(). */
if (HYPERVISOR_sched_op(SCHEDOP_block, NULL) != 0)
BUG();
}
static void xen_halt(void)
{
if (irqs_disabled())
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
else
xen_safe_halt();
}
static const struct pv_irq_ops xen_irq_ops __initdata = {
.init_IRQ = __xen_init_IRQ,
.save_fl = xen_save_fl,
.restore_fl = xen_restore_fl,
.irq_disable = xen_irq_disable,
.irq_enable = xen_irq_enable,
.safe_halt = xen_safe_halt,
.halt = xen_halt,
#ifdef CONFIG_X86_64
.adjust_exception_frame = xen_adjust_exception_frame,
#endif
};
void __init xen_init_irq_ops()
{
pv_irq_ops = xen_irq_ops;
}

View file

@ -40,6 +40,7 @@
*/ */
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/highmem.h> #include <linux/highmem.h>
#include <linux/debugfs.h>
#include <linux/bug.h> #include <linux/bug.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
@ -57,6 +58,61 @@
#include "multicalls.h" #include "multicalls.h"
#include "mmu.h" #include "mmu.h"
#include "debugfs.h"
#define MMU_UPDATE_HISTO 30
#ifdef CONFIG_XEN_DEBUG_FS
static struct {
u32 pgd_update;
u32 pgd_update_pinned;
u32 pgd_update_batched;
u32 pud_update;
u32 pud_update_pinned;
u32 pud_update_batched;
u32 pmd_update;
u32 pmd_update_pinned;
u32 pmd_update_batched;
u32 pte_update;
u32 pte_update_pinned;
u32 pte_update_batched;
u32 mmu_update;
u32 mmu_update_extended;
u32 mmu_update_histo[MMU_UPDATE_HISTO];
u32 prot_commit;
u32 prot_commit_batched;
u32 set_pte_at;
u32 set_pte_at_batched;
u32 set_pte_at_pinned;
u32 set_pte_at_current;
u32 set_pte_at_kernel;
} mmu_stats;
static u8 zero_stats;
static inline void check_zero(void)
{
if (unlikely(zero_stats)) {
memset(&mmu_stats, 0, sizeof(mmu_stats));
zero_stats = 0;
}
}
#define ADD_STATS(elem, val) \
do { check_zero(); mmu_stats.elem += (val); } while(0)
#else /* !CONFIG_XEN_DEBUG_FS */
#define ADD_STATS(elem, val) do { (void)(val); } while(0)
#endif /* CONFIG_XEN_DEBUG_FS */
/* /*
* Just beyond the highest usermode address. STACK_TOP_MAX has a * Just beyond the highest usermode address. STACK_TOP_MAX has a
@ -229,25 +285,35 @@ void make_lowmem_page_readwrite(void *vaddr)
} }
static bool page_pinned(void *ptr) static bool xen_page_pinned(void *ptr)
{ {
struct page *page = virt_to_page(ptr); struct page *page = virt_to_page(ptr);
return PagePinned(page); return PagePinned(page);
} }
static void extend_mmu_update(const struct mmu_update *update) static void xen_extend_mmu_update(const struct mmu_update *update)
{ {
struct multicall_space mcs; struct multicall_space mcs;
struct mmu_update *u; struct mmu_update *u;
mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u)); mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
if (mcs.mc != NULL) if (mcs.mc != NULL) {
ADD_STATS(mmu_update_extended, 1);
ADD_STATS(mmu_update_histo[mcs.mc->args[1]], -1);
mcs.mc->args[1]++; mcs.mc->args[1]++;
else {
if (mcs.mc->args[1] < MMU_UPDATE_HISTO)
ADD_STATS(mmu_update_histo[mcs.mc->args[1]], 1);
else
ADD_STATS(mmu_update_histo[0], 1);
} else {
ADD_STATS(mmu_update, 1);
mcs = __xen_mc_entry(sizeof(*u)); mcs = __xen_mc_entry(sizeof(*u));
MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF); MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
ADD_STATS(mmu_update_histo[1], 1);
} }
u = mcs.args; u = mcs.args;
@ -265,7 +331,9 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
/* ptr may be ioremapped for 64-bit pagetable setup */ /* ptr may be ioremapped for 64-bit pagetable setup */
u.ptr = arbitrary_virt_to_machine(ptr).maddr; u.ptr = arbitrary_virt_to_machine(ptr).maddr;
u.val = pmd_val_ma(val); u.val = pmd_val_ma(val);
extend_mmu_update(&u); xen_extend_mmu_update(&u);
ADD_STATS(pmd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
@ -274,13 +342,17 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
void xen_set_pmd(pmd_t *ptr, pmd_t val) void xen_set_pmd(pmd_t *ptr, pmd_t val)
{ {
ADD_STATS(pmd_update, 1);
/* If page is not pinned, we can just update the entry /* If page is not pinned, we can just update the entry
directly */ directly */
if (!page_pinned(ptr)) { if (!xen_page_pinned(ptr)) {
*ptr = val; *ptr = val;
return; return;
} }
ADD_STATS(pmd_update_pinned, 1);
xen_set_pmd_hyper(ptr, val); xen_set_pmd_hyper(ptr, val);
} }
@ -300,12 +372,18 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
if (mm == &init_mm) if (mm == &init_mm)
preempt_disable(); preempt_disable();
ADD_STATS(set_pte_at, 1);
// ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
ADD_STATS(set_pte_at_current, mm == current->mm);
ADD_STATS(set_pte_at_kernel, mm == &init_mm);
if (mm == current->mm || mm == &init_mm) { if (mm == current->mm || mm == &init_mm) {
if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
struct multicall_space mcs; struct multicall_space mcs;
mcs = xen_mc_entry(0); mcs = xen_mc_entry(0);
MULTI_update_va_mapping(mcs.mc, addr, pteval, 0); MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
ADD_STATS(set_pte_at_batched, 1);
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
goto out; goto out;
} else } else
@ -334,7 +412,10 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
u.val = pte_val_ma(pte); u.val = pte_val_ma(pte);
extend_mmu_update(&u); xen_extend_mmu_update(&u);
ADD_STATS(prot_commit, 1);
ADD_STATS(prot_commit_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
} }
@ -400,7 +481,9 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
/* ptr may be ioremapped for 64-bit pagetable setup */ /* ptr may be ioremapped for 64-bit pagetable setup */
u.ptr = arbitrary_virt_to_machine(ptr).maddr; u.ptr = arbitrary_virt_to_machine(ptr).maddr;
u.val = pud_val_ma(val); u.val = pud_val_ma(val);
extend_mmu_update(&u); xen_extend_mmu_update(&u);
ADD_STATS(pud_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
@ -409,18 +492,26 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
void xen_set_pud(pud_t *ptr, pud_t val) void xen_set_pud(pud_t *ptr, pud_t val)
{ {
ADD_STATS(pud_update, 1);
/* If page is not pinned, we can just update the entry /* If page is not pinned, we can just update the entry
directly */ directly */
if (!page_pinned(ptr)) { if (!xen_page_pinned(ptr)) {
*ptr = val; *ptr = val;
return; return;
} }
ADD_STATS(pud_update_pinned, 1);
xen_set_pud_hyper(ptr, val); xen_set_pud_hyper(ptr, val);
} }
void xen_set_pte(pte_t *ptep, pte_t pte) void xen_set_pte(pte_t *ptep, pte_t pte)
{ {
ADD_STATS(pte_update, 1);
// ADD_STATS(pte_update_pinned, xen_page_pinned(ptep));
ADD_STATS(pte_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
ptep->pte_high = pte.pte_high; ptep->pte_high = pte.pte_high;
smp_wmb(); smp_wmb();
@ -490,7 +581,7 @@ static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val)
u.ptr = virt_to_machine(ptr).maddr; u.ptr = virt_to_machine(ptr).maddr;
u.val = pgd_val_ma(val); u.val = pgd_val_ma(val);
extend_mmu_update(&u); xen_extend_mmu_update(&u);
} }
/* /*
@ -517,17 +608,22 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
{ {
pgd_t *user_ptr = xen_get_user_pgd(ptr); pgd_t *user_ptr = xen_get_user_pgd(ptr);
ADD_STATS(pgd_update, 1);
/* If page is not pinned, we can just update the entry /* If page is not pinned, we can just update the entry
directly */ directly */
if (!page_pinned(ptr)) { if (!xen_page_pinned(ptr)) {
*ptr = val; *ptr = val;
if (user_ptr) { if (user_ptr) {
WARN_ON(page_pinned(user_ptr)); WARN_ON(xen_page_pinned(user_ptr));
*user_ptr = val; *user_ptr = val;
} }
return; return;
} }
ADD_STATS(pgd_update_pinned, 1);
ADD_STATS(pgd_update_batched, paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU);
/* If it's pinned, then we can at least batch the kernel and /* If it's pinned, then we can at least batch the kernel and
user updates together. */ user updates together. */
xen_mc_batch(); xen_mc_batch();
@ -555,9 +651,12 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
* For 64-bit, we must skip the Xen hole in the middle of the address * For 64-bit, we must skip the Xen hole in the middle of the address
* space, just after the big x86-64 virtual hole. * space, just after the big x86-64 virtual hole.
*/ */
static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), static int xen_pgd_walk(struct mm_struct *mm,
unsigned long limit) int (*func)(struct mm_struct *mm, struct page *,
enum pt_level),
unsigned long limit)
{ {
pgd_t *pgd = mm->pgd;
int flush = 0; int flush = 0;
unsigned hole_low, hole_high; unsigned hole_low, hole_high;
unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@ -590,8 +689,6 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
pmdidx_limit = 0; pmdidx_limit = 0;
#endif #endif
flush |= (*func)(virt_to_page(pgd), PT_PGD);
for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) {
pud_t *pud; pud_t *pud;
@ -604,7 +701,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
pud = pud_offset(&pgd[pgdidx], 0); pud = pud_offset(&pgd[pgdidx], 0);
if (PTRS_PER_PUD > 1) /* not folded */ if (PTRS_PER_PUD > 1) /* not folded */
flush |= (*func)(virt_to_page(pud), PT_PUD); flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) {
pmd_t *pmd; pmd_t *pmd;
@ -619,7 +716,7 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
pmd = pmd_offset(&pud[pudidx], 0); pmd = pmd_offset(&pud[pudidx], 0);
if (PTRS_PER_PMD > 1) /* not folded */ if (PTRS_PER_PMD > 1) /* not folded */
flush |= (*func)(virt_to_page(pmd), PT_PMD); flush |= (*func)(mm, virt_to_page(pmd), PT_PMD);
for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) {
struct page *pte; struct page *pte;
@ -633,28 +730,34 @@ static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level),
continue; continue;
pte = pmd_page(pmd[pmdidx]); pte = pmd_page(pmd[pmdidx]);
flush |= (*func)(pte, PT_PTE); flush |= (*func)(mm, pte, PT_PTE);
} }
} }
} }
out: out:
/* Do the top level last, so that the callbacks can use it as
a cue to do final things like tlb flushes. */
flush |= (*func)(mm, virt_to_page(pgd), PT_PGD);
return flush; return flush;
} }
static spinlock_t *lock_pte(struct page *page) /* If we're using split pte locks, then take the page's lock and
return a pointer to it. Otherwise return NULL. */
static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
{ {
spinlock_t *ptl = NULL; spinlock_t *ptl = NULL;
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS #if USE_SPLIT_PTLOCKS
ptl = __pte_lockptr(page); ptl = __pte_lockptr(page);
spin_lock(ptl); spin_lock_nest_lock(ptl, &mm->page_table_lock);
#endif #endif
return ptl; return ptl;
} }
static void do_unlock(void *v) static void xen_pte_unlock(void *v)
{ {
spinlock_t *ptl = v; spinlock_t *ptl = v;
spin_unlock(ptl); spin_unlock(ptl);
@ -672,7 +775,8 @@ static void xen_do_pin(unsigned level, unsigned long pfn)
MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
} }
static int pin_page(struct page *page, enum pt_level level) static int xen_pin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
{ {
unsigned pgfl = TestSetPagePinned(page); unsigned pgfl = TestSetPagePinned(page);
int flush; int flush;
@ -691,21 +795,40 @@ static int pin_page(struct page *page, enum pt_level level)
flush = 0; flush = 0;
/*
* We need to hold the pagetable lock between the time
* we make the pagetable RO and when we actually pin
* it. If we don't, then other users may come in and
* attempt to update the pagetable by writing it,
* which will fail because the memory is RO but not
* pinned, so Xen won't do the trap'n'emulate.
*
* If we're using split pte locks, we can't hold the
* entire pagetable's worth of locks during the
* traverse, because we may wrap the preempt count (8
* bits). The solution is to mark RO and pin each PTE
* page while holding the lock. This means the number
* of locks we end up holding is never more than a
* batch size (~32 entries, at present).
*
* If we're not using split pte locks, we needn't pin
* the PTE pages independently, because we're
* protected by the overall pagetable lock.
*/
ptl = NULL; ptl = NULL;
if (level == PT_PTE) if (level == PT_PTE)
ptl = lock_pte(page); ptl = xen_pte_lock(page, mm);
MULTI_update_va_mapping(mcs.mc, (unsigned long)pt, MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
pfn_pte(pfn, PAGE_KERNEL_RO), pfn_pte(pfn, PAGE_KERNEL_RO),
level == PT_PGD ? UVMF_TLB_FLUSH : 0); level == PT_PGD ? UVMF_TLB_FLUSH : 0);
if (level == PT_PTE) if (ptl) {
xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn); xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
if (ptl) {
/* Queue a deferred unlock for when this batch /* Queue a deferred unlock for when this batch
is completed. */ is completed. */
xen_mc_callback(do_unlock, ptl); xen_mc_callback(xen_pte_unlock, ptl);
} }
} }
@ -715,11 +838,11 @@ static int pin_page(struct page *page, enum pt_level level)
/* This is called just after a mm has been created, but it has not /* This is called just after a mm has been created, but it has not
been used yet. We need to make sure that its pagetable is all been used yet. We need to make sure that its pagetable is all
read-only, and can be pinned. */ read-only, and can be pinned. */
void xen_pgd_pin(pgd_t *pgd) static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
{ {
xen_mc_batch(); xen_mc_batch();
if (pgd_walk(pgd, pin_page, USER_LIMIT)) { if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
/* re-enable interrupts for kmap_flush_unused */ /* re-enable interrupts for kmap_flush_unused */
xen_mc_issue(0); xen_mc_issue(0);
kmap_flush_unused(); kmap_flush_unused();
@ -733,25 +856,35 @@ void xen_pgd_pin(pgd_t *pgd)
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
if (user_pgd) { if (user_pgd) {
pin_page(virt_to_page(user_pgd), PT_PGD); xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd)));
} }
} }
#else /* CONFIG_X86_32 */ #else /* CONFIG_X86_32 */
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is pinnable */ /* Need to make sure unshared kernel PMD is pinnable */
pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
PT_PMD);
#endif #endif
xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
xen_mc_issue(0); xen_mc_issue(0);
} }
static void xen_pgd_pin(struct mm_struct *mm)
{
__xen_pgd_pin(mm, mm->pgd);
}
/* /*
* On save, we need to pin all pagetables to make sure they get their * On save, we need to pin all pagetables to make sure they get their
* mfns turned into pfns. Search the list for any unpinned pgds and pin * mfns turned into pfns. Search the list for any unpinned pgds and pin
* them (unpinned pgds are not currently in use, probably because the * them (unpinned pgds are not currently in use, probably because the
* process is under construction or destruction). * process is under construction or destruction).
*
* Expected to be called in stop_machine() ("equivalent to taking
* every spinlock in the system"), so the locking doesn't really
* matter all that much.
*/ */
void xen_mm_pin_all(void) void xen_mm_pin_all(void)
{ {
@ -762,7 +895,7 @@ void xen_mm_pin_all(void)
list_for_each_entry(page, &pgd_list, lru) { list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) { if (!PagePinned(page)) {
xen_pgd_pin((pgd_t *)page_address(page)); __xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
SetPageSavePinned(page); SetPageSavePinned(page);
} }
} }
@ -775,7 +908,8 @@ void xen_mm_pin_all(void)
* that's before we have page structures to store the bits. So do all * that's before we have page structures to store the bits. So do all
* the book-keeping now. * the book-keeping now.
*/ */
static __init int mark_pinned(struct page *page, enum pt_level level) static __init int xen_mark_pinned(struct mm_struct *mm, struct page *page,
enum pt_level level)
{ {
SetPagePinned(page); SetPagePinned(page);
return 0; return 0;
@ -783,10 +917,11 @@ static __init int mark_pinned(struct page *page, enum pt_level level)
void __init xen_mark_init_mm_pinned(void) void __init xen_mark_init_mm_pinned(void)
{ {
pgd_walk(init_mm.pgd, mark_pinned, FIXADDR_TOP); xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
} }
static int unpin_page(struct page *page, enum pt_level level) static int xen_unpin_page(struct mm_struct *mm, struct page *page,
enum pt_level level)
{ {
unsigned pgfl = TestClearPagePinned(page); unsigned pgfl = TestClearPagePinned(page);
@ -796,10 +931,18 @@ static int unpin_page(struct page *page, enum pt_level level)
spinlock_t *ptl = NULL; spinlock_t *ptl = NULL;
struct multicall_space mcs; struct multicall_space mcs;
/*
* Do the converse to pin_page. If we're using split
* pte locks, we must be holding the lock for while
* the pte page is unpinned but still RO to prevent
* concurrent updates from seeing it in this
* partially-pinned state.
*/
if (level == PT_PTE) { if (level == PT_PTE) {
ptl = lock_pte(page); ptl = xen_pte_lock(page, mm);
xen_do_pin(MMUEXT_UNPIN_TABLE, pfn); if (ptl)
xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
} }
mcs = __xen_mc_entry(0); mcs = __xen_mc_entry(0);
@ -810,7 +953,7 @@ static int unpin_page(struct page *page, enum pt_level level)
if (ptl) { if (ptl) {
/* unlock when batch completed */ /* unlock when batch completed */
xen_mc_callback(do_unlock, ptl); xen_mc_callback(xen_pte_unlock, ptl);
} }
} }
@ -818,7 +961,7 @@ static int unpin_page(struct page *page, enum pt_level level)
} }
/* Release a pagetables pages back as normal RW */ /* Release a pagetables pages back as normal RW */
static void xen_pgd_unpin(pgd_t *pgd) static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
{ {
xen_mc_batch(); xen_mc_batch();
@ -830,21 +973,27 @@ static void xen_pgd_unpin(pgd_t *pgd)
if (user_pgd) { if (user_pgd) {
xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd)));
unpin_page(virt_to_page(user_pgd), PT_PGD); xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
} }
} }
#endif #endif
#ifdef CONFIG_X86_PAE #ifdef CONFIG_X86_PAE
/* Need to make sure unshared kernel PMD is unpinned */ /* Need to make sure unshared kernel PMD is unpinned */
pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])),
PT_PMD);
#endif #endif
pgd_walk(pgd, unpin_page, USER_LIMIT); xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
xen_mc_issue(0); xen_mc_issue(0);
} }
static void xen_pgd_unpin(struct mm_struct *mm)
{
__xen_pgd_unpin(mm, mm->pgd);
}
/* /*
* On resume, undo any pinning done at save, so that the rest of the * On resume, undo any pinning done at save, so that the rest of the
* kernel doesn't see any unexpected pinned pagetables. * kernel doesn't see any unexpected pinned pagetables.
@ -859,7 +1008,7 @@ void xen_mm_unpin_all(void)
list_for_each_entry(page, &pgd_list, lru) { list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) { if (PageSavePinned(page)) {
BUG_ON(!PagePinned(page)); BUG_ON(!PagePinned(page));
xen_pgd_unpin((pgd_t *)page_address(page)); __xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
ClearPageSavePinned(page); ClearPageSavePinned(page);
} }
} }
@ -870,14 +1019,14 @@ void xen_mm_unpin_all(void)
void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
{ {
spin_lock(&next->page_table_lock); spin_lock(&next->page_table_lock);
xen_pgd_pin(next->pgd); xen_pgd_pin(next);
spin_unlock(&next->page_table_lock); spin_unlock(&next->page_table_lock);
} }
void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{ {
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
xen_pgd_pin(mm->pgd); xen_pgd_pin(mm);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
} }
@ -907,7 +1056,7 @@ static void drop_other_mm_ref(void *info)
} }
} }
static void drop_mm_ref(struct mm_struct *mm) static void xen_drop_mm_ref(struct mm_struct *mm)
{ {
cpumask_t mask; cpumask_t mask;
unsigned cpu; unsigned cpu;
@ -937,7 +1086,7 @@ static void drop_mm_ref(struct mm_struct *mm)
smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); smp_call_function_mask(mask, drop_other_mm_ref, mm, 1);
} }
#else #else
static void drop_mm_ref(struct mm_struct *mm) static void xen_drop_mm_ref(struct mm_struct *mm)
{ {
if (current->active_mm == mm) if (current->active_mm == mm)
load_cr3(swapper_pg_dir); load_cr3(swapper_pg_dir);
@ -961,14 +1110,77 @@ static void drop_mm_ref(struct mm_struct *mm)
void xen_exit_mmap(struct mm_struct *mm) void xen_exit_mmap(struct mm_struct *mm)
{ {
get_cpu(); /* make sure we don't move around */ get_cpu(); /* make sure we don't move around */
drop_mm_ref(mm); xen_drop_mm_ref(mm);
put_cpu(); put_cpu();
spin_lock(&mm->page_table_lock); spin_lock(&mm->page_table_lock);
/* pgd may not be pinned in the error exit path of execve */ /* pgd may not be pinned in the error exit path of execve */
if (page_pinned(mm->pgd)) if (xen_page_pinned(mm->pgd))
xen_pgd_unpin(mm->pgd); xen_pgd_unpin(mm);
spin_unlock(&mm->page_table_lock); spin_unlock(&mm->page_table_lock);
} }
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_mmu_debug;
static int __init xen_mmu_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_mmu_debug = debugfs_create_dir("mmu", d_xen);
debugfs_create_u8("zero_stats", 0644, d_mmu_debug, &zero_stats);
debugfs_create_u32("pgd_update", 0444, d_mmu_debug, &mmu_stats.pgd_update);
debugfs_create_u32("pgd_update_pinned", 0444, d_mmu_debug,
&mmu_stats.pgd_update_pinned);
debugfs_create_u32("pgd_update_batched", 0444, d_mmu_debug,
&mmu_stats.pgd_update_pinned);
debugfs_create_u32("pud_update", 0444, d_mmu_debug, &mmu_stats.pud_update);
debugfs_create_u32("pud_update_pinned", 0444, d_mmu_debug,
&mmu_stats.pud_update_pinned);
debugfs_create_u32("pud_update_batched", 0444, d_mmu_debug,
&mmu_stats.pud_update_pinned);
debugfs_create_u32("pmd_update", 0444, d_mmu_debug, &mmu_stats.pmd_update);
debugfs_create_u32("pmd_update_pinned", 0444, d_mmu_debug,
&mmu_stats.pmd_update_pinned);
debugfs_create_u32("pmd_update_batched", 0444, d_mmu_debug,
&mmu_stats.pmd_update_pinned);
debugfs_create_u32("pte_update", 0444, d_mmu_debug, &mmu_stats.pte_update);
// debugfs_create_u32("pte_update_pinned", 0444, d_mmu_debug,
// &mmu_stats.pte_update_pinned);
debugfs_create_u32("pte_update_batched", 0444, d_mmu_debug,
&mmu_stats.pte_update_pinned);
debugfs_create_u32("mmu_update", 0444, d_mmu_debug, &mmu_stats.mmu_update);
debugfs_create_u32("mmu_update_extended", 0444, d_mmu_debug,
&mmu_stats.mmu_update_extended);
xen_debugfs_create_u32_array("mmu_update_histo", 0444, d_mmu_debug,
mmu_stats.mmu_update_histo, 20);
debugfs_create_u32("set_pte_at", 0444, d_mmu_debug, &mmu_stats.set_pte_at);
debugfs_create_u32("set_pte_at_batched", 0444, d_mmu_debug,
&mmu_stats.set_pte_at_batched);
debugfs_create_u32("set_pte_at_current", 0444, d_mmu_debug,
&mmu_stats.set_pte_at_current);
debugfs_create_u32("set_pte_at_kernel", 0444, d_mmu_debug,
&mmu_stats.set_pte_at_kernel);
debugfs_create_u32("prot_commit", 0444, d_mmu_debug, &mmu_stats.prot_commit);
debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug,
&mmu_stats.prot_commit_batched);
return 0;
}
fs_initcall(xen_mmu_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */

View file

@ -18,9 +18,6 @@ void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next);
void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm); void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
void xen_exit_mmap(struct mm_struct *mm); void xen_exit_mmap(struct mm_struct *mm);
void xen_pgd_pin(pgd_t *pgd);
//void xen_pgd_unpin(pgd_t *pgd);
pteval_t xen_pte_val(pte_t); pteval_t xen_pte_val(pte_t);
pmdval_t xen_pmd_val(pmd_t); pmdval_t xen_pmd_val(pmd_t);
pgdval_t xen_pgd_val(pgd_t); pgdval_t xen_pgd_val(pgd_t);

View file

@ -21,16 +21,20 @@
*/ */
#include <linux/percpu.h> #include <linux/percpu.h>
#include <linux/hardirq.h> #include <linux/hardirq.h>
#include <linux/debugfs.h>
#include <asm/xen/hypercall.h> #include <asm/xen/hypercall.h>
#include "multicalls.h" #include "multicalls.h"
#include "debugfs.h"
#define MC_BATCH 32
#define MC_DEBUG 1 #define MC_DEBUG 1
#define MC_BATCH 32
#define MC_ARGS (MC_BATCH * 16) #define MC_ARGS (MC_BATCH * 16)
struct mc_buffer { struct mc_buffer {
struct multicall_entry entries[MC_BATCH]; struct multicall_entry entries[MC_BATCH];
#if MC_DEBUG #if MC_DEBUG
@ -47,6 +51,76 @@ struct mc_buffer {
static DEFINE_PER_CPU(struct mc_buffer, mc_buffer); static DEFINE_PER_CPU(struct mc_buffer, mc_buffer);
DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags); DEFINE_PER_CPU(unsigned long, xen_mc_irq_flags);
/* flush reasons 0- slots, 1- args, 2- callbacks */
enum flush_reasons
{
FL_SLOTS,
FL_ARGS,
FL_CALLBACKS,
FL_N_REASONS
};
#ifdef CONFIG_XEN_DEBUG_FS
#define NHYPERCALLS 40 /* not really */
static struct {
unsigned histo[MC_BATCH+1];
unsigned issued;
unsigned arg_total;
unsigned hypercalls;
unsigned histo_hypercalls[NHYPERCALLS];
unsigned flush[FL_N_REASONS];
} mc_stats;
static u8 zero_stats;
static inline void check_zero(void)
{
if (unlikely(zero_stats)) {
memset(&mc_stats, 0, sizeof(mc_stats));
zero_stats = 0;
}
}
static void mc_add_stats(const struct mc_buffer *mc)
{
int i;
check_zero();
mc_stats.issued++;
mc_stats.hypercalls += mc->mcidx;
mc_stats.arg_total += mc->argidx;
mc_stats.histo[mc->mcidx]++;
for(i = 0; i < mc->mcidx; i++) {
unsigned op = mc->entries[i].op;
if (op < NHYPERCALLS)
mc_stats.histo_hypercalls[op]++;
}
}
static void mc_stats_flush(enum flush_reasons idx)
{
check_zero();
mc_stats.flush[idx]++;
}
#else /* !CONFIG_XEN_DEBUG_FS */
static inline void mc_add_stats(const struct mc_buffer *mc)
{
}
static inline void mc_stats_flush(enum flush_reasons idx)
{
}
#endif /* CONFIG_XEN_DEBUG_FS */
void xen_mc_flush(void) void xen_mc_flush(void)
{ {
struct mc_buffer *b = &__get_cpu_var(mc_buffer); struct mc_buffer *b = &__get_cpu_var(mc_buffer);
@ -60,6 +134,8 @@ void xen_mc_flush(void)
something in the middle */ something in the middle */
local_irq_save(flags); local_irq_save(flags);
mc_add_stats(b);
if (b->mcidx) { if (b->mcidx) {
#if MC_DEBUG #if MC_DEBUG
memcpy(b->debug, b->entries, memcpy(b->debug, b->entries,
@ -115,6 +191,7 @@ struct multicall_space __xen_mc_entry(size_t args)
if (b->mcidx == MC_BATCH || if (b->mcidx == MC_BATCH ||
(argidx + args) > MC_ARGS) { (argidx + args) > MC_ARGS) {
mc_stats_flush(b->mcidx == MC_BATCH ? FL_SLOTS : FL_ARGS);
xen_mc_flush(); xen_mc_flush();
argidx = roundup(b->argidx, sizeof(u64)); argidx = roundup(b->argidx, sizeof(u64));
} }
@ -158,10 +235,44 @@ void xen_mc_callback(void (*fn)(void *), void *data)
struct mc_buffer *b = &__get_cpu_var(mc_buffer); struct mc_buffer *b = &__get_cpu_var(mc_buffer);
struct callback *cb; struct callback *cb;
if (b->cbidx == MC_BATCH) if (b->cbidx == MC_BATCH) {
mc_stats_flush(FL_CALLBACKS);
xen_mc_flush(); xen_mc_flush();
}
cb = &b->callbacks[b->cbidx++]; cb = &b->callbacks[b->cbidx++];
cb->fn = fn; cb->fn = fn;
cb->data = data; cb->data = data;
} }
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_mc_debug;
static int __init xen_mc_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_mc_debug = debugfs_create_dir("multicalls", d_xen);
debugfs_create_u8("zero_stats", 0644, d_mc_debug, &zero_stats);
debugfs_create_u32("batches", 0444, d_mc_debug, &mc_stats.issued);
debugfs_create_u32("hypercalls", 0444, d_mc_debug, &mc_stats.hypercalls);
debugfs_create_u32("arg_total", 0444, d_mc_debug, &mc_stats.arg_total);
xen_debugfs_create_u32_array("batch_histo", 0444, d_mc_debug,
mc_stats.histo, MC_BATCH);
xen_debugfs_create_u32_array("hypercall_histo", 0444, d_mc_debug,
mc_stats.histo_hypercalls, NHYPERCALLS);
xen_debugfs_create_u32_array("flush_reasons", 0444, d_mc_debug,
mc_stats.flush, FL_N_REASONS);
return 0;
}
fs_initcall(xen_mc_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */

View file

@ -11,11 +11,8 @@
* useful topology information for the kernel to make use of. As a * useful topology information for the kernel to make use of. As a
* result, all CPUs are treated as if they're single-core and * result, all CPUs are treated as if they're single-core and
* single-threaded. * single-threaded.
*
* This does not handle HOTPLUG_CPU yet.
*/ */
#include <linux/sched.h> #include <linux/sched.h>
#include <linux/kernel_stat.h>
#include <linux/err.h> #include <linux/err.h>
#include <linux/smp.h> #include <linux/smp.h>
@ -36,8 +33,6 @@
#include "xen-ops.h" #include "xen-ops.h"
#include "mmu.h" #include "mmu.h"
static void __cpuinit xen_init_lock_cpu(int cpu);
cpumask_t xen_cpu_initialized_map; cpumask_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, resched_irq);
@ -64,11 +59,12 @@ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
static __cpuinit void cpu_bringup_and_idle(void) static __cpuinit void cpu_bringup(void)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
cpu_init(); cpu_init();
touch_softlockup_watchdog();
preempt_disable(); preempt_disable();
xen_enable_sysenter(); xen_enable_sysenter();
@ -89,6 +85,11 @@ static __cpuinit void cpu_bringup_and_idle(void)
local_irq_enable(); local_irq_enable();
wmb(); /* make sure everything is out */ wmb(); /* make sure everything is out */
}
static __cpuinit void cpu_bringup_and_idle(void)
{
cpu_bringup();
cpu_idle(); cpu_idle();
} }
@ -212,8 +213,6 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
cpu_set(cpu, cpu_present_map); cpu_set(cpu, cpu_present_map);
} }
//init_xenbus_allowed_cpumask();
} }
static __cpuinit int static __cpuinit int
@ -281,12 +280,6 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
struct task_struct *idle = idle_task(cpu); struct task_struct *idle = idle_task(cpu);
int rc; int rc;
#if 0
rc = cpu_up_check(cpu);
if (rc)
return rc;
#endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Allocate node local memory for AP pdas */ /* Allocate node local memory for AP pdas */
WARN_ON(cpu == 0); WARN_ON(cpu == 0);
@ -339,6 +332,60 @@ static void xen_smp_cpus_done(unsigned int max_cpus)
{ {
} }
#ifdef CONFIG_HOTPLUG_CPU
static int xen_cpu_disable(void)
{
unsigned int cpu = smp_processor_id();
if (cpu == 0)
return -EBUSY;
cpu_disable_common();
load_cr3(swapper_pg_dir);
return 0;
}
static void xen_cpu_die(unsigned int cpu)
{
while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ/10);
}
unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
if (num_online_cpus() == 1)
alternatives_smp_switch(0);
}
static void xen_play_dead(void)
{
play_dead_common();
HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
cpu_bringup();
}
#else /* !CONFIG_HOTPLUG_CPU */
static int xen_cpu_disable(void)
{
return -ENOSYS;
}
static void xen_cpu_die(unsigned int cpu)
{
BUG();
}
static void xen_play_dead(void)
{
BUG();
}
#endif
static void stop_self(void *v) static void stop_self(void *v)
{ {
int cpu = smp_processor_id(); int cpu = smp_processor_id();
@ -419,176 +466,16 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
return IRQ_HANDLED; return IRQ_HANDLED;
} }
struct xen_spinlock {
unsigned char lock; /* 0 -> free; 1 -> locked */
unsigned short spinners; /* count of waiting cpus */
};
static int xen_spin_is_locked(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
return xl->lock != 0;
}
static int xen_spin_is_contended(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
/* Not strictly true; this is only the count of contended
lock-takers entering the slow path. */
return xl->spinners != 0;
}
static int xen_spin_trylock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
u8 old = 1;
asm("xchgb %b0,%1"
: "+q" (old), "+m" (xl->lock) : : "memory");
return old == 0;
}
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
static inline void spinning_lock(struct xen_spinlock *xl)
{
__get_cpu_var(lock_spinners) = xl;
wmb(); /* set lock of interest before count */
asm(LOCK_PREFIX " incw %0"
: "+m" (xl->spinners) : : "memory");
}
static inline void unspinning_lock(struct xen_spinlock *xl)
{
asm(LOCK_PREFIX " decw %0"
: "+m" (xl->spinners) : : "memory");
wmb(); /* decrement count before clearing lock */
__get_cpu_var(lock_spinners) = NULL;
}
static noinline int xen_spin_lock_slow(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
int irq = __get_cpu_var(lock_kicker_irq);
int ret;
/* If kicker interrupts not initialized yet, just spin */
if (irq == -1)
return 0;
/* announce we're spinning */
spinning_lock(xl);
/* clear pending */
xen_clear_irq_pending(irq);
/* check again make sure it didn't become free while
we weren't looking */
ret = xen_spin_trylock(lock);
if (ret)
goto out;
/* block until irq becomes pending */
xen_poll_irq(irq);
kstat_this_cpu.irqs[irq]++;
out:
unspinning_lock(xl);
return ret;
}
static void xen_spin_lock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
int timeout;
u8 oldval;
do {
timeout = 1 << 10;
asm("1: xchgb %1,%0\n"
" testb %1,%1\n"
" jz 3f\n"
"2: rep;nop\n"
" cmpb $0,%0\n"
" je 1b\n"
" dec %2\n"
" jnz 2b\n"
"3:\n"
: "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
: "1" (1)
: "memory");
} while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock)));
}
static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
{
int cpu;
for_each_online_cpu(cpu) {
/* XXX should mix up next cpu selection */
if (per_cpu(lock_spinners, cpu) == xl) {
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
break;
}
}
}
static void xen_spin_unlock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
smp_wmb(); /* make sure no writes get moved after unlock */
xl->lock = 0; /* release lock */
/* make sure unlock happens before kick */
barrier();
if (unlikely(xl->spinners))
xen_spin_unlock_slow(xl);
}
static __cpuinit void xen_init_lock_cpu(int cpu)
{
int irq;
const char *name;
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
xen_reschedule_interrupt,
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
name,
NULL);
if (irq >= 0) {
disable_irq(irq); /* make sure it's never delivered */
per_cpu(lock_kicker_irq, cpu) = irq;
}
printk("cpu %d spinlock event irq %d\n", cpu, irq);
}
static void __init xen_init_spinlocks(void)
{
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
pv_lock_ops.spin_is_contended = xen_spin_is_contended;
pv_lock_ops.spin_lock = xen_spin_lock;
pv_lock_ops.spin_trylock = xen_spin_trylock;
pv_lock_ops.spin_unlock = xen_spin_unlock;
}
static const struct smp_ops xen_smp_ops __initdata = { static const struct smp_ops xen_smp_ops __initdata = {
.smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu,
.smp_prepare_cpus = xen_smp_prepare_cpus, .smp_prepare_cpus = xen_smp_prepare_cpus,
.cpu_up = xen_cpu_up,
.smp_cpus_done = xen_smp_cpus_done, .smp_cpus_done = xen_smp_cpus_done,
.cpu_up = xen_cpu_up,
.cpu_die = xen_cpu_die,
.cpu_disable = xen_cpu_disable,
.play_dead = xen_play_dead,
.smp_send_stop = xen_smp_send_stop, .smp_send_stop = xen_smp_send_stop,
.smp_send_reschedule = xen_smp_send_reschedule, .smp_send_reschedule = xen_smp_send_reschedule,

428
arch/x86/xen/spinlock.c Normal file
View file

@ -0,0 +1,428 @@
/*
* Split spinlock implementation out into its own file, so it can be
* compiled in a FTRACE-compatible way.
*/
#include <linux/kernel_stat.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/log2.h>
#include <asm/paravirt.h>
#include <xen/interface/xen.h>
#include <xen/events.h>
#include "xen-ops.h"
#include "debugfs.h"
#ifdef CONFIG_XEN_DEBUG_FS
static struct xen_spinlock_stats
{
u64 taken;
u32 taken_slow;
u32 taken_slow_nested;
u32 taken_slow_pickup;
u32 taken_slow_spurious;
u32 taken_slow_irqenable;
u64 released;
u32 released_slow;
u32 released_slow_kicked;
#define HISTO_BUCKETS 30
u32 histo_spin_total[HISTO_BUCKETS+1];
u32 histo_spin_spinning[HISTO_BUCKETS+1];
u32 histo_spin_blocked[HISTO_BUCKETS+1];
u64 time_total;
u64 time_spinning;
u64 time_blocked;
} spinlock_stats;
static u8 zero_stats;
static unsigned lock_timeout = 1 << 10;
#define TIMEOUT lock_timeout
static inline void check_zero(void)
{
if (unlikely(zero_stats)) {
memset(&spinlock_stats, 0, sizeof(spinlock_stats));
zero_stats = 0;
}
}
#define ADD_STATS(elem, val) \
do { check_zero(); spinlock_stats.elem += (val); } while(0)
static inline u64 spin_time_start(void)
{
return xen_clocksource_read();
}
static void __spin_time_accum(u64 delta, u32 *array)
{
unsigned index = ilog2(delta);
check_zero();
if (index < HISTO_BUCKETS)
array[index]++;
else
array[HISTO_BUCKETS]++;
}
static inline void spin_time_accum_spinning(u64 start)
{
u32 delta = xen_clocksource_read() - start;
__spin_time_accum(delta, spinlock_stats.histo_spin_spinning);
spinlock_stats.time_spinning += delta;
}
static inline void spin_time_accum_total(u64 start)
{
u32 delta = xen_clocksource_read() - start;
__spin_time_accum(delta, spinlock_stats.histo_spin_total);
spinlock_stats.time_total += delta;
}
static inline void spin_time_accum_blocked(u64 start)
{
u32 delta = xen_clocksource_read() - start;
__spin_time_accum(delta, spinlock_stats.histo_spin_blocked);
spinlock_stats.time_blocked += delta;
}
#else /* !CONFIG_XEN_DEBUG_FS */
#define TIMEOUT (1 << 10)
#define ADD_STATS(elem, val) do { (void)(val); } while(0)
static inline u64 spin_time_start(void)
{
return 0;
}
static inline void spin_time_accum_total(u64 start)
{
}
static inline void spin_time_accum_spinning(u64 start)
{
}
static inline void spin_time_accum_blocked(u64 start)
{
}
#endif /* CONFIG_XEN_DEBUG_FS */
struct xen_spinlock {
unsigned char lock; /* 0 -> free; 1 -> locked */
unsigned short spinners; /* count of waiting cpus */
};
static int xen_spin_is_locked(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
return xl->lock != 0;
}
static int xen_spin_is_contended(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
/* Not strictly true; this is only the count of contended
lock-takers entering the slow path. */
return xl->spinners != 0;
}
static int xen_spin_trylock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
u8 old = 1;
asm("xchgb %b0,%1"
: "+q" (old), "+m" (xl->lock) : : "memory");
return old == 0;
}
static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
/*
* Mark a cpu as interested in a lock. Returns the CPU's previous
* lock of interest, in case we got preempted by an interrupt.
*/
static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl)
{
struct xen_spinlock *prev;
prev = __get_cpu_var(lock_spinners);
__get_cpu_var(lock_spinners) = xl;
wmb(); /* set lock of interest before count */
asm(LOCK_PREFIX " incw %0"
: "+m" (xl->spinners) : : "memory");
return prev;
}
/*
* Mark a cpu as no longer interested in a lock. Restores previous
* lock of interest (NULL for none).
*/
static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev)
{
asm(LOCK_PREFIX " decw %0"
: "+m" (xl->spinners) : : "memory");
wmb(); /* decrement count before restoring lock */
__get_cpu_var(lock_spinners) = prev;
}
static noinline int xen_spin_lock_slow(struct raw_spinlock *lock, bool irq_enable)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
struct xen_spinlock *prev;
int irq = __get_cpu_var(lock_kicker_irq);
int ret;
unsigned long flags;
u64 start;
/* If kicker interrupts not initialized yet, just spin */
if (irq == -1)
return 0;
start = spin_time_start();
/* announce we're spinning */
prev = spinning_lock(xl);
flags = __raw_local_save_flags();
if (irq_enable) {
ADD_STATS(taken_slow_irqenable, 1);
raw_local_irq_enable();
}
ADD_STATS(taken_slow, 1);
ADD_STATS(taken_slow_nested, prev != NULL);
do {
/* clear pending */
xen_clear_irq_pending(irq);
/* check again make sure it didn't become free while
we weren't looking */
ret = xen_spin_trylock(lock);
if (ret) {
ADD_STATS(taken_slow_pickup, 1);
/*
* If we interrupted another spinlock while it
* was blocking, make sure it doesn't block
* without rechecking the lock.
*/
if (prev != NULL)
xen_set_irq_pending(irq);
goto out;
}
/*
* Block until irq becomes pending. If we're
* interrupted at this point (after the trylock but
* before entering the block), then the nested lock
* handler guarantees that the irq will be left
* pending if there's any chance the lock became free;
* xen_poll_irq() returns immediately if the irq is
* pending.
*/
xen_poll_irq(irq);
ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq));
} while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */
kstat_this_cpu.irqs[irq]++;
out:
raw_local_irq_restore(flags);
unspinning_lock(xl, prev);
spin_time_accum_blocked(start);
return ret;
}
static inline void __xen_spin_lock(struct raw_spinlock *lock, bool irq_enable)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
unsigned timeout;
u8 oldval;
u64 start_spin;
ADD_STATS(taken, 1);
start_spin = spin_time_start();
do {
u64 start_spin_fast = spin_time_start();
timeout = TIMEOUT;
asm("1: xchgb %1,%0\n"
" testb %1,%1\n"
" jz 3f\n"
"2: rep;nop\n"
" cmpb $0,%0\n"
" je 1b\n"
" dec %2\n"
" jnz 2b\n"
"3:\n"
: "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
: "1" (1)
: "memory");
spin_time_accum_spinning(start_spin_fast);
} while (unlikely(oldval != 0 &&
(TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));
spin_time_accum_total(start_spin);
}
static void xen_spin_lock(struct raw_spinlock *lock)
{
__xen_spin_lock(lock, false);
}
static void xen_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
{
__xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
}
static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl)
{
int cpu;
ADD_STATS(released_slow, 1);
for_each_online_cpu(cpu) {
/* XXX should mix up next cpu selection */
if (per_cpu(lock_spinners, cpu) == xl) {
ADD_STATS(released_slow_kicked, 1);
xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
break;
}
}
}
static void xen_spin_unlock(struct raw_spinlock *lock)
{
struct xen_spinlock *xl = (struct xen_spinlock *)lock;
ADD_STATS(released, 1);
smp_wmb(); /* make sure no writes get moved after unlock */
xl->lock = 0; /* release lock */
/* make sure unlock happens before kick */
barrier();
if (unlikely(xl->spinners))
xen_spin_unlock_slow(xl);
}
static irqreturn_t dummy_handler(int irq, void *dev_id)
{
BUG();
return IRQ_HANDLED;
}
void __cpuinit xen_init_lock_cpu(int cpu)
{
int irq;
const char *name;
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
dummy_handler,
IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
name,
NULL);
if (irq >= 0) {
disable_irq(irq); /* make sure it's never delivered */
per_cpu(lock_kicker_irq, cpu) = irq;
}
printk("cpu %d spinlock event irq %d\n", cpu, irq);
}
void xen_uninit_lock_cpu(int cpu)
{
unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
}
void __init xen_init_spinlocks(void)
{
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
pv_lock_ops.spin_is_contended = xen_spin_is_contended;
pv_lock_ops.spin_lock = xen_spin_lock;
pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
pv_lock_ops.spin_trylock = xen_spin_trylock;
pv_lock_ops.spin_unlock = xen_spin_unlock;
}
#ifdef CONFIG_XEN_DEBUG_FS
static struct dentry *d_spin_debug;
static int __init xen_spinlock_debugfs(void)
{
struct dentry *d_xen = xen_init_debugfs();
if (d_xen == NULL)
return -ENOMEM;
d_spin_debug = debugfs_create_dir("spinlocks", d_xen);
debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats);
debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout);
debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken);
debugfs_create_u32("taken_slow", 0444, d_spin_debug,
&spinlock_stats.taken_slow);
debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug,
&spinlock_stats.taken_slow_nested);
debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug,
&spinlock_stats.taken_slow_pickup);
debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug,
&spinlock_stats.taken_slow_spurious);
debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug,
&spinlock_stats.taken_slow_irqenable);
debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released);
debugfs_create_u32("released_slow", 0444, d_spin_debug,
&spinlock_stats.released_slow);
debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug,
&spinlock_stats.released_slow_kicked);
debugfs_create_u64("time_spinning", 0444, d_spin_debug,
&spinlock_stats.time_spinning);
debugfs_create_u64("time_blocked", 0444, d_spin_debug,
&spinlock_stats.time_blocked);
debugfs_create_u64("time_total", 0444, d_spin_debug,
&spinlock_stats.time_total);
xen_debugfs_create_u32_array("histo_total", 0444, d_spin_debug,
spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1);
xen_debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug,
spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1);
xen_debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug,
spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1);
return 0;
}
fs_initcall(xen_spinlock_debugfs);
#endif /* CONFIG_XEN_DEBUG_FS */

View file

@ -30,8 +30,6 @@
#define TIMER_SLOP 100000 #define TIMER_SLOP 100000
#define NS_PER_TICK (1000000000LL / HZ) #define NS_PER_TICK (1000000000LL / HZ)
static cycle_t xen_clocksource_read(void);
/* runstate info updated by Xen */ /* runstate info updated by Xen */
static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
@ -213,7 +211,7 @@ unsigned long xen_tsc_khz(void)
return xen_khz; return xen_khz;
} }
static cycle_t xen_clocksource_read(void) cycle_t xen_clocksource_read(void)
{ {
struct pvclock_vcpu_time_info *src; struct pvclock_vcpu_time_info *src;
cycle_t ret; cycle_t ret;
@ -452,6 +450,14 @@ void xen_setup_timer(int cpu)
setup_runstate_info(cpu); setup_runstate_info(cpu);
} }
void xen_teardown_timer(int cpu)
{
struct clock_event_device *evt;
BUG_ON(cpu == 0);
evt = &per_cpu(xen_clock_events, cpu);
unbind_from_irqhandler(evt->irq, NULL);
}
void xen_setup_cpu_clockevents(void) void xen_setup_cpu_clockevents(void)
{ {
BUG_ON(preemptible()); BUG_ON(preemptible());

View file

@ -298,7 +298,7 @@ check_events:
push %eax push %eax
push %ecx push %ecx
push %edx push %edx
call force_evtchn_callback call xen_force_evtchn_callback
pop %edx pop %edx
pop %ecx pop %ecx
pop %eax pop %eax

View file

@ -26,8 +26,15 @@
/* Pseudo-flag used for virtual NMI, which we don't implement yet */ /* Pseudo-flag used for virtual NMI, which we don't implement yet */
#define XEN_EFLAGS_NMI 0x80000000 #define XEN_EFLAGS_NMI 0x80000000
#if 0 #if 1
#include <asm/percpu.h> /*
x86-64 does not yet support direct access to percpu variables
via a segment override, so we just need to make sure this code
never gets used
*/
#define BUG ud2a
#define PER_CPU_VAR(var, off) 0xdeadbeef
#endif
/* /*
Enable events. This clears the event mask and tests the pending Enable events. This clears the event mask and tests the pending
@ -35,6 +42,8 @@
events, then enter the hypervisor to get them handled. events, then enter the hypervisor to get them handled.
*/ */
ENTRY(xen_irq_enable_direct) ENTRY(xen_irq_enable_direct)
BUG
/* Unmask events */ /* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
@ -58,6 +67,8 @@ ENDPATCH(xen_irq_enable_direct)
non-zero. non-zero.
*/ */
ENTRY(xen_irq_disable_direct) ENTRY(xen_irq_disable_direct)
BUG
movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
ENDPATCH(xen_irq_disable_direct) ENDPATCH(xen_irq_disable_direct)
ret ret
@ -74,6 +85,8 @@ ENDPATCH(xen_irq_disable_direct)
Xen and x86 use opposite senses (mask vs enable). Xen and x86 use opposite senses (mask vs enable).
*/ */
ENTRY(xen_save_fl_direct) ENTRY(xen_save_fl_direct)
BUG
testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
setz %ah setz %ah
addb %ah,%ah addb %ah,%ah
@ -91,6 +104,8 @@ ENDPATCH(xen_save_fl_direct)
if so. if so.
*/ */
ENTRY(xen_restore_fl_direct) ENTRY(xen_restore_fl_direct)
BUG
testb $X86_EFLAGS_IF>>8, %ah testb $X86_EFLAGS_IF>>8, %ah
setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
/* Preempt here doesn't matter because that will deal with /* Preempt here doesn't matter because that will deal with
@ -122,7 +137,7 @@ check_events:
push %r9 push %r9
push %r10 push %r10
push %r11 push %r11
call force_evtchn_callback call xen_force_evtchn_callback
pop %r11 pop %r11
pop %r10 pop %r10
pop %r9 pop %r9
@ -133,7 +148,6 @@ check_events:
pop %rcx pop %rcx
pop %rax pop %rax
ret ret
#endif
ENTRY(xen_adjust_exception_frame) ENTRY(xen_adjust_exception_frame)
mov 8+0(%rsp),%rcx mov 8+0(%rsp),%rcx

View file

@ -2,6 +2,7 @@
#define XEN_OPS_H #define XEN_OPS_H
#include <linux/init.h> #include <linux/init.h>
#include <linux/clocksource.h>
#include <linux/irqreturn.h> #include <linux/irqreturn.h>
#include <xen/xen-ops.h> #include <xen/xen-ops.h>
@ -31,7 +32,10 @@ void xen_vcpu_restore(void);
void __init xen_build_dynamic_phys_to_machine(void); void __init xen_build_dynamic_phys_to_machine(void);
void xen_init_irq_ops(void);
void xen_setup_timer(int cpu); void xen_setup_timer(int cpu);
void xen_teardown_timer(int cpu);
cycle_t xen_clocksource_read(void);
void xen_setup_cpu_clockevents(void); void xen_setup_cpu_clockevents(void);
unsigned long xen_tsc_khz(void); unsigned long xen_tsc_khz(void);
void __init xen_time_init(void); void __init xen_time_init(void);
@ -50,6 +54,10 @@ void __init xen_setup_vcpu_info_placement(void);
#ifdef CONFIG_SMP #ifdef CONFIG_SMP
void xen_smp_init(void); void xen_smp_init(void);
void __init xen_init_spinlocks(void);
__cpuinit void xen_init_lock_cpu(int cpu);
void xen_uninit_lock_cpu(int cpu);
extern cpumask_t xen_cpu_initialized_map; extern cpumask_t xen_cpu_initialized_map;
#else #else
static inline void xen_smp_init(void) {} static inline void xen_smp_init(void) {}

View file

@ -1066,7 +1066,7 @@ static struct xenbus_driver blkfront = {
static int __init xlblk_init(void) static int __init xlblk_init(void)
{ {
if (!is_running_on_xen()) if (!xen_domain())
return -ENODEV; return -ENODEV;
if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) { if (register_blkdev(XENVBD_MAJOR, DEV_NAME)) {

View file

@ -108,8 +108,8 @@ static int __init xen_init(void)
{ {
struct hvc_struct *hp; struct hvc_struct *hp;
if (!is_running_on_xen() || if (!xen_pv_domain() ||
is_initial_xendomain() || xen_initial_domain() ||
!xen_start_info->console.domU.evtchn) !xen_start_info->console.domU.evtchn)
return -ENODEV; return -ENODEV;
@ -142,7 +142,7 @@ static void __exit xen_fini(void)
static int xen_cons_init(void) static int xen_cons_init(void)
{ {
if (!is_running_on_xen()) if (!xen_pv_domain())
return 0; return 0;
hvc_instantiate(HVC_COOKIE, 0, &hvc_ops); hvc_instantiate(HVC_COOKIE, 0, &hvc_ops);

View file

@ -335,11 +335,11 @@ static struct xenbus_driver xenkbd = {
static int __init xenkbd_init(void) static int __init xenkbd_init(void)
{ {
if (!is_running_on_xen()) if (!xen_domain())
return -ENODEV; return -ENODEV;
/* Nothing to do if running in dom0. */ /* Nothing to do if running in dom0. */
if (is_initial_xendomain()) if (xen_initial_domain())
return -ENODEV; return -ENODEV;
return xenbus_register_frontend(&xenkbd); return xenbus_register_frontend(&xenkbd);

View file

@ -1794,10 +1794,10 @@ static struct xenbus_driver netfront = {
static int __init netif_init(void) static int __init netif_init(void)
{ {
if (!is_running_on_xen()) if (!xen_domain())
return -ENODEV; return -ENODEV;
if (is_initial_xendomain()) if (xen_initial_domain())
return 0; return 0;
printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n"); printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
@ -1809,7 +1809,7 @@ module_init(netif_init);
static void __exit netif_exit(void) static void __exit netif_exit(void)
{ {
if (is_initial_xendomain()) if (xen_initial_domain())
return; return;
xenbus_unregister_driver(&netfront); xenbus_unregister_driver(&netfront);

View file

@ -210,143 +210,7 @@ timer_action (struct ehci_hcd *ehci, enum ehci_timer_action action)
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/
/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */ #include <linux/usb/ehci_def.h>
/* Section 2.2 Host Controller Capability Registers */
struct ehci_caps {
/* these fields are specified as 8 and 16 bit registers,
* but some hosts can't perform 8 or 16 bit PCI accesses.
*/
u32 hc_capbase;
#define HC_LENGTH(p) (((p)>>00)&0x00ff) /* bits 7:0 */
#define HC_VERSION(p) (((p)>>16)&0xffff) /* bits 31:16 */
u32 hcs_params; /* HCSPARAMS - offset 0x4 */
#define HCS_DEBUG_PORT(p) (((p)>>20)&0xf) /* bits 23:20, debug port? */
#define HCS_INDICATOR(p) ((p)&(1 << 16)) /* true: has port indicators */
#define HCS_N_CC(p) (((p)>>12)&0xf) /* bits 15:12, #companion HCs */
#define HCS_N_PCC(p) (((p)>>8)&0xf) /* bits 11:8, ports per CC */
#define HCS_PORTROUTED(p) ((p)&(1 << 7)) /* true: port routing */
#define HCS_PPC(p) ((p)&(1 << 4)) /* true: port power control */
#define HCS_N_PORTS(p) (((p)>>0)&0xf) /* bits 3:0, ports on HC */
u32 hcc_params; /* HCCPARAMS - offset 0x8 */
#define HCC_EXT_CAPS(p) (((p)>>8)&0xff) /* for pci extended caps */
#define HCC_ISOC_CACHE(p) ((p)&(1 << 7)) /* true: can cache isoc frame */
#define HCC_ISOC_THRES(p) (((p)>>4)&0x7) /* bits 6:4, uframes cached */
#define HCC_CANPARK(p) ((p)&(1 << 2)) /* true: can park on async qh */
#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
u8 portroute [8]; /* nibbles for routing - offset 0xC */
} __attribute__ ((packed));
/* Section 2.3 Host Controller Operational Registers */
struct ehci_regs {
/* USBCMD: offset 0x00 */
u32 command;
/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */
#define CMD_PARK (1<<11) /* enable "park" on async qh */
#define CMD_PARK_CNT(c) (((c)>>8)&3) /* how many transfers to park for */
#define CMD_LRESET (1<<7) /* partial reset (no ports, etc) */
#define CMD_IAAD (1<<6) /* "doorbell" interrupt async advance */
#define CMD_ASE (1<<5) /* async schedule enable */
#define CMD_PSE (1<<4) /* periodic schedule enable */
/* 3:2 is periodic frame list size */
#define CMD_RESET (1<<1) /* reset HC not bus */
#define CMD_RUN (1<<0) /* start/stop HC */
/* USBSTS: offset 0x04 */
u32 status;
#define STS_ASS (1<<15) /* Async Schedule Status */
#define STS_PSS (1<<14) /* Periodic Schedule Status */
#define STS_RECL (1<<13) /* Reclamation */
#define STS_HALT (1<<12) /* Not running (any reason) */
/* some bits reserved */
/* these STS_* flags are also intr_enable bits (USBINTR) */
#define STS_IAA (1<<5) /* Interrupted on async advance */
#define STS_FATAL (1<<4) /* such as some PCI access errors */
#define STS_FLR (1<<3) /* frame list rolled over */
#define STS_PCD (1<<2) /* port change detect */
#define STS_ERR (1<<1) /* "error" completion (overflow, ...) */
#define STS_INT (1<<0) /* "normal" completion (short, ...) */
/* USBINTR: offset 0x08 */
u32 intr_enable;
/* FRINDEX: offset 0x0C */
u32 frame_index; /* current microframe number */
/* CTRLDSSEGMENT: offset 0x10 */
u32 segment; /* address bits 63:32 if needed */
/* PERIODICLISTBASE: offset 0x14 */
u32 frame_list; /* points to periodic list */
/* ASYNCLISTADDR: offset 0x18 */
u32 async_next; /* address of next async queue head */
u32 reserved [9];
/* CONFIGFLAG: offset 0x40 */
u32 configured_flag;
#define FLAG_CF (1<<0) /* true: we'll support "high speed" */
/* PORTSC: offset 0x44 */
u32 port_status [0]; /* up to N_PORTS */
/* 31:23 reserved */
#define PORT_WKOC_E (1<<22) /* wake on overcurrent (enable) */
#define PORT_WKDISC_E (1<<21) /* wake on disconnect (enable) */
#define PORT_WKCONN_E (1<<20) /* wake on connect (enable) */
/* 19:16 for port testing */
#define PORT_LED_OFF (0<<14)
#define PORT_LED_AMBER (1<<14)
#define PORT_LED_GREEN (2<<14)
#define PORT_LED_MASK (3<<14)
#define PORT_OWNER (1<<13) /* true: companion hc owns this port */
#define PORT_POWER (1<<12) /* true: has power (see PPC) */
#define PORT_USB11(x) (((x)&(3<<10))==(1<<10)) /* USB 1.1 device */
/* 11:10 for detecting lowspeed devices (reset vs release ownership) */
/* 9 reserved */
#define PORT_RESET (1<<8) /* reset port */
#define PORT_SUSPEND (1<<7) /* suspend port */
#define PORT_RESUME (1<<6) /* resume it */
#define PORT_OCC (1<<5) /* over current change */
#define PORT_OC (1<<4) /* over current active */
#define PORT_PEC (1<<3) /* port enable change */
#define PORT_PE (1<<2) /* port enable */
#define PORT_CSC (1<<1) /* connect status change */
#define PORT_CONNECT (1<<0) /* device connected */
#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
} __attribute__ ((packed));
#define USBMODE 0x68 /* USB Device mode */
#define USBMODE_SDIS (1<<3) /* Stream disable */
#define USBMODE_BE (1<<2) /* BE/LE endianness select */
#define USBMODE_CM_HC (3<<0) /* host controller mode */
#define USBMODE_CM_IDLE (0<<0) /* idle state */
/* Appendix C, Debug port ... intended for use with special "debug devices"
* that can help if there's no serial console. (nonstandard enumeration.)
*/
struct ehci_dbg_port {
u32 control;
#define DBGP_OWNER (1<<30)
#define DBGP_ENABLED (1<<28)
#define DBGP_DONE (1<<16)
#define DBGP_INUSE (1<<10)
#define DBGP_ERRCODE(x) (((x)>>7)&0x07)
# define DBGP_ERR_BAD 1
# define DBGP_ERR_SIGNAL 2
#define DBGP_ERROR (1<<6)
#define DBGP_GO (1<<5)
#define DBGP_OUT (1<<4)
#define DBGP_LEN(x) (((x)>>0)&0x0f)
u32 pids;
#define DBGP_PID_GET(x) (((x)>>16)&0xff)
#define DBGP_PID_SET(data,tok) (((data)<<8)|(tok))
u32 data03;
u32 data47;
u32 address;
#define DBGP_EPADDR(dev,ep) (((dev)<<8)|(ep))
} __attribute__ ((packed));
/*-------------------------------------------------------------------------*/ /*-------------------------------------------------------------------------*/

View file

@ -673,7 +673,6 @@ config FB_VESA
select FB_CFB_FILLRECT select FB_CFB_FILLRECT
select FB_CFB_COPYAREA select FB_CFB_COPYAREA
select FB_CFB_IMAGEBLIT select FB_CFB_IMAGEBLIT
select VIDEO_SELECT
help help
This is the frame buffer device driver for generic VESA 2.0 This is the frame buffer device driver for generic VESA 2.0
compliant graphic cards. The older VESA 1.2 cards are not supported. compliant graphic cards. The older VESA 1.2 cards are not supported.
@ -1578,7 +1577,6 @@ config FB_CYBLA
tristate "Cyberblade/i1 support" tristate "Cyberblade/i1 support"
depends on FB && PCI && X86_32 && !64BIT depends on FB && PCI && X86_32 && !64BIT
select FB_CFB_IMAGEBLIT select FB_CFB_IMAGEBLIT
select VIDEO_SELECT
---help--- ---help---
This driver is supposed to support the Trident Cyberblade/i1 This driver is supposed to support the Trident Cyberblade/i1
graphics core integrated in the VIA VT8601A North Bridge, graphics core integrated in the VIA VT8601A North Bridge,

View file

@ -43,22 +43,6 @@ config VGACON_SOFT_SCROLLBACK_SIZE
buffer. Each 64KB will give you approximately 16 80x25 buffer. Each 64KB will give you approximately 16 80x25
screenfuls of scrollback buffer screenfuls of scrollback buffer
config VIDEO_SELECT
bool "Video mode selection support"
depends on X86 && VGA_CONSOLE
---help---
This enables support for text mode selection on kernel startup. If
you want to take advantage of some high-resolution text mode your
card's BIOS offers, but the traditional Linux utilities like
SVGATextMode don't, you can say Y here and set the mode using the
"vga=" option from your boot loader (lilo or loadlin) or set
"vga=ask" which brings up a video mode menu on kernel startup. (Try
"man bootparam" or see the documentation of your boot loader about
how to pass options to the kernel.)
Read the file <file:Documentation/svga.txt> for more information
about the Video mode selection support. If unsure, say N.
config MDA_CONSOLE config MDA_CONSOLE
depends on !M68K && !PARISC && ISA depends on !M68K && !PARISC && ISA
tristate "MDA text console (dual-headed) (EXPERIMENTAL)" tristate "MDA text console (dual-headed) (EXPERIMENTAL)"

View file

@ -680,11 +680,11 @@ static struct xenbus_driver xenfb = {
static int __init xenfb_init(void) static int __init xenfb_init(void)
{ {
if (!is_running_on_xen()) if (!xen_domain())
return -ENODEV; return -ENODEV;
/* Nothing to do if running in dom0. */ /* Nothing to do if running in dom0. */
if (is_initial_xendomain()) if (xen_initial_domain())
return -ENODEV; return -ENODEV;
return xenbus_register_frontend(&xenfb); return xenbus_register_frontend(&xenfb);

View file

@ -1,4 +1,5 @@
obj-y += grant-table.o features.o events.o manage.o obj-y += grant-table.o features.o events.o manage.o
obj-y += xenbus/ obj-y += xenbus/
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += balloon.o obj-$(CONFIG_XEN_BALLOON) += balloon.o

View file

@ -53,7 +53,6 @@
#include <asm/tlb.h> #include <asm/tlb.h>
#include <xen/interface/memory.h> #include <xen/interface/memory.h>
#include <xen/balloon.h>
#include <xen/xenbus.h> #include <xen/xenbus.h>
#include <xen/features.h> #include <xen/features.h>
#include <xen/page.h> #include <xen/page.h>
@ -226,9 +225,8 @@ static int increase_reservation(unsigned long nr_pages)
} }
set_xen_guest_handle(reservation.extent_start, frame_list); set_xen_guest_handle(reservation.extent_start, frame_list);
reservation.nr_extents = nr_pages; reservation.nr_extents = nr_pages;
rc = HYPERVISOR_memory_op( rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
XENMEM_populate_physmap, &reservation);
if (rc < nr_pages) { if (rc < nr_pages) {
if (rc > 0) { if (rc > 0) {
int ret; int ret;
@ -236,7 +234,7 @@ static int increase_reservation(unsigned long nr_pages)
/* We hit the Xen hard limit: reprobe. */ /* We hit the Xen hard limit: reprobe. */
reservation.nr_extents = rc; reservation.nr_extents = rc;
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation); &reservation);
BUG_ON(ret != rc); BUG_ON(ret != rc);
} }
if (rc >= 0) if (rc >= 0)
@ -420,7 +418,7 @@ static int __init balloon_init(void)
unsigned long pfn; unsigned long pfn;
struct page *page; struct page *page;
if (!is_running_on_xen()) if (!xen_pv_domain())
return -ENODEV; return -ENODEV;
pr_info("xen_balloon: Initialising balloon driver.\n"); pr_info("xen_balloon: Initialising balloon driver.\n");
@ -464,136 +462,13 @@ static void balloon_exit(void)
module_exit(balloon_exit); module_exit(balloon_exit);
static void balloon_update_driver_allowance(long delta) #define BALLOON_SHOW(name, format, args...) \
{ static ssize_t show_##name(struct sys_device *dev, \
unsigned long flags; struct sysdev_attribute *attr, \
char *buf) \
spin_lock_irqsave(&balloon_lock, flags); { \
balloon_stats.driver_pages += delta; return sprintf(buf, format, ##args); \
spin_unlock_irqrestore(&balloon_lock, flags); } \
}
static int dealloc_pte_fn(
pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
{
unsigned long mfn = pte_mfn(*pte);
int ret;
struct xen_memory_reservation reservation = {
.nr_extents = 1,
.extent_order = 0,
.domid = DOMID_SELF
};
set_xen_guest_handle(reservation.extent_start, &mfn);
set_pte_at(&init_mm, addr, pte, __pte_ma(0ull));
set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
BUG_ON(ret != 1);
return 0;
}
static struct page **alloc_empty_pages_and_pagevec(int nr_pages)
{
unsigned long vaddr, flags;
struct page *page, **pagevec;
int i, ret;
pagevec = kmalloc(sizeof(page) * nr_pages, GFP_KERNEL);
if (pagevec == NULL)
return NULL;
for (i = 0; i < nr_pages; i++) {
page = pagevec[i] = alloc_page(GFP_KERNEL);
if (page == NULL)
goto err;
vaddr = (unsigned long)page_address(page);
scrub_page(page);
spin_lock_irqsave(&balloon_lock, flags);
if (xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long gmfn = page_to_pfn(page);
struct xen_memory_reservation reservation = {
.nr_extents = 1,
.extent_order = 0,
.domid = DOMID_SELF
};
set_xen_guest_handle(reservation.extent_start, &gmfn);
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
if (ret == 1)
ret = 0; /* success */
} else {
ret = apply_to_page_range(&init_mm, vaddr, PAGE_SIZE,
dealloc_pte_fn, NULL);
}
if (ret != 0) {
spin_unlock_irqrestore(&balloon_lock, flags);
__free_page(page);
goto err;
}
totalram_pages = --balloon_stats.current_pages;
spin_unlock_irqrestore(&balloon_lock, flags);
}
out:
schedule_work(&balloon_worker);
flush_tlb_all();
return pagevec;
err:
spin_lock_irqsave(&balloon_lock, flags);
while (--i >= 0)
balloon_append(pagevec[i]);
spin_unlock_irqrestore(&balloon_lock, flags);
kfree(pagevec);
pagevec = NULL;
goto out;
}
static void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages)
{
unsigned long flags;
int i;
if (pagevec == NULL)
return;
spin_lock_irqsave(&balloon_lock, flags);
for (i = 0; i < nr_pages; i++) {
BUG_ON(page_count(pagevec[i]) != 1);
balloon_append(pagevec[i]);
}
spin_unlock_irqrestore(&balloon_lock, flags);
kfree(pagevec);
schedule_work(&balloon_worker);
}
static void balloon_release_driver_page(struct page *page)
{
unsigned long flags;
spin_lock_irqsave(&balloon_lock, flags);
balloon_append(page);
balloon_stats.driver_pages--;
spin_unlock_irqrestore(&balloon_lock, flags);
schedule_work(&balloon_worker);
}
#define BALLOON_SHOW(name, format, args...) \
static ssize_t show_##name(struct sys_device *dev, \
char *buf) \
{ \
return sprintf(buf, format, ##args); \
} \
static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL) static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages)); BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
@ -604,7 +479,8 @@ BALLOON_SHOW(hard_limit_kb,
(balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0); (balloon_stats.hard_limit!=~0UL) ? PAGES2KB(balloon_stats.hard_limit) : 0);
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages)); BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
static ssize_t show_target_kb(struct sys_device *dev, char *buf) static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
char *buf)
{ {
return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages)); return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
} }
@ -614,19 +490,14 @@ static ssize_t store_target_kb(struct sys_device *dev,
const char *buf, const char *buf,
size_t count) size_t count)
{ {
char memstring[64], *endchar; char *endchar;
unsigned long long target_bytes; unsigned long long target_bytes;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (count <= 1) target_bytes = memparse(buf, &endchar);
return -EBADMSG; /* runt */
if (count > sizeof(memstring))
return -EFBIG; /* too long */
strcpy(memstring, buf);
target_bytes = memparse(memstring, &endchar);
balloon_set_new_target(target_bytes >> PAGE_SHIFT); balloon_set_new_target(target_bytes >> PAGE_SHIFT);
return count; return count;
@ -694,20 +565,4 @@ static int register_balloon(struct sys_device *sysdev)
return error; return error;
} }
static void unregister_balloon(struct sys_device *sysdev)
{
int i;
sysfs_remove_group(&sysdev->kobj, &balloon_info_group);
for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++)
sysdev_remove_file(sysdev, balloon_attrs[i]);
sysdev_unregister(sysdev);
sysdev_class_unregister(&balloon_sysdev_class);
}
static void balloon_sysfs_exit(void)
{
unregister_balloon(&balloon_sysdev);
}
MODULE_LICENSE("GPL"); MODULE_LICENSE("GPL");

90
drivers/xen/cpu_hotplug.c Normal file
View file

@ -0,0 +1,90 @@
#include <linux/notifier.h>
#include <xen/xenbus.h>
#include <asm-x86/xen/hypervisor.h>
#include <asm/cpu.h>
static void enable_hotplug_cpu(int cpu)
{
if (!cpu_present(cpu))
arch_register_cpu(cpu);
cpu_set(cpu, cpu_present_map);
}
static void disable_hotplug_cpu(int cpu)
{
if (cpu_present(cpu))
arch_unregister_cpu(cpu);
cpu_clear(cpu, cpu_present_map);
}
static void vcpu_hotplug(unsigned int cpu)
{
int err;
char dir[32], state[32];
if (!cpu_possible(cpu))
return;
sprintf(dir, "cpu/%u", cpu);
err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state);
if (err != 1) {
printk(KERN_ERR "XENBUS: Unable to read cpu state\n");
return;
}
if (strcmp(state, "online") == 0) {
enable_hotplug_cpu(cpu);
} else if (strcmp(state, "offline") == 0) {
(void)cpu_down(cpu);
disable_hotplug_cpu(cpu);
} else {
printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n",
state, cpu);
}
}
static void handle_vcpu_hotplug_event(struct xenbus_watch *watch,
const char **vec, unsigned int len)
{
unsigned int cpu;
char *cpustr;
const char *node = vec[XS_WATCH_PATH];
cpustr = strstr(node, "cpu/");
if (cpustr != NULL) {
sscanf(cpustr, "cpu/%u", &cpu);
vcpu_hotplug(cpu);
}
}
static int setup_cpu_watcher(struct notifier_block *notifier,
unsigned long event, void *data)
{
static struct xenbus_watch cpu_watch = {
.node = "cpu",
.callback = handle_vcpu_hotplug_event};
(void)register_xenbus_watch(&cpu_watch);
return NOTIFY_DONE;
}
static int __init setup_vcpu_hotplug_event(void)
{
static struct notifier_block xsn_cpu = {
.notifier_call = setup_cpu_watcher };
if (!xen_pv_domain())
return -ENODEV;
register_xenstore_notifier(&xsn_cpu);
return 0;
}
arch_initcall(setup_vcpu_hotplug_event);

View file

@ -84,17 +84,6 @@ static int irq_bindcount[NR_IRQS];
/* Xen will never allocate port zero for any purpose. */ /* Xen will never allocate port zero for any purpose. */
#define VALID_EVTCHN(chn) ((chn) != 0) #define VALID_EVTCHN(chn) ((chn) != 0)
/*
* Force a proper event-channel callback from Xen after clearing the
* callback mask. We do this in a very simple manner, by making a call
* down into Xen. The pending flag will be checked by Xen on return.
*/
void force_evtchn_callback(void)
{
(void)HYPERVISOR_xen_version(0, NULL);
}
EXPORT_SYMBOL_GPL(force_evtchn_callback);
static struct irq_chip xen_dynamic_chip; static struct irq_chip xen_dynamic_chip;
/* Constructor for packed IRQ information. */ /* Constructor for packed IRQ information. */
@ -175,6 +164,12 @@ static inline void set_evtchn(int port)
sync_set_bit(port, &s->evtchn_pending[0]); sync_set_bit(port, &s->evtchn_pending[0]);
} }
static inline int test_evtchn(int port)
{
struct shared_info *s = HYPERVISOR_shared_info;
return sync_test_bit(port, &s->evtchn_pending[0]);
}
/** /**
* notify_remote_via_irq - send event to remote end of event channel via irq * notify_remote_via_irq - send event to remote end of event channel via irq
@ -365,6 +360,10 @@ static void unbind_from_irq(unsigned int irq)
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn)) per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
[index_from_irq(irq)] = -1; [index_from_irq(irq)] = -1;
break; break;
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
[index_from_irq(irq)] = -1;
break;
default: default:
break; break;
} }
@ -743,6 +742,25 @@ void xen_clear_irq_pending(int irq)
clear_evtchn(evtchn); clear_evtchn(evtchn);
} }
void xen_set_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
if (VALID_EVTCHN(evtchn))
set_evtchn(evtchn);
}
bool xen_test_irq_pending(int irq)
{
int evtchn = evtchn_from_irq(irq);
bool ret = false;
if (VALID_EVTCHN(evtchn))
ret = test_evtchn(evtchn);
return ret;
}
/* Poll waiting for an irq to become pending. In the usual case, the /* Poll waiting for an irq to become pending. In the usual case, the
irq will be disabled so it won't deliver an interrupt. */ irq will be disabled so it won't deliver an interrupt. */
void xen_poll_irq(int irq) void xen_poll_irq(int irq)

View file

@ -508,7 +508,7 @@ static int __devinit gnttab_init(void)
unsigned int max_nr_glist_frames, nr_glist_frames; unsigned int max_nr_glist_frames, nr_glist_frames;
unsigned int nr_init_grefs; unsigned int nr_init_grefs;
if (!is_running_on_xen()) if (!xen_domain())
return -ENODEV; return -ENODEV;
nr_grant_frames = 1; nr_grant_frames = 1;

View file

@ -814,7 +814,7 @@ static int __init xenbus_probe_init(void)
DPRINTK(""); DPRINTK("");
err = -ENODEV; err = -ENODEV;
if (!is_running_on_xen()) if (!xen_domain())
goto out_error; goto out_error;
/* Register ourselves with the kernel bus subsystem */ /* Register ourselves with the kernel bus subsystem */
@ -829,7 +829,7 @@ static int __init xenbus_probe_init(void)
/* /*
* Domain0 doesn't have a store_evtchn or store_mfn yet. * Domain0 doesn't have a store_evtchn or store_mfn yet.
*/ */
if (is_initial_xendomain()) { if (xen_initial_domain()) {
/* dom0 not yet supported */ /* dom0 not yet supported */
} else { } else {
xenstored_ready = 1; xenstored_ready = 1;
@ -846,7 +846,7 @@ static int __init xenbus_probe_init(void)
goto out_unreg_back; goto out_unreg_back;
} }
if (!is_initial_xendomain()) if (!xen_initial_domain())
xenbus_probe(NULL); xenbus_probe(NULL);
return 0; return 0;
@ -937,7 +937,7 @@ static void wait_for_devices(struct xenbus_driver *xendrv)
unsigned long timeout = jiffies + 10*HZ; unsigned long timeout = jiffies + 10*HZ;
struct device_driver *drv = xendrv ? &xendrv->driver : NULL; struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
if (!ready_to_wait_for_devices || !is_running_on_xen()) if (!ready_to_wait_for_devices || !xen_domain())
return; return;
while (exists_disconnected_device(drv)) { while (exists_disconnected_device(drv)) {

View file

@ -199,6 +199,8 @@ typedef struct siginfo {
*/ */
#define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */ #define TRAP_BRKPT (__SI_FAULT|1) /* process breakpoint */
#define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */ #define TRAP_TRACE (__SI_FAULT|2) /* process trace trap */
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint/watchpoint */
#define NSIGTRAP 2 #define NSIGTRAP 2
/* /*

View file

@ -3,11 +3,6 @@
#include <asm-generic/siginfo.h> #include <asm-generic/siginfo.h>
/*
* SIGTRAP si_codes
*/
#define TRAP_BRANCH (__SI_FAULT|3) /* process taken branch trap */
#define TRAP_HWBKPT (__SI_FAULT|4) /* hardware breakpoint or watchpoint */
#undef NSIGTRAP #undef NSIGTRAP
#define NSIGTRAP 4 #define NSIGTRAP 4

View file

@ -16,4 +16,21 @@ static inline unsigned int get_bios_ebda(void)
void reserve_ebda_region(void); void reserve_ebda_region(void);
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
/*
* This is obviously not a great place for this, but we want to be
* able to scatter it around anywhere in the kernel.
*/
void check_for_bios_corruption(void);
void start_periodic_check_for_corruption(void);
#else
static inline void check_for_bios_corruption(void)
{
}
static inline void start_periodic_check_for_corruption(void)
{
}
#endif
#endif /* ASM_X86__BIOS_EBDA_H */ #endif /* ASM_X86__BIOS_EBDA_H */

View file

@ -2,9 +2,7 @@
#define ASM_X86__BOOT_H #define ASM_X86__BOOT_H
/* Don't touch these, unless you really know what you're doing. */ /* Don't touch these, unless you really know what you're doing. */
#define DEF_INITSEG 0x9000
#define DEF_SYSSEG 0x1000 #define DEF_SYSSEG 0x1000
#define DEF_SETUPSEG 0x9020
#define DEF_SYSSIZE 0x7F00 #define DEF_SYSSIZE 0x7F00
/* Internal svga startup constants */ /* Internal svga startup constants */

View file

@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_struct *desc,
desc->d = info->seg_32bit; desc->d = info->seg_32bit;
desc->g = info->limit_in_pages; desc->g = info->limit_in_pages;
desc->base2 = (info->base_addr & 0xff000000) >> 24; desc->base2 = (info->base_addr & 0xff000000) >> 24;
/*
* Don't allow setting of the lm bit. It is useless anyway
* because 64bit system calls require __USER_CS:
*/
desc->l = 0;
} }
extern struct desc_ptr idt_descr; extern struct desc_ptr idt_descr;
@ -97,7 +102,15 @@ static inline int desc_empty(const void *ptr)
native_write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type)
#define write_idt_entry(dt, entry, g) \ #define write_idt_entry(dt, entry, g) \
native_write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
#endif
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
}
static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
}
#endif /* CONFIG_PARAVIRT */
static inline void native_write_idt_entry(gate_desc *idt, int entry, static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate) const gate_desc *gate)

View file

@ -0,0 +1,47 @@
#ifndef ASM_X86__MICROCODE_H
#define ASM_X86__MICROCODE_H
struct cpu_signature {
unsigned int sig;
unsigned int pf;
unsigned int rev;
};
struct device;
struct microcode_ops {
int (*request_microcode_user) (int cpu, const void __user *buf, size_t size);
int (*request_microcode_fw) (int cpu, struct device *device);
void (*apply_microcode) (int cpu);
int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
void (*microcode_fini_cpu) (int cpu);
};
struct ucode_cpu_info {
struct cpu_signature cpu_sig;
int valid;
void *mc;
};
extern struct ucode_cpu_info ucode_cpu_info[];
#ifdef CONFIG_MICROCODE_INTEL
extern struct microcode_ops * __init init_intel_microcode(void);
#else
static inline struct microcode_ops * __init init_intel_microcode(void)
{
return NULL;
}
#endif /* CONFIG_MICROCODE_INTEL */
#ifdef CONFIG_MICROCODE_AMD
extern struct microcode_ops * __init init_amd_microcode(void);
#else
static inline struct microcode_ops * __init init_amd_microcode(void)
{
return NULL;
}
#endif
#endif /* ASM_X86__MICROCODE_H */

View file

@ -7,7 +7,7 @@
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
#define VIRTUAL_BUG_ON(x) #include <linux/mmdebug.h>
#include <asm/smp.h> #include <asm/smp.h>
@ -29,7 +29,6 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
{ {
unsigned nid; unsigned nid;
VIRTUAL_BUG_ON(!memnodemap); VIRTUAL_BUG_ON(!memnodemap);
VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize);
nid = memnodemap[addr >> memnode_shift]; nid = memnodemap[addr >> memnode_shift];
VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
return nid; return nid;

View file

@ -73,7 +73,12 @@ typedef struct page *pgtable_t;
#endif #endif
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#define __phys_addr_const(x) ((x) - PAGE_OFFSET)
#ifdef CONFIG_DEBUG_VIRTUAL
extern unsigned long __phys_addr(unsigned long);
#else
#define __phys_addr(x) ((x) - PAGE_OFFSET) #define __phys_addr(x) ((x) - PAGE_OFFSET)
#endif
#define __phys_reloc_hide(x) RELOC_HIDE((x), 0) #define __phys_reloc_hide(x) RELOC_HIDE((x), 0)
#ifdef CONFIG_FLATMEM #ifdef CONFIG_FLATMEM

View file

@ -124,6 +124,9 @@ struct pv_cpu_ops {
int entrynum, const void *desc, int size); int entrynum, const void *desc, int size);
void (*write_idt_entry)(gate_desc *, void (*write_idt_entry)(gate_desc *,
int entrynum, const gate_desc *gate); int entrynum, const gate_desc *gate);
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t); void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
void (*set_iopl_mask)(unsigned mask); void (*set_iopl_mask)(unsigned mask);
@ -325,6 +328,7 @@ struct pv_lock_ops {
int (*spin_is_locked)(struct raw_spinlock *lock); int (*spin_is_locked)(struct raw_spinlock *lock);
int (*spin_is_contended)(struct raw_spinlock *lock); int (*spin_is_contended)(struct raw_spinlock *lock);
void (*spin_lock)(struct raw_spinlock *lock); void (*spin_lock)(struct raw_spinlock *lock);
void (*spin_lock_flags)(struct raw_spinlock *lock, unsigned long flags);
int (*spin_trylock)(struct raw_spinlock *lock); int (*spin_trylock)(struct raw_spinlock *lock);
void (*spin_unlock)(struct raw_spinlock *lock); void (*spin_unlock)(struct raw_spinlock *lock);
}; };
@ -830,6 +834,16 @@ do { \
(aux) = __aux; \ (aux) = __aux; \
} while (0) } while (0)
static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.alloc_ldt, ldt, entries);
}
static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
{
PVOP_VCALL2(pv_cpu_ops.free_ldt, ldt, entries);
}
static inline void load_TR_desc(void) static inline void load_TR_desc(void)
{ {
PVOP_VCALL0(pv_cpu_ops.load_tr_desc); PVOP_VCALL0(pv_cpu_ops.load_tr_desc);
@ -1394,6 +1408,12 @@ static __always_inline void __raw_spin_lock(struct raw_spinlock *lock)
PVOP_VCALL1(pv_lock_ops.spin_lock, lock); PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
} }
static __always_inline void __raw_spin_lock_flags(struct raw_spinlock *lock,
unsigned long flags)
{
PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
}
static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock)
{ {
return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);

View file

@ -586,41 +586,6 @@ static inline void clear_in_cr4(unsigned long mask)
write_cr4(cr4); write_cr4(cr4);
} }
struct microcode_header {
unsigned int hdrver;
unsigned int rev;
unsigned int date;
unsigned int sig;
unsigned int cksum;
unsigned int ldrver;
unsigned int pf;
unsigned int datasize;
unsigned int totalsize;
unsigned int reserved[3];
};
struct microcode {
struct microcode_header hdr;
unsigned int bits[0];
};
typedef struct microcode microcode_t;
typedef struct microcode_header microcode_header_t;
/* microcode format is extended from prescott processors */
struct extended_signature {
unsigned int sig;
unsigned int pf;
unsigned int cksum;
};
struct extended_sigtable {
unsigned int count;
unsigned int cksum;
unsigned int reserved[3];
struct extended_signature sigs[0];
};
typedef struct { typedef struct {
unsigned long seg; unsigned long seg;
} mm_segment_t; } mm_segment_t;

View file

@ -177,11 +177,11 @@ convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs,
int error_code); int error_code, int si_code);
#else
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
#endif #endif
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
extern long syscall_trace_enter(struct pt_regs *); extern long syscall_trace_enter(struct pt_regs *);
extern void syscall_trace_leave(struct pt_regs *); extern void syscall_trace_leave(struct pt_regs *);

View file

@ -50,12 +50,16 @@ extern struct {
struct smp_ops { struct smp_ops {
void (*smp_prepare_boot_cpu)(void); void (*smp_prepare_boot_cpu)(void);
void (*smp_prepare_cpus)(unsigned max_cpus); void (*smp_prepare_cpus)(unsigned max_cpus);
int (*cpu_up)(unsigned cpu);
void (*smp_cpus_done)(unsigned max_cpus); void (*smp_cpus_done)(unsigned max_cpus);
void (*smp_send_stop)(void); void (*smp_send_stop)(void);
void (*smp_send_reschedule)(int cpu); void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
void (*send_call_func_ipi)(cpumask_t mask); void (*send_call_func_ipi)(cpumask_t mask);
void (*send_call_func_single_ipi)(int cpu); void (*send_call_func_single_ipi)(int cpu);
}; };
@ -94,6 +98,21 @@ static inline int __cpu_up(unsigned int cpu)
return smp_ops.cpu_up(cpu); return smp_ops.cpu_up(cpu);
} }
static inline int __cpu_disable(void)
{
return smp_ops.cpu_disable();
}
static inline void __cpu_die(unsigned int cpu)
{
smp_ops.cpu_die(cpu);
}
static inline void play_dead(void)
{
smp_ops.play_dead();
}
static inline void smp_send_reschedule(int cpu) static inline void smp_send_reschedule(int cpu)
{ {
smp_ops.smp_send_reschedule(cpu); smp_ops.smp_send_reschedule(cpu);
@ -109,16 +128,19 @@ static inline void arch_send_call_function_ipi(cpumask_t mask)
smp_ops.send_call_func_ipi(mask); smp_ops.send_call_func_ipi(mask);
} }
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void); void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus); void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus); void native_smp_cpus_done(unsigned int max_cpus);
int native_cpu_up(unsigned int cpunum); int native_cpu_up(unsigned int cpunum);
int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);
void native_play_dead(void);
void play_dead_common(void);
void native_send_call_func_ipi(cpumask_t mask); void native_send_call_func_ipi(cpumask_t mask);
void native_send_call_func_single_ipi(int cpu); void native_send_call_func_single_ipi(int cpu);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
void smp_store_cpu_info(int id); void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@ -205,9 +227,5 @@ static inline int hard_smp_processor_id(void)
#endif /* CONFIG_X86_LOCAL_APIC */ #endif /* CONFIG_X86_LOCAL_APIC */
#ifdef CONFIG_HOTPLUG_CPU
extern void cpu_uninit(void);
#endif
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* ASM_X86__SMP_H */ #endif /* ASM_X86__SMP_H */

View file

@ -21,8 +21,10 @@
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
# define LOCK_PTR_REG "a" # define LOCK_PTR_REG "a"
# define REG_PTR_MODE "k"
#else #else
# define LOCK_PTR_REG "D" # define LOCK_PTR_REG "D"
# define REG_PTR_MODE "q"
#endif #endif
#if defined(CONFIG_X86_32) && \ #if defined(CONFIG_X86_32) && \
@ -54,19 +56,7 @@
* much between them in performance though, especially as locks are out of line. * much between them in performance though, especially as locks are out of line.
*/ */
#if (NR_CPUS < 256) #if (NR_CPUS < 256)
static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) #define TICKET_SHIFT 8
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 8) & 0xff) != (tmp & 0xff));
}
static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 8) - tmp) & 0xff) > 1;
}
static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
{ {
@ -89,19 +79,17 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
{ {
int tmp; int tmp, new;
short new;
asm volatile("movw %2,%w0\n\t" asm volatile("movzwl %2, %0\n\t"
"cmpb %h0,%b0\n\t" "cmpb %h0,%b0\n\t"
"leal 0x100(%" REG_PTR_MODE "0), %1\n\t"
"jne 1f\n\t" "jne 1f\n\t"
"movw %w0,%w1\n\t"
"incb %h1\n\t"
LOCK_PREFIX "cmpxchgw %w1,%2\n\t" LOCK_PREFIX "cmpxchgw %w1,%2\n\t"
"1:" "1:"
"sete %b1\n\t" "sete %b1\n\t"
"movzbl %b1,%0\n\t" "movzbl %b1,%0\n\t"
: "=&a" (tmp), "=Q" (new), "+m" (lock->slock) : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
: :
: "memory", "cc"); : "memory", "cc");
@ -116,19 +104,7 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
: "memory", "cc"); : "memory", "cc");
} }
#else #else
static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) #define TICKET_SHIFT 16
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 16) & 0xffff) != (tmp & 0xffff));
}
static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> 16) - tmp) & 0xffff) > 1;
}
static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
{ {
@ -146,7 +122,7 @@ static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
/* don't need lfence here, because loads are in-order */ /* don't need lfence here, because loads are in-order */
"jmp 1b\n" "jmp 1b\n"
"2:" "2:"
: "+Q" (inc), "+m" (lock->slock), "=r" (tmp) : "+r" (inc), "+m" (lock->slock), "=&r" (tmp)
: :
: "memory", "cc"); : "memory", "cc");
} }
@ -160,13 +136,13 @@ static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
"movl %0,%1\n\t" "movl %0,%1\n\t"
"roll $16, %0\n\t" "roll $16, %0\n\t"
"cmpl %0,%1\n\t" "cmpl %0,%1\n\t"
"leal 0x00010000(%" REG_PTR_MODE "0), %1\n\t"
"jne 1f\n\t" "jne 1f\n\t"
"addl $0x00010000, %1\n\t"
LOCK_PREFIX "cmpxchgl %1,%2\n\t" LOCK_PREFIX "cmpxchgl %1,%2\n\t"
"1:" "1:"
"sete %b1\n\t" "sete %b1\n\t"
"movzbl %b1,%0\n\t" "movzbl %b1,%0\n\t"
: "=&a" (tmp), "=r" (new), "+m" (lock->slock) : "=&a" (tmp), "=&q" (new), "+m" (lock->slock)
: :
: "memory", "cc"); : "memory", "cc");
@ -182,7 +158,19 @@ static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
} }
#endif #endif
#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
}
static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
}
#ifdef CONFIG_PARAVIRT #ifdef CONFIG_PARAVIRT
/* /*
@ -272,6 +260,13 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
{ {
__ticket_spin_unlock(lock); __ticket_spin_unlock(lock);
} }
static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
unsigned long flags)
{
__raw_spin_lock(lock);
}
#endif /* CONFIG_PARAVIRT */ #endif /* CONFIG_PARAVIRT */
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)

View file

@ -119,6 +119,10 @@ static inline void native_flush_tlb_others(const cpumask_t *cpumask,
{ {
} }
static inline void reset_lazy_tlbstate(void)
{
}
#else /* SMP */ #else /* SMP */
#include <asm/smp.h> #include <asm/smp.h>
@ -151,6 +155,12 @@ struct tlb_state {
char __cacheline_padding[L1_CACHE_BYTES-8]; char __cacheline_padding[L1_CACHE_BYTES-8];
}; };
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
void reset_lazy_tlbstate(void);
#else
static inline void reset_lazy_tlbstate(void)
{
}
#endif #endif
#endif /* SMP */ #endif /* SMP */

View file

@ -1,6 +1,8 @@
#ifndef ASM_X86__TRAPS_H #ifndef ASM_X86__TRAPS_H
#define ASM_X86__TRAPS_H #define ASM_X86__TRAPS_H
#include <asm/debugreg.h>
/* Common in X86_32 and X86_64 */ /* Common in X86_32 and X86_64 */
asmlinkage void divide_error(void); asmlinkage void divide_error(void);
asmlinkage void debug(void); asmlinkage void debug(void);
@ -36,6 +38,16 @@ void do_invalid_op(struct pt_regs *, long);
void do_general_protection(struct pt_regs *, long); void do_general_protection(struct pt_regs *, long);
void do_nmi(struct pt_regs *, long); void do_nmi(struct pt_regs *, long);
static inline int get_si_code(unsigned long condition)
{
if (condition & DR_STEP)
return TRAP_TRACE;
else if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))
return TRAP_HWBKPT;
else
return TRAP_BRKPT;
}
extern int panic_on_unrecovered_nmi; extern int panic_on_unrecovered_nmi;
extern int kstack_depth_to_print; extern int kstack_depth_to_print;

View file

@ -54,7 +54,6 @@
/* arch/i386/kernel/setup.c */ /* arch/i386/kernel/setup.c */
extern struct shared_info *HYPERVISOR_shared_info; extern struct shared_info *HYPERVISOR_shared_info;
extern struct start_info *xen_start_info; extern struct start_info *xen_start_info;
#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
/* arch/i386/mach-xen/evtchn.c */ /* arch/i386/mach-xen/evtchn.c */
/* Force a proper event-channel callback from Xen. */ /* Force a proper event-channel callback from Xen. */
@ -67,6 +66,17 @@ u64 jiffies_to_st(unsigned long jiffies);
#define MULTI_UVMFLAGS_INDEX 3 #define MULTI_UVMFLAGS_INDEX 3
#define MULTI_UVMDOMID_INDEX 4 #define MULTI_UVMDOMID_INDEX 4
#define is_running_on_xen() (xen_start_info ? 1 : 0) enum xen_domain_type {
XEN_NATIVE,
XEN_PV_DOMAIN,
XEN_HVM_DOMAIN,
};
extern enum xen_domain_type xen_domain_type;
#define xen_domain() (xen_domain_type != XEN_NATIVE)
#define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN)
#define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
#define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN)
#endif /* ASM_X86__XEN__HYPERVISOR_H */ #endif /* ASM_X86__XEN__HYPERVISOR_H */

View file

@ -360,6 +360,7 @@ typedef struct elf64_shdr {
#define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */ #define NT_PPC_SPE 0x101 /* PowerPC SPE/EVR registers */
#define NT_PPC_VSX 0x102 /* PowerPC VSX registers */ #define NT_PPC_VSX 0x102 /* PowerPC VSX registers */
#define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */ #define NT_386_TLS 0x200 /* i386 TLS slots (struct user_desc) */
#define NT_386_IOPERM 0x201 /* x86 io permission bitmap (1=deny) */
/* Note header in a PT_NOTE section */ /* Note header in a PT_NOTE section */

View file

@ -182,7 +182,7 @@ extern int vsscanf(const char *, const char *, va_list)
extern int get_option(char **str, int *pint); extern int get_option(char **str, int *pint);
extern char *get_options(const char *str, int nints, int *ints); extern char *get_options(const char *str, int nints, int *ints);
extern unsigned long long memparse(char *ptr, char **retptr); extern unsigned long long memparse(const char *ptr, char **retptr);
extern int core_kernel_text(unsigned long addr); extern int core_kernel_text(unsigned long addr);
extern int __kernel_text_address(unsigned long addr); extern int __kernel_text_address(unsigned long addr);

View file

@ -7,6 +7,7 @@
#include <linux/gfp.h> #include <linux/gfp.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/mmdebug.h>
#include <linux/mmzone.h> #include <linux/mmzone.h>
#include <linux/rbtree.h> #include <linux/rbtree.h>
#include <linux/prio_tree.h> #include <linux/prio_tree.h>
@ -219,12 +220,6 @@ struct inode;
*/ */
#include <linux/page-flags.h> #include <linux/page-flags.h>
#ifdef CONFIG_DEBUG_VM
#define VM_BUG_ON(cond) BUG_ON(cond)
#else
#define VM_BUG_ON(condition) do { } while(0)
#endif
/* /*
* Methods to modify the page usage count. * Methods to modify the page usage count.
* *
@ -919,7 +914,7 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
} }
#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */ #endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS #if USE_SPLIT_PTLOCKS
/* /*
* We tuck a spinlock to guard each pagetable page into its struct page, * We tuck a spinlock to guard each pagetable page into its struct page,
* at page->private, with BUILD_BUG_ON to make sure that this will not * at page->private, with BUILD_BUG_ON to make sure that this will not
@ -932,14 +927,14 @@ static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long a
} while (0) } while (0)
#define pte_lock_deinit(page) ((page)->mapping = NULL) #define pte_lock_deinit(page) ((page)->mapping = NULL)
#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));}) #define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
#else #else /* !USE_SPLIT_PTLOCKS */
/* /*
* We use mm->page_table_lock to guard all pagetable pages of the mm. * We use mm->page_table_lock to guard all pagetable pages of the mm.
*/ */
#define pte_lock_init(page) do {} while (0) #define pte_lock_init(page) do {} while (0)
#define pte_lock_deinit(page) do {} while (0) #define pte_lock_deinit(page) do {} while (0)
#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;}) #define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ #endif /* USE_SPLIT_PTLOCKS */
static inline void pgtable_page_ctor(struct page *page) static inline void pgtable_page_ctor(struct page *page)
{ {

View file

@ -21,11 +21,13 @@
struct address_space; struct address_space;
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS #define USE_SPLIT_PTLOCKS (NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
#if USE_SPLIT_PTLOCKS
typedef atomic_long_t mm_counter_t; typedef atomic_long_t mm_counter_t;
#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ #else /* !USE_SPLIT_PTLOCKS */
typedef unsigned long mm_counter_t; typedef unsigned long mm_counter_t;
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ #endif /* !USE_SPLIT_PTLOCKS */
/* /*
* Each physical page in the system has a struct page associated with * Each physical page in the system has a struct page associated with
@ -65,7 +67,7 @@ struct page {
* see PAGE_MAPPING_ANON below. * see PAGE_MAPPING_ANON below.
*/ */
}; };
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS #if USE_SPLIT_PTLOCKS
spinlock_t ptl; spinlock_t ptl;
#endif #endif
struct kmem_cache *slab; /* SLUB: Pointer to slab */ struct kmem_cache *slab; /* SLUB: Pointer to slab */

18
include/linux/mmdebug.h Normal file
View file

@ -0,0 +1,18 @@
#ifndef LINUX_MM_DEBUG_H
#define LINUX_MM_DEBUG_H 1
#include <linux/autoconf.h>
#ifdef CONFIG_DEBUG_VM
#define VM_BUG_ON(cond) BUG_ON(cond)
#else
#define VM_BUG_ON(cond) do { } while (0)
#endif
#ifdef CONFIG_DEBUG_VIRTUAL
#define VIRTUAL_BUG_ON(cond) BUG_ON(cond)
#else
#define VIRTUAL_BUG_ON(cond) do { } while (0)
#endif
#endif

View file

@ -352,7 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
extern void arch_unmap_area(struct mm_struct *, unsigned long); extern void arch_unmap_area(struct mm_struct *, unsigned long);
extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS #if USE_SPLIT_PTLOCKS
/* /*
* The mm counters are not protected by its page_table_lock, * The mm counters are not protected by its page_table_lock,
* so must be incremented atomically. * so must be incremented atomically.
@ -363,7 +363,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member) #define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member) #define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
#else /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ #else /* !USE_SPLIT_PTLOCKS */
/* /*
* The mm counters are protected by its page_table_lock, * The mm counters are protected by its page_table_lock,
* so can be incremented directly. * so can be incremented directly.
@ -374,7 +374,7 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#define inc_mm_counter(mm, member) (mm)->_##member++ #define inc_mm_counter(mm, member) (mm)->_##member++
#define dec_mm_counter(mm, member) (mm)->_##member-- #define dec_mm_counter(mm, member) (mm)->_##member--
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */ #endif /* !USE_SPLIT_PTLOCKS */
#define get_mm_rss(mm) \ #define get_mm_rss(mm) \
(get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss)) (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))

View file

@ -0,0 +1,160 @@
/*
* Copyright (c) 2001-2002 by David Brownell
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2 of the License, or (at your
* option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#ifndef __LINUX_USB_EHCI_DEF_H
#define __LINUX_USB_EHCI_DEF_H
/* EHCI register interface, corresponds to EHCI Revision 0.95 specification */
/* Section 2.2 Host Controller Capability Registers */
struct ehci_caps {
/* these fields are specified as 8 and 16 bit registers,
* but some hosts can't perform 8 or 16 bit PCI accesses.
*/
u32 hc_capbase;
#define HC_LENGTH(p) (((p)>>00)&0x00ff) /* bits 7:0 */
#define HC_VERSION(p) (((p)>>16)&0xffff) /* bits 31:16 */
u32 hcs_params; /* HCSPARAMS - offset 0x4 */
#define HCS_DEBUG_PORT(p) (((p)>>20)&0xf) /* bits 23:20, debug port? */
#define HCS_INDICATOR(p) ((p)&(1 << 16)) /* true: has port indicators */
#define HCS_N_CC(p) (((p)>>12)&0xf) /* bits 15:12, #companion HCs */
#define HCS_N_PCC(p) (((p)>>8)&0xf) /* bits 11:8, ports per CC */
#define HCS_PORTROUTED(p) ((p)&(1 << 7)) /* true: port routing */
#define HCS_PPC(p) ((p)&(1 << 4)) /* true: port power control */
#define HCS_N_PORTS(p) (((p)>>0)&0xf) /* bits 3:0, ports on HC */
u32 hcc_params; /* HCCPARAMS - offset 0x8 */
#define HCC_EXT_CAPS(p) (((p)>>8)&0xff) /* for pci extended caps */
#define HCC_ISOC_CACHE(p) ((p)&(1 << 7)) /* true: can cache isoc frame */
#define HCC_ISOC_THRES(p) (((p)>>4)&0x7) /* bits 6:4, uframes cached */
#define HCC_CANPARK(p) ((p)&(1 << 2)) /* true: can park on async qh */
#define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1)) /* true: periodic_size changes*/
#define HCC_64BIT_ADDR(p) ((p)&(1)) /* true: can use 64-bit addr */
u8 portroute [8]; /* nibbles for routing - offset 0xC */
} __attribute__ ((packed));
/* Section 2.3 Host Controller Operational Registers */
struct ehci_regs {
/* USBCMD: offset 0x00 */
u32 command;
/* 23:16 is r/w intr rate, in microframes; default "8" == 1/msec */
#define CMD_PARK (1<<11) /* enable "park" on async qh */
#define CMD_PARK_CNT(c) (((c)>>8)&3) /* how many transfers to park for */
#define CMD_LRESET (1<<7) /* partial reset (no ports, etc) */
#define CMD_IAAD (1<<6) /* "doorbell" interrupt async advance */
#define CMD_ASE (1<<5) /* async schedule enable */
#define CMD_PSE (1<<4) /* periodic schedule enable */
/* 3:2 is periodic frame list size */
#define CMD_RESET (1<<1) /* reset HC not bus */
#define CMD_RUN (1<<0) /* start/stop HC */
/* USBSTS: offset 0x04 */
u32 status;
#define STS_ASS (1<<15) /* Async Schedule Status */
#define STS_PSS (1<<14) /* Periodic Schedule Status */
#define STS_RECL (1<<13) /* Reclamation */
#define STS_HALT (1<<12) /* Not running (any reason) */
/* some bits reserved */
/* these STS_* flags are also intr_enable bits (USBINTR) */
#define STS_IAA (1<<5) /* Interrupted on async advance */
#define STS_FATAL (1<<4) /* such as some PCI access errors */
#define STS_FLR (1<<3) /* frame list rolled over */
#define STS_PCD (1<<2) /* port change detect */
#define STS_ERR (1<<1) /* "error" completion (overflow, ...) */
#define STS_INT (1<<0) /* "normal" completion (short, ...) */
/* USBINTR: offset 0x08 */
u32 intr_enable;
/* FRINDEX: offset 0x0C */
u32 frame_index; /* current microframe number */
/* CTRLDSSEGMENT: offset 0x10 */
u32 segment; /* address bits 63:32 if needed */
/* PERIODICLISTBASE: offset 0x14 */
u32 frame_list; /* points to periodic list */
/* ASYNCLISTADDR: offset 0x18 */
u32 async_next; /* address of next async queue head */
u32 reserved [9];
/* CONFIGFLAG: offset 0x40 */
u32 configured_flag;
#define FLAG_CF (1<<0) /* true: we'll support "high speed" */
/* PORTSC: offset 0x44 */
u32 port_status [0]; /* up to N_PORTS */
/* 31:23 reserved */
#define PORT_WKOC_E (1<<22) /* wake on overcurrent (enable) */
#define PORT_WKDISC_E (1<<21) /* wake on disconnect (enable) */
#define PORT_WKCONN_E (1<<20) /* wake on connect (enable) */
/* 19:16 for port testing */
#define PORT_LED_OFF (0<<14)
#define PORT_LED_AMBER (1<<14)
#define PORT_LED_GREEN (2<<14)
#define PORT_LED_MASK (3<<14)
#define PORT_OWNER (1<<13) /* true: companion hc owns this port */
#define PORT_POWER (1<<12) /* true: has power (see PPC) */
#define PORT_USB11(x) (((x)&(3<<10)) == (1<<10)) /* USB 1.1 device */
/* 11:10 for detecting lowspeed devices (reset vs release ownership) */
/* 9 reserved */
#define PORT_RESET (1<<8) /* reset port */
#define PORT_SUSPEND (1<<7) /* suspend port */
#define PORT_RESUME (1<<6) /* resume it */
#define PORT_OCC (1<<5) /* over current change */
#define PORT_OC (1<<4) /* over current active */
#define PORT_PEC (1<<3) /* port enable change */
#define PORT_PE (1<<2) /* port enable */
#define PORT_CSC (1<<1) /* connect status change */
#define PORT_CONNECT (1<<0) /* device connected */
#define PORT_RWC_BITS (PORT_CSC | PORT_PEC | PORT_OCC)
} __attribute__ ((packed));
#define USBMODE 0x68 /* USB Device mode */
#define USBMODE_SDIS (1<<3) /* Stream disable */
#define USBMODE_BE (1<<2) /* BE/LE endianness select */
#define USBMODE_CM_HC (3<<0) /* host controller mode */
#define USBMODE_CM_IDLE (0<<0) /* idle state */
/* Appendix C, Debug port ... intended for use with special "debug devices"
* that can help if there's no serial console. (nonstandard enumeration.)
*/
struct ehci_dbg_port {
u32 control;
#define DBGP_OWNER (1<<30)
#define DBGP_ENABLED (1<<28)
#define DBGP_DONE (1<<16)
#define DBGP_INUSE (1<<10)
#define DBGP_ERRCODE(x) (((x)>>7)&0x07)
# define DBGP_ERR_BAD 1
# define DBGP_ERR_SIGNAL 2
#define DBGP_ERROR (1<<6)
#define DBGP_GO (1<<5)
#define DBGP_OUT (1<<4)
#define DBGP_LEN(x) (((x)>>0)&0x0f)
u32 pids;
#define DBGP_PID_GET(x) (((x)>>16)&0xff)
#define DBGP_PID_SET(data, tok) (((data)<<8)|(tok))
u32 data03;
u32 data47;
u32 address;
#define DBGP_EPADDR(dev, ep) (((dev)<<8)|(ep))
} __attribute__ ((packed));
#endif /* __LINUX_USB_EHCI_DEF_H */

View file

@ -1,61 +0,0 @@
/******************************************************************************
* balloon.h
*
* Xen balloon driver - enables returning/claiming memory to/from Xen.
*
* Copyright (c) 2003, B Dragovic
* Copyright (c) 2003-2004, M Williamson, K Fraser
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version 2
* as published by the Free Software Foundation; or, when distributed
* separately from the Linux kernel or incorporated into other
* software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef __XEN_BALLOON_H__
#define __XEN_BALLOON_H__
#include <linux/spinlock.h>
#if 0
/*
* Inform the balloon driver that it should allow some slop for device-driver
* memory activities.
*/
void balloon_update_driver_allowance(long delta);
/* Allocate/free a set of empty pages in low memory (i.e., no RAM mapped). */
struct page **alloc_empty_pages_and_pagevec(int nr_pages);
void free_empty_pages_and_pagevec(struct page **pagevec, int nr_pages);
void balloon_release_driver_page(struct page *page);
/*
* Prevent the balloon driver from changing the memory reservation during
* a driver critical region.
*/
extern spinlock_t balloon_lock;
#define balloon_lock(__flags) spin_lock_irqsave(&balloon_lock, __flags)
#define balloon_unlock(__flags) spin_unlock_irqrestore(&balloon_lock, __flags)
#endif
#endif /* __XEN_BALLOON_H__ */

Some files were not shown because too many files have changed in this diff Show more