Merge branch 'master'

This commit is contained in:
Jeff Garzik 2005-11-09 01:07:12 -05:00
commit a892acacd3
1533 changed files with 51745 additions and 23907 deletions

View file

@ -3642,11 +3642,9 @@ S: Beaverton, OR 97005
S: USA
N: Michal Wronski
E: wrona@mat.uni.torun.pl
W: http://www.mat.uni.torun.pl/~wrona
E: Michal.Wronski@motorola.com
D: POSIX message queues fs (with K. Benedyczak)
S: ul. Teczowa 23/12
S: 80-680 Gdansk-Sobieszewo
S: Krakow
S: Poland
N: Frank Xia

View file

@ -139,9 +139,14 @@ You'll probably want to upgrade.
Ksymoops
--------
If the unthinkable happens and your kernel oopses, you'll need a 2.4
version of ksymoops to decode the report; see REPORTING-BUGS in the
root of the Linux source for more information.
If the unthinkable happens and your kernel oopses, you may need the
ksymoops tool to decode it, but in most cases you don't.
In the 2.6 kernel it is generally preferred to build the kernel with
CONFIG_KALLSYMS so that it produces readable dumps that can be used as-is
(this also produces better output than ksymoops).
If for some reason your kernel is not build with CONFIG_KALLSYMS and
you have no way to rebuild and reproduce the Oops with that option, then
you can still decode that Oops with ksymoops.
Module-Init-Tools
-----------------

View file

@ -10,7 +10,7 @@ DOCBOOKS := wanbook.xml z8530book.xml mcabook.xml videobook.xml \
kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
procfs-guide.xml writing_usb_driver.xml \
sis900.xml kernel-api.xml journal-api.xml lsm.xml usb.xml \
gadget.xml libata.xml mtdnand.xml librs.xml
gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml
###
# The build process is as follows (targets):

View file

@ -306,7 +306,7 @@ an example.
</para>
<sect1><title>Journal Level</title>
!Efs/jbd/journal.c
!Efs/jbd/recovery.c
!Ifs/jbd/recovery.c
</sect1>
<sect1><title>Transasction Level</title>
!Efs/jbd/transaction.c

View file

@ -118,7 +118,7 @@ X!Ilib/string.c
</sect1>
<sect1><title>User Space Memory Access</title>
!Iinclude/asm-i386/uaccess.h
!Iarch/i386/lib/usercopy.c
!Earch/i386/lib/usercopy.c
</sect1>
<sect1><title>More Memory Management Functions</title>
!Iinclude/linux/rmap.h
@ -174,7 +174,6 @@ X!Ilib/string.c
<title>The Linux VFS</title>
<sect1><title>The Filesystem types</title>
!Iinclude/linux/fs.h
!Einclude/linux/fs.h
</sect1>
<sect1><title>The Directory Cache</title>
!Efs/dcache.c
@ -266,7 +265,7 @@ X!Ekernel/module.c
<chapter id="hardware">
<title>Hardware Interfaces</title>
<sect1><title>Interrupt Handling</title>
!Ikernel/irq/manage.c
!Ekernel/irq/manage.c
</sect1>
<sect1><title>Resources Management</title>
@ -501,7 +500,7 @@ KAO -->
!Edrivers/video/modedb.c
</sect1>
<sect1><title>Frame Buffer Macintosh Video Mode Database</title>
!Idrivers/video/macmodes.c
!Edrivers/video/macmodes.c
</sect1>
<sect1><title>Frame Buffer Fonts</title>
<para>

View file

@ -0,0 +1,160 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
"http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
<!ENTITY rapidio SYSTEM "rapidio.xml">
]>
<book id="RapidIO-Guide">
<bookinfo>
<title>RapidIO Subsystem Guide</title>
<authorgroup>
<author>
<firstname>Matt</firstname>
<surname>Porter</surname>
<affiliation>
<address>
<email>mporter@kernel.crashing.org</email>
<email>mporter@mvista.com</email>
</address>
</affiliation>
</author>
</authorgroup>
<copyright>
<year>2005</year>
<holder>MontaVista Software, Inc.</holder>
</copyright>
<legalnotice>
<para>
This documentation is free software; you can redistribute
it and/or modify it under the terms of the GNU General Public
License version 2 as published by the Free Software Foundation.
</para>
<para>
This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU General Public License for more details.
</para>
<para>
You should have received a copy of the GNU General Public
License along with this program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
MA 02111-1307 USA
</para>
<para>
For more details see the file COPYING in the source
distribution of Linux.
</para>
</legalnotice>
</bookinfo>
<toc></toc>
<chapter id="intro">
<title>Introduction</title>
<para>
RapidIO is a high speed switched fabric interconnect with
features aimed at the embedded market. RapidIO provides
support for memory-mapped I/O as well as message-based
transactions over the switched fabric network. RapidIO has
a standardized discovery mechanism not unlike the PCI bus
standard that allows simple detection of devices in a
network.
</para>
<para>
This documentation is provided for developers intending
to support RapidIO on new architectures, write new drivers,
or to understand the subsystem internals.
</para>
</chapter>
<chapter id="bugs">
<title>Known Bugs and Limitations</title>
<sect1>
<title>Bugs</title>
<para>None. ;)</para>
</sect1>
<sect1>
<title>Limitations</title>
<para>
<orderedlist>
<listitem><para>Access/management of RapidIO memory regions is not supported</para></listitem>
<listitem><para>Multiple host enumeration is not supported</para></listitem>
</orderedlist>
</para>
</sect1>
</chapter>
<chapter id="drivers">
<title>RapidIO driver interface</title>
<para>
Drivers are provided a set of calls in order
to interface with the subsystem to gather info
on devices, request/map memory region resources,
and manage mailboxes/doorbells.
</para>
<sect1>
<title>Functions</title>
!Iinclude/linux/rio_drv.h
!Edrivers/rapidio/rio-driver.c
!Edrivers/rapidio/rio.c
</sect1>
</chapter>
<chapter id="internals">
<title>Internals</title>
<para>
This chapter contains the autogenerated documentation of the RapidIO
subsystem.
</para>
<sect1><title>Structures</title>
!Iinclude/linux/rio.h
</sect1>
<sect1><title>Enumeration and Discovery</title>
!Idrivers/rapidio/rio-scan.c
</sect1>
<sect1><title>Driver functionality</title>
!Idrivers/rapidio/rio.c
!Idrivers/rapidio/rio-access.c
</sect1>
<sect1><title>Device model support</title>
!Idrivers/rapidio/rio-driver.c
</sect1>
<sect1><title>Sysfs support</title>
!Idrivers/rapidio/rio-sysfs.c
</sect1>
<sect1><title>PPC32 support</title>
!Iarch/ppc/kernel/rio.c
!Earch/ppc/syslib/ppc85xx_rio.c
!Iarch/ppc/syslib/ppc85xx_rio.c
</sect1>
</chapter>
<chapter id="credits">
<title>Credits</title>
<para>
The following people have contributed to the RapidIO
subsystem directly or indirectly:
<orderedlist>
<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
<listitem><para>Randy Vinson<email>rvinson@mvista.com</email></para></listitem>
<listitem><para>Dan Malek<email>dan@embeddedalley.com</email></para></listitem>
</orderedlist>
</para>
<para>
The following people have contributed to this document:
<orderedlist>
<listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
</orderedlist>
</para>
</chapter>
</book>

View file

@ -10,14 +10,22 @@
This guide describes the basics of Message Signaled Interrupts (MSI),
the advantages of using MSI over traditional interrupt mechanisms,
and how to enable your driver to use MSI or MSI-X. Also included is
a Frequently Asked Questions.
a Frequently Asked Questions (FAQ) section.
1.1 Terminology
PCI devices can be single-function or multi-function. In either case,
when this text talks about enabling or disabling MSI on a "device
function," it is referring to one specific PCI device and function and
not to all functions on a PCI device (unless the PCI device has only
one function).
2. Copyright 2003 Intel Corporation
3. What is MSI/MSI-X?
Message Signaled Interrupt (MSI), as described in the PCI Local Bus
Specification Revision 2.3 or latest, is an optional feature, and a
Specification Revision 2.3 or later, is an optional feature, and a
required feature for PCI Express devices. MSI enables a device function
to request service by sending an Inbound Memory Write on its PCI bus to
the FSB as a Message Signal Interrupt transaction. Because MSI is
@ -27,7 +35,7 @@ supported.
A PCI device that supports MSI must also support pin IRQ assertion
interrupt mechanism to provide backward compatibility for systems that
do not support MSI. In Systems, which support MSI, the bus driver is
do not support MSI. In systems which support MSI, the bus driver is
responsible for initializing the message address and message data of
the device function's MSI/MSI-X capability structure during device
initial configuration.
@ -61,17 +69,17 @@ over the MSI capability structure as described below.
- MSI and MSI-X both support per-vector masking. Per-vector
masking is an optional extension of MSI but a required
feature for MSI-X. Per-vector masking provides the kernel
the ability to mask/unmask MSI when servicing its software
interrupt service routing handler. If per-vector masking is
feature for MSI-X. Per-vector masking provides the kernel the
ability to mask/unmask a single MSI while running its
interrupt service routine. If per-vector masking is
not supported, then the device driver should provide the
hardware/software synchronization to ensure that the device
generates MSI when the driver wants it to do so.
4. Why use MSI?
As a benefit the simplification of board design, MSI allows board
designers to remove out of band interrupt routing. MSI is another
As a benefit to the simplification of board design, MSI allows board
designers to remove out-of-band interrupt routing. MSI is another
step towards a legacy-free environment.
Due to increasing pressure on chipset and processor packages to
@ -87,7 +95,7 @@ support. As a result, the PCI Express technology requires MSI
support for better interrupt performance.
Using MSI enables the device functions to support two or more
vectors, which can be configured to target different CPU's to
vectors, which can be configured to target different CPUs to
increase scalability.
5. Configuring a driver to use MSI/MSI-X
@ -119,13 +127,13 @@ pci_enable_msi() explicitly.
int pci_enable_msi(struct pci_dev *dev)
With this new API, any existing device driver, which like to have
MSI enabled on its device function, must call this API to enable MSI
With this new API, a device driver that wants to have MSI
enabled on its device function must call this API to enable MSI.
A successful call will initialize the MSI capability structure
with ONE vector, regardless of whether a device function is
capable of supporting multiple messages. This vector replaces the
pre-assigned dev->irq with a new MSI vector. To avoid the conflict
of new assigned vector with existing pre-assigned vector requires
pre-assigned dev->irq with a new MSI vector. To avoid a conflict
of the new assigned vector with existing pre-assigned vector requires
a device driver to call this API before calling request_irq().
5.2.2 API pci_disable_msi
@ -137,14 +145,14 @@ when a device driver is unloading. This API restores dev->irq with
the pre-assigned IOAPIC vector and switches a device's interrupt
mode to PCI pin-irq assertion/INTx emulation mode.
Note that a device driver should always call free_irq() on MSI vector
it has done request_irq() on before calling this API. Failure to do
so results a BUG_ON() and a device will be left with MSI enabled and
Note that a device driver should always call free_irq() on the MSI vector
that it has done request_irq() on before calling this API. Failure to do
so results in a BUG_ON() and a device will be left with MSI enabled and
leaks its vector.
5.2.3 MSI mode vs. legacy mode diagram
The below diagram shows the events, which switches the interrupt
The below diagram shows the events which switch the interrupt
mode on the MSI-capable device function between MSI mode and
PIN-IRQ assertion mode.
@ -155,9 +163,9 @@ PIN-IRQ assertion mode.
------------ pci_disable_msi ------------------------
Figure 1.0 MSI Mode vs. Legacy Mode
Figure 1. MSI Mode vs. Legacy Mode
In Figure 1.0, a device operates by default in legacy mode. Legacy
In Figure 1, a device operates by default in legacy mode. Legacy
in this context means PCI pin-irq assertion or PCI-Express INTx
emulation. A successful MSI request (using pci_enable_msi()) switches
a device's interrupt mode to MSI mode. A pre-assigned IOAPIC vector
@ -166,11 +174,11 @@ assigned MSI vector will replace dev->irq.
To return back to its default mode, a device driver should always call
pci_disable_msi() to undo the effect of pci_enable_msi(). Note that a
device driver should always call free_irq() on MSI vector it has done
request_irq() on before calling pci_disable_msi(). Failure to do so
results a BUG_ON() and a device will be left with MSI enabled and
device driver should always call free_irq() on the MSI vector it has
done request_irq() on before calling pci_disable_msi(). Failure to do
so results in a BUG_ON() and a device will be left with MSI enabled and
leaks its vector. Otherwise, the PCI subsystem restores a device's
dev->irq with a pre-assigned IOAPIC vector and marks released
dev->irq with a pre-assigned IOAPIC vector and marks the released
MSI vector as unused.
Once being marked as unused, there is no guarantee that the PCI
@ -178,8 +186,8 @@ subsystem will reserve this MSI vector for a device. Depending on
the availability of current PCI vector resources and the number of
MSI/MSI-X requests from other drivers, this MSI may be re-assigned.
For the case where the PCI subsystem re-assigned this MSI vector
another driver, a request to switching back to MSI mode may result
For the case where the PCI subsystem re-assigns this MSI vector to
another driver, a request to switch back to MSI mode may result
in being assigned a different MSI vector or a failure if no more
vectors are available.
@ -208,12 +216,12 @@ Unlike the function pci_enable_msi(), the function pci_enable_msix()
does not replace the pre-assigned IOAPIC dev->irq with a new MSI
vector because the PCI subsystem writes the 1:1 vector-to-entry mapping
into the field vector of each element contained in a second argument.
Note that the pre-assigned IO-APIC dev->irq is valid only if the device
operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt of
Note that the pre-assigned IOAPIC dev->irq is valid only if the device
operates in PIN-IRQ assertion mode. In MSI-X mode, any attempt at
using dev->irq by the device driver to request for interrupt service
may result unpredictabe behavior.
For each MSI-X vector granted, a device driver is responsible to call
For each MSI-X vector granted, a device driver is responsible for calling
other functions like request_irq(), enable_irq(), etc. to enable
this vector with its corresponding interrupt service handler. It is
a device driver's choice to assign all vectors with the same
@ -224,13 +232,13 @@ service handler.
The PCI 3.0 specification has implementation notes that MMIO address
space for a device's MSI-X structure should be isolated so that the
software system can set different page for controlling accesses to
the MSI-X structure. The implementation of MSI patch requires the PCI
software system can set different pages for controlling accesses to the
MSI-X structure. The implementation of MSI support requires the PCI
subsystem, not a device driver, to maintain full control of the MSI-X
table/MSI-X PBA and MMIO address space of the MSI-X table/MSI-X PBA.
A device driver is prohibited from requesting the MMIO address space
of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem will fail
enabling MSI-X on its hardware device when it calls the function
table/MSI-X PBA (Pending Bit Array) and MMIO address space of the MSI-X
table/MSI-X PBA. A device driver is prohibited from requesting the MMIO
address space of the MSI-X table/MSI-X PBA. Otherwise, the PCI subsystem
will fail enabling MSI-X on its hardware device when it calls the function
pci_enable_msix().
5.3.2 Handling MSI-X allocation
@ -274,9 +282,9 @@ For the case where fewer MSI-X vectors are allocated to a function
than requested, the function pci_enable_msix() will return the
maximum number of MSI-X vectors available to the caller. A device
driver may re-send its request with fewer or equal vectors indicated
in a return. For example, if a device driver requests 5 vectors, but
the number of available vectors is 3 vectors, a value of 3 will be a
return as a result of pci_enable_msix() call. A function could be
in the return. For example, if a device driver requests 5 vectors, but
the number of available vectors is 3 vectors, a value of 3 will be
returned as a result of pci_enable_msix() call. A function could be
designed for its driver to use only 3 MSI-X table entries as
different combinations as ABC--, A-B-C, A--CB, etc. Note that this
patch does not support multiple entries with the same vector. Such
@ -285,49 +293,46 @@ as ABBCC, AABCC, BCCBA, etc will result as a failure by the function
pci_enable_msix(). Below are the reasons why supporting multiple
entries with the same vector is an undesirable solution.
- The PCI subsystem can not determine which entry, which
generated the message, to mask/unmask MSI while handling
- The PCI subsystem cannot determine the entry that
generated the message to mask/unmask MSI while handling
software driver ISR. Attempting to walk through all MSI-X
table entries (2048 max) to mask/unmask any match vector
is an undesirable solution.
- Walk through all MSI-X table entries (2048 max) to handle
- Walking through all MSI-X table entries (2048 max) to handle
SMP affinity of any match vector is an undesirable solution.
5.3.4 API pci_enable_msix
int pci_enable_msix(struct pci_dev *dev, u32 *entries, int nvec)
int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
This API enables a device driver to request the PCI subsystem
for enabling MSI-X messages on its hardware device. Depending on
to enable MSI-X messages on its hardware device. Depending on
the availability of PCI vectors resources, the PCI subsystem enables
either all or nothing.
either all or none of the requested vectors.
Argument dev points to the device (pci_dev) structure.
Argument 'dev' points to the device (pci_dev) structure.
Argument entries is a pointer of unsigned integer type. The number of
elements is indicated in argument nvec. The content of each element
will be mapped to the following struct defined in /driver/pci/msi.h.
Argument 'entries' is a pointer to an array of msix_entry structs.
The number of entries is indicated in argument 'nvec'.
struct msix_entry is defined in /driver/pci/msi.h:
struct msix_entry {
u16 vector; /* kernel uses to write alloc vector */
u16 entry; /* driver uses to specify entry */
};
A device driver is responsible for initializing the field entry of
each element with unique entry supported by MSI-X table. Otherwise,
A device driver is responsible for initializing the field 'entry' of
each element with a unique entry supported by MSI-X table. Otherwise,
-EINVAL will be returned as a result. A successful return of zero
indicates the PCI subsystem completes initializing each of requested
indicates the PCI subsystem completed initializing each of the requested
entries of the MSI-X table with message address and message data.
Last but not least, the PCI subsystem will write the 1:1
vector-to-entry mapping into the field vector of each element. A
device driver is responsible of keeping track of allocated MSI-X
vector-to-entry mapping into the field 'vector' of each element. A
device driver is responsible for keeping track of allocated MSI-X
vectors in its internal data structure.
Argument nvec is an integer indicating the number of messages
requested.
A return of zero indicates that the number of MSI-X vectors is
A return of zero indicates that the number of MSI-X vectors was
successfully allocated. A return of greater than zero indicates
MSI-X vector shortage. Or a return of less than zero indicates
a failure. This failure may be a result of duplicate entries
@ -341,12 +346,12 @@ void pci_disable_msix(struct pci_dev *dev)
This API should always be used to undo the effect of pci_enable_msix()
when a device driver is unloading. Note that a device driver should
always call free_irq() on all MSI-X vectors it has done request_irq()
on before calling this API. Failure to do so results a BUG_ON() and
on before calling this API. Failure to do so results in a BUG_ON() and
a device will be left with MSI-X enabled and leaks its vectors.
5.3.6 MSI-X mode vs. legacy mode diagram
The below diagram shows the events, which switches the interrupt
The below diagram shows the events which switch the interrupt
mode on the MSI-X capable device function between MSI-X mode and
PIN-IRQ assertion mode (legacy).
@ -356,22 +361,22 @@ PIN-IRQ assertion mode (legacy).
| | ===============> | |
------------ pci_disable_msix ------------------------
Figure 2.0 MSI-X Mode vs. Legacy Mode
Figure 2. MSI-X Mode vs. Legacy Mode
In Figure 2.0, a device operates by default in legacy mode. A
In Figure 2, a device operates by default in legacy mode. A
successful MSI-X request (using pci_enable_msix()) switches a
device's interrupt mode to MSI-X mode. A pre-assigned IOAPIC vector
stored in dev->irq will be saved by the PCI subsystem; however,
unlike MSI mode, the PCI subsystem will not replace dev->irq with
assigned MSI-X vector because the PCI subsystem already writes the 1:1
vector-to-entry mapping into the field vector of each element
vector-to-entry mapping into the field 'vector' of each element
specified in second argument.
To return back to its default mode, a device driver should always call
pci_disable_msix() to undo the effect of pci_enable_msix(). Note that
a device driver should always call free_irq() on all MSI-X vectors it
has done request_irq() on before calling pci_disable_msix(). Failure
to do so results a BUG_ON() and a device will be left with MSI-X
to do so results in a BUG_ON() and a device will be left with MSI-X
enabled and leaks its vectors. Otherwise, the PCI subsystem switches a
device function's interrupt mode from MSI-X mode to legacy mode and
marks all allocated MSI-X vectors as unused.
@ -383,53 +388,56 @@ MSI/MSI-X requests from other drivers, these MSI-X vectors may be
re-assigned.
For the case where the PCI subsystem re-assigned these MSI-X vectors
to other driver, a request to switching back to MSI-X mode may result
to other drivers, a request to switch back to MSI-X mode may result
being assigned with another set of MSI-X vectors or a failure if no
more vectors are available.
5.4 Handling function implementng both MSI and MSI-X capabilities
5.4 Handling function implementing both MSI and MSI-X capabilities
For the case where a function implements both MSI and MSI-X
capabilities, the PCI subsystem enables a device to run either in MSI
mode or MSI-X mode but not both. A device driver determines whether it
wants MSI or MSI-X enabled on its hardware device. Once a device
driver requests for MSI, for example, it is prohibited to request for
driver requests for MSI, for example, it is prohibited from requesting
MSI-X; in other words, a device driver is not permitted to ping-pong
between MSI mod MSI-X mode during a run-time.
5.5 Hardware requirements for MSI/MSI-X support
MSI/MSI-X support requires support from both system hardware and
individual hardware device functions.
5.5.1 System hardware support
Since the target of MSI address is the local APIC CPU, enabling
MSI/MSI-X support in Linux kernel is dependent on whether existing
system hardware supports local APIC. Users should verify their
system whether it runs when CONFIG_X86_LOCAL_APIC=y.
MSI/MSI-X support in the Linux kernel is dependent on whether existing
system hardware supports local APIC. Users should verify that their
system supports local APIC operation by testing that it runs when
CONFIG_X86_LOCAL_APIC=y.
In SMP environment, CONFIG_X86_LOCAL_APIC is automatically set;
however, in UP environment, users must manually set
CONFIG_X86_LOCAL_APIC. Once CONFIG_X86_LOCAL_APIC=y, setting
CONFIG_PCI_MSI enables the VECTOR based scheme and
the option for MSI-capable device drivers to selectively enable
MSI/MSI-X.
CONFIG_PCI_MSI enables the VECTOR based scheme and the option for
MSI-capable device drivers to selectively enable MSI/MSI-X.
Note that CONFIG_X86_IO_APIC setting is irrelevant because MSI/MSI-X
vector is allocated new during runtime and MSI/MSI-X support does not
depend on BIOS support. This key independency enables MSI/MSI-X
support on future IOxAPIC free platform.
support on future IOxAPIC free platforms.
5.5.2 Device hardware support
The hardware device function supports MSI by indicating the
MSI/MSI-X capability structure on its PCI capability list. By
default, this capability structure will not be initialized by
the kernel to enable MSI during the system boot. In other words,
the device function is running on its default pin assertion mode.
Note that in many cases the hardware supporting MSI have bugs,
which may result in system hang. The software driver of specific
MSI-capable hardware is responsible for whether calling
which may result in system hangs. The software driver of specific
MSI-capable hardware is responsible for deciding whether to call
pci_enable_msi or not. A return of zero indicates the kernel
successfully initializes the MSI/MSI-X capability structure of the
successfully initialized the MSI/MSI-X capability structure of the
device function. The device function is now running on MSI/MSI-X mode.
5.6 How to tell whether MSI/MSI-X is enabled on device function
@ -439,10 +447,10 @@ pci_enable_msi()/pci_enable_msix() indicates to a device driver that
its device function is initialized successfully and ready to run in
MSI/MSI-X mode.
At the user level, users can use command 'cat /proc/interrupts'
to display the vector allocated for a device and its interrupt
MSI/MSI-X mode ("PCI MSI"/"PCI MSIX"). Below shows below MSI mode is
enabled on a SCSI Adaptec 39320D Ultra320.
At the user level, users can use the command 'cat /proc/interrupts'
to display the vectors allocated for devices and their interrupt
MSI/MSI-X modes ("PCI-MSI"/"PCI-MSI-X"). Below shows MSI mode is
enabled on a SCSI Adaptec 39320D Ultra320 controller.
CPU0 CPU1
0: 324639 0 IO-APIC-edge timer
@ -453,8 +461,8 @@ enabled on a SCSI Adaptec 39320D Ultra320.
15: 1 0 IO-APIC-edge ide1
169: 0 0 IO-APIC-level uhci-hcd
185: 0 0 IO-APIC-level uhci-hcd
193: 138 10 PCI MSI aic79xx
201: 30 0 PCI MSI aic79xx
193: 138 10 PCI-MSI aic79xx
201: 30 0 PCI-MSI aic79xx
225: 30 0 IO-APIC-level aic7xxx
233: 30 0 IO-APIC-level aic7xxx
NMI: 0 0
@ -490,8 +498,8 @@ target address set as 0xfeexxxxx, as conformed to PCI
specification 2.3 or latest, then it should work.
Q4. From the driver point of view, if the MSI is lost because
of the errors occur during inbound memory write, then it may
wait for ever. Is there a mechanism for it to recover?
of errors occurring during inbound memory write, then it may
wait forever. Is there a mechanism for it to recover?
A4. Since the target of the transaction is an inbound memory
write, all transaction termination conditions (Retry,

View file

@ -772,8 +772,6 @@ RCU pointer/list traversal:
list_for_each_entry_rcu
list_for_each_continue_rcu (to be deprecated in favor of new
list_for_each_entry_continue_rcu)
hlist_for_each_rcu (to be deprecated in favor of
hlist_for_each_entry_rcu)
hlist_for_each_entry_rcu
RCU pointer update:

View file

@ -25,7 +25,7 @@
#include <linux/skbuff.h>
#include <linux/timer.h>
#include "connector.h"
#include <linux/connector.h>
static struct cb_id cn_test_id = { 0x123, 0x456 };
static char cn_test_name[] = "cn_test";
@ -104,7 +104,7 @@ static int cn_test_want_notify(void)
req->first = cn_test_id.val + 20;
req->range = 10;
NETLINK_CB(skb).dst_groups = ctl->group;
NETLINK_CB(skb).dst_group = ctl->group;
//netlink_broadcast(nls, skb, 0, ctl->group, GFP_ATOMIC);
netlink_unicast(nls, skb, 0, 0);

View file

@ -19,7 +19,6 @@ There are two dm targets available: snapshot and snapshot-origin.
*) snapshot-origin <origin>
which will normally have one or more snapshots based on it.
You must create the snapshot-origin device before you can create snapshots.
Reads will be mapped directly to the backing device. For each write, the
original data will be saved in the <COW device> of each snapshot to keep
its visible content unchanged, at least until the <COW device> fills up.
@ -27,7 +26,7 @@ its visible content unchanged, at least until the <COW device> fills up.
*) snapshot <origin> <COW device> <persistent?> <chunksize>
A snapshot is created of the <origin> block device. Changed chunks of
A snapshot of the <origin> block device is created. Changed chunks of
<chunksize> sectors will be stored on the <COW device>. Writes will
only go to the <COW device>. Reads will come from the <COW device> or
from <origin> for unchanged data. <COW device> will often be
@ -37,6 +36,8 @@ the amount of free space and expand the <COW device> before it fills up.
<persistent?> is P (Persistent) or N (Not persistent - will not survive
after reboot).
The difference is that for transient snapshots less metadata must be
saved on disk - they can be kept in memory by the kernel.
How this is used by LVM2

View file

@ -146,10 +146,10 @@ pmipal Use the protected mode interface for palette changes.
mtrr:n setup memory type range registers for the vesafb framebuffer
where n:
0 - disabled (equivalent to nomtrr)
0 - disabled (equivalent to nomtrr) (default)
1 - uncachable
2 - write-back
3 - write-combining (default)
3 - write-combining
4 - write-through
If you see the following in dmesg, choose the type that matches the

View file

@ -69,6 +69,22 @@ Who: Grant Coady <gcoady@gmail.com>
---------------------------
What: remove EXPORT_SYMBOL(panic_timeout)
When: April 2006
Files: kernel/panic.c
Why: No modular usage in the kernel.
Who: Adrian Bunk <bunk@stusta.de>
---------------------------
What: remove EXPORT_SYMBOL(insert_resource)
When: April 2006
Files: kernel/resource.c
Why: No modular usage in the kernel.
Who: Adrian Bunk <bunk@stusta.de>
---------------------------
What: PCMCIA control ioctl (needed for pcmcia-cs [cardmgr, cardctl])
When: November 2005
Files: drivers/pcmcia/: pcmcia_ioctl.c

View file

@ -0,0 +1,173 @@
RCU-based dcache locking model
==============================
On many workloads, the most common operation on dcache is to look up a
dentry, given a parent dentry and the name of the child. Typically,
for every open(), stat() etc., the dentry corresponding to the
pathname will be looked up by walking the tree starting with the first
component of the pathname and using that dentry along with the next
component to look up the next level and so on. Since it is a frequent
operation for workloads like multiuser environments and web servers,
it is important to optimize this path.
Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus in
every component during path look-up. Since 2.5.10 onwards, fast-walk
algorithm changed this by holding the dcache_lock at the beginning and
walking as many cached path component dentries as possible. This
significantly decreases the number of acquisition of
dcache_lock. However it also increases the lock hold time
significantly and affects performance in large SMP machines. Since
2.5.62 kernel, dcache has been using a new locking model that uses RCU
to make dcache look-up lock-free.
The current dcache locking model is not very different from the
existing dcache locking model. Prior to 2.5.62 kernel, dcache_lock
protected the hash chain, d_child, d_alias, d_lru lists as well as
d_inode and several other things like mount look-up. RCU-based changes
affect only the way the hash chain is protected. For everything else
the dcache_lock must be taken for both traversing as well as
updating. The hash chain updates too take the dcache_lock. The
significant change is the way d_lookup traverses the hash chain, it
doesn't acquire the dcache_lock for this and rely on RCU to ensure
that the dentry has not been *freed*.
Dcache locking details
======================
For many multi-user workloads, open() and stat() on files are very
frequently occurring operations. Both involve walking of path names to
find the dentry corresponding to the concerned file. In 2.4 kernel,
dcache_lock was held during look-up of each path component. Contention
and cache-line bouncing of this global lock caused significant
scalability problems. With the introduction of RCU in Linux kernel,
this was worked around by making the look-up of path components during
path walking lock-free.
Safe lock-free look-up of dcache hash table
===========================================
Dcache is a complex data structure with the hash table entries also
linked together in other lists. In 2.4 kernel, dcache_lock protected
all the lists. We applied RCU only on hash chain walking. The rest of
the lists are still protected by dcache_lock. Some of the important
changes are :
1. The deletion from hash chain is done using hlist_del_rcu() macro
which doesn't initialize next pointer of the deleted dentry and
this allows us to walk safely lock-free while a deletion is
happening.
2. Insertion of a dentry into the hash table is done using
hlist_add_head_rcu() which take care of ordering the writes - the
writes to the dentry must be visible before the dentry is
inserted. This works in conjunction with hlist_for_each_rcu() while
walking the hash chain. The only requirement is that all
initialization to the dentry must be done before
hlist_add_head_rcu() since we don't have dcache_lock protection
while traversing the hash chain. This isn't different from the
existing code.
3. The dentry looked up without holding dcache_lock by cannot be
returned for walking if it is unhashed. It then may have a NULL
d_inode or other bogosity since RCU doesn't protect the other
fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
indicate unhashed dentries and use this in conjunction with a
per-dentry lock (d_lock). Once looked up without the dcache_lock,
we acquire the per-dentry lock (d_lock) and check if the dentry is
unhashed. If so, the look-up is failed. If not, the reference count
of the dentry is increased and the dentry is returned.
4. Once a dentry is looked up, it must be ensured during the path walk
for that component it doesn't go away. In pre-2.5.10 code, this was
done holding a reference to the dentry. dcache_rcu does the same.
In some sense, dcache_rcu path walking looks like the pre-2.5.10
version.
5. All dentry hash chain updates must take the dcache_lock as well as
the per-dentry lock in that order. dput() does this to ensure that
a dentry that has just been looked up in another CPU doesn't get
deleted before dget() can be done on it.
6. There are several ways to do reference counting of RCU protected
objects. One such example is in ipv4 route cache where deferred
freeing (using call_rcu()) is done as soon as the reference count
goes to zero. This cannot be done in the case of dentries because
tearing down of dentries require blocking (dentry_iput()) which
isn't supported from RCU callbacks. Instead, tearing down of
dentries happen synchronously in dput(), but actual freeing happens
later when RCU grace period is over. This allows safe lock-free
walking of the hash chains, but a matched dentry may have been
partially torn down. The checking of DCACHE_UNHASHED flag with
d_lock held detects such dentries and prevents them from being
returned from look-up.
Maintaining POSIX rename semantics
==================================
Since look-up of dentries is lock-free, it can race against a
concurrent rename operation. For example, during rename of file A to
B, look-up of either A or B must succeed. So, if look-up of B happens
after A has been removed from the hash chain but not added to the new
hash chain, it may fail. Also, a comparison while the name is being
written concurrently by a rename may result in false positive matches
violating rename semantics. Issues related to race with rename are
handled as described below :
1. Look-up can be done in two ways - d_lookup() which is safe from
simultaneous renames and __d_lookup() which is not. If
__d_lookup() fails, it must be followed up by a d_lookup() to
correctly determine whether a dentry is in the hash table or
not. d_lookup() protects look-ups using a sequence lock
(rename_lock).
2. The name associated with a dentry (d_name) may be changed if a
rename is allowed to happen simultaneously. To avoid memcmp() in
__d_lookup() go out of bounds due to a rename and false positive
comparison, the name comparison is done while holding the
per-dentry lock. This prevents concurrent renames during this
operation.
3. Hash table walking during look-up may move to a different bucket as
the current dentry is moved to a different bucket due to rename.
But we use hlists in dcache hash table and they are
null-terminated. So, even if a dentry moves to a different bucket,
hash chain walk will terminate. [with a list_head list, it may not
since termination is when the list_head in the original bucket is
reached]. Since we redo the d_parent check and compare name while
holding d_lock, lock-free look-up will not race against d_move().
4. There can be a theoretical race when a dentry keeps coming back to
original bucket due to double moves. Due to this look-up may
consider that it has never moved and can end up in a infinite loop.
But this is not any worse that theoretical livelocks we already
have in the kernel.
Important guidelines for filesystem developers related to dcache_rcu
====================================================================
1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
don't change. Only dcache internal implementation changes. However
filesystems *must not* delete from the dentry hash chains directly
using the list macros like allowed earlier. They must use dcache
APIs like d_drop() or __d_drop() depending on the situation.
2. d_flags is now protected by a per-dentry lock (d_lock). All access
to d_flags must be protected by it.
3. For a hashed dentry, checking of d_count needs to be protected by
d_lock.
Papers and other documentation on dcache locking
================================================
1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
2. http://lse.sourceforge.net/locking/dcache/dcache.html

View file

@ -1812,11 +1812,6 @@ it may overflow the messages buffer, but try to get as much of it as
you can
if you get an Oops, run ksymoops to decode it so that the
names of the offending functions are provided. A non-decoded Oops is
pretty useless
send a copy of your devfsd configuration file(s)
send the bug report to me first.

View file

@ -0,0 +1,195 @@
ramfs, rootfs and initramfs
October 17, 2005
Rob Landley <rob@landley.net>
=============================
What is ramfs?
--------------
Ramfs is a very simple filesystem that exports Linux's disk caching
mechanisms (the page cache and dentry cache) as a dynamically resizable
ram-based filesystem.
Normally all files are cached in memory by Linux. Pages of data read from
backing store (usually the block device the filesystem is mounted on) are kept
around in case it's needed again, but marked as clean (freeable) in case the
Virtual Memory system needs the memory for something else. Similarly, data
written to files is marked clean as soon as it has been written to backing
store, but kept around for caching purposes until the VM reallocates the
memory. A similar mechanism (the dentry cache) greatly speeds up access to
directories.
With ramfs, there is no backing store. Files written into ramfs allocate
dentries and page cache as usual, but there's nowhere to write them to.
This means the pages are never marked clean, so they can't be freed by the
VM when it's looking to recycle memory.
The amount of code required to implement ramfs is tiny, because all the
work is done by the existing Linux caching infrastructure. Basically,
you're mounting the disk cache as a filesystem. Because of this, ramfs is not
an optional component removable via menuconfig, since there would be negligible
space savings.
ramfs and ramdisk:
------------------
The older "ram disk" mechanism created a synthetic block device out of
an area of ram and used it as backing store for a filesystem. This block
device was of fixed size, so the filesystem mounted on it was of fixed
size. Using a ram disk also required unnecessarily copying memory from the
fake block device into the page cache (and copying changes back out), as well
as creating and destroying dentries. Plus it needed a filesystem driver
(such as ext2) to format and interpret this data.
Compared to ramfs, this wastes memory (and memory bus bandwidth), creates
unnecessary work for the CPU, and pollutes the CPU caches. (There are tricks
to avoid this copying by playing with the page tables, but they're unpleasantly
complicated and turn out to be about as expensive as the copying anyway.)
More to the point, all the work ramfs is doing has to happen _anyway_,
since all file access goes through the page and dentry caches. The ram
disk is simply unnecessary, ramfs is internally much simpler.
Another reason ramdisks are semi-obsolete is that the introduction of
loopback devices offered a more flexible and convenient way to create
synthetic block devices, now from files instead of from chunks of memory.
See losetup (8) for details.
ramfs and tmpfs:
----------------
One downside of ramfs is you can keep writing data into it until you fill
up all memory, and the VM can't free it because the VM thinks that files
should get written to backing store (rather than swap space), but ramfs hasn't
got any backing store. Because of this, only root (or a trusted user) should
be allowed write access to a ramfs mount.
A ramfs derivative called tmpfs was created to add size limits, and the ability
to write the data to swap space. Normal users can be allowed write access to
tmpfs mounts. See Documentation/filesystems/tmpfs.txt for more information.
What is rootfs?
---------------
Rootfs is a special instance of ramfs, which is always present in 2.6 systems.
(It's used internally as the starting and stopping point for searches of the
kernel's doubly-linked list of mount points.)
Most systems just mount another filesystem over it and ignore it. The
amount of space an empty instance of ramfs takes up is tiny.
What is initramfs?
------------------
All 2.6 Linux kernels contain a gzipped "cpio" format archive, which is
extracted into rootfs when the kernel boots up. After extracting, the kernel
checks to see if rootfs contains a file "init", and if so it executes it as PID
1. If found, this init process is responsible for bringing the system the
rest of the way up, including locating and mounting the real root device (if
any). If rootfs does not contain an init program after the embedded cpio
archive is extracted into it, the kernel will fall through to the older code
to locate and mount a root partition, then exec some variant of /sbin/init
out of that.
All this differs from the old initrd in several ways:
- The old initrd was a separate file, while the initramfs archive is linked
into the linux kernel image. (The directory linux-*/usr is devoted to
generating this archive during the build.)
- The old initrd file was a gzipped filesystem image (in some file format,
such as ext2, that had to be built into the kernel), while the new
initramfs archive is a gzipped cpio archive (like tar only simpler,
see cpio(1) and Documentation/early-userspace/buffer-format.txt).
- The program run by the old initrd (which was called /initrd, not /init) did
some setup and then returned to the kernel, while the init program from
initramfs is not expected to return to the kernel. (If /init needs to hand
off control it can overmount / with a new root device and exec another init
program. See the switch_root utility, below.)
- When switching another root device, initrd would pivot_root and then
umount the ramdisk. But initramfs is rootfs: you can neither pivot_root
rootfs, nor unmount it. Instead delete everything out of rootfs to
free up the space (find -xdev / -exec rm '{}' ';'), overmount rootfs
with the new root (cd /newmount; mount --move . /; chroot .), attach
stdin/stdout/stderr to the new /dev/console, and exec the new init.
Since this is a remarkably persnickity process (and involves deleting
commands before you can run them), the klibc package introduced a helper
program (utils/run_init.c) to do all this for you. Most other packages
(such as busybox) have named this command "switch_root".
Populating initramfs:
---------------------
The 2.6 kernel build process always creates a gzipped cpio format initramfs
archive and links it into the resulting kernel binary. By default, this
archive is empty (consuming 134 bytes on x86). The config option
CONFIG_INITRAMFS_SOURCE (for some reason buried under devices->block devices
in menuconfig, and living in usr/Kconfig) can be used to specify a source for
the initramfs archive, which will automatically be incorporated into the
resulting binary. This option can point to an existing gzipped cpio archive, a
directory containing files to be archived, or a text file specification such
as the following example:
dir /dev 755 0 0
nod /dev/console 644 0 0 c 5 1
nod /dev/loop0 644 0 0 b 7 0
dir /bin 755 1000 1000
slink /bin/sh busybox 777 0 0
file /bin/busybox initramfs/busybox 755 0 0
dir /proc 755 0 0
dir /sys 755 0 0
dir /mnt 755 0 0
file /init initramfs/init.sh 755 0 0
One advantage of the text file is that root access is not required to
set permissions or create device nodes in the new archive. (Note that those
two example "file" entries expect to find files named "init.sh" and "busybox" in
a directory called "initramfs", under the linux-2.6.* directory. See
Documentation/early-userspace/README for more details.)
If you don't already understand what shared libraries, devices, and paths
you need to get a minimal root filesystem up and running, here are some
references:
http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
http://www.linuxfromscratch.org/lfs/view/stable/
The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is
designed to be a tiny C library to statically link early userspace
code against, along with some related utilities. It is BSD licensed.
I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
myself. These are LGPL and GPL, respectively.
In theory you could use glibc, but that's not well suited for small embedded
uses like this. (A "hello world" program statically linked against glibc is
over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do
name lookups, even when otherwise statically linked.)
Future directions:
------------------
Today (2.6.14), initramfs is always compiled in, but not always used. The
kernel falls back to legacy boot code that is reached only if initramfs does
not contain an /init program. The fallback is legacy code, there to ensure a
smooth transition and allowing early boot functionality to gradually move to
"early userspace" (I.E. initramfs).
The move to early userspace is necessary because finding and mounting the real
root device is complex. Root partitions can span multiple devices (raid or
separate journal). They can be out on the network (requiring dhcp, setting a
specific mac address, logging into a server, etc). They can live on removable
media, with dynamically allocated major/minor numbers and persistent naming
issues requiring a full udev implementation to sort out. They can be
compressed, encrypted, copy-on-write, loopback mounted, strangely partitioned,
and so on.
This kind of complexity (which inevitably includes policy) is rightly handled
in userspace. Both klibc and busybox/uClibc are working on simple initramfs
packages to drop into a kernel build, and when standard solutions are ready
and widely deployed, the kernel's legacy early boot code will become obsolete
and a candidate for the feature removal schedule.
But that's a while off yet.

View file

@ -3,7 +3,7 @@
Original author: Richard Gooch <rgooch@atnf.csiro.au>
Last updated on August 25, 2005
Last updated on October 28, 2005
Copyright (C) 1999 Richard Gooch
Copyright (C) 2005 Pekka Enberg
@ -11,62 +11,61 @@
This file is released under the GPLv2.
What is it?
===========
Introduction
============
The Virtual File System (otherwise known as the Virtual Filesystem
Switch) is the software layer in the kernel that provides the
filesystem interface to userspace programs. It also provides an
abstraction within the kernel which allows different filesystem
implementations to coexist.
The Virtual File System (also known as the Virtual Filesystem Switch)
is the software layer in the kernel that provides the filesystem
interface to userspace programs. It also provides an abstraction
within the kernel which allows different filesystem implementations to
coexist.
VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so
on are called from a process context. Filesystem locking is described
in the document Documentation/filesystems/Locking.
A Quick Look At How It Works
============================
Directory Entry Cache (dcache)
------------------------------
In this section I'll briefly describe how things work, before
launching into the details. I'll start with describing what happens
when user programs open and manipulate files, and then look from the
other view which is how a filesystem is supported and subsequently
mounted.
The VFS implements the open(2), stat(2), chmod(2), and similar system
calls. The pathname argument that is passed to them is used by the VFS
to search through the directory entry cache (also known as the dentry
cache or dcache). This provides a very fast look-up mechanism to
translate a pathname (filename) into a specific dentry. Dentries live
in RAM and are never saved to disc: they exist only for performance.
The dentry cache is meant to be a view into your entire filespace. As
most computers cannot fit all dentries in the RAM at the same time,
some bits of the cache are missing. In order to resolve your pathname
into a dentry, the VFS may have to resort to creating dentries along
the way, and then loading the inode. This is done by looking up the
inode.
Opening a File
--------------
The Inode Object
----------------
The VFS implements the open(2), stat(2), chmod(2) and similar system
calls. The pathname argument is used by the VFS to search through the
directory entry cache (dentry cache or "dcache"). This provides a very
fast look-up mechanism to translate a pathname (filename) into a
specific dentry.
An individual dentry usually has a pointer to an inode. Inodes are
filesystem objects such as regular files, directories, FIFOs and other
beasts. They live either on the disc (for block device filesystems)
or in the memory (for pseudo filesystems). Inodes that live on the
disc are copied into the memory when required and changes to the inode
are written back to disc. A single inode can be pointed to by multiple
dentries (hard links, for example, do this).
An individual dentry usually has a pointer to an inode. Inodes are the
things that live on disc drives, and can be regular files (you know:
those things that you write data into), directories, FIFOs and other
beasts. Dentries live in RAM and are never saved to disc: they exist
only for performance. Inodes live on disc and are copied into memory
when required. Later any changes are written back to disc. The inode
that lives in RAM is a VFS inode, and it is this which the dentry
points to. A single inode can be pointed to by multiple dentries
(think about hardlinks).
To look up an inode requires that the VFS calls the lookup() method of
the parent directory inode. This method is installed by the specific
filesystem implementation that the inode lives in. Once the VFS has
the required dentry (and hence the inode), we can do all those boring
things like open(2) the file, or stat(2) it to peek at the inode
data. The stat(2) operation is fairly simple: once the VFS has the
dentry, it peeks at the inode data and passes some of it back to
userspace.
The dcache is meant to be a view into your entire filespace. Unlike
Linus, most of us losers can't fit enough dentries into RAM to cover
all of our filespace, so the dcache has bits missing. In order to
resolve your pathname into a dentry, the VFS may have to resort to
creating dentries along the way, and then loading the inode. This is
done by looking up the inode.
To look up an inode (usually read from disc) requires that the VFS
calls the lookup() method of the parent directory inode. This method
is installed by the specific filesystem implementation that the inode
lives in. There will be more on this later.
Once the VFS has the required dentry (and hence the inode), we can do
all those boring things like open(2) the file, or stat(2) it to peek
at the inode data. The stat(2) operation is fairly simple: once the
VFS has the dentry, it peeks at the inode data and passes some of it
back to userspace.
The File Object
---------------
Opening a file requires another operation: allocation of a file
structure (this is the kernel-side implementation of file
@ -74,51 +73,39 @@ descriptors). The freshly allocated file structure is initialized with
a pointer to the dentry and a set of file operation member functions.
These are taken from the inode data. The open() file method is then
called so the specific filesystem implementation can do it's work. You
can see that this is another switch performed by the VFS.
The file structure is placed into the file descriptor table for the
process.
can see that this is another switch performed by the VFS. The file
structure is placed into the file descriptor table for the process.
Reading, writing and closing files (and other assorted VFS operations)
is done by using the userspace file descriptor to grab the appropriate
file structure, and then calling the required file structure method
function to do whatever is required.
For as long as the file is open, it keeps the dentry "open" (in use),
which in turn means that the VFS inode is still in use.
All VFS system calls (i.e. open(2), stat(2), read(2), write(2),
chmod(2) and so on) are called from a process context. You should
assume that these calls are made without any kernel locks being
held. This means that the processes may be executing the same piece of
filesystem or driver code at the same time, on different
processors. You should ensure that access to shared resources is
protected by appropriate locks.
file structure, and then calling the required file structure method to
do whatever is required. For as long as the file is open, it keeps the
dentry in use, which in turn means that the VFS inode is still in use.
Registering and Mounting a Filesystem
-------------------------------------
=====================================
If you want to support a new kind of filesystem in the kernel, all you
need to do is call register_filesystem(). You pass a structure
describing the filesystem implementation (struct file_system_type)
which is then added to an internal table of supported filesystems. You
can do:
To register and unregister a filesystem, use the following API
functions:
% cat /proc/filesystems
#include <linux/fs.h>
to see what filesystems are currently available on your system.
extern int register_filesystem(struct file_system_type *);
extern int unregister_filesystem(struct file_system_type *);
When a request is made to mount a block device onto a directory in
your filespace the VFS will call the appropriate method for the
specific filesystem. The dentry for the mount point will then be
updated to point to the root inode for the new filesystem.
The passed struct file_system_type describes your filesystem. When a
request is made to mount a device onto a directory in your filespace,
the VFS will call the appropriate get_sb() method for the specific
filesystem. The dentry for the mount point will then be updated to
point to the root inode for the new filesystem.
It's now time to look at things in more detail.
You can see all filesystems that are registered to the kernel in the
file /proc/filesystems.
struct file_system_type
=======================
-----------------------
This describes the filesystem. As of kernel 2.6.13, the following
members are defined:
@ -197,8 +184,14 @@ A fill_super() method implementation has the following arguments:
int silent: whether or not to be silent on error
The Superblock Object
=====================
A superblock object represents a mounted filesystem.
struct super_operations
=======================
-----------------------
This describes how the VFS can manipulate the superblock of your
filesystem. As of kernel 2.6.13, the following members are defined:
@ -286,9 +279,9 @@ or bottom half).
a superblock. The second parameter indicates whether the method
should wait until the write out has been completed. Optional.
write_super_lockfs: called when VFS is locking a filesystem and forcing
it into a consistent state. This function is currently used by the
Logical Volume Manager (LVM).
write_super_lockfs: called when VFS is locking a filesystem and
forcing it into a consistent state. This method is currently
used by the Logical Volume Manager (LVM).
unlockfs: called when VFS is unlocking a filesystem and making it writable
again.
@ -317,8 +310,14 @@ field. This is a pointer to a "struct inode_operations" which
describes the methods that can be performed on individual inodes.
The Inode Object
================
An inode object represents an object within the filesystem.
struct inode_operations
=======================
-----------------------
This describes how the VFS can manipulate an inode in your
filesystem. As of kernel 2.6.13, the following members are defined:
@ -394,51 +393,62 @@ otherwise noted.
will probably need to call d_instantiate() just as you would
in the create() method
rename: called by the rename(2) system call to rename the object to
have the parent and name given by the second inode and dentry.
readlink: called by the readlink(2) system call. Only required if
you want to support reading symbolic links
follow_link: called by the VFS to follow a symbolic link to the
inode it points to. Only required if you want to support
symbolic links. This function returns a void pointer cookie
symbolic links. This method returns a void pointer cookie
that is passed to put_link().
put_link: called by the VFS to release resources allocated by
follow_link(). The cookie returned by follow_link() is passed to
to this function as the last parameter. It is used by filesystems
such as NFS where page cache is not stable (i.e. page that was
installed when the symbolic link walk started might not be in the
page cache at the end of the walk).
follow_link(). The cookie returned by follow_link() is passed
to to this method as the last parameter. It is used by
filesystems such as NFS where page cache is not stable
(i.e. page that was installed when the symbolic link walk
started might not be in the page cache at the end of the
walk).
truncate: called by the VFS to change the size of a file. The i_size
field of the inode is set to the desired size by the VFS before
this function is called. This function is called by the truncate(2)
system call and related functionality.
truncate: called by the VFS to change the size of a file. The
i_size field of the inode is set to the desired size by the
VFS before this method is called. This method is called by
the truncate(2) system call and related functionality.
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
setattr: called by the VFS to set attributes for a file. This function is
called by chmod(2) and related system calls.
setattr: called by the VFS to set attributes for a file. This method
is called by chmod(2) and related system calls.
getattr: called by the VFS to get attributes of a file. This function is
called by stat(2) and related system calls.
getattr: called by the VFS to get attributes of a file. This method
is called by stat(2) and related system calls.
setxattr: called by the VFS to set an extended attribute for a file.
Extended attribute is a name:value pair associated with an inode. This
function is called by setxattr(2) system call.
Extended attribute is a name:value pair associated with an
inode. This method is called by setxattr(2) system call.
getxattr: called by the VFS to retrieve the value of an extended attribute
name. This function is called by getxattr(2) function call.
getxattr: called by the VFS to retrieve the value of an extended
attribute name. This method is called by getxattr(2) function
call.
listxattr: called by the VFS to list all extended attributes for a given
file. This function is called by listxattr(2) system call.
listxattr: called by the VFS to list all extended attributes for a
given file. This method is called by listxattr(2) system call.
removexattr: called by the VFS to remove an extended attribute from a file.
This function is called by removexattr(2) system call.
removexattr: called by the VFS to remove an extended attribute from
a file. This method is called by removexattr(2) system call.
The Address Space Object
========================
The address space object is used to identify pages in the page cache.
struct address_space_operations
===============================
-------------------------------
This describes how the VFS can manipulate mapping of a file to page cache in
your filesystem. As of kernel 2.6.13, the following members are defined:
@ -502,8 +512,14 @@ struct address_space_operations {
it. An example implementation can be found in fs/ext2/xip.c.
The File Object
===============
A file object represents a file opened by a process.
struct file_operations
======================
----------------------
This describes how the VFS can manipulate an open file. As of kernel
2.6.13, the following members are defined:
@ -661,7 +677,7 @@ of child dentries. Child dentries are basically like files in a
directory.
Directory Entry Cache APIs
Directory Entry Cache API
--------------------------
There are a number of functions defined which permit a filesystem to
@ -705,178 +721,24 @@ manipulate dentries:
and the dentry is returned. The caller must use d_put()
to free the dentry when it finishes using it.
RCU-based dcache locking model
------------------------------
On many workloads, the most common operation on dcache is
to look up a dentry, given a parent dentry and the name
of the child. Typically, for every open(), stat() etc.,
the dentry corresponding to the pathname will be looked
up by walking the tree starting with the first component
of the pathname and using that dentry along with the next
component to look up the next level and so on. Since it
is a frequent operation for workloads like multiuser
environments and web servers, it is important to optimize
this path.
Prior to 2.5.10, dcache_lock was acquired in d_lookup and thus
in every component during path look-up. Since 2.5.10 onwards,
fast-walk algorithm changed this by holding the dcache_lock
at the beginning and walking as many cached path component
dentries as possible. This significantly decreases the number
of acquisition of dcache_lock. However it also increases the
lock hold time significantly and affects performance in large
SMP machines. Since 2.5.62 kernel, dcache has been using
a new locking model that uses RCU to make dcache look-up
lock-free.
The current dcache locking model is not very different from the existing
dcache locking model. Prior to 2.5.62 kernel, dcache_lock
protected the hash chain, d_child, d_alias, d_lru lists as well
as d_inode and several other things like mount look-up. RCU-based
changes affect only the way the hash chain is protected. For everything
else the dcache_lock must be taken for both traversing as well as
updating. The hash chain updates too take the dcache_lock.
The significant change is the way d_lookup traverses the hash chain,
it doesn't acquire the dcache_lock for this and rely on RCU to
ensure that the dentry has not been *freed*.
For further information on dentry locking, please refer to the document
Documentation/filesystems/dentry-locking.txt.
Dcache locking details
----------------------
Resources
=========
For many multi-user workloads, open() and stat() on files are
very frequently occurring operations. Both involve walking
of path names to find the dentry corresponding to the
concerned file. In 2.4 kernel, dcache_lock was held
during look-up of each path component. Contention and
cache-line bouncing of this global lock caused significant
scalability problems. With the introduction of RCU
in Linux kernel, this was worked around by making
the look-up of path components during path walking lock-free.
(Note some of these resources are not up-to-date with the latest kernel
version.)
Creating Linux virtual filesystems. 2002
<http://lwn.net/Articles/13325/>
Safe lock-free look-up of dcache hash table
===========================================
The Linux Virtual File-system Layer by Neil Brown. 1999
<http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/vfs.html>
Dcache is a complex data structure with the hash table entries
also linked together in other lists. In 2.4 kernel, dcache_lock
protected all the lists. We applied RCU only on hash chain
walking. The rest of the lists are still protected by dcache_lock.
Some of the important changes are :
A tour of the Linux VFS by Michael K. Johnson. 1996
<http://www.tldp.org/LDP/khg/HyperNews/get/fs/vfstour.html>
1. The deletion from hash chain is done using hlist_del_rcu() macro which
doesn't initialize next pointer of the deleted dentry and this
allows us to walk safely lock-free while a deletion is happening.
2. Insertion of a dentry into the hash table is done using
hlist_add_head_rcu() which take care of ordering the writes -
the writes to the dentry must be visible before the dentry
is inserted. This works in conjunction with hlist_for_each_rcu()
while walking the hash chain. The only requirement is that
all initialization to the dentry must be done before hlist_add_head_rcu()
since we don't have dcache_lock protection while traversing
the hash chain. This isn't different from the existing code.
3. The dentry looked up without holding dcache_lock by cannot be
returned for walking if it is unhashed. It then may have a NULL
d_inode or other bogosity since RCU doesn't protect the other
fields in the dentry. We therefore use a flag DCACHE_UNHASHED to
indicate unhashed dentries and use this in conjunction with a
per-dentry lock (d_lock). Once looked up without the dcache_lock,
we acquire the per-dentry lock (d_lock) and check if the
dentry is unhashed. If so, the look-up is failed. If not, the
reference count of the dentry is increased and the dentry is returned.
4. Once a dentry is looked up, it must be ensured during the path
walk for that component it doesn't go away. In pre-2.5.10 code,
this was done holding a reference to the dentry. dcache_rcu does
the same. In some sense, dcache_rcu path walking looks like
the pre-2.5.10 version.
5. All dentry hash chain updates must take the dcache_lock as well as
the per-dentry lock in that order. dput() does this to ensure
that a dentry that has just been looked up in another CPU
doesn't get deleted before dget() can be done on it.
6. There are several ways to do reference counting of RCU protected
objects. One such example is in ipv4 route cache where
deferred freeing (using call_rcu()) is done as soon as
the reference count goes to zero. This cannot be done in
the case of dentries because tearing down of dentries
require blocking (dentry_iput()) which isn't supported from
RCU callbacks. Instead, tearing down of dentries happen
synchronously in dput(), but actual freeing happens later
when RCU grace period is over. This allows safe lock-free
walking of the hash chains, but a matched dentry may have
been partially torn down. The checking of DCACHE_UNHASHED
flag with d_lock held detects such dentries and prevents
them from being returned from look-up.
Maintaining POSIX rename semantics
==================================
Since look-up of dentries is lock-free, it can race against
a concurrent rename operation. For example, during rename
of file A to B, look-up of either A or B must succeed.
So, if look-up of B happens after A has been removed from the
hash chain but not added to the new hash chain, it may fail.
Also, a comparison while the name is being written concurrently
by a rename may result in false positive matches violating
rename semantics. Issues related to race with rename are
handled as described below :
1. Look-up can be done in two ways - d_lookup() which is safe
from simultaneous renames and __d_lookup() which is not.
If __d_lookup() fails, it must be followed up by a d_lookup()
to correctly determine whether a dentry is in the hash table
or not. d_lookup() protects look-ups using a sequence
lock (rename_lock).
2. The name associated with a dentry (d_name) may be changed if
a rename is allowed to happen simultaneously. To avoid memcmp()
in __d_lookup() go out of bounds due to a rename and false
positive comparison, the name comparison is done while holding the
per-dentry lock. This prevents concurrent renames during this
operation.
3. Hash table walking during look-up may move to a different bucket as
the current dentry is moved to a different bucket due to rename.
But we use hlists in dcache hash table and they are null-terminated.
So, even if a dentry moves to a different bucket, hash chain
walk will terminate. [with a list_head list, it may not since
termination is when the list_head in the original bucket is reached].
Since we redo the d_parent check and compare name while holding
d_lock, lock-free look-up will not race against d_move().
4. There can be a theoretical race when a dentry keeps coming back
to original bucket due to double moves. Due to this look-up may
consider that it has never moved and can end up in a infinite loop.
But this is not any worse that theoretical livelocks we already
have in the kernel.
Important guidelines for filesystem developers related to dcache_rcu
====================================================================
1. Existing dcache interfaces (pre-2.5.62) exported to filesystem
don't change. Only dcache internal implementation changes. However
filesystems *must not* delete from the dentry hash chains directly
using the list macros like allowed earlier. They must use dcache
APIs like d_drop() or __d_drop() depending on the situation.
2. d_flags is now protected by a per-dentry lock (d_lock). All
access to d_flags must be protected by it.
3. For a hashed dentry, checking of d_count needs to be protected
by d_lock.
Papers and other documentation on dcache locking
================================================
1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
2. http://lse.sourceforge.net/locking/dcache/dcache.html
A small trail through the Linux kernel by Andries Brouwer. 2001
<http://www.win.tue.nl/~aeb/linux/vfs/trail.html>

View file

@ -1,18 +1,21 @@
High Precision Event Timer Driver for Linux
The High Precision Event Timer (HPET) hardware is the future replacement for the 8254 and Real
Time Clock (RTC) periodic timer functionality. Each HPET can have up two 32 timers. It is possible
to configure the first two timers as legacy replacements for 8254 and RTC periodic. A specification
done by INTEL and Microsoft can be found at http://www.intel.com/labs/platcomp/hpet/hpetspec.htm.
The High Precision Event Timer (HPET) hardware is the future replacement
for the 8254 and Real Time Clock (RTC) periodic timer functionality.
Each HPET can have up two 32 timers. It is possible to configure the
first two timers as legacy replacements for 8254 and RTC periodic timers.
A specification done by Intel and Microsoft can be found at
<http://www.intel.com/hardwaredesign/hpetspec.htm>.
The driver supports detection of HPET driver allocation and initialization of the HPET before the
driver module_init routine is called. This enables platform code which uses timer 0 or 1 as the
main timer to intercept HPET initialization. An example of this initialization can be found in
The driver supports detection of HPET driver allocation and initialization
of the HPET before the driver module_init routine is called. This enables
platform code which uses timer 0 or 1 as the main timer to intercept HPET
initialization. An example of this initialization can be found in
arch/i386/kernel/time_hpet.c.
The driver provides two APIs which are very similar to the API found in the rtc.c driver.
There is a user space API and a kernel space API. An example user space program is provided
below.
The driver provides two APIs which are very similar to the API found in
the rtc.c driver. There is a user space API and a kernel space API.
An example user space program is provided below.
#include <stdio.h>
#include <stdlib.h>
@ -290,9 +293,8 @@ The kernel API has three interfaces exported from the driver:
hpet_unregister(struct hpet_task *tp)
hpet_control(struct hpet_task *tp, unsigned int cmd, unsigned long arg)
The kernel module using this interface fills in the ht_func and ht_data members of the
hpet_task structure before calling hpet_register. hpet_control simply vectors to the hpet_ioctl
routine and has the same commands and respective arguments as the user API. hpet_unregister
The kernel module using this interface fills in the ht_func and ht_data
members of the hpet_task structure before calling hpet_register.
hpet_control simply vectors to the hpet_ioctl routine and has the same
commands and respective arguments as the user API. hpet_unregister
is used to terminate usage of the HPET timer reserved by hpet_register.

View file

@ -130,8 +130,6 @@ Code Seq# Include File Comments
<mailto:zapman@interlan.net>
'i' 00-3F linux/i2o.h
'j' 00-3F linux/joystick.h
'k' all asm-sparc/kbio.h
asm-sparc64/kbio.h
'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system
<http://mikonos.dia.unisa.it/tcfs>
'l' 40-7F linux/udf_fs_i.h in development:

View file

@ -120,7 +120,7 @@ ISDN_NET_MAGIC 0x49344C02 isdn_net_local_s drivers/isdn/i4l/isdn_net_li
SAVEKMSG_MAGIC2 0x4B4D5347 savekmsg arch/*/amiga/config.c
STLI_BOARDMAGIC 0x4bc6c825 stlibrd include/linux/istallion.h
CS_STATE_MAGIC 0x4c4f4749 cs_state sound/oss/cs46xx.c
SLAB_C_MAGIC 0x4f17a36d kmem_cache_s mm/slab.c
SLAB_C_MAGIC 0x4f17a36d kmem_cache mm/slab.c
COW_MAGIC 0x4f4f4f4d cow_header_v1 arch/um/drivers/ubd_user.c
I810_CARD_MAGIC 0x5072696E i810_card sound/oss/i810_audio.c
TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c

View file

@ -176,8 +176,6 @@ information (_most_ of which _is_ _essential_) includes:
- Which client caused the problem ?
- How much data was being transferred ?
- Was the network congested ?
- If there was a kernel panic, please run the output through ksymoops
before sending it to me, otherwise its _useless_.
- How can the problem be reproduced ?
- Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of
tcpdump don't understand how to dump DECnet properly, so including

View file

@ -1,6 +1,6 @@
NOTE: ksymoops is useless on 2.6. Please use the Oops in its original format
(from dmesg, etc). Ignore any references in this or other docs to "decoding
the Oops" or "running it through ksymoops". If you post an Oops fron 2.6 that
the Oops" or "running it through ksymoops". If you post an Oops from 2.6 that
has been run through ksymoops, people will just tell you to repost it.
Quick Summary

View file

@ -11,9 +11,9 @@ boot video card. (Kernel usually does not even contain video card
driver -- vesafb and vgacon are widely used).
This is not problem for swsusp, because during swsusp resume, BIOS is
run normally so video card is normally initialized. S3 has absolutely
no chance of working with SMP/HT. Be sure it to turn it off before
testing (swsusp should work ok, OTOH).
run normally so video card is normally initialized. It should not be
problem for S1 standby, because hardware should retain its state over
that.
There are a few types of systems where video works after S3 resume:
@ -64,7 +64,7 @@ your video card (good luck getting docs :-(). Maybe suspending from X
(proper X, knowing your hardware, not XF68_FBcon) might have better
chance of working.
Table of known working systems:
Table of known working notebooks:
Model hack (or "how to do it")
------------------------------------------------------------------------------
@ -73,7 +73,7 @@ Acer TM 242FX vbetool (6)
Acer TM C110 video_post (8)
Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8)
Acer TM 4052LCi s3_bios (2)
Acer TM 636Lci s3_bios vga=normal (2)
Acer TM 636Lci s3_bios,s3_mode (4)
Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text console back
Acer TM 660 ??? (*)
Acer TM 800 vga=normal, X patches, see webpage (5) or vbetool (6)
@ -137,6 +137,13 @@ Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****)
Toshiba M30 (2) xor X with nvidia driver using internal AGP
Uniwill 244IIO ??? (*)
Known working desktop systems
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Mainboard Graphics card hack (or "how to do it")
------------------------------------------------------------------------------
Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4)
(*) from http://www.ubuntulinux.org/wiki/HoaryPMResults, not sure
which options to use. If you know, please tell me.

View file

@ -8,11 +8,10 @@ All devices which can be addressed by means of ccws are called 'CCW devices' -
even if they aren't actually driven by ccws.
All ccw devices are accessed via a subchannel, this is reflected in the
structures under root/:
structures under devices/:
root/
- sys
- legacy
devices/
- system/
- css0/
- 0.0.0000/0.0.0815/
- 0.0.0001/0.0.4711/
@ -36,7 +35,7 @@ availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
online: An interface to set the device online and offline.
In the special case of the device being disconnected (see the
notify function under 1.2), piping 0 to online will focibly delete
notify function under 1.2), piping 0 to online will forcibly delete
the device.
The device drivers can add entries to export per-device data and interfaces.
@ -222,7 +221,7 @@ and are called 'chp0.<chpid>'. They have no driver and do not belong to any bus.
Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
only the logical state and not the physical state, since we cannot track the
latter consistently due to lacking machine support (we don't need to be aware
of anyway).
of it anyway).
status - Can be 'online' or 'offline'.
Piping 'on' or 'off' sets the chpid logically online/offline.
@ -235,12 +234,16 @@ status - Can be 'online' or 'offline'.
3. System devices
-----------------
Note: cpus may yet be added here.
3.1 xpram
---------
xpram shows up under sys/ as 'xpram'.
xpram shows up under devices/system/ as 'xpram'.
3.2 cpus
--------
For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
attribute 'online' which can be 0 or 1.
4. Other devices

File diff suppressed because it is too large Load diff

View file

@ -167,7 +167,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
spdif - Support SPDIF I/O
- Default: disabled
Module supports autoprobe and multiple chips (max 8).
This module supports one chip and autoprobe.
The power-management is supported.
@ -206,7 +206,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
See "AC97 Quirk Option" section below.
spdif_aclink - S/PDIF transfer over AC-link (default = 1)
This module supports up to 8 cards and autoprobe.
This module supports one card and autoprobe.
ATI IXP has two different methods to control SPDIF output. One is
over AC-link and another is over the "direct" SPDIF output. The
@ -218,7 +218,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
Module for ATI IXP 150/200/250 AC97 modem controllers.
Module supports up to 8 cards.
This module supports one card and autoprobe.
Note: The default index value of this module is -2, i.e. the first
slot is excluded.
@ -637,7 +637,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
model - force the model name
position_fix - Fix DMA pointer (0 = auto, 1 = none, 2 = POSBUF, 3 = FIFO size)
Module supports up to 8 cards.
This module supports one card and autoprobe.
Each codec may have a model table for different configurations.
If your machine isn't listed there, the default (usually minimal)
@ -663,6 +663,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
adjusted. Appearing only when compiled with
$CONFIG_SND_DEBUG=y
ALC260
hp HP machines
fujitsu Fujitsu S7020
CMI9880
minimal 3-jack in back
min_fp 3-jack in back, 2-jack in front
@ -811,7 +815,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
semaphores (e.g. on some ASUS laptops)
(default off)
Module supports autoprobe and multiple bus-master chips (max 8).
This module supports one chip and autoprobe.
Note: the latest driver supports auto-detection of chip clock.
if you still encounter too fast playback, specify the clock
@ -830,7 +834,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
ac97_clock - AC'97 codec clock base (0 = auto-detect)
This module supports up to 8 cards and autoprobe.
This module supports one card and autoprobe.
Note: The default index value of this module is -2, i.e. the first
slot is excluded.
@ -950,8 +954,10 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
use_cache - 0 or 1 (disabled by default)
vaio_hack - alias buffer_top=0x25a800
reset_workaround - enable AC97 RESET workaround for some laptops
reset_workaround2 - enable extended AC97 RESET workaround for some
other laptops
Module supports autoprobe and multiple chips (max 8).
This module supports one chip and autoprobe.
The power-management is supported.
@ -980,6 +986,11 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
workaround is enabled automatically. For other laptops with a
hard freeze, you can try reset_workaround=1 option.
Note: Dell Latitude CSx laptops have another problem regarding
AC97 RESET. On these laptops, reset_workaround2 option is
turned on as default. This option is worth to try if the
previous reset_workaround option doesn't help.
Note: This driver is really crappy. It's a porting from the
OSS driver, which is a result of black-magic reverse engineering.
The detection of codec will fail if the driver is loaded *after*
@ -1310,7 +1321,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
ac97_quirk - AC'97 workaround for strange hardware
See "AC97 Quirk Option" section below.
Module supports autoprobe and multiple bus-master chips (max 8).
This module supports one chip and autoprobe.
Note: on some SMP motherboards like MSI 694D the interrupts might
not be generated properly. In such a case, please try to
@ -1352,7 +1363,7 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
ac97_clock - AC'97 codec clock base (default 48000Hz)
Module supports up to 8 cards.
This module supports one card and autoprobe.
Note: The default index value of this module is -2, i.e. the first
slot is excluded.

View file

@ -18,8 +18,8 @@
</affiliation>
</author>
<date>March 6, 2005</date>
<edition>0.3.4</edition>
<date>October 6, 2005</date>
<edition>0.3.5</edition>
<abstract>
<para>
@ -30,7 +30,7 @@
<legalnotice>
<para>
Copyright (c) 2002-2004 Takashi Iwai <email>tiwai@suse.de</email>
Copyright (c) 2002-2005 Takashi Iwai <email>tiwai@suse.de</email>
</para>
<para>
@ -1433,25 +1433,10 @@
<informalexample>
<programlisting>
<![CDATA[
if (chip->res_port) {
release_resource(chip->res_port);
kfree_nocheck(chip->res_port);
}
release_and_free_resource(chip->res_port);
]]>
</programlisting>
</informalexample>
As you can see, the resource pointer is also to be freed
via <function>kfree_nocheck()</function> after
<function>release_resource()</function> is called. You
cannot use <function>kfree()</function> here, because on ALSA,
<function>kfree()</function> may be a wrapper to its own
allocator with the memory debugging. Since the resource pointer
is allocated externally outside the ALSA, it must be released
via the native
<function>kfree()</function>.
<function>kfree_nocheck()</function> is used for that; it calls
the native <function>kfree()</function> without wrapper.
</para>
<para>
@ -2190,8 +2175,7 @@ struct _snd_pcm_runtime {
unsigned int rate_den;
/* -- SW params -- */
int tstamp_timespec; /* use timeval (0) or timespec (1) */
snd_pcm_tstamp_t tstamp_mode; /* mmap timestamp is updated */
struct timespec tstamp_mode; /* mmap timestamp is updated */
unsigned int period_step;
unsigned int sleep_min; /* min ticks to sleep */
snd_pcm_uframes_t xfer_align; /* xfer size need to be a multiple */
@ -3709,8 +3693,7 @@ struct _snd_pcm_runtime {
<para>
Here, the chip instance is retrieved via
<function>snd_kcontrol_chip()</function> macro. This macro
converts from kcontrol-&gt;private_data to the type defined by
<type>chip_t</type>. The
just accesses to kcontrol-&gt;private_data. The
kcontrol-&gt;private_data field is
given as the argument of <function>snd_ctl_new()</function>
(see the later subsection
@ -5998,32 +5981,23 @@ struct _snd_pcm_runtime {
The first argument is the expression to evaluate, and the
second argument is the action if it fails. When
<constant>CONFIG_SND_DEBUG</constant>, is set, it will show an
error message such as <computeroutput>BUG? (xxx) (called from
yyy)</computeroutput>. When no debug flag is set, this is
ignored.
error message such as <computeroutput>BUG? (xxx)</computeroutput>
together with stack trace.
</para>
</section>
<section id="useful-functions-snd-runtime-check">
<title><function>snd_runtime_check()</function></title>
<para>
This macro is quite similar with
<function>snd_assert()</function>. Unlike
<function>snd_assert()</function>, the expression is always
evaluated regardless of
<constant>CONFIG_SND_DEBUG</constant>. When
<constant>CONFIG_SND_DEBUG</constant> is set, the macro will
show a message like <computeroutput>ERROR (xx) (called from
yyy)</computeroutput>.
When no debug flag is set, this macro is ignored.
</para>
</section>
<section id="useful-functions-snd-bug">
<title><function>snd_BUG()</function></title>
<para>
It calls <function>snd_assert(0,)</function> -- that is, just
prints the error message at the point. It's useful to show that
a fatal error happens there.
It shows <computeroutput>BUG?</computeroutput> message and
stack trace as well as <function>snd_assert</function> at the point.
It's useful to show that a fatal error happens there.
</para>
<para>
When no debug flag is set, this macro is ignored.
</para>
</section>
</chapter>

View file

@ -41,9 +41,9 @@ sure that bitwise types don't get mixed up (little-endian vs big-endian
vs cpu-endian vs whatever), and there the constant "0" really _is_
special.
Modify top-level Makefile to say
Use
CHECK = sparse -Wbitwise
make C=[12] CF=-Wbitwise
or you don't get any checking at all.

View file

@ -27,9 +27,9 @@ information out of a register+stack dump printed by the kernel on
protection faults (so-called "kernel oops").
If you run into some kind of deadlock, you can try to dump a call trace
for each process using sysrq-t (see Documentation/sysrq.txt). ksymoops
will translate these dumps into kernel symbols too. This way it is
possible to figure where *exactly* some process in "D" state is stuck.
for each process using sysrq-t (see Documentation/sysrq.txt).
This way it is possible to figure where *exactly* some process in "D"
state is stuck.
I've seen reports that bttv 0.7.x crashes whereas 0.8.x works rock solid
for some people. Thus probably a small buglet left somewhere in bttv

View file

@ -13,12 +13,13 @@ This optimization is more critical now as bigger and bigger physical memories
Users can use the huge page support in Linux kernel by either using the mmap
system call or standard SYSv shared memory system calls (shmget, shmat).
First the Linux kernel needs to be built with CONFIG_HUGETLB_PAGE (present
under Processor types and feature) and CONFIG_HUGETLBFS (present under file
system option on config menu) config options.
First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
(present under "File systems") and CONFIG_HUGETLB_PAGE (selected
automatically when CONFIG_HUGETLBFS is selected) configuration
options.
The kernel built with hugepage support should show the number of configured
hugepages in the system by running the "cat /proc/meminfo" command.
hugepages in the system by running the "cat /proc/meminfo" command.
/proc/meminfo also provides information about the total number of hugetlb
pages configured in the kernel. It also displays information about the
@ -38,19 +39,19 @@ in the kernel.
/proc/sys/vm/nr_hugepages indicates the current number of configured hugetlb
pages in the kernel. Super user can dynamically request more (or free some
pre-configured) hugepages.
The allocation( or deallocation) of hugetlb pages is posible only if there are
pre-configured) hugepages.
The allocation (or deallocation) of hugetlb pages is possible only if there are
enough physically contiguous free pages in system (freeing of hugepages is
possible only if there are enough hugetlb pages free that can be transfered
possible only if there are enough hugetlb pages free that can be transfered
back to regular memory pool).
Pages that are used as hugetlb pages are reserved inside the kernel and can
not be used for other purposes.
not be used for other purposes.
Once the kernel with Hugetlb page support is built and running, a user can
use either the mmap system call or shared memory system calls to start using
the huge pages. It is required that the system administrator preallocate
enough memory for huge page purposes.
enough memory for huge page purposes.
Use the following command to dynamically allocate/deallocate hugepages:
@ -80,9 +81,9 @@ memory (huge pages) allowed for that filesystem (/mnt/huge). The size is
rounded down to HPAGE_SIZE. The option nr_inode sets the maximum number of
inodes that /mnt/huge can use. If the size or nr_inode options are not
provided on command line then no limits are set. For size and nr_inodes
options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
example, size=2K has the same meaning as size=2048. An example is given at
the end of this document.
options, you can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For
example, size=2K has the same meaning as size=2048. An example is given at
the end of this document.
read and write system calls are not supported on files that reside on hugetlb
file systems.

View file

@ -1077,6 +1077,26 @@ P: Jaroslav Kysela
M: perex@suse.cz
S: Maintained
HPET: High Precision Event Timers driver (hpet.c)
P: Clemens Ladisch
M: clemens@ladisch.de
S: Maintained
HPET: i386
P: Venkatesh Pallipadi (Venki)
M: venkatesh.pallipadi@intel.com
S: Maintained
HPET: x86_64
P: Andi Kleen and Vojtech Pavlik
M: ak@muc.de and vojtech@suse.cz
S: Maintained
HPET: ACPI hpet.c
P: Bob Picco
M: bob.picco@hp.com
S: Maintained
HPFS FILESYSTEM
P: Mikulas Patocka
M: mikulas@artax.karlin.mff.cuni.cz
@ -2051,6 +2071,12 @@ P: Matt Mackall
M: mpm@selenic.com
S: Maintained
RAPIDIO SUBSYSTEM
P: Matt Porter
M: mporter@kernel.crashing.org
L: linux-kernel@vger.kernel.org
S: Maintained
REAL TIME CLOCK DRIVER
P: Paul Gortmaker
M: p_gortmaker@yahoo.com
@ -2455,10 +2481,10 @@ L: linux-kernel@vger.kernel.org
S: Maintained
TRIVIAL PATCHES
P: Rusty Russell
M: trivial@rustcorp.com.au
P: Adrian Bunk
M: trivial@kernel.org
L: linux-kernel@vger.kernel.org
W: http://www.kernel.org/pub/linux/kernel/people/rusty/trivial/
W: http://www.kernel.org/pub/linux/kernel/people/bunk/trivial/
S: Maintained
TMS380 TOKEN-RING NETWORK DRIVER

View file

@ -583,7 +583,7 @@ export MODLIB
ifeq ($(KBUILD_EXTMOD),)
core-y += kernel/ mm/ fs/ ipc/ security/ crypto/
core-y += kernel/ mm/ fs/ ipc/ security/ crypto/ block/
vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \

View file

@ -324,7 +324,7 @@ menu "Kernel Features"
config SMP
bool "Symmetric Multi-Processing (EXPERIMENTAL)"
depends on EXPERIMENTAL && BROKEN #&& n
depends on EXPERIMENTAL && REALVIEW_MPCORE
help
This enables support for systems with more than one CPU. If you have
a system with only one CPU, like most personal computers, say N. If
@ -356,6 +356,16 @@ config HOTPLUG_CPU
Say Y here to experiment with turning CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu.
config LOCAL_TIMERS
bool "Use local timer interrupts"
depends on SMP && n
default y
help
Enable support for local timers on SMP platforms, rather then the
legacy IPI broadcast method. Local timers allows the system
accounting to be spread across the timer interval, preventing a
"thundering herd" at every timer tick.
config PREEMPT
bool "Preemptible Kernel (EXPERIMENTAL)"
depends on EXPERIMENTAL
@ -585,7 +595,7 @@ config FPE_NWFPE
config FPE_NWFPE_XP
bool "Support extended precision"
depends on FPE_NWFPE && !CPU_BIG_ENDIAN
depends on FPE_NWFPE
help
Say Y to include 80-bit support in the kernel floating-point
emulator. Otherwise, only 32 and 64-bit support is compiled in.

View file

@ -283,8 +283,14 @@ void flush_window(void)
putstr(".");
}
#ifndef arch_error
#define arch_error(x)
#endif
static void error(char *x)
{
arch_error(x);
putstr("\n\n");
putstr(x);
putstr("\n\n -- System halted");

View file

@ -19,12 +19,6 @@
#define SCOOP_REG(d,adr) (*(volatile unsigned short*)(d +(adr)))
/* PCMCIA to Scoop linkage structures for pxa2xx_sharpsl.c
There is no easy way to link multiple scoop devices into one
single entity for the pxa2xx_pcmcia device */
int scoop_num;
struct scoop_pcmcia_dev *scoop_devs;
struct scoop_dev {
void *base;
spinlock_t scoop_lock;

View file

@ -9,6 +9,7 @@
*/
#include <linux/module.h>
#include <linux/string.h>
#include <linux/cryptohash.h>
#include <linux/delay.h>
#include <linux/in6.h>
#include <linux/syscalls.h>
@ -126,6 +127,9 @@ EXPORT_SYMBOL(__put_user_2);
EXPORT_SYMBOL(__put_user_4);
EXPORT_SYMBOL(__put_user_8);
/* crypto hash */
EXPORT_SYMBOL(sha_transform);
/* gcc lib functions */
EXPORT_SYMBOL(__ashldi3);
EXPORT_SYMBOL(__ashrdi3);

View file

@ -47,6 +47,13 @@
movne r0, sp
adrne lr, 1b
bne do_IPI
#ifdef CONFIG_LOCAL_TIMERS
test_for_ltirq r0, r6, r5, lr
movne r0, sp
adrne lr, 1b
bne do_local_timer
#endif
#endif
.endm

View file

@ -264,6 +264,7 @@ int show_interrupts(struct seq_file *p, void *v)
#endif
#ifdef CONFIG_SMP
show_ipi_list(p);
show_local_irqs(p);
#endif
seq_printf(p, "Err: %10lu\n", irq_err_count);
}
@ -995,7 +996,7 @@ void __init init_irq_proc(void)
struct proc_dir_entry *dir;
int irq;
dir = proc_mkdir("irq", 0);
dir = proc_mkdir("irq", NULL);
if (!dir)
return;

View file

@ -648,7 +648,7 @@ static int ptrace_setwmmxregs(struct task_struct *tsk, void __user *ufp)
#endif
static int do_ptrace(int request, struct task_struct *child, long addr, long data)
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
unsigned long tmp;
int ret;
@ -782,53 +782,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
return ret;
}
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
{
struct task_struct *child;
int ret;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) {
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ptrace(current->parent, current);
if (ret)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child)
goto out;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
goto out_tsk;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
goto out_tsk;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret == 0)
ret = do_ptrace(request, child, addr, data);
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
return ret;
}
asmlinkage void syscall_trace(int why, struct pt_regs *regs)
{
unsigned long ip;

View file

@ -142,7 +142,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
ret = -EIO;
}
secondary_data.stack = 0;
secondary_data.stack = NULL;
secondary_data.pgdir = 0;
*pmd_offset(pgd, PHYS_OFFSET) = __pmd(0);
@ -184,6 +184,11 @@ int __cpuexit __cpu_disable(void)
*/
migrate_irqs();
/*
* Stop the local timer for this CPU.
*/
local_timer_stop(cpu);
/*
* Flush user cache and TLB mappings, and then remove this CPU
* from the vm mask set of all processes.
@ -289,6 +294,11 @@ asmlinkage void __cpuinit secondary_start_kernel(void)
*/
cpu_set(cpu, cpu_online_map);
/*
* Setup local timer for this CPU.
*/
local_timer_setup(cpu);
/*
* OK, it's off to the idle thread for us
*/
@ -359,8 +369,8 @@ static void send_ipi_message(cpumask_t callmap, enum ipi_msg_type msg)
* You must not call this function with disabled interrupts, from a
* hardware interrupt handler, nor from a bottom half handler.
*/
int smp_call_function_on_cpu(void (*func)(void *info), void *info, int retry,
int wait, cpumask_t callmap)
static int smp_call_function_on_cpu(void (*func)(void *info), void *info,
int retry, int wait, cpumask_t callmap)
{
struct smp_call_struct data;
unsigned long timeout;
@ -454,6 +464,18 @@ void show_ipi_list(struct seq_file *p)
seq_putc(p, '\n');
}
void show_local_irqs(struct seq_file *p)
{
unsigned int cpu;
seq_printf(p, "LOC: ");
for_each_present_cpu(cpu)
seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs);
seq_putc(p, '\n');
}
static void ipi_timer(struct pt_regs *regs)
{
int user = user_mode(regs);
@ -464,6 +486,18 @@ static void ipi_timer(struct pt_regs *regs)
irq_exit();
}
#ifdef CONFIG_LOCAL_TIMERS
asmlinkage void do_local_timer(struct pt_regs *regs)
{
int cpu = smp_processor_id();
if (local_timer_ack()) {
irq_stat[cpu].local_timer_irqs++;
ipi_timer(regs);
}
}
#endif
/*
* ipi_call_function - handle IPI from smp_call_function()
*
@ -515,7 +549,7 @@ static void ipi_cpu_stop(unsigned int cpu)
*
* Bit 0 - Inter-processor function call
*/
void do_IPI(struct pt_regs *regs)
asmlinkage void do_IPI(struct pt_regs *regs)
{
unsigned int cpu = smp_processor_id();
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);

View file

@ -14,6 +14,7 @@
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/string.h>
#include <asm/semaphore.h>
#include <asm/hardware/clock.h>

View file

@ -25,6 +25,7 @@
#include <asm/hardware.h>
#include <asm/io.h>
#include <asm/sizes.h>
#include <asm/page.h>
#include <asm/mach/map.h>

View file

@ -420,8 +420,7 @@ static int impd1_probe(struct lm_device *dev)
free_impd1:
if (impd1 && impd1->base)
iounmap(impd1->base);
if (impd1)
kfree(impd1);
kfree(impd1);
release_lm:
release_mem_region(dev->resource.start, SZ_4K);
return ret;

View file

@ -84,63 +84,54 @@ static struct map_desc ixp2000_io_desc[] __initdata = {
.virtual = IXP2000_CAP_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_CAP_PHYS_BASE),
.length = IXP2000_CAP_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_INTCTL_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_INTCTL_PHYS_BASE),
.length = IXP2000_INTCTL_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CREG_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CREG_PHYS_BASE),
.length = IXP2000_PCI_CREG_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CSR_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CSR_PHYS_BASE),
.length = IXP2000_PCI_CSR_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_MSF_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_MSF_PHYS_BASE),
.length = IXP2000_MSF_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_IO_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_IO_PHYS_BASE),
.length = IXP2000_PCI_IO_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CFG0_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CFG0_PHYS_BASE),
.length = IXP2000_PCI_CFG0_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}, {
.virtual = IXP2000_PCI_CFG1_VIRT_BASE,
.pfn = __phys_to_pfn(IXP2000_PCI_CFG1_PHYS_BASE),
.length = IXP2000_PCI_CFG1_SIZE,
.type = MT_DEVICE
.type = MT_IXP2000_DEVICE,
}
};
void __init ixp2000_map_io(void)
{
extern unsigned int processor_id;
/*
* On IXP2400 CPUs we need to use MT_IXP2000_DEVICE for
* tweaking the PMDs so XCB=101. On IXP2800s we use the normal
* PMD flags.
* On IXP2400 CPUs we need to use MT_IXP2000_DEVICE so that
* XCB=101 (to avoid triggering erratum #66), and given that
* this mode speeds up I/O accesses and we have write buffer
* flushes in the right places anyway, it doesn't hurt to use
* XCB=101 for all IXP2000s.
*/
if ((processor_id & 0xfffffff0) == 0x69054190) {
int i;
printk(KERN_INFO "Enabling IXP2400 erratum #66 workaround\n");
for(i=0;i<ARRAY_SIZE(ixp2000_io_desc);i++)
ixp2000_io_desc[i].type = MT_IXP2000_DEVICE;
}
iotable_init(ixp2000_io_desc, ARRAY_SIZE(ixp2000_io_desc));
/* Set slowport to 8-bit mode. */

View file

@ -62,15 +62,6 @@ static struct scoop_config corgi_scoop_setup = {
.io_out = CORGI_SCOOP_IO_OUT,
};
static struct scoop_pcmcia_dev corgi_pcmcia_scoop[] = {
{
.dev = &corgiscoop_device.dev,
.irq = CORGI_IRQ_GPIO_CF_IRQ,
.cd_irq = CORGI_IRQ_GPIO_CF_CD,
.cd_irq_str = "PCMCIA0 CD",
},
};
struct platform_device corgiscoop_device = {
.name = "sharp-scoop",
.id = -1,
@ -81,6 +72,44 @@ struct platform_device corgiscoop_device = {
.resource = corgi_scoop_resources,
};
static void corgi_pcmcia_init(void)
{
/* Setup default state of GPIO outputs
before we enable them as outputs. */
GPSR(GPIO48_nPOE) = GPIO_bit(GPIO48_nPOE) |
GPIO_bit(GPIO49_nPWE) | GPIO_bit(GPIO50_nPIOR) |
GPIO_bit(GPIO51_nPIOW) | GPIO_bit(GPIO52_nPCE_1) |
GPIO_bit(GPIO53_nPCE_2);
pxa_gpio_mode(GPIO48_nPOE_MD);
pxa_gpio_mode(GPIO49_nPWE_MD);
pxa_gpio_mode(GPIO50_nPIOR_MD);
pxa_gpio_mode(GPIO51_nPIOW_MD);
pxa_gpio_mode(GPIO55_nPREG_MD);
pxa_gpio_mode(GPIO56_nPWAIT_MD);
pxa_gpio_mode(GPIO57_nIOIS16_MD);
pxa_gpio_mode(GPIO52_nPCE_1_MD);
pxa_gpio_mode(GPIO53_nPCE_2_MD);
pxa_gpio_mode(GPIO54_pSKTSEL_MD);
}
static struct scoop_pcmcia_dev corgi_pcmcia_scoop[] = {
{
.dev = &corgiscoop_device.dev,
.irq = CORGI_IRQ_GPIO_CF_IRQ,
.cd_irq = CORGI_IRQ_GPIO_CF_CD,
.cd_irq_str = "PCMCIA0 CD",
},
};
static struct scoop_pcmcia_config corgi_pcmcia_config = {
.devs = &corgi_pcmcia_scoop[0],
.num_devs = 1,
.pcmcia_init = corgi_pcmcia_init,
};
EXPORT_SYMBOL(corgiscoop_device);
/*
* Corgi SSP Device
@ -294,8 +323,7 @@ static void __init corgi_init(void)
pxa_set_mci_info(&corgi_mci_platform_data);
pxa_set_ficp_info(&corgi_ficp_platform_data);
scoop_num = 1;
scoop_devs = &corgi_pcmcia_scoop[0];
platform_scoop_config = &corgi_pcmcia_config;
platform_add_devices(devices, ARRAY_SIZE(devices));
}

View file

@ -19,6 +19,7 @@
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/string.h>
#include <asm/arch/akita.h>
#include <asm/arch/corgi.h>
#include <asm/arch/hardware.h>

View file

@ -65,6 +65,27 @@ struct platform_device poodle_scoop_device = {
.resource = poodle_scoop_resources,
};
static void poodle_pcmcia_init(void)
{
/* Setup default state of GPIO outputs
before we enable them as outputs. */
GPSR(GPIO48_nPOE) = GPIO_bit(GPIO48_nPOE) |
GPIO_bit(GPIO49_nPWE) | GPIO_bit(GPIO50_nPIOR) |
GPIO_bit(GPIO51_nPIOW) | GPIO_bit(GPIO52_nPCE_1) |
GPIO_bit(GPIO53_nPCE_2);
pxa_gpio_mode(GPIO48_nPOE_MD);
pxa_gpio_mode(GPIO49_nPWE_MD);
pxa_gpio_mode(GPIO50_nPIOR_MD);
pxa_gpio_mode(GPIO51_nPIOW_MD);
pxa_gpio_mode(GPIO55_nPREG_MD);
pxa_gpio_mode(GPIO56_nPWAIT_MD);
pxa_gpio_mode(GPIO57_nIOIS16_MD);
pxa_gpio_mode(GPIO52_nPCE_1_MD);
pxa_gpio_mode(GPIO53_nPCE_2_MD);
pxa_gpio_mode(GPIO54_pSKTSEL_MD);
}
static struct scoop_pcmcia_dev poodle_pcmcia_scoop[] = {
{
.dev = &poodle_scoop_device.dev,
@ -74,6 +95,14 @@ static struct scoop_pcmcia_dev poodle_pcmcia_scoop[] = {
},
};
static struct scoop_pcmcia_config poodle_pcmcia_config = {
.devs = &poodle_pcmcia_scoop[0],
.num_devs = 1,
.pcmcia_init = poodle_pcmcia_init,
};
EXPORT_SYMBOL(poodle_scoop_device);
/* LoCoMo device */
static struct resource locomo_resources[] = {
@ -268,8 +297,7 @@ static void __init poodle_init(void)
pxa_set_mci_info(&poodle_mci_platform_data);
pxa_set_ficp_info(&poodle_ficp_platform_data);
scoop_num = 1;
scoop_devs = &poodle_pcmcia_scoop[0];
platform_scoop_config = &poodle_pcmcia_config;
ret = platform_add_devices(devices, ARRAY_SIZE(devices));
if (ret) {

View file

@ -104,6 +104,66 @@ struct platform_device spitzscoop2_device = {
.resource = spitz_scoop2_resources,
};
#define SPITZ_PWR_SD 0x01
#define SPITZ_PWR_CF 0x02
/* Power control is shared with between one of the CF slots and SD */
static void spitz_card_pwr_ctrl(int device, unsigned short new_cpr)
{
unsigned short cpr = read_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR);
if (new_cpr & 0x0007) {
set_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
if (!(cpr & 0x0002) && !(cpr & 0x0004))
mdelay(5);
if (device == SPITZ_PWR_CF)
cpr |= 0x0002;
if (device == SPITZ_PWR_SD)
cpr |= 0x0004;
write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr | new_cpr);
} else {
if (device == SPITZ_PWR_CF)
cpr &= ~0x0002;
if (device == SPITZ_PWR_SD)
cpr &= ~0x0004;
write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr | new_cpr);
if (!(cpr & 0x0002) && !(cpr & 0x0004)) {
mdelay(1);
reset_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
}
}
}
static void spitz_pcmcia_init(void)
{
/* Setup default state of GPIO outputs
before we enable them as outputs. */
GPSR(GPIO48_nPOE) = GPIO_bit(GPIO48_nPOE) |
GPIO_bit(GPIO49_nPWE) | GPIO_bit(GPIO50_nPIOR) |
GPIO_bit(GPIO51_nPIOW) | GPIO_bit(GPIO54_nPCE_2);
GPSR(GPIO85_nPCE_1) = GPIO_bit(GPIO85_nPCE_1);
pxa_gpio_mode(GPIO48_nPOE_MD);
pxa_gpio_mode(GPIO49_nPWE_MD);
pxa_gpio_mode(GPIO50_nPIOR_MD);
pxa_gpio_mode(GPIO51_nPIOW_MD);
pxa_gpio_mode(GPIO55_nPREG_MD);
pxa_gpio_mode(GPIO56_nPWAIT_MD);
pxa_gpio_mode(GPIO57_nIOIS16_MD);
pxa_gpio_mode(GPIO85_nPCE_1_MD);
pxa_gpio_mode(GPIO54_nPCE_2_MD);
pxa_gpio_mode(GPIO104_pSKTSEL_MD);
}
static void spitz_pcmcia_pwr(struct device *scoop, unsigned short cpr, int nr)
{
/* Only need to override behaviour for slot 0 */
if (nr == 0)
spitz_card_pwr_ctrl(SPITZ_PWR_CF, cpr);
else
write_scoop_reg(scoop, SCOOP_CPR, cpr);
}
static struct scoop_pcmcia_dev spitz_pcmcia_scoop[] = {
{
.dev = &spitzscoop_device.dev,
@ -117,6 +177,16 @@ static struct scoop_pcmcia_dev spitz_pcmcia_scoop[] = {
},
};
static struct scoop_pcmcia_config spitz_pcmcia_config = {
.devs = &spitz_pcmcia_scoop[0],
.num_devs = 2,
.pcmcia_init = spitz_pcmcia_init,
.power_ctrl = spitz_pcmcia_pwr,
};
EXPORT_SYMBOL(spitzscoop_device);
EXPORT_SYMBOL(spitzscoop2_device);
/*
* Spitz SSP Device
@ -235,27 +305,14 @@ static int spitz_mci_init(struct device *dev, irqreturn_t (*spitz_detect_int)(in
return 0;
}
/* Power control is shared with one of the CF slots so we have a mess */
static void spitz_mci_setpower(struct device *dev, unsigned int vdd)
{
struct pxamci_platform_data* p_d = dev->platform_data;
unsigned short cpr = read_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR);
if (( 1 << vdd) & p_d->ocr_mask) {
/* printk(KERN_DEBUG "%s: on\n", __FUNCTION__); */
set_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
mdelay(2);
write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr | 0x04);
} else {
/* printk(KERN_DEBUG "%s: off\n", __FUNCTION__); */
write_scoop_reg(&spitzscoop_device.dev, SCOOP_CPR, cpr & ~0x04);
if (!(cpr | 0x02)) {
mdelay(1);
reset_scoop_gpio(&spitzscoop_device.dev, SPITZ_SCP_CF_POWER);
}
}
if (( 1 << vdd) & p_d->ocr_mask)
spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0004);
else
spitz_card_pwr_ctrl(SPITZ_PWR_SD, 0x0000);
}
static int spitz_mci_get_ro(struct device *dev)
@ -351,8 +408,8 @@ static void __init common_init(void)
static void __init spitz_init(void)
{
scoop_num = 2;
scoop_devs = &spitz_pcmcia_scoop[0];
platform_scoop_config = &spitz_pcmcia_config;
spitz_bl_machinfo.set_bl_intensity = spitz_bl_set_intensity;
common_init();

View file

@ -132,11 +132,13 @@ static void __init pxa_timer_init(void)
tv.tv_sec = pxa_get_rtc_time();
do_settimeofday(&tv);
OSMR0 = 0; /* set initial match at 0 */
OIER = 0; /* disable any timer interrupts */
OSCR = LATCH*2; /* push OSCR out of the way */
OSMR0 = LATCH; /* set initial match */
OSSR = 0xf; /* clear status on all timers */
setup_irq(IRQ_OST0, &pxa_timer_irq);
OIER |= OIER_E0; /* enable match on timer 0 to cause interrupts */
OSCR = 0; /* initialize free-running timer, force first match */
OIER = OIER_E0; /* enable match on timer 0 to cause interrupts */
OSCR = 0; /* initialize free-running timer */
}
#ifdef CONFIG_NO_IDLE_HZ

View file

@ -98,6 +98,9 @@ struct platform_device tosascoop_jc_device = {
.resource = tosa_scoop_jc_resources,
};
/*
* PCMCIA
*/
static struct scoop_pcmcia_dev tosa_pcmcia_scoop[] = {
{
.dev = &tosascoop_device.dev,
@ -111,16 +114,155 @@ static struct scoop_pcmcia_dev tosa_pcmcia_scoop[] = {
},
};
static void tosa_pcmcia_init(void)
{
/* Setup default state of GPIO outputs
before we enable them as outputs. */
GPSR(GPIO48_nPOE) = GPIO_bit(GPIO48_nPOE) |
GPIO_bit(GPIO49_nPWE) | GPIO_bit(GPIO50_nPIOR) |
GPIO_bit(GPIO51_nPIOW) | GPIO_bit(GPIO52_nPCE_1) |
GPIO_bit(GPIO53_nPCE_2);
pxa_gpio_mode(GPIO48_nPOE_MD);
pxa_gpio_mode(GPIO49_nPWE_MD);
pxa_gpio_mode(GPIO50_nPIOR_MD);
pxa_gpio_mode(GPIO51_nPIOW_MD);
pxa_gpio_mode(GPIO55_nPREG_MD);
pxa_gpio_mode(GPIO56_nPWAIT_MD);
pxa_gpio_mode(GPIO57_nIOIS16_MD);
pxa_gpio_mode(GPIO52_nPCE_1_MD);
pxa_gpio_mode(GPIO53_nPCE_2_MD);
pxa_gpio_mode(GPIO54_pSKTSEL_MD);
}
static struct scoop_pcmcia_config tosa_pcmcia_config = {
.devs = &tosa_pcmcia_scoop[0],
.num_devs = 2,
.pcmcia_init = tosa_pcmcia_init,
};
/*
* USB Device Controller
*/
static void tosa_udc_command(int cmd)
{
switch(cmd) {
case PXA2XX_UDC_CMD_CONNECT:
set_scoop_gpio(&tosascoop_jc_device.dev,TOSA_SCOOP_JC_USB_PULLUP);
break;
case PXA2XX_UDC_CMD_DISCONNECT:
reset_scoop_gpio(&tosascoop_jc_device.dev,TOSA_SCOOP_JC_USB_PULLUP);
break;
}
}
static int tosa_udc_is_connected(void)
{
return ((GPLR(TOSA_GPIO_USB_IN) & GPIO_bit(TOSA_GPIO_USB_IN)) == 0);
}
static struct pxa2xx_udc_mach_info udc_info __initdata = {
.udc_command = tosa_udc_command,
.udc_is_connected = tosa_udc_is_connected,
};
/*
* MMC/SD Device
*/
static struct pxamci_platform_data tosa_mci_platform_data;
static int tosa_mci_init(struct device *dev, irqreturn_t (*tosa_detect_int)(int, void *, struct pt_regs *), void *data)
{
int err;
/* setup GPIO for PXA25x MMC controller */
pxa_gpio_mode(GPIO6_MMCCLK_MD);
pxa_gpio_mode(GPIO8_MMCCS0_MD);
pxa_gpio_mode(TOSA_GPIO_nSD_DETECT | GPIO_IN);
tosa_mci_platform_data.detect_delay = msecs_to_jiffies(250);
err = request_irq(TOSA_IRQ_GPIO_nSD_DETECT, tosa_detect_int, SA_INTERRUPT,
"MMC/SD card detect", data);
if (err) {
printk(KERN_ERR "tosa_mci_init: MMC/SD: can't request MMC card detect IRQ\n");
return -1;
}
set_irq_type(TOSA_IRQ_GPIO_nSD_DETECT, IRQT_BOTHEDGE);
return 0;
}
static void tosa_mci_setpower(struct device *dev, unsigned int vdd)
{
struct pxamci_platform_data* p_d = dev->platform_data;
if (( 1 << vdd) & p_d->ocr_mask) {
set_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_PWR_ON);
} else {
reset_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_PWR_ON);
}
}
static int tosa_mci_get_ro(struct device *dev)
{
return (read_scoop_reg(&tosascoop_device.dev, SCOOP_GPWR)&TOSA_SCOOP_SD_WP);
}
static void tosa_mci_exit(struct device *dev, void *data)
{
free_irq(TOSA_IRQ_GPIO_nSD_DETECT, data);
}
static struct pxamci_platform_data tosa_mci_platform_data = {
.ocr_mask = MMC_VDD_32_33|MMC_VDD_33_34,
.init = tosa_mci_init,
.get_ro = tosa_mci_get_ro,
.setpower = tosa_mci_setpower,
.exit = tosa_mci_exit,
};
/*
* Irda
*/
static void tosa_irda_transceiver_mode(struct device *dev, int mode)
{
if (mode & IR_OFF) {
reset_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_IR_POWERDWN);
pxa_gpio_mode(GPIO47_STTXD|GPIO_DFLT_LOW);
pxa_gpio_mode(GPIO47_STTXD|GPIO_OUT);
} else {
pxa_gpio_mode(GPIO47_STTXD_MD);
set_scoop_gpio(&tosascoop_device.dev,TOSA_SCOOP_IR_POWERDWN);
}
}
static struct pxaficp_platform_data tosa_ficp_platform_data = {
.transceiver_cap = IR_SIRMODE | IR_OFF,
.transceiver_mode = tosa_irda_transceiver_mode,
};
/*
* Tosa Keyboard
*/
static struct platform_device tosakbd_device = {
.name = "tosa-keyboard",
.id = -1,
};
static struct platform_device *devices[] __initdata = {
&tosascoop_device,
&tosascoop_jc_device,
&tosakbd_device,
};
static void __init tosa_init(void)
{
pxa_gpio_mode(TOSA_GPIO_ON_RESET | GPIO_IN);
pxa_gpio_mode(TOSA_GPIO_TC6393_INT | GPIO_IN);
pxa_gpio_mode(TOSA_GPIO_USB_IN | GPIO_IN);
/* setup sleep mode values */
PWER = 0x00000002;
@ -131,13 +273,15 @@ static void __init tosa_init(void)
PGSR2 = 0x00014000;
PCFR |= PCFR_OPDE;
// enable batt_fault
/* enable batt_fault */
PMCR = 0x01;
platform_add_devices(devices, ARRAY_SIZE(devices));
pxa_set_mci_info(&tosa_mci_platform_data);
pxa_set_udc_info(&udc_info);
pxa_set_ficp_info(&tosa_ficp_platform_data);
platform_scoop_config = &tosa_pcmcia_config;
scoop_num = 2;
scoop_devs = &tosa_pcmcia_scoop[0];
platform_add_devices(devices, ARRAY_SIZE(devices));
}
static void __init fixup_tosa(struct machine_desc *desc,

View file

@ -8,4 +8,13 @@ config MACH_REALVIEW_EB
help
Include support for the ARM(R) RealView Emulation Baseboard platform.
config REALVIEW_MPCORE
bool "Support MPcore tile"
depends on MACH_REALVIEW_EB
help
Enable support for the MPCore tile on the Realview platform.
Since there are device address and interrupt differences, a
kernel built with this option enabled is not compatible with
other tiles.
endmenu

View file

@ -4,3 +4,4 @@
obj-y := core.o clock.o
obj-$(CONFIG_MACH_REALVIEW_EB) += realview_eb.o
obj-$(CONFIG_SMP) += platsmp.o headsmp.o

View file

@ -550,6 +550,11 @@ static irqreturn_t realview_timer_interrupt(int irq, void *dev_id, struct pt_reg
timer_tick(regs);
#ifdef CONFIG_SMP
smp_send_timer();
update_process_times(user_mode(regs));
#endif
write_sequnlock(&xtime_lock);
return IRQ_HANDLED;

View file

@ -23,6 +23,7 @@
#define __ASM_ARCH_REALVIEW_H
#include <asm/hardware/amba.h>
#include <asm/leds.h>
#include <asm/io.h>
#define __io_address(n) __io(IO_ADDRESS(n))

View file

@ -0,0 +1,39 @@
/*
* linux/arch/arm/mach-realview/headsmp.S
*
* Copyright (c) 2003 ARM Limited
* All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/linkage.h>
#include <linux/init.h>
__INIT
/*
* Realview specific entry point for secondary CPUs. This provides
* a "holding pen" into which all secondary cores are held until we're
* ready for them to initialise.
*/
ENTRY(realview_secondary_startup)
mrc p15, 0, r0, c0, c0, 5
and r0, r0, #15
adr r4, 1f
ldmia r4, {r5, r6}
sub r4, r4, r5
add r6, r6, r4
pen: ldr r7, [r6]
cmp r7, r0
bne pen
/*
* we've been released from the holding pen: secondary_stack
* should now contain the SVC stack for this core
*/
b secondary_startup
1: .long .
.long pen_release

View file

@ -0,0 +1,195 @@
/*
* linux/arch/arm/mach-realview/platsmp.c
*
* Copyright (C) 2002 ARM Ltd.
* All Rights Reserved
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/smp.h>
#include <asm/cacheflush.h>
#include <asm/hardware/arm_scu.h>
#include <asm/hardware.h>
#include "core.h"
extern void realview_secondary_startup(void);
/*
* control for which core is the next to come out of the secondary
* boot "holding pen"
*/
volatile int __cpuinitdata pen_release = -1;
static unsigned int __init get_core_count(void)
{
unsigned int ncores;
ncores = __raw_readl(__io_address(REALVIEW_MPCORE_SCU_BASE) + SCU_CONFIG);
return (ncores & 0x03) + 1;
}
static DEFINE_SPINLOCK(boot_lock);
void __cpuinit platform_secondary_init(unsigned int cpu)
{
/*
* the primary core may have used a "cross call" soft interrupt
* to get this processor out of WFI in the BootMonitor - make
* sure that we are no longer being sent this soft interrupt
*/
smp_cross_call_done(cpumask_of_cpu(cpu));
/*
* if any interrupts are already enabled for the primary
* core (e.g. timer irq), then they will not have been enabled
* for us: do so
*/
gic_cpu_init(__io_address(REALVIEW_GIC_CPU_BASE));
/*
* let the primary processor know we're out of the
* pen, then head off into the C entry point
*/
pen_release = -1;
/*
* Synchronise with the boot thread.
*/
spin_lock(&boot_lock);
spin_unlock(&boot_lock);
}
int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle)
{
unsigned long timeout;
/*
* set synchronisation state between this boot processor
* and the secondary one
*/
spin_lock(&boot_lock);
/*
* The secondary processor is waiting to be released from
* the holding pen - release it, then wait for it to flag
* that it has been released by resetting pen_release.
*
* Note that "pen_release" is the hardware CPU ID, whereas
* "cpu" is Linux's internal ID.
*/
pen_release = cpu;
flush_cache_all();
/*
* XXX
*
* This is a later addition to the booting protocol: the
* bootMonitor now puts secondary cores into WFI, so
* poke_milo() no longer gets the cores moving; we need
* to send a soft interrupt to wake the secondary core.
* Use smp_cross_call() for this, since there's little
* point duplicating the code here
*/
smp_cross_call(cpumask_of_cpu(cpu));
timeout = jiffies + (1 * HZ);
while (time_before(jiffies, timeout)) {
if (pen_release == -1)
break;
udelay(10);
}
/*
* now the secondary core is starting up let it run its
* calibrations, then wait for it to finish
*/
spin_unlock(&boot_lock);
return pen_release != -1 ? -ENOSYS : 0;
}
static void __init poke_milo(void)
{
extern void secondary_startup(void);
/* nobody is to be released from the pen yet */
pen_release = -1;
/*
* write the address of secondary startup into the system-wide
* flags register, then clear the bottom two bits, which is what
* BootMonitor is waiting for
*/
#if 1
#define REALVIEW_SYS_FLAGSS_OFFSET 0x30
__raw_writel(virt_to_phys(realview_secondary_startup),
__io_address(REALVIEW_SYS_BASE) +
REALVIEW_SYS_FLAGSS_OFFSET);
#define REALVIEW_SYS_FLAGSC_OFFSET 0x34
__raw_writel(3,
__io_address(REALVIEW_SYS_BASE) +
REALVIEW_SYS_FLAGSC_OFFSET);
#endif
mb();
}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int ncores = get_core_count();
unsigned int cpu = smp_processor_id();
int i;
/* sanity check */
if (ncores == 0) {
printk(KERN_ERR
"Realview: strange CM count of 0? Default to 1\n");
ncores = 1;
}
if (ncores > NR_CPUS) {
printk(KERN_WARNING
"Realview: no. of cores (%d) greater than configured "
"maximum of %d - clipping\n",
ncores, NR_CPUS);
ncores = NR_CPUS;
}
smp_store_cpu_info(cpu);
/*
* are we trying to boot more cores than exist?
*/
if (max_cpus > ncores)
max_cpus = ncores;
/*
* Initialise the possible/present maps.
* cpu_possible_map describes the set of CPUs which may be present
* cpu_present_map describes the set of CPUs populated
*/
for (i = 0; i < max_cpus; i++) {
cpu_set(i, cpu_possible_map);
cpu_set(i, cpu_present_map);
}
/*
* Do we need any more CPUs? If so, then let them know where
* to start. Note that, on modern versions of MILO, the "poke"
* doesn't actually do anything until each individual core is
* sent a soft interrupt to get it out of WFI
*/
if (max_cpus > 1)
poke_milo();
}

View file

@ -136,6 +136,11 @@ static struct amba_device *amba_devs[] __initdata = {
static void __init gic_init_irq(void)
{
#ifdef CONFIG_REALVIEW_MPCORE
writel(0x0000a05f, __io_address(REALVIEW_SYS_LOCK));
writel(0x008003c0, __io_address(REALVIEW_SYS_BASE) + 0xd8);
writel(0x00000000, __io_address(REALVIEW_SYS_LOCK));
#endif
gic_dist_init(__io_address(REALVIEW_GIC_DIST_BASE));
gic_cpu_init(__io_address(REALVIEW_GIC_CPU_BASE));
}

View file

@ -121,6 +121,14 @@ config S3C2410_BOOT_WATCHDOG
system resets depends on the value of PCLK. The timeout on an
200MHz s3c2410 should be about 30 seconds.
config S3C2410_BOOT_ERROR_RESET
bool "S3C2410 Reboot on decompression error"
depends on ARCH_S3C2410
help
Say y here to use the watchdog to reset the system if the
kernel decompressor detects an error during decompression.
comment "S3C2410 Setup"
config S3C2410_DMA

View file

@ -89,32 +89,63 @@
/* macros to modify the physical addresses for io space */
#define PA_CS2(item) ((item) + S3C2410_CS2)
#define PA_CS3(item) ((item) + S3C2410_CS3)
#define PA_CS4(item) ((item) + S3C2410_CS4)
#define PA_CS5(item) ((item) + S3C2410_CS5)
#define PA_CS2(item) (__phys_to_pfn((item) + S3C2410_CS2))
#define PA_CS3(item) (__phys_to_pfn((item) + S3C2410_CS3))
#define PA_CS4(item) (__phys_to_pfn((item) + S3C2410_CS4))
#define PA_CS5(item) (__phys_to_pfn((item) + S3C2410_CS5))
static struct map_desc bast_iodesc[] __initdata = {
/* ISA IO areas */
{ (u32)S3C24XX_VA_ISA_BYTE, PA_CS2(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
{ (u32)S3C24XX_VA_ISA_WORD, PA_CS3(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
/* we could possibly compress the next set down into a set of smaller tables
* pagetables, but that would mean using an L2 section, and it still means
* we cannot actually feed the same register to an LDR due to 16K spacing
*/
{
.virtual = (u32)S3C24XX_VA_ISA_BYTE,
.pfn = PA_CS2(BAST_PA_ISAIO),
.length = SZ_16M,
.type = MT_DEVICE,
}, {
.virtual = (u32)S3C24XX_VA_ISA_WORD,
.pfn = PA_CS3(BAST_PA_ISAIO),
.length = SZ_16M,
.type = MT_DEVICE,
},
/* bast CPLD control registers, and external interrupt controls */
{ (u32)BAST_VA_CTRL1, BAST_PA_CTRL1, SZ_1M, MT_DEVICE },
{ (u32)BAST_VA_CTRL2, BAST_PA_CTRL2, SZ_1M, MT_DEVICE },
{ (u32)BAST_VA_CTRL3, BAST_PA_CTRL3, SZ_1M, MT_DEVICE },
{ (u32)BAST_VA_CTRL4, BAST_PA_CTRL4, SZ_1M, MT_DEVICE },
{
.virtual = (u32)BAST_VA_CTRL1,
.pfn = __phys_to_pfn(BAST_PA_CTRL1),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)BAST_VA_CTRL2,
.pfn = __phys_to_pfn(BAST_PA_CTRL2),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)BAST_VA_CTRL3,
.pfn = __phys_to_pfn(BAST_PA_CTRL3),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)BAST_VA_CTRL4,
.pfn = __phys_to_pfn(BAST_PA_CTRL4),
.length = SZ_1M,
.type = MT_DEVICE,
},
/* PC104 IRQ mux */
{ (u32)BAST_VA_PC104_IRQREQ, BAST_PA_PC104_IRQREQ, SZ_1M, MT_DEVICE },
{ (u32)BAST_VA_PC104_IRQRAW, BAST_PA_PC104_IRQRAW, SZ_1M, MT_DEVICE },
{ (u32)BAST_VA_PC104_IRQMASK, BAST_PA_PC104_IRQMASK, SZ_1M, MT_DEVICE },
{
.virtual = (u32)BAST_VA_PC104_IRQREQ,
.pfn = __phys_to_pfn(BAST_PA_PC104_IRQREQ),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)BAST_VA_PC104_IRQRAW,
.pfn = __phys_to_pfn(BAST_PA_PC104_IRQRAW),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)BAST_VA_PC104_IRQMASK,
.pfn = __phys_to_pfn(BAST_PA_PC104_IRQMASK),
.length = SZ_1M,
.type = MT_DEVICE,
},
/* peripheral space... one for each of fast/slow/byte/16bit */
/* note, ide is only decoded in word space, even though some registers

View file

@ -74,27 +74,47 @@
/* macros to modify the physical addresses for io space */
#define PA_CS2(item) ((item) + S3C2410_CS2)
#define PA_CS3(item) ((item) + S3C2410_CS3)
#define PA_CS4(item) ((item) + S3C2410_CS4)
#define PA_CS5(item) ((item) + S3C2410_CS5)
#define PA_CS2(item) (__phys_to_pfn((item) + S3C2410_CS2))
#define PA_CS3(item) (__phys_to_pfn((item) + S3C2410_CS3))
#define PA_CS4(item) (__phys_to_pfn((item) + S3C2410_CS4))
#define PA_CS5(item) (__phys_to_pfn((item) + S3C2410_CS5))
static struct map_desc vr1000_iodesc[] __initdata = {
/* ISA IO areas */
{
.virtual = (u32)S3C24XX_VA_ISA_BYTE,
.pfn = PA_CS2(BAST_PA_ISAIO),
.length = SZ_16M,
.type = MT_DEVICE,
}, {
.virtual = (u32)S3C24XX_VA_ISA_WORD,
.pfn = PA_CS3(BAST_PA_ISAIO),
.length = SZ_16M,
.type = MT_DEVICE,
},
{ (u32)S3C24XX_VA_ISA_BYTE, PA_CS2(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
{ (u32)S3C24XX_VA_ISA_WORD, PA_CS3(BAST_PA_ISAIO), SZ_16M, MT_DEVICE },
/* we could possibly compress the next set down into a set of smaller tables
* pagetables, but that would mean using an L2 section, and it still means
* we cannot actually feed the same register to an LDR due to 16K spacing
*/
/* bast CPLD control registers, and external interrupt controls */
{ (u32)VR1000_VA_CTRL1, VR1000_PA_CTRL1, SZ_1M, MT_DEVICE },
{ (u32)VR1000_VA_CTRL2, VR1000_PA_CTRL2, SZ_1M, MT_DEVICE },
{ (u32)VR1000_VA_CTRL3, VR1000_PA_CTRL3, SZ_1M, MT_DEVICE },
{ (u32)VR1000_VA_CTRL4, VR1000_PA_CTRL4, SZ_1M, MT_DEVICE },
/* CPLD control registers, and external interrupt controls */
{
.virtual = (u32)VR1000_VA_CTRL1,
.pfn = __phys_to_pfn(VR1000_PA_CTRL1),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)VR1000_VA_CTRL2,
.pfn = __phys_to_pfn(VR1000_PA_CTRL2),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)VR1000_VA_CTRL3,
.pfn = __phys_to_pfn(VR1000_PA_CTRL3),
.length = SZ_1M,
.type = MT_DEVICE,
}, {
.virtual = (u32)VR1000_VA_CTRL4,
.pfn = __phys_to_pfn(VR1000_PA_CTRL4),
.length = SZ_1M,
.type = MT_DEVICE,
},
/* peripheral space... one for each of fast/slow/byte/16bit */
/* note, ide is only decoded in word space, even though some registers

View file

@ -124,11 +124,13 @@ static void __init sa1100_timer_init(void)
tv.tv_sec = sa1100_get_rtc_time();
do_settimeofday(&tv);
OSMR0 = 0; /* set initial match at 0 */
OIER = 0; /* disable any timer interrupts */
OSCR = LATCH*2; /* push OSCR out of the way */
OSMR0 = LATCH; /* set initial match */
OSSR = 0xf; /* clear status on all timers */
setup_irq(IRQ_OST0, &sa1100_timer_irq);
OIER |= OIER_E0; /* enable match on timer 0 to cause interrupts */
OSCR = 0; /* initialize free-running timer, force first match */
OIER = OIER_E0; /* enable match on timer 0 to cause interrupts */
OSCR = 0; /* initialize free-running timer */
}
#ifdef CONFIG_NO_IDLE_HZ

View file

@ -354,7 +354,7 @@ void __init build_mem_type_table(void)
{
struct cachepolicy *cp;
unsigned int cr = get_cr();
unsigned int user_pgprot;
unsigned int user_pgprot, kern_pgprot;
int cpu_arch = cpu_architecture();
int i;
@ -381,7 +381,7 @@ void __init build_mem_type_table(void)
}
cp = &cache_policies[cachepolicy];
user_pgprot = cp->pte;
kern_pgprot = user_pgprot = cp->pte;
/*
* ARMv6 and above have extended page tables.
@ -393,6 +393,7 @@ void __init build_mem_type_table(void)
*/
mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4;
mem_types[MT_ROM].prot_sect &= ~PMD_BIT4;
/*
* Mark cache clean areas and XIP ROM read only
* from SVC mode and no access from userspace.
@ -412,32 +413,47 @@ void __init build_mem_type_table(void)
* (iow, non-global)
*/
user_pgprot |= L_PTE_ASID;
#ifdef CONFIG_SMP
/*
* Mark memory with the "shared" attribute for SMP systems
*/
user_pgprot |= L_PTE_SHARED;
kern_pgprot |= L_PTE_SHARED;
mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
#endif
}
for (i = 0; i < 16; i++) {
unsigned long v = pgprot_val(protection_map[i]);
v = (v & ~(L_PTE_BUFFERABLE|L_PTE_CACHEABLE)) | user_pgprot;
protection_map[i] = __pgprot(v);
}
mem_types[MT_LOW_VECTORS].prot_pte |= kern_pgprot;
mem_types[MT_HIGH_VECTORS].prot_pte |= kern_pgprot;
if (cpu_arch >= CPU_ARCH_ARMv5) {
mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte & PTE_CACHEABLE;
#ifndef CONFIG_SMP
/*
* Only use write-through for non-SMP systems
*/
mem_types[MT_LOW_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
mem_types[MT_HIGH_VECTORS].prot_pte &= ~L_PTE_BUFFERABLE;
#endif
} else {
mem_types[MT_LOW_VECTORS].prot_pte |= cp->pte;
mem_types[MT_HIGH_VECTORS].prot_pte |= cp->pte;
mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
}
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | L_PTE_WRITE |
L_PTE_EXEC | kern_pgprot);
mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
mem_types[MT_ROM].prot_sect |= cp->pmd;
for (i = 0; i < 16; i++) {
unsigned long v = pgprot_val(protection_map[i]);
v = (v & ~(PTE_BUFFERABLE|PTE_CACHEABLE)) | user_pgprot;
protection_map[i] = __pgprot(v);
}
pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
L_PTE_DIRTY | L_PTE_WRITE |
L_PTE_EXEC | cp->pte);
switch (cp->pmd) {
case PMD_SECT_WT:
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;

View file

@ -12,6 +12,7 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/hardware/arm_scu.h>
#include <asm/procinfo.h>
#include <asm/pgtable.h>
@ -112,6 +113,9 @@ ENTRY(cpu_v6_dcache_clean_area)
ENTRY(cpu_v6_switch_mm)
mov r2, #0
ldr r1, [r1, #MM_CONTEXT_ID] @ get mm->context.id
#ifdef CONFIG_SMP
orr r0, r0, #2 @ set shared pgtable
#endif
mcr p15, 0, r2, c7, c5, 6 @ flush BTAC/BTB
mcr p15, 0, r2, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c2, c0, 0 @ set TTB 0
@ -140,7 +144,7 @@ ENTRY(cpu_v6_switch_mm)
ENTRY(cpu_v6_set_pte)
str r1, [r0], #-2048 @ linux version
bic r2, r1, #0x000007f0
bic r2, r1, #0x000003f0
bic r2, r2, #0x00000003
orr r2, r2, #PTE_EXT_AP0 | 2
@ -191,6 +195,23 @@ cpu_v6_name:
* - cache type register is implemented
*/
__v6_setup:
#ifdef CONFIG_SMP
/* Set up the SCU on core 0 only */
mrc p15, 0, r0, c0, c0, 5 @ CPU core number
ands r0, r0, #15
moveq r0, #0x10000000 @ SCU_BASE
orreq r0, r0, #0x00100000
ldreq r5, [r0, #SCU_CTRL]
orreq r5, r5, #1
streq r5, [r0, #SCU_CTRL]
#ifndef CONFIG_CPU_DCACHE_DISABLE
mrc p15, 0, r0, c1, c0, 1 @ Enable SMP/nAMP mode
orr r0, r0, #0x20
mcr p15, 0, r0, c1, c0, 1
#endif
#endif
mov r0, #0
mcr p15, 0, r0, c7, c14, 0 @ clean+invalidate D cache
mcr p15, 0, r0, c7, c5, 0 @ invalidate I cache
@ -198,6 +219,9 @@ __v6_setup:
mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
mcr p15, 0, r0, c8, c7, 0 @ invalidate I + D TLBs
mcr p15, 0, r0, c2, c0, 2 @ TTB control register
#ifdef CONFIG_SMP
orr r4, r4, #2 @ set shared pgtable
#endif
mcr p15, 0, r4, c2, c0, 1 @ load TTB1
#ifdef CONFIG_VFP
mrc p15, 0, r0, c1, c0, 2

View file

@ -60,7 +60,7 @@ typedef union tagFPREG {
#ifdef CONFIG_FPE_NWFPE_XP
floatx80 fExtended;
#else
int padding[3];
u32 padding[3];
#endif
} FPREG;

View file

@ -59,8 +59,13 @@ static inline void loadExtended(const unsigned int Fn, const unsigned int __user
p = (unsigned int *) &fpa11->fpreg[Fn].fExtended;
fpa11->fType[Fn] = typeExtended;
get_user(p[0], &pMem[0]); /* sign & exponent */
#ifdef __ARMEB__
get_user(p[1], &pMem[1]); /* ms bits */
get_user(p[2], &pMem[2]); /* ls bits */
#else
get_user(p[1], &pMem[2]); /* ls bits */
get_user(p[2], &pMem[1]); /* ms bits */
#endif
}
#endif
@ -177,8 +182,13 @@ static inline void storeExtended(const unsigned int Fn, unsigned int __user *pMe
}
put_user(val.i[0], &pMem[0]); /* sign & exp */
#ifdef __ARMEB__
put_user(val.i[1], &pMem[1]); /* msw */
put_user(val.i[2], &pMem[2]);
#else
put_user(val.i[1], &pMem[2]);
put_user(val.i[2], &pMem[1]); /* msw */
#endif
}
#endif

View file

@ -29,14 +29,14 @@
#ifdef CONFIG_FPE_NWFPE_XP
const floatx80 floatx80Constant[] = {
{0x0000, 0x0000000000000000ULL}, /* extended 0.0 */
{0x3fff, 0x8000000000000000ULL}, /* extended 1.0 */
{0x4000, 0x8000000000000000ULL}, /* extended 2.0 */
{0x4000, 0xc000000000000000ULL}, /* extended 3.0 */
{0x4001, 0x8000000000000000ULL}, /* extended 4.0 */
{0x4001, 0xa000000000000000ULL}, /* extended 5.0 */
{0x3ffe, 0x8000000000000000ULL}, /* extended 0.5 */
{0x4002, 0xa000000000000000ULL} /* extended 10.0 */
{ .high = 0x0000, .low = 0x0000000000000000ULL},/* extended 0.0 */
{ .high = 0x3fff, .low = 0x8000000000000000ULL},/* extended 1.0 */
{ .high = 0x4000, .low = 0x8000000000000000ULL},/* extended 2.0 */
{ .high = 0x4000, .low = 0xc000000000000000ULL},/* extended 3.0 */
{ .high = 0x4001, .low = 0x8000000000000000ULL},/* extended 4.0 */
{ .high = 0x4001, .low = 0xa000000000000000ULL},/* extended 5.0 */
{ .high = 0x3ffe, .low = 0x8000000000000000ULL},/* extended 0.5 */
{ .high = 0x4002, .low = 0xa000000000000000ULL},/* extended 10.0 */
};
#endif

View file

@ -332,6 +332,7 @@ static floatx80 commonNaNToFloatx80( commonNaNT a )
z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
z.__padding = 0;
return z;
}

View file

@ -531,6 +531,7 @@ INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
z.low = zSig;
z.high = ( ( (bits16) zSign )<<15 ) + zExp;
z.__padding = 0;
return z;
}
@ -2831,6 +2832,7 @@ static floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, flo
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
z.__padding = 0;
return z;
}
if ( aExp == 0 ) {
@ -2950,6 +2952,7 @@ floatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
z.__padding = 0;
return z;
}
return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
@ -3015,6 +3018,7 @@ floatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
z.__padding = 0;
return z;
}
roundData->exception |= float_flag_divbyzero;
@ -3093,6 +3097,7 @@ floatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
z.__padding = 0;
return z;
}
normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
@ -3184,6 +3189,7 @@ floatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
roundData->exception |= float_flag_invalid;
z.low = floatx80_default_nan_low;
z.high = floatx80_default_nan_high;
z.__padding = 0;
return z;
}
if ( aExp == 0 ) {

View file

@ -51,11 +51,17 @@ input or output the `floatx80' type will be defined.
Software IEC/IEEE floating-point types.
-------------------------------------------------------------------------------
*/
typedef unsigned long int float32;
typedef unsigned long long float64;
typedef u32 float32;
typedef u64 float64;
typedef struct {
unsigned short high;
unsigned long long low;
#ifdef __ARMEB__
u16 __padding;
u16 high;
#else
u16 high;
u16 __padding;
#endif
u64 low;
} floatx80;
/*

View file

@ -546,7 +546,7 @@ static int ptrace_setfpregs(struct task_struct *tsk, void *ufp)
sizeof(struct user_fp)) ? -EFAULT : 0;
}
static int do_ptrace(int request, struct task_struct *child, long addr, long data)
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
unsigned long tmp;
int ret;
@ -665,53 +665,6 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
return ret;
}
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
{
struct task_struct *child;
int ret;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) {
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ptrace(current->parent, current);
if (ret)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child)
goto out;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
goto out_tsk;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
goto out_tsk;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret == 0)
ret = do_ptrace(request, child, addr, data);
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
return ret;
}
asmlinkage void syscall_trace(int why, struct pt_regs *regs)
{
unsigned long ip;

View file

@ -177,7 +177,7 @@ The example address is 0xd004000c; in binary this is:
Given the top-level Page Directory, the offset in that directory is calculated
using the upper 8 bits:
extern inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
static inline pgd_t * pgd_offset(struct mm_struct * mm, unsigned long address)
{
return mm->pgd + (address >> PGDIR_SHIFT);
}
@ -190,14 +190,14 @@ The pgd_t from our example will therefore be the 208'th (0xd0) entry in mm->pgd.
Since the Middle Directory does not exist, it is a unity mapping:
extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
{
return (pmd_t *) dir;
}
The Page Table provides the final lookup by using bits 13 to 23 as index:
extern inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
static inline pte_t * pte_offset(pmd_t * dir, unsigned long address)
{
return (pte_t *) pmd_page(*dir) + ((address >> PAGE_SHIFT) &
(PTRS_PER_PTE - 1));

View file

@ -76,55 +76,11 @@ ptrace_disable(struct task_struct *child)
* (in user space) where the result of the ptrace call is written (instead of
* being returned).
*/
asmlinkage int
sys_ptrace(long request, long pid, long addr, long data)
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
struct task_struct *child;
int ret;
unsigned long __user *datap = (unsigned long __user *)data;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) {
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ptrace(current->parent, current);
if (ret)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child)
goto out;
ret = -EPERM;
if (pid == 1) /* Leave the init process alone! */
goto out_tsk;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
goto out_tsk;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret < 0)
goto out_tsk;
switch (request) {
/* Read word at location address. */
case PTRACE_PEEKTEXT:
@ -289,10 +245,7 @@ sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
return ret;
}

View file

@ -476,7 +476,7 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
* OK, we're invoking a handler
*/
extern inline void
static inline void
handle_signal(int canrestart, unsigned long sig,
siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs * regs)

View file

@ -277,7 +277,7 @@ struct file_operations cryptocop_fops = {
static void free_cdesc(struct cryptocop_dma_desc *cdesc)
{
DEBUG(printk("free_cdesc: cdesc 0x%p, from_pool=%d\n", cdesc, cdesc->from_pool));
if (cdesc->free_buf) kfree(cdesc->free_buf);
kfree(cdesc->free_buf);
if (cdesc->from_pool) {
unsigned long int flags;
@ -2950,15 +2950,15 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig
put_page(outpages[i]);
}
if (digest_result) kfree(digest_result);
if (inpages) kfree(inpages);
if (outpages) kfree(outpages);
kfree(digest_result);
kfree(inpages);
kfree(outpages);
if (cop){
if (cop->tfrm_op.indata) kfree(cop->tfrm_op.indata);
if (cop->tfrm_op.outdata) kfree(cop->tfrm_op.outdata);
kfree(cop->tfrm_op.indata);
kfree(cop->tfrm_op.outdata);
kfree(cop);
}
if (jc) kfree(jc);
kfree(jc);
DEBUG(print_lock_status());

View file

@ -99,55 +99,11 @@ ptrace_disable(struct task_struct *child)
}
asmlinkage int
sys_ptrace(long request, long pid, long addr, long data)
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
struct task_struct *child;
int ret;
unsigned long __user *datap = (unsigned long __user *)data;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) {
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ptrace(current->parent, current);
if (ret)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child)
goto out;
ret = -EPERM;
if (pid == 1) /* Leave the init process alone! */
goto out_tsk;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
goto out_tsk;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret < 0)
goto out_tsk;
switch (request) {
/* Read word at location address. */
case PTRACE_PEEKTEXT:
@ -347,10 +303,7 @@ sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
return ret;
}

View file

@ -513,7 +513,7 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
}
/* Invoke a singal handler to, well, handle the signal. */
extern inline void
static inline void
handle_signal(int canrestart, unsigned long sig,
siginfo_t *info, struct k_sigaction *ka,
sigset_t *oldset, struct pt_regs * regs)

View file

@ -16,7 +16,7 @@
#include <asm/tlbflush.h>
#include <asm/arch/memmap.h>
extern inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size,
unsigned long phys_addr, pgprot_t prot)
{
unsigned long end;

View file

@ -106,48 +106,11 @@ void ptrace_enable(struct task_struct *child)
child->thread.frame0->__status |= REG__STATUS_STEP;
}
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
struct task_struct *child;
unsigned long tmp;
int ret;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) {
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ptrace(current->parent, current);
if (ret)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child)
goto out;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
goto out_tsk;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
goto out_tsk;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret < 0)
goto out_tsk;
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@ -351,10 +314,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = -EIO;
break;
}
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
return ret;
}

View file

@ -57,43 +57,10 @@ void ptrace_disable(struct task_struct *child)
h8300_disable_trace(child);
}
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
struct task_struct *child;
int ret;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) {
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child)
goto out;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
goto out_tsk;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
goto out_tsk;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret < 0)
goto out_tsk;
switch (request) {
case PTRACE_PEEKTEXT: /* read word at location addr. */
case PTRACE_PEEKDATA: {
@ -251,10 +218,6 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = -EIO;
break;
}
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
return ret;
}

View file

@ -997,8 +997,21 @@ source "drivers/Kconfig"
source "fs/Kconfig"
menu "Instrumentation Support"
depends on EXPERIMENTAL
source "arch/i386/oprofile/Kconfig"
config KPROBES
bool "Kprobes (EXPERIMENTAL)"
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
a probepoint and specifies the callback. Kprobes is useful
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".
endmenu
source "arch/i386/Kconfig.debug"
source "security/Kconfig"

View file

@ -22,16 +22,6 @@ config DEBUG_STACKOVERFLOW
This option will cause messages to be printed if free stack space
drops below a certain limit.
config KPROBES
bool "Kprobes"
depends on DEBUG_KERNEL
help
Kprobes allows you to trap at almost any kernel address and
execute a callback function. register_kprobe() establishes
a probepoint and specifies the callback. Kprobes is useful
for kernel debugging, non-intrusive instrumentation and testing.
If in doubt, say "N".
config DEBUG_STACK_USAGE
bool "Stack utilization instrumentation"
depends on DEBUG_KERNEL

View file

@ -559,14 +559,20 @@ void __devinit setup_local_APIC(void)
* If Linux enabled the LAPIC against the BIOS default
* disable it down before re-entering the BIOS on shutdown.
* Otherwise the BIOS may get confused and not power-off.
* Additionally clear all LVT entries before disable_local_APIC
* for the case where Linux didn't enable the LAPIC.
*/
void lapic_shutdown(void)
{
if (!cpu_has_apic || !enabled_via_apicbase)
if (!cpu_has_apic)
return;
local_irq_disable();
disable_local_APIC();
clear_local_APIC();
if (enabled_via_apicbase)
disable_local_APIC();
local_irq_enable();
}

View file

@ -447,8 +447,7 @@ static char * apm_event_name[] = {
"system standby resume",
"capabilities change"
};
#define NR_APM_EVENT_NAME \
(sizeof(apm_event_name) / sizeof(apm_event_name[0]))
#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name)
typedef struct lookup_t {
int key;
@ -479,7 +478,7 @@ static const lookup_t error_table[] = {
{ APM_NO_ERROR, "BIOS did not set a return code" },
{ APM_NOT_PRESENT, "No APM present" }
};
#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t))
#define ERROR_COUNT ARRAY_SIZE(error_table)
/**
* apm_error - display an APM error

View file

@ -30,8 +30,6 @@ static int disable_x86_serial_nr __devinitdata = 1;
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
extern void mcheck_init(struct cpuinfo_x86 *c);
extern int disable_pse;
static void default_init(struct cpuinfo_x86 * c)
@ -429,9 +427,8 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c)
}
/* Init Machine Check Exception if available. */
#ifdef CONFIG_X86_MCE
mcheck_init(c);
#endif
if (c == &boot_cpu_data)
sysenter_setup();
enable_sep_cpu();

View file

@ -377,10 +377,9 @@ acpi_cpufreq_cpu_init (
arg0.buffer.length = 12;
arg0.buffer.pointer = (u8 *) arg0_buf;
data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL);
if (!data)
return (-ENOMEM);
memset(data, 0, sizeof(struct cpufreq_acpi_io));
acpi_io_data[cpu] = data;

View file

@ -171,10 +171,9 @@ static int get_ranges (unsigned char *pst)
unsigned int speed;
u8 fid, vid;
powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL);
if (!powernow_table)
return -ENOMEM;
memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1)));
for (j=0 ; j < number_scales; j++) {
fid = *pst++;
@ -305,16 +304,13 @@ static int powernow_acpi_init(void)
goto err0;
}
acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance),
acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance),
GFP_KERNEL);
if (!acpi_processor_perf) {
retval = -ENOMEM;
goto err0;
}
memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance));
if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
retval = -EIO;
goto err1;
@ -337,14 +333,12 @@ static int powernow_acpi_init(void)
goto err2;
}
powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL);
if (!powernow_table) {
retval = -ENOMEM;
goto err2;
}
memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table)));
pc.val = (unsigned long) acpi_processor_perf->states[0].control;
for (i = 0; i < number_scales; i++) {
u8 fid, vid;

View file

@ -462,7 +462,6 @@ static int check_supported_cpu(unsigned int cpu)
oldmask = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(cpu));
schedule();
if (smp_processor_id() != cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", cpu);
@ -497,9 +496,7 @@ static int check_supported_cpu(unsigned int cpu)
out:
set_cpus_allowed(current, oldmask);
schedule();
return rc;
}
static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid)
@ -913,7 +910,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
schedule();
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
@ -968,8 +964,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi
err_out:
set_cpus_allowed(current, oldmask);
schedule();
return ret;
}
@ -991,12 +985,11 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
if (!check_supported_cpu(pol->cpu))
return -ENODEV;
data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL);
if (!data) {
printk(KERN_ERR PFX "unable to alloc powernow_k8_data");
return -ENOMEM;
}
memset(data,0,sizeof(struct powernow_k8_data));
data->cpu = pol->cpu;
@ -1026,7 +1019,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
/* only run on specific CPU from here on */
oldmask = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(pol->cpu));
schedule();
if (smp_processor_id() != pol->cpu) {
printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu);
@ -1045,7 +1037,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
/* run on any CPU again */
set_cpus_allowed(current, oldmask);
schedule();
pol->governor = CPUFREQ_DEFAULT_GOVERNOR;
pol->cpus = cpu_core_map[pol->cpu];
@ -1080,7 +1071,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol)
err_out:
set_cpus_allowed(current, oldmask);
schedule();
powernow_k8_cpu_exit_acpi(data);
kfree(data);
@ -1116,17 +1106,14 @@ static unsigned int powernowk8_get (unsigned int cpu)
set_cpus_allowed(current, oldmask);
return 0;
}
preempt_disable();
if (query_current_values_with_pending_wait(data))
goto out;
khz = find_khz_freq_from_fid(data->currfid);
out:
preempt_enable_no_resched();
out:
set_cpus_allowed(current, oldmask);
return khz;
}

View file

@ -67,7 +67,7 @@ static const struct cpu_id cpu_ids[] = {
[CPU_MP4HT_D0] = {15, 3, 4 },
[CPU_MP4HT_E0] = {15, 4, 1 },
};
#define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0]))
#define N_IDS ARRAY_SIZE(cpu_ids)
struct cpu_model
{
@ -423,12 +423,11 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
}
}
centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL);
centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL);
if (!centrino_model[cpu]) {
result = -ENOMEM;
goto err_unreg;
}
memset(centrino_model[cpu], 0, sizeof(struct cpu_model));
centrino_model[cpu]->model_name=NULL;
centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000;

View file

@ -68,7 +68,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
/* AMD K7 machine check is Intel like */
void __devinit amd_mcheck_init(struct cpuinfo_x86 *c)
void amd_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;

View file

@ -16,7 +16,7 @@
#include "mce.h"
int mce_disabled __devinitdata = 0;
int mce_disabled = 0;
int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_
void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
/* This has to be run for each processor */
void __devinit mcheck_init(struct cpuinfo_x86 *c)
void mcheck_init(struct cpuinfo_x86 *c)
{
if (mce_disabled==1)
return;

View file

@ -77,7 +77,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs)
}
/* P4/Xeon Thermal regulation detect and init */
static void __devinit intel_init_thermal(struct cpuinfo_x86 *c)
static void intel_init_thermal(struct cpuinfo_x86 *c)
{
u32 l, h;
unsigned int cpu = smp_processor_id();
@ -231,7 +231,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c)
void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;

View file

@ -28,7 +28,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting for processors with Intel style MCE */
void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c)
void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;

View file

@ -79,7 +79,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
}
/* Set up machine check reporting for processors with Intel style MCE */
void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c)
void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;

View file

@ -22,7 +22,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod
}
/* Set up machine check reporting on the Winchip C6 series */
void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c)
void winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
machine_check_vector = winchip_machine_check;

View file

@ -31,22 +31,16 @@
#include <linux/config.h>
#include <linux/kprobes.h>
#include <linux/ptrace.h>
#include <linux/spinlock.h>
#include <linux/preempt.h>
#include <asm/cacheflush.h>
#include <asm/kdebug.h>
#include <asm/desc.h>
static struct kprobe *current_kprobe;
static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags;
static struct kprobe *kprobe_prev;
static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev;
static struct pt_regs jprobe_saved_regs;
static long *jprobe_saved_esp;
/* copy of the kernel stack at the probe fire time */
static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
void jprobe_return_end(void);
DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
/*
* returns non-zero if opcode modifies the interrupt flag.
*/
@ -91,29 +85,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p)
{
}
static inline void save_previous_kprobe(void)
static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb)
{
kprobe_prev = current_kprobe;
kprobe_status_prev = kprobe_status;
kprobe_old_eflags_prev = kprobe_old_eflags;
kprobe_saved_eflags_prev = kprobe_saved_eflags;
kcb->prev_kprobe.kp = kprobe_running();
kcb->prev_kprobe.status = kcb->kprobe_status;
kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags;
kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags;
}
static inline void restore_previous_kprobe(void)
static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
current_kprobe = kprobe_prev;
kprobe_status = kprobe_status_prev;
kprobe_old_eflags = kprobe_old_eflags_prev;
kprobe_saved_eflags = kprobe_saved_eflags_prev;
__get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
kcb->kprobe_status = kcb->prev_kprobe.status;
kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags;
kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags;
}
static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs)
static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
current_kprobe = p;
kprobe_saved_eflags = kprobe_old_eflags
__get_cpu_var(current_kprobe) = p;
kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags
= (regs->eflags & (TF_MASK | IF_MASK));
if (is_IF_modifier(p->opcode))
kprobe_saved_eflags &= ~IF_MASK;
kcb->kprobe_saved_eflags &= ~IF_MASK;
}
static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
@ -127,6 +122,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs)
regs->eip = (unsigned long)&p->ainsn.insn;
}
/* Called with kretprobe_lock held */
void __kprobes arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
{
@ -157,9 +153,15 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
int ret = 0;
kprobe_opcode_t *addr = NULL;
unsigned long *lp;
struct kprobe_ctlblk *kcb;
/* We're in an interrupt, but this is clear and BUG()-safe. */
/*
* We don't want to be preempted for the entire
* duration of kprobe processing
*/
preempt_disable();
kcb = get_kprobe_ctlblk();
/* Check if the application is using LDT entry for its code segment and
* calculate the address by reading the base address from the LDT entry.
*/
@ -173,15 +175,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
}
/* Check we're not actually recursing */
if (kprobe_running()) {
/* We *are* holding lock here, so this is safe.
Disarm the probe we just hit, and ignore it. */
p = get_kprobe(addr);
if (p) {
if (kprobe_status == KPROBE_HIT_SS &&
if (kcb->kprobe_status == KPROBE_HIT_SS &&
*p->ainsn.insn == BREAKPOINT_INSTRUCTION) {
regs->eflags &= ~TF_MASK;
regs->eflags |= kprobe_saved_eflags;
unlock_kprobes();
regs->eflags |= kcb->kprobe_saved_eflags;
goto no_kprobe;
}
/* We have reentered the kprobe_handler(), since
@ -190,26 +189,23 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
* just single step on the instruction of the new probe
* without calling any user handlers.
*/
save_previous_kprobe();
set_current_kprobe(p, regs);
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
p->nmissed++;
prepare_singlestep(p, regs);
kprobe_status = KPROBE_REENTER;
kcb->kprobe_status = KPROBE_REENTER;
return 1;
} else {
p = current_kprobe;
p = __get_cpu_var(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
goto ss_probe;
}
}
/* If it's not ours, can't be delete race, (we hold lock). */
goto no_kprobe;
}
lock_kprobes();
p = get_kprobe(addr);
if (!p) {
unlock_kprobes();
if (regs->eflags & VM_MASK) {
/* We are in virtual-8086 mode. Return 0 */
goto no_kprobe;
@ -232,8 +228,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
goto no_kprobe;
}
kprobe_status = KPROBE_HIT_ACTIVE;
set_current_kprobe(p, regs);
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (p->pre_handler && p->pre_handler(p, regs))
/* handler has already set things up, so skip ss setup */
@ -241,7 +237,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
ss_probe:
prepare_singlestep(p, regs);
kprobe_status = KPROBE_HIT_SS;
kcb->kprobe_status = KPROBE_HIT_SS;
return 1;
no_kprobe:
@ -269,9 +265,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
struct kretprobe_instance *ri = NULL;
struct hlist_head *head;
struct hlist_node *node, *tmp;
unsigned long orig_ret_address = 0;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline;
spin_lock_irqsave(&kretprobe_lock, flags);
head = kretprobe_inst_table_head(current);
/*
@ -310,14 +307,15 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address));
regs->eip = orig_ret_address;
unlock_kprobes();
reset_current_kprobe();
spin_unlock_irqrestore(&kretprobe_lock, flags);
preempt_enable_no_resched();
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we have handled unlocking
* and re-enabling preemption.
*/
/*
* By returning a non-zero value, we are telling
* kprobe_handler() that we don't want the post_handler
* to run (and have re-enabled preemption)
*/
return 1;
}
@ -343,7 +341,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs)
* that is atop the stack is the address following the copied instruction.
* We need to make it the address following the original instruction.
*/
static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
static void __kprobes resume_execution(struct kprobe *p,
struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{
unsigned long *tos = (unsigned long *)&regs->esp;
unsigned long next_eip = 0;
@ -353,7 +352,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
switch (p->ainsn.insn[0]) {
case 0x9c: /* pushfl */
*tos &= ~(TF_MASK | IF_MASK);
*tos |= kprobe_old_eflags;
*tos |= kcb->kprobe_old_eflags;
break;
case 0xc3: /* ret/lret */
case 0xcb:
@ -394,27 +393,30 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
/*
* Interrupts are disabled on entry as trap1 is an interrupt gate and they
* remain disabled thoroughout this function. And we hold kprobe lock.
* remain disabled thoroughout this function.
*/
static inline int post_kprobe_handler(struct pt_regs *regs)
{
if (!kprobe_running())
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (!cur)
return 0;
if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) {
kprobe_status = KPROBE_HIT_SSDONE;
current_kprobe->post_handler(current_kprobe, regs, 0);
if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
cur->post_handler(cur, regs, 0);
}
resume_execution(current_kprobe, regs);
regs->eflags |= kprobe_saved_eflags;
resume_execution(cur, regs, kcb);
regs->eflags |= kcb->kprobe_saved_eflags;
/*Restore back the original saved kprobes variables and continue. */
if (kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe();
if (kcb->kprobe_status == KPROBE_REENTER) {
restore_previous_kprobe(kcb);
goto out;
}
unlock_kprobes();
reset_current_kprobe();
out:
preempt_enable_no_resched();
@ -429,18 +431,19 @@ static inline int post_kprobe_handler(struct pt_regs *regs)
return 1;
}
/* Interrupts disabled, kprobe_lock held. */
static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{
if (current_kprobe->fault_handler
&& current_kprobe->fault_handler(current_kprobe, regs, trapnr))
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
return 1;
if (kprobe_status & KPROBE_HIT_SS) {
resume_execution(current_kprobe, regs);
regs->eflags |= kprobe_old_eflags;
if (kcb->kprobe_status & KPROBE_HIT_SS) {
resume_execution(cur, regs, kcb);
regs->eflags |= kcb->kprobe_old_eflags;
unlock_kprobes();
reset_current_kprobe();
preempt_enable_no_resched();
}
return 0;
@ -453,39 +456,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
struct die_args *args = (struct die_args *)data;
int ret = NOTIFY_DONE;
switch (val) {
case DIE_INT3:
if (kprobe_handler(args->regs))
return NOTIFY_STOP;
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
if (post_kprobe_handler(args->regs))
return NOTIFY_STOP;
ret = NOTIFY_STOP;
break;
case DIE_GPF:
if (kprobe_running() &&
kprobe_fault_handler(args->regs, args->trapnr))
return NOTIFY_STOP;
break;
case DIE_PAGE_FAULT:
/* kprobe_running() needs smp_processor_id() */
preempt_disable();
if (kprobe_running() &&
kprobe_fault_handler(args->regs, args->trapnr))
return NOTIFY_STOP;
ret = NOTIFY_STOP;
preempt_enable();
break;
default:
break;
}
return NOTIFY_DONE;
return ret;
}
int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
{
struct jprobe *jp = container_of(p, struct jprobe, kp);
unsigned long addr;
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
jprobe_saved_regs = *regs;
jprobe_saved_esp = &regs->esp;
addr = (unsigned long)jprobe_saved_esp;
kcb->jprobe_saved_regs = *regs;
kcb->jprobe_saved_esp = &regs->esp;
addr = (unsigned long)(kcb->jprobe_saved_esp);
/*
* TBD: As Linus pointed out, gcc assumes that the callee
@ -494,7 +499,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
* we also save and restore enough stack bytes to cover
* the argument area.
*/
memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr,
MIN_STACK_SIZE(addr));
regs->eflags &= ~IF_MASK;
regs->eip = (unsigned long)(jp->entry);
return 1;
@ -502,36 +508,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
void __kprobes jprobe_return(void)
{
preempt_enable_no_resched();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
asm volatile (" xchgl %%ebx,%%esp \n"
" int3 \n"
" .globl jprobe_return_end \n"
" jprobe_return_end: \n"
" nop \n"::"b"
(jprobe_saved_esp):"memory");
(kcb->jprobe_saved_esp):"memory");
}
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
u8 *addr = (u8 *) (regs->eip - 1);
unsigned long stack_addr = (unsigned long)jprobe_saved_esp;
unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp);
struct jprobe *jp = container_of(p, struct jprobe, kp);
if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) {
if (&regs->esp != jprobe_saved_esp) {
if (&regs->esp != kcb->jprobe_saved_esp) {
struct pt_regs *saved_regs =
container_of(jprobe_saved_esp, struct pt_regs, esp);
container_of(kcb->jprobe_saved_esp,
struct pt_regs, esp);
printk("current esp %p does not match saved esp %p\n",
&regs->esp, jprobe_saved_esp);
&regs->esp, kcb->jprobe_saved_esp);
printk("Saved registers for jprobe %p\n", jp);
show_registers(saved_regs);
printk("Current registers\n");
show_registers(regs);
BUG();
}
*regs = jprobe_saved_regs;
memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
*regs = kcb->jprobe_saved_regs;
memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack,
MIN_STACK_SIZE(stack_addr));
preempt_enable_no_resched();
return 1;
}
return 0;

View file

@ -18,6 +18,7 @@
#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/desc.h>
#include <asm/mmu_context.h>
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *null)

View file

@ -132,7 +132,7 @@ static struct resource mca_standard_resources[] = {
{ .start = 0x100, .end = 0x107, .name = "POS (MCA)" }
};
#define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource))
#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources)
/**
* mca_read_and_store_pos - read the POS registers into a memory buffer

View file

@ -354,49 +354,12 @@ ptrace_set_thread_area(struct task_struct *child,
return 0;
}
asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
long arch_ptrace(struct task_struct *child, long request, long addr, long data)
{
struct task_struct *child;
struct user * dummy = NULL;
int i, ret;
unsigned long __user *datap = (unsigned long __user *)data;
lock_kernel();
ret = -EPERM;
if (request == PTRACE_TRACEME) {
/* are we already being traced? */
if (current->ptrace & PT_PTRACED)
goto out;
ret = security_ptrace(current->parent, current);
if (ret)
goto out;
/* set the ptrace bit in the process flags. */
current->ptrace |= PT_PTRACED;
ret = 0;
goto out;
}
ret = -ESRCH;
read_lock(&tasklist_lock);
child = find_task_by_pid(pid);
if (child)
get_task_struct(child);
read_unlock(&tasklist_lock);
if (!child)
goto out;
ret = -EPERM;
if (pid == 1) /* you may not mess with init */
goto out_tsk;
if (request == PTRACE_ATTACH) {
ret = ptrace_attach(child);
goto out_tsk;
}
ret = ptrace_check_attach(child, request == PTRACE_KILL);
if (ret < 0)
goto out_tsk;
switch (request) {
/* when I and D space are separate, these will need to be fixed. */
case PTRACE_PEEKTEXT: /* read word at location addr. */
@ -663,10 +626,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data)
ret = ptrace_request(child, request, addr, data);
break;
}
out_tsk:
put_task_struct(child);
out:
unlock_kernel();
out_tsk:
return ret;
}

View file

@ -10,6 +10,7 @@
#include <asm/delay.h>
#include <linux/pci.h>
#include <linux/reboot_fixups.h>
static void cs5530a_warm_reset(struct pci_dev *dev)
{
@ -42,7 +43,7 @@ void mach_reboot_fixups(void)
struct pci_dev *dev;
int i;
for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) {
for (i=0; i < ARRAY_SIZE(fixups_table); i++) {
cur = &(fixups_table[i]);
dev = pci_get_device(cur->vendor, cur->device, NULL);
if (!dev)

View file

@ -12,6 +12,7 @@
#include <linux/pci.h>
#include <linux/scx200.h>
#include <linux/scx200_gpio.h>
/* Verify that the configuration block really is there */
#define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base))

View file

@ -68,11 +68,9 @@ EXPORT_SYMBOL(smp_num_siblings);
/* Package ID of each logical CPU */
int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(phys_proc_id);
/* Core ID of each logical CPU */
int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
EXPORT_SYMBOL(cpu_core_id);
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_sibling_map);
@ -612,7 +610,7 @@ static inline void __inquire_remote_apic(int apicid)
printk("Inquiring remote APIC #%d...\n", apicid);
for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
for (i = 0; i < ARRAY_SIZE(regs); i++) {
printk("... APIC #%d %s: ", apicid, names[i]);
/*

Some files were not shown because too many files have changed in this diff Show more