Merge 4.12-rc5 into usb-next
We want the USB fixes in here as well. Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
commit
81606aea23
864 changed files with 9843 additions and 5371 deletions
|
@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues.
|
|||
If the userspace hasn't been prepared to ignore the unreliable "opened"
|
||||
events and the unreliable initial state notification, Linux users can use
|
||||
the following kernel parameters to handle the possible issues:
|
||||
A. button.lid_init_state=open:
|
||||
A. button.lid_init_state=method:
|
||||
When this option is specified, the ACPI button driver reports the
|
||||
initial lid state using the returning value of the _LID control method
|
||||
and whether the "opened"/"closed" events are paired fully relies on the
|
||||
firmware implementation.
|
||||
This option can be used to fix some platforms where the returning value
|
||||
of the _LID control method is reliable but the initial lid state
|
||||
notification is missing.
|
||||
This option is the default behavior during the period the userspace
|
||||
isn't ready to handle the buggy AML tables.
|
||||
B. button.lid_init_state=open:
|
||||
When this option is specified, the ACPI button driver always reports the
|
||||
initial lid state as "opened" and whether the "opened"/"closed" events
|
||||
are paired fully relies on the firmware implementation.
|
||||
This may fix some platforms where the returning value of the _LID
|
||||
control method is not reliable and the initial lid state notification is
|
||||
missing.
|
||||
This option is the default behavior during the period the userspace
|
||||
isn't ready to handle the buggy AML tables.
|
||||
|
||||
If the userspace has been prepared to ignore the unreliable "opened" events
|
||||
and the unreliable initial state notification, Linux users should always
|
||||
use the following kernel parameter:
|
||||
B. button.lid_init_state=ignore:
|
||||
C. button.lid_init_state=ignore:
|
||||
When this option is specified, the ACPI button driver never reports the
|
||||
initial lid state and there is a compensation mechanism implemented to
|
||||
ensure that the reliable "closed" notifications can always be delievered
|
||||
|
|
|
@ -866,6 +866,15 @@
|
|||
|
||||
dscc4.setup= [NET]
|
||||
|
||||
dt_cpu_ftrs= [PPC]
|
||||
Format: {"off" | "known"}
|
||||
Control how the dt_cpu_ftrs device-tree binding is
|
||||
used for CPU feature discovery and setup (if it
|
||||
exists).
|
||||
off: Do not use it, fall back to legacy cpu table.
|
||||
known: Do not pass through unknown features to guests
|
||||
or userspace, only those that the kernel is aware of.
|
||||
|
||||
dump_apple_properties [X86]
|
||||
Dump name and content of EFI device properties on
|
||||
x86 Macs. Useful for driver authors to determine
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
.. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
|
||||
.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
|
||||
|
||||
=======================
|
||||
CPU Performance Scaling
|
||||
|
@ -75,7 +76,7 @@ feedback registers, as that information is typically specific to the hardware
|
|||
interface it comes from and may not be easily represented in an abstract,
|
||||
platform-independent way. For this reason, ``CPUFreq`` allows scaling drivers
|
||||
to bypass the governor layer and implement their own performance scaling
|
||||
algorithms. That is done by the ``intel_pstate`` scaling driver.
|
||||
algorithms. That is done by the |intel_pstate| scaling driver.
|
||||
|
||||
|
||||
``CPUFreq`` Policy Objects
|
||||
|
@ -174,13 +175,13 @@ necessary to restart the scaling governor so that it can take the new online CPU
|
|||
into account. That is achieved by invoking the governor's ``->stop`` and
|
||||
``->start()`` callbacks, in this order, for the entire policy.
|
||||
|
||||
As mentioned before, the ``intel_pstate`` scaling driver bypasses the scaling
|
||||
As mentioned before, the |intel_pstate| scaling driver bypasses the scaling
|
||||
governor layer of ``CPUFreq`` and provides its own P-state selection algorithms.
|
||||
Consequently, if ``intel_pstate`` is used, scaling governors are not attached to
|
||||
Consequently, if |intel_pstate| is used, scaling governors are not attached to
|
||||
new policy objects. Instead, the driver's ``->setpolicy()`` callback is invoked
|
||||
to register per-CPU utilization update callbacks for each policy. These
|
||||
callbacks are invoked by the CPU scheduler in the same way as for scaling
|
||||
governors, but in the ``intel_pstate`` case they both determine the P-state to
|
||||
governors, but in the |intel_pstate| case they both determine the P-state to
|
||||
use and change the hardware configuration accordingly in one go from scheduler
|
||||
context.
|
||||
|
||||
|
@ -257,7 +258,7 @@ are the following:
|
|||
|
||||
``scaling_available_governors``
|
||||
List of ``CPUFreq`` scaling governors present in the kernel that can
|
||||
be attached to this policy or (if the ``intel_pstate`` scaling driver is
|
||||
be attached to this policy or (if the |intel_pstate| scaling driver is
|
||||
in use) list of scaling algorithms provided by the driver that can be
|
||||
applied to this policy.
|
||||
|
||||
|
@ -274,7 +275,7 @@ are the following:
|
|||
the CPU is actually running at (due to hardware design and other
|
||||
limitations).
|
||||
|
||||
Some scaling drivers (e.g. ``intel_pstate``) attempt to provide
|
||||
Some scaling drivers (e.g. |intel_pstate|) attempt to provide
|
||||
information more precisely reflecting the current CPU frequency through
|
||||
this attribute, but that still may not be the exact current CPU
|
||||
frequency as seen by the hardware at the moment.
|
||||
|
@ -284,13 +285,13 @@ are the following:
|
|||
|
||||
``scaling_governor``
|
||||
The scaling governor currently attached to this policy or (if the
|
||||
``intel_pstate`` scaling driver is in use) the scaling algorithm
|
||||
|intel_pstate| scaling driver is in use) the scaling algorithm
|
||||
provided by the driver that is currently applied to this policy.
|
||||
|
||||
This attribute is read-write and writing to it will cause a new scaling
|
||||
governor to be attached to this policy or a new scaling algorithm
|
||||
provided by the scaling driver to be applied to it (in the
|
||||
``intel_pstate`` case), as indicated by the string written to this
|
||||
|intel_pstate| case), as indicated by the string written to this
|
||||
attribute (which must be one of the names listed by the
|
||||
``scaling_available_governors`` attribute described above).
|
||||
|
||||
|
@ -619,7 +620,7 @@ This file is located under :file:`/sys/devices/system/cpu/cpufreq/` and controls
|
|||
the "boost" setting for the whole system. It is not present if the underlying
|
||||
scaling driver does not support the frequency boost mechanism (or supports it,
|
||||
but provides a driver-specific interface for controlling it, like
|
||||
``intel_pstate``).
|
||||
|intel_pstate|).
|
||||
|
||||
If the value in this file is 1, the frequency boost mechanism is enabled. This
|
||||
means that either the hardware can be put into states in which it is able to
|
||||
|
|
|
@ -6,6 +6,7 @@ Power Management
|
|||
:maxdepth: 2
|
||||
|
||||
cpufreq
|
||||
intel_pstate
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
|
755
Documentation/admin-guide/pm/intel_pstate.rst
Normal file
755
Documentation/admin-guide/pm/intel_pstate.rst
Normal file
|
@ -0,0 +1,755 @@
|
|||
===============================================
|
||||
``intel_pstate`` CPU Performance Scaling Driver
|
||||
===============================================
|
||||
|
||||
::
|
||||
|
||||
Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
|
||||
General Information
|
||||
===================
|
||||
|
||||
``intel_pstate`` is a part of the
|
||||
:doc:`CPU performance scaling subsystem <cpufreq>` in the Linux kernel
|
||||
(``CPUFreq``). It is a scaling driver for the Sandy Bridge and later
|
||||
generations of Intel processors. Note, however, that some of those processors
|
||||
may not be supported. [To understand ``intel_pstate`` it is necessary to know
|
||||
how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
|
||||
you have not done that yet.]
|
||||
|
||||
For the processors supported by ``intel_pstate``, the P-state concept is broader
|
||||
than just an operating frequency or an operating performance point (see the
|
||||
`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
|
||||
information about that). For this reason, the representation of P-states used
|
||||
by ``intel_pstate`` internally follows the hardware specification (for details
|
||||
refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
|
||||
Volume 3: System Programming Guide <SDM_>`_). However, the ``CPUFreq`` core
|
||||
uses frequencies for identifying operating performance points of CPUs and
|
||||
frequencies are involved in the user space interface exposed by it, so
|
||||
``intel_pstate`` maps its internal representation of P-states to frequencies too
|
||||
(fortunately, that mapping is unambiguous). At the same time, it would not be
|
||||
practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of
|
||||
available frequencies due to the possible size of it, so the driver does not do
|
||||
that. Some functionality of the core is limited by that.
|
||||
|
||||
Since the hardware P-state selection interface used by ``intel_pstate`` is
|
||||
available at the logical CPU level, the driver always works with individual
|
||||
CPUs. Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy
|
||||
object corresponds to one logical CPU and ``CPUFreq`` policies are effectively
|
||||
equivalent to CPUs. In particular, this means that they become "inactive" every
|
||||
time the corresponding CPU is taken offline and need to be re-initialized when
|
||||
it goes back online.
|
||||
|
||||
``intel_pstate`` is not modular, so it cannot be unloaded, which means that the
|
||||
only way to pass early-configuration-time parameters to it is via the kernel
|
||||
command line. However, its configuration can be adjusted via ``sysfs`` to a
|
||||
great extent. In some configurations it even is possible to unregister it via
|
||||
``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
|
||||
registered (see `below <status_attr_>`_).
|
||||
|
||||
|
||||
Operation Modes
|
||||
===============
|
||||
|
||||
``intel_pstate`` can operate in three different modes: in the active mode with
|
||||
or without hardware-managed P-states support and in the passive mode. Which of
|
||||
them will be in effect depends on what kernel command line options are used and
|
||||
on the capabilities of the processor.
|
||||
|
||||
Active Mode
|
||||
-----------
|
||||
|
||||
This is the default operation mode of ``intel_pstate``. If it works in this
|
||||
mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
|
||||
policies contains the string "intel_pstate".
|
||||
|
||||
In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
|
||||
provides its own scaling algorithms for P-state selection. Those algorithms
|
||||
can be applied to ``CPUFreq`` policies in the same way as generic scaling
|
||||
governors (that is, through the ``scaling_governor`` policy attribute in
|
||||
``sysfs``). [Note that different P-state selection algorithms may be chosen for
|
||||
different policies, but that is not recommended.]
|
||||
|
||||
They are not generic scaling governors, but their names are the same as the
|
||||
names of some of those governors. Moreover, confusingly enough, they generally
|
||||
do not work in the same way as the generic governors they share the names with.
|
||||
For example, the ``powersave`` P-state selection algorithm provided by
|
||||
``intel_pstate`` is not a counterpart of the generic ``powersave`` governor
|
||||
(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors).
|
||||
|
||||
There are two P-state selection algorithms provided by ``intel_pstate`` in the
|
||||
active mode: ``powersave`` and ``performance``. The way they both operate
|
||||
depends on whether or not the hardware-managed P-states (HWP) feature has been
|
||||
enabled in the processor and possibly on the processor model.
|
||||
|
||||
Which of the P-state selection algorithms is used by default depends on the
|
||||
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option.
|
||||
Namely, if that option is set, the ``performance`` algorithm will be used by
|
||||
default, and the other one will be used by default if it is not set.
|
||||
|
||||
Active Mode With HWP
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If the processor supports the HWP feature, it will be enabled during the
|
||||
processor initialization and cannot be disabled after that. It is possible
|
||||
to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the
|
||||
kernel in the command line.
|
||||
|
||||
If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to
|
||||
select P-states by itself, but still it can give hints to the processor's
|
||||
internal P-state selection logic. What those hints are depends on which P-state
|
||||
selection algorithm has been applied to the given policy (or to the CPU it
|
||||
corresponds to).
|
||||
|
||||
Even though the P-state selection is carried out by the processor automatically,
|
||||
``intel_pstate`` registers utilization update callbacks with the CPU scheduler
|
||||
in this mode. However, they are not used for running a P-state selection
|
||||
algorithm, but for periodic updates of the current CPU frequency information to
|
||||
be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``.
|
||||
|
||||
HWP + ``performance``
|
||||
.....................
|
||||
|
||||
In this configuration ``intel_pstate`` will write 0 to the processor's
|
||||
Energy-Performance Preference (EPP) knob (if supported) or its
|
||||
Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
|
||||
internal P-state selection logic is expected to focus entirely on performance.
|
||||
|
||||
This will override the EPP/EPB setting coming from the ``sysfs`` interface
|
||||
(see `Energy vs Performance Hints`_ below).
|
||||
|
||||
Also, in this configuration the range of P-states available to the processor's
|
||||
internal P-state selection logic is always restricted to the upper boundary
|
||||
(that is, the maximum P-state that the driver is allowed to use).
|
||||
|
||||
HWP + ``powersave``
|
||||
...................
|
||||
|
||||
In this configuration ``intel_pstate`` will set the processor's
|
||||
Energy-Performance Preference (EPP) knob (if supported) or its
|
||||
Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was
|
||||
previously set to via ``sysfs`` (or whatever default value it was
|
||||
set to by the platform firmware). This usually causes the processor's
|
||||
internal P-state selection logic to be less performance-focused.
|
||||
|
||||
Active Mode Without HWP
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
This is the default operation mode for processors that do not support the HWP
|
||||
feature. It also is used by default with the ``intel_pstate=no_hwp`` argument
|
||||
in the kernel command line. However, in this mode ``intel_pstate`` may refuse
|
||||
to work with the given processor if it does not recognize it. [Note that
|
||||
``intel_pstate`` will never refuse to work with any processor with the HWP
|
||||
feature enabled.]
|
||||
|
||||
In this mode ``intel_pstate`` registers utilization update callbacks with the
|
||||
CPU scheduler in order to run a P-state selection algorithm, either
|
||||
``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
|
||||
setting in ``sysfs``. The current CPU frequency information to be made
|
||||
available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
|
||||
periodically updated by those utilization update callbacks too.
|
||||
|
||||
``performance``
|
||||
...............
|
||||
|
||||
Without HWP, this P-state selection algorithm is always the same regardless of
|
||||
the processor model and platform configuration.
|
||||
|
||||
It selects the maximum P-state it is allowed to use, subject to limits set via
|
||||
``sysfs``, every time the P-state selection computations are carried out by the
|
||||
driver's utilization update callback for the given CPU (that does not happen
|
||||
more often than every 10 ms), but the hardware configuration will not be changed
|
||||
if the new P-state is the same as the current one.
|
||||
|
||||
This is the default P-state selection algorithm if the
|
||||
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
|
||||
is set.
|
||||
|
||||
``powersave``
|
||||
.............
|
||||
|
||||
Without HWP, this P-state selection algorithm generally depends on the
|
||||
processor model and/or the system profile setting in the ACPI tables and there
|
||||
are two variants of it.
|
||||
|
||||
One of them is used with processors from the Atom line and (regardless of the
|
||||
processor model) on platforms with the system profile in the ACPI tables set to
|
||||
"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or
|
||||
"workstation". It is also used with processors supporting the HWP feature if
|
||||
that feature has not been enabled (that is, with the ``intel_pstate=no_hwp``
|
||||
argument in the kernel command line). It is similar to the algorithm
|
||||
implemented by the generic ``schedutil`` scaling governor except that the
|
||||
utilization metric used by it is based on numbers coming from feedback
|
||||
registers of the CPU. It generally selects P-states proportional to the
|
||||
current CPU utilization, so it is referred to as the "proportional" algorithm.
|
||||
|
||||
The second variant of the ``powersave`` P-state selection algorithm, used in all
|
||||
of the other cases (generally, on processors from the Core line, so it is
|
||||
referred to as the "Core" algorithm), is based on the values read from the APERF
|
||||
and MPERF feedback registers and the previously requested target P-state.
|
||||
It does not really take CPU utilization into account explicitly, but as a rule
|
||||
it causes the CPU P-state to ramp up very quickly in response to increased
|
||||
utilization which is generally desirable in server environments.
|
||||
|
||||
Regardless of the variant, this algorithm is run by the driver's utilization
|
||||
update callback for the given CPU when it is invoked by the CPU scheduler, but
|
||||
not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this
|
||||
particular case <Tuning Interface in debugfs_>`_). Like in the ``performance``
|
||||
case, the hardware configuration is not touched if the new P-state turns out to
|
||||
be the same as the current one.
|
||||
|
||||
This is the default P-state selection algorithm if the
|
||||
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
|
||||
is not set.
|
||||
|
||||
Passive Mode
|
||||
------------
|
||||
|
||||
This mode is used if the ``intel_pstate=passive`` argument is passed to the
|
||||
kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
|
||||
Like in the active mode without HWP support, in this mode ``intel_pstate`` may
|
||||
refuse to work with the given processor if it does not recognize it.
|
||||
|
||||
If the driver works in this mode, the ``scaling_driver`` policy attribute in
|
||||
``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
|
||||
Then, the driver behaves like a regular ``CPUFreq`` scaling driver. That is,
|
||||
it is invoked by generic scaling governors when necessary to talk to the
|
||||
hardware in order to change the P-state of a CPU (in particular, the
|
||||
``schedutil`` governor can invoke it directly from scheduler context).
|
||||
|
||||
While in this mode, ``intel_pstate`` can be used with all of the (generic)
|
||||
scaling governors listed by the ``scaling_available_governors`` policy attribute
|
||||
in ``sysfs`` (and the P-state selection algorithms described above are not
|
||||
used). Then, it is responsible for the configuration of policy objects
|
||||
corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling
|
||||
governors attached to the policy objects) with accurate information on the
|
||||
maximum and minimum operating frequencies supported by the hardware (including
|
||||
the so-called "turbo" frequency ranges). In other words, in the passive mode
|
||||
the entire range of available P-states is exposed by ``intel_pstate`` to the
|
||||
``CPUFreq`` core. However, in this mode the driver does not register
|
||||
utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq``
|
||||
information comes from the ``CPUFreq`` core (and is the last frequency selected
|
||||
by the current scaling governor for the given policy).
|
||||
|
||||
|
||||
.. _turbo:
|
||||
|
||||
Turbo P-states Support
|
||||
======================
|
||||
|
||||
In the majority of cases, the entire range of P-states available to
|
||||
``intel_pstate`` can be divided into two sub-ranges that correspond to
|
||||
different types of processor behavior, above and below a boundary that
|
||||
will be referred to as the "turbo threshold" in what follows.
|
||||
|
||||
The P-states above the turbo threshold are referred to as "turbo P-states" and
|
||||
the whole sub-range of P-states they belong to is referred to as the "turbo
|
||||
range". These names are related to the Turbo Boost technology allowing a
|
||||
multicore processor to opportunistically increase the P-state of one or more
|
||||
cores if there is enough power to do that and if that is not going to cause the
|
||||
thermal envelope of the processor package to be exceeded.
|
||||
|
||||
Specifically, if software sets the P-state of a CPU core within the turbo range
|
||||
(that is, above the turbo threshold), the processor is permitted to take over
|
||||
performance scaling control for that core and put it into turbo P-states of its
|
||||
choice going forward. However, that permission is interpreted differently by
|
||||
different processor generations. Namely, the Sandy Bridge generation of
|
||||
processors will never use any P-states above the last one set by software for
|
||||
the given core, even if it is within the turbo range, whereas all of the later
|
||||
processor generations will take it as a license to use any P-states from the
|
||||
turbo range, even above the one set by software. In other words, on those
|
||||
processors setting any P-state from the turbo range will enable the processor
|
||||
to put the given core into all turbo P-states up to and including the maximum
|
||||
supported one as it sees fit.
|
||||
|
||||
One important property of turbo P-states is that they are not sustainable. More
|
||||
precisely, there is no guarantee that any CPUs will be able to stay in any of
|
||||
those states indefinitely, because the power distribution within the processor
|
||||
package may change over time or the thermal envelope it was designed for might
|
||||
be exceeded if a turbo P-state was used for too long.
|
||||
|
||||
In turn, the P-states below the turbo threshold generally are sustainable. In
|
||||
fact, if one of them is set by software, the processor is not expected to change
|
||||
it to a lower one unless in a thermal stress or a power limit violation
|
||||
situation (a higher P-state may still be used if it is set for another CPU in
|
||||
the same package at the same time, for example).
|
||||
|
||||
Some processors allow multiple cores to be in turbo P-states at the same time,
|
||||
but the maximum P-state that can be set for them generally depends on the number
|
||||
of cores running concurrently. The maximum turbo P-state that can be set for 3
|
||||
cores at the same time usually is lower than the analogous maximum P-state for
|
||||
2 cores, which in turn usually is lower than the maximum turbo P-state that can
|
||||
be set for 1 core. The one-core maximum turbo P-state is thus the maximum
|
||||
supported one overall.
|
||||
|
||||
The maximum supported turbo P-state, the turbo threshold (the maximum supported
|
||||
non-turbo P-state) and the minimum supported P-state are specific to the
|
||||
processor model and can be determined by reading the processor's model-specific
|
||||
registers (MSRs). Moreover, some processors support the Configurable TDP
|
||||
(Thermal Design Power) feature and, when that feature is enabled, the turbo
|
||||
threshold effectively becomes a configurable value that can be set by the
|
||||
platform firmware.
|
||||
|
||||
Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
|
||||
the entire range of available P-states, including the whole turbo range, to the
|
||||
``CPUFreq`` core and (in the passive mode) to generic scaling governors. This
|
||||
generally causes turbo P-states to be set more often when ``intel_pstate`` is
|
||||
used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
|
||||
for more information).
|
||||
|
||||
Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
|
||||
(even if the Configurable TDP feature is enabled in the processor), its
|
||||
``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
|
||||
work as expected in all cases (that is, if set to disable turbo P-states, it
|
||||
always should prevent ``intel_pstate`` from using them).
|
||||
|
||||
|
||||
Processor Support
|
||||
=================
|
||||
|
||||
To handle a given processor ``intel_pstate`` requires a number of different
|
||||
pieces of information on it to be known, including:
|
||||
|
||||
* The minimum supported P-state.
|
||||
|
||||
* The maximum supported `non-turbo P-state <turbo_>`_.
|
||||
|
||||
* Whether or not turbo P-states are supported at all.
|
||||
|
||||
* The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
|
||||
are supported).
|
||||
|
||||
* The scaling formula to translate the driver's internal representation
|
||||
of P-states into frequencies and the other way around.
|
||||
|
||||
Generally, ways to obtain that information are specific to the processor model
|
||||
or family. Although it often is possible to obtain all of it from the processor
|
||||
itself (using model-specific registers), there are cases in which hardware
|
||||
manuals need to be consulted to get to it too.
|
||||
|
||||
For this reason, there is a list of supported processors in ``intel_pstate`` and
|
||||
the driver initialization will fail if the detected processor is not in that
|
||||
list, unless it supports the `HWP feature <Active Mode_>`_. [The interface to
|
||||
obtain all of the information listed above is the same for all of the processors
|
||||
supporting the HWP feature, which is why they all are supported by
|
||||
``intel_pstate``.]
|
||||
|
||||
|
||||
User Space Interface in ``sysfs``
|
||||
=================================
|
||||
|
||||
Global Attributes
|
||||
-----------------
|
||||
|
||||
``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to
|
||||
control its functionality at the system level. They are located in the
|
||||
``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all
|
||||
CPUs.
|
||||
|
||||
Some of them are not present if the ``intel_pstate=per_cpu_perf_limits``
|
||||
argument is passed to the kernel in the command line.
|
||||
|
||||
``max_perf_pct``
|
||||
Maximum P-state the driver is allowed to set in percent of the
|
||||
maximum supported performance level (the highest supported `turbo
|
||||
P-state <turbo_>`_).
|
||||
|
||||
This attribute will not be exposed if the
|
||||
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
|
||||
command line.
|
||||
|
||||
``min_perf_pct``
|
||||
Minimum P-state the driver is allowed to set in percent of the
|
||||
maximum supported performance level (the highest supported `turbo
|
||||
P-state <turbo_>`_).
|
||||
|
||||
This attribute will not be exposed if the
|
||||
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
|
||||
command line.
|
||||
|
||||
``num_pstates``
|
||||
Number of P-states supported by the processor (between 0 and 255
|
||||
inclusive) including both turbo and non-turbo P-states (see
|
||||
`Turbo P-states Support`_).
|
||||
|
||||
The value of this attribute is not affected by the ``no_turbo``
|
||||
setting described `below <no_turbo_attr_>`_.
|
||||
|
||||
This attribute is read-only.
|
||||
|
||||
``turbo_pct``
|
||||
Ratio of the `turbo range <turbo_>`_ size to the size of the entire
|
||||
range of supported P-states, in percent.
|
||||
|
||||
This attribute is read-only.
|
||||
|
||||
.. _no_turbo_attr:
|
||||
|
||||
``no_turbo``
|
||||
If set (equal to 1), the driver is not allowed to set any turbo P-states
|
||||
(see `Turbo P-states Support`_). If unset (equalt to 0, which is the
|
||||
default), turbo P-states can be set by the driver.
|
||||
[Note that ``intel_pstate`` does not support the general ``boost``
|
||||
attribute (supported by some other scaling drivers) which is replaced
|
||||
by this one.]
|
||||
|
||||
This attrubute does not affect the maximum supported frequency value
|
||||
supplied to the ``CPUFreq`` core and exposed via the policy interface,
|
||||
but it affects the maximum possible value of per-policy P-state limits
|
||||
(see `Interpretation of Policy Attributes`_ below for details).
|
||||
|
||||
.. _status_attr:
|
||||
|
||||
``status``
|
||||
Operation mode of the driver: "active", "passive" or "off".
|
||||
|
||||
"active"
|
||||
The driver is functional and in the `active mode
|
||||
<Active Mode_>`_.
|
||||
|
||||
"passive"
|
||||
The driver is functional and in the `passive mode
|
||||
<Passive Mode_>`_.
|
||||
|
||||
"off"
|
||||
The driver is not functional (it is not registered as a scaling
|
||||
driver with the ``CPUFreq`` core).
|
||||
|
||||
This attribute can be written to in order to change the driver's
|
||||
operation mode or to unregister it. The string written to it must be
|
||||
one of the possible values of it and, if successful, the write will
|
||||
cause the driver to switch over to the operation mode represented by
|
||||
that string - or to be unregistered in the "off" case. [Actually,
|
||||
switching over from the active mode to the passive mode or the other
|
||||
way around causes the driver to be unregistered and registered again
|
||||
with a different set of callbacks, so all of its settings (the global
|
||||
as well as the per-policy ones) are then reset to their default
|
||||
values, possibly depending on the target operation mode.]
|
||||
|
||||
That only is supported in some configurations, though (for example, if
|
||||
the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
|
||||
the operation mode of the driver cannot be changed), and if it is not
|
||||
supported in the current configuration, writes to this attribute with
|
||||
fail with an appropriate error.
|
||||
|
||||
Interpretation of Policy Attributes
|
||||
-----------------------------------
|
||||
|
||||
The interpretation of some ``CPUFreq`` policy attributes described in
|
||||
:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
|
||||
and it generally depends on the driver's `operation mode <Operation Modes_>`_.
|
||||
|
||||
First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
|
||||
``scaling_cur_freq`` attributes are produced by applying a processor-specific
|
||||
multiplier to the internal P-state representation used by ``intel_pstate``.
|
||||
Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
|
||||
attributes are capped by the frequency corresponding to the maximum P-state that
|
||||
the driver is allowed to set.
|
||||
|
||||
If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
|
||||
not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
|
||||
and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
|
||||
Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
|
||||
``scaling_min_freq`` to go down to that value if they were above it before.
|
||||
However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
|
||||
restored after unsetting ``no_turbo``, unless these attributes have been written
|
||||
to after ``no_turbo`` was set.
|
||||
|
||||
If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq``
|
||||
and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
|
||||
which also is the value of ``cpuinfo_max_freq`` in either case.
|
||||
|
||||
Next, the following policy attributes have special meaning if
|
||||
``intel_pstate`` works in the `active mode <Active Mode_>`_:
|
||||
|
||||
``scaling_available_governors``
|
||||
List of P-state selection algorithms provided by ``intel_pstate``.
|
||||
|
||||
``scaling_governor``
|
||||
P-state selection algorithm provided by ``intel_pstate`` currently in
|
||||
use with the given policy.
|
||||
|
||||
``scaling_cur_freq``
|
||||
Frequency of the average P-state of the CPU represented by the given
|
||||
policy for the time interval between the last two invocations of the
|
||||
driver's utilization update callback by the CPU scheduler for that CPU.
|
||||
|
||||
The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
|
||||
same as for other scaling drivers.
|
||||
|
||||
Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
|
||||
depends on the operation mode of the driver. Namely, it is either
|
||||
"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
|
||||
`passive mode <Passive Mode_>`_).
|
||||
|
||||
Coordination of P-State Limits
|
||||
------------------------------
|
||||
|
||||
``intel_pstate`` allows P-state limits to be set in two ways: with the help of
|
||||
the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
|
||||
<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
|
||||
``CPUFreq`` policy attributes. The coordination between those limits is based
|
||||
on the following rules, regardless of the current operation mode of the driver:
|
||||
|
||||
1. All CPUs are affected by the global limits (that is, none of them can be
|
||||
requested to run faster than the global maximum and none of them can be
|
||||
requested to run slower than the global minimum).
|
||||
|
||||
2. Each individual CPU is affected by its own per-policy limits (that is, it
|
||||
cannot be requested to run faster than its own per-policy maximum and it
|
||||
cannot be requested to run slower than its own per-policy minimum).
|
||||
|
||||
3. The global and per-policy limits can be set independently.
|
||||
|
||||
If the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the
|
||||
resulting effective values are written into its registers whenever the limits
|
||||
change in order to request its internal P-state selection logic to always set
|
||||
P-states within these limits. Otherwise, the limits are taken into account by
|
||||
scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
|
||||
every time before setting a new P-state for a CPU.
|
||||
|
||||
Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
|
||||
is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
|
||||
at all and the only way to set the limits is by using the policy attributes.
|
||||
|
||||
|
||||
Energy vs Performance Hints
|
||||
---------------------------
|
||||
|
||||
If ``intel_pstate`` works in the `active mode with the HWP feature enabled
|
||||
<Active Mode With HWP_>`_ in the processor, additional attributes are present
|
||||
in every ``CPUFreq`` policy directory in ``sysfs``. They are intended to allow
|
||||
user space to help ``intel_pstate`` to adjust the processor's internal P-state
|
||||
selection logic by focusing it on performance or on energy-efficiency, or
|
||||
somewhere between the two extremes:
|
||||
|
||||
``energy_performance_preference``
|
||||
Current value of the energy vs performance hint for the given policy
|
||||
(or the CPU represented by it).
|
||||
|
||||
The hint can be changed by writing to this attribute.
|
||||
|
||||
``energy_performance_available_preferences``
|
||||
List of strings that can be written to the
|
||||
``energy_performance_preference`` attribute.
|
||||
|
||||
They represent different energy vs performance hints and should be
|
||||
self-explanatory, except that ``default`` represents whatever hint
|
||||
value was set by the platform firmware.
|
||||
|
||||
Strings written to the ``energy_performance_preference`` attribute are
|
||||
internally translated to integer values written to the processor's
|
||||
Energy-Performance Preference (EPP) knob (if supported) or its
|
||||
Energy-Performance Bias (EPB) knob.
|
||||
|
||||
[Note that tasks may by migrated from one CPU to another by the scheduler's
|
||||
load-balancing algorithm and if different energy vs performance hints are
|
||||
set for those CPUs, that may lead to undesirable outcomes. To avoid such
|
||||
issues it is better to set the same energy vs performance hint for all CPUs
|
||||
or to pin every task potentially sensitive to them to a specific CPU.]
|
||||
|
||||
.. _acpi-cpufreq:
|
||||
|
||||
``intel_pstate`` vs ``acpi-cpufreq``
|
||||
====================================
|
||||
|
||||
On the majority of systems supported by ``intel_pstate``, the ACPI tables
|
||||
provided by the platform firmware contain ``_PSS`` objects returning information
|
||||
that can be used for CPU performance scaling (refer to the `ACPI specification`_
|
||||
for details on the ``_PSS`` objects and the format of the information returned
|
||||
by them).
|
||||
|
||||
The information returned by the ACPI ``_PSS`` objects is used by the
|
||||
``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate``
|
||||
the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling
|
||||
interface, but the set of P-states it can use is limited by the ``_PSS``
|
||||
output.
|
||||
|
||||
On those systems each ``_PSS`` object returns a list of P-states supported by
|
||||
the corresponding CPU which basically is a subset of the P-states range that can
|
||||
be used by ``intel_pstate`` on the same system, with one exception: the whole
|
||||
`turbo range <turbo_>`_ is represented by one item in it (the topmost one). By
|
||||
convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
|
||||
than the frequency of the highest non-turbo P-state listed by it, but the
|
||||
corresponding P-state representation (following the hardware specification)
|
||||
returned for it matches the maximum supported turbo P-state (or is the
|
||||
special value 255 meaning essentially "go as high as you can get").
|
||||
|
||||
The list of P-states returned by ``_PSS`` is reflected by the table of
|
||||
available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and
|
||||
scaling governors and the minimum and maximum supported frequencies reported by
|
||||
it come from that list as well. In particular, given the special representation
|
||||
of the turbo range described above, this means that the maximum supported
|
||||
frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency
|
||||
of the highest supported non-turbo P-state listed by ``_PSS`` which, of course,
|
||||
affects decisions made by the scaling governors, except for ``powersave`` and
|
||||
``performance``.
|
||||
|
||||
For example, if a given governor attempts to select a frequency proportional to
|
||||
estimated CPU load and maps the load of 100% to the maximum supported frequency
|
||||
(possibly multiplied by a constant), then it will tend to choose P-states below
|
||||
the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because
|
||||
in that case the turbo range corresponds to a small fraction of the frequency
|
||||
band it can use (1 MHz vs 1 GHz or more). In consequence, it will only go to
|
||||
the turbo range for the highest loads and the other loads above 50% that might
|
||||
benefit from running at turbo frequencies will be given non-turbo P-states
|
||||
instead.
|
||||
|
||||
One more issue related to that may appear on systems supporting the
|
||||
`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
|
||||
turbo threshold. Namely, if that is not coordinated with the lists of P-states
|
||||
returned by ``_PSS`` properly, there may be more than one item corresponding to
|
||||
a turbo P-state in those lists and there may be a problem with avoiding the
|
||||
turbo range (if desirable or necessary). Usually, to avoid using turbo
|
||||
P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
|
||||
by ``_PSS``, but that is not sufficient when there are other turbo P-states in
|
||||
the list returned by it.
|
||||
|
||||
Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
|
||||
`passive mode <Passive Mode_>`_, except that the number of P-states it can set
|
||||
is limited to the ones listed by the ACPI ``_PSS`` objects.
|
||||
|
||||
|
||||
Kernel Command Line Options for ``intel_pstate``
|
||||
================================================
|
||||
|
||||
Several kernel command line options can be used to pass early-configuration-time
|
||||
parameters to ``intel_pstate`` in order to enforce specific behavior of it. All
|
||||
of them have to be prepended with the ``intel_pstate=`` prefix.
|
||||
|
||||
``disable``
|
||||
Do not register ``intel_pstate`` as the scaling driver even if the
|
||||
processor is supported by it.
|
||||
|
||||
``passive``
|
||||
Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
|
||||
start with.
|
||||
|
||||
This option implies the ``no_hwp`` one described below.
|
||||
|
||||
``force``
|
||||
Register ``intel_pstate`` as the scaling driver instead of
|
||||
``acpi-cpufreq`` even if the latter is preferred on the given system.
|
||||
|
||||
This may prevent some platform features (such as thermal controls and
|
||||
power capping) that rely on the availability of ACPI P-states
|
||||
information from functioning as expected, so it should be used with
|
||||
caution.
|
||||
|
||||
This option does not work with processors that are not supported by
|
||||
``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling
|
||||
driver is used instead of ``acpi-cpufreq``.
|
||||
|
||||
``no_hwp``
|
||||
Do not enable the `hardware-managed P-states (HWP) feature
|
||||
<Active Mode With HWP_>`_ even if it is supported by the processor.
|
||||
|
||||
``hwp_only``
|
||||
Register ``intel_pstate`` as the scaling driver only if the
|
||||
`hardware-managed P-states (HWP) feature <Active Mode With HWP_>`_ is
|
||||
supported by the processor.
|
||||
|
||||
``support_acpi_ppc``
|
||||
Take ACPI ``_PPC`` performance limits into account.
|
||||
|
||||
If the preferred power management profile in the FADT (Fixed ACPI
|
||||
Description Table) is set to "Enterprise Server" or "Performance
|
||||
Server", the ACPI ``_PPC`` limits are taken into account by default
|
||||
and this option has no effect.
|
||||
|
||||
``per_cpu_perf_limits``
|
||||
Use per-logical-CPU P-State limits (see `Coordination of P-state
|
||||
Limits`_ for details).
|
||||
|
||||
|
||||
Diagnostics and Tuning
|
||||
======================
|
||||
|
||||
Trace Events
|
||||
------------
|
||||
|
||||
There are two static trace events that can be used for ``intel_pstate``
|
||||
diagnostics. One of them is the ``cpu_frequency`` trace event generally used
|
||||
by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
|
||||
to ``intel_pstate``. Both of them are triggered by ``intel_pstate`` only if
|
||||
it works in the `active mode <Active Mode_>`_.
|
||||
|
||||
The following sequence of shell commands can be used to enable them and see
|
||||
their output (if the kernel is generally configured to support event tracing)::
|
||||
|
||||
# cd /sys/kernel/debug/tracing/
|
||||
# echo 1 > events/power/pstate_sample/enable
|
||||
# echo 1 > events/power/cpu_frequency/enable
|
||||
# cat trace
|
||||
gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
|
||||
cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2
|
||||
|
||||
If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
|
||||
``cpu_frequency`` trace event will be triggered either by the ``schedutil``
|
||||
scaling governor (for the policies it is attached to), or by the ``CPUFreq``
|
||||
core (for the policies with other scaling governors).
|
||||
|
||||
``ftrace``
|
||||
----------
|
||||
|
||||
The ``ftrace`` interface can be used for low-level diagnostics of
|
||||
``intel_pstate``. For example, to check how often the function to set a
|
||||
P-state is called, the ``ftrace`` filter can be set to to
|
||||
:c:func:`intel_pstate_set_pstate`::
|
||||
|
||||
# cd /sys/kernel/debug/tracing/
|
||||
# cat available_filter_functions | grep -i pstate
|
||||
intel_pstate_set_pstate
|
||||
intel_pstate_cpu_init
|
||||
...
|
||||
# echo intel_pstate_set_pstate > set_ftrace_filter
|
||||
# echo function > current_tracer
|
||||
# cat trace | head -15
|
||||
# tracer: function
|
||||
#
|
||||
# entries-in-buffer/entries-written: 80/80 #P:4
|
||||
#
|
||||
# _-----=> irqs-off
|
||||
# / _----=> need-resched
|
||||
# | / _---=> hardirq/softirq
|
||||
# || / _--=> preempt-depth
|
||||
# ||| / delay
|
||||
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
|
||||
# | | | |||| | |
|
||||
Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
<idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
|
||||
Tuning Interface in ``debugfs``
|
||||
-------------------------------
|
||||
|
||||
The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of
|
||||
processors in the active mode <powersave_>`_ is based on a `PID controller`_
|
||||
whose parameters were chosen to address a number of different use cases at the
|
||||
same time. However, it still is possible to fine-tune it to a specific workload
|
||||
and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is
|
||||
provided for this purpose. [Note that the ``pstate_snb`` directory will be
|
||||
present only if the specific P-state selection algorithm matching the interface
|
||||
in it actually is in use.]
|
||||
|
||||
The following files present in that directory can be used to modify the PID
|
||||
controller parameters at run time:
|
||||
|
||||
| ``deadband``
|
||||
| ``d_gain_pct``
|
||||
| ``i_gain_pct``
|
||||
| ``p_gain_pct``
|
||||
| ``sample_rate_ms``
|
||||
| ``setpoint``
|
||||
|
||||
Note, however, that achieving desirable results this way generally requires
|
||||
expert-level understanding of the power vs performance tradeoff, so extra care
|
||||
is recommended when attempting to do that.
|
||||
|
||||
|
||||
.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
|
||||
.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
|
||||
.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
|
||||
.. _PID controller: https://en.wikipedia.org/wiki/PID_controller
|
|
@ -1,281 +0,0 @@
|
|||
Intel P-State driver
|
||||
--------------------
|
||||
|
||||
This driver provides an interface to control the P-State selection for the
|
||||
SandyBridge+ Intel processors.
|
||||
|
||||
The following document explains P-States:
|
||||
http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
|
||||
As stated in the document, P-State doesn’t exactly mean a frequency. However, for
|
||||
the sake of the relationship with cpufreq, P-State and frequency are used
|
||||
interchangeably.
|
||||
|
||||
Understanding the cpufreq core governors and policies are important before
|
||||
discussing more details about the Intel P-State driver. Based on what callbacks
|
||||
a cpufreq driver provides to the cpufreq core, it can support two types of
|
||||
drivers:
|
||||
- with target_index() callback: In this mode, the drivers using cpufreq core
|
||||
simply provide the minimum and maximum frequency limits and an additional
|
||||
interface target_index() to set the current frequency. The cpufreq subsystem
|
||||
has a number of scaling governors ("performance", "powersave", "ondemand",
|
||||
etc.). Depending on which governor is in use, cpufreq core will call for
|
||||
transitions to a specific frequency using target_index() callback.
|
||||
- setpolicy() callback: In this mode, drivers do not provide target_index()
|
||||
callback, so cpufreq core can't request a transition to a specific frequency.
|
||||
The driver provides minimum and maximum frequency limits and callbacks to set a
|
||||
policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
|
||||
The cpufreq core can request the driver to operate in any of the two policies:
|
||||
"performance" and "powersave". The driver decides which frequency to use based
|
||||
on the above policy selection considering minimum and maximum frequency limits.
|
||||
|
||||
The Intel P-State driver falls under the latter category, which implements the
|
||||
setpolicy() callback. This driver decides what P-State to use based on the
|
||||
requested policy from the cpufreq core. If the processor is capable of
|
||||
selecting its next P-State internally, then the driver will offload this
|
||||
responsibility to the processor (aka HWP: Hardware P-States). If not, the
|
||||
driver implements algorithms to select the next P-State.
|
||||
|
||||
Since these policies are implemented in the driver, they are not same as the
|
||||
cpufreq scaling governors implementation, even if they have the same name in
|
||||
the cpufreq sysfs (scaling_governors). For example the "performance" policy is
|
||||
similar to cpufreq’s "performance" governor, but "powersave" is completely
|
||||
different than the cpufreq "powersave" governor. The strategy here is similar
|
||||
to cpufreq "ondemand", where the requested P-State is related to the system load.
|
||||
|
||||
Sysfs Interface
|
||||
|
||||
In addition to the frequency-controlling interfaces provided by the cpufreq
|
||||
core, the driver provides its own sysfs files to control the P-State selection.
|
||||
These files have been added to /sys/devices/system/cpu/intel_pstate/.
|
||||
Any changes made to these files are applicable to all CPUs (even in a
|
||||
multi-package system, Refer to later section on placing "Per-CPU limits").
|
||||
|
||||
max_perf_pct: Limits the maximum P-State that will be requested by
|
||||
the driver. It states it as a percentage of the available performance. The
|
||||
available (P-State) performance may be reduced by the no_turbo
|
||||
setting described below.
|
||||
|
||||
min_perf_pct: Limits the minimum P-State that will be requested by
|
||||
the driver. It states it as a percentage of the max (non-turbo)
|
||||
performance level.
|
||||
|
||||
no_turbo: Limits the driver to selecting P-State below the turbo
|
||||
frequency range.
|
||||
|
||||
turbo_pct: Displays the percentage of the total performance that
|
||||
is supported by hardware that is in the turbo range. This number
|
||||
is independent of whether turbo has been disabled or not.
|
||||
|
||||
num_pstates: Displays the number of P-States that are supported
|
||||
by hardware. This number is independent of whether turbo has
|
||||
been disabled or not.
|
||||
|
||||
For example, if a system has these parameters:
|
||||
Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State)
|
||||
Max non turbo ratio: 0x17
|
||||
Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio)
|
||||
|
||||
Sysfs will show :
|
||||
max_perf_pct:100, which corresponds to 1 core ratio
|
||||
min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio
|
||||
no_turbo:0, turbo is not disabled
|
||||
num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1)
|
||||
turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates
|
||||
|
||||
Refer to "Intel® 64 and IA-32 Architectures Software Developer’s Manual
|
||||
Volume 3: System Programming Guide" to understand ratios.
|
||||
|
||||
There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
|
||||
that can be used for controlling the operation mode of the driver:
|
||||
|
||||
status: Three settings are possible:
|
||||
"off" - The driver is not in use at this time.
|
||||
"active" - The driver works as a P-state governor (default).
|
||||
"passive" - The driver works as a regular cpufreq one and collaborates
|
||||
with the generic cpufreq governors (it sets P-states as
|
||||
requested by those governors).
|
||||
The current setting is returned by reads from this attribute. Writing one
|
||||
of the above strings to it changes the operation mode as indicated by that
|
||||
string, if possible. If HW-managed P-states (HWP) are enabled, it is not
|
||||
possible to change the driver's operation mode and attempts to write to
|
||||
this attribute will fail.
|
||||
|
||||
cpufreq sysfs for Intel P-State
|
||||
|
||||
Since this driver registers with cpufreq, cpufreq sysfs is also presented.
|
||||
There are some important differences, which need to be considered.
|
||||
|
||||
scaling_cur_freq: This displays the real frequency which was used during
|
||||
the last sample period instead of what is requested. Some other cpufreq driver,
|
||||
like acpi-cpufreq, displays what is requested (Some changes are on the
|
||||
way to fix this for acpi-cpufreq driver). The same is true for frequencies
|
||||
displayed at /proc/cpuinfo.
|
||||
|
||||
scaling_governor: This displays current active policy. Since each CPU has a
|
||||
cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this
|
||||
is not possible with Intel P-States, as there is one common policy for all
|
||||
CPUs. Here, the last requested policy will be applicable to all CPUs. It is
|
||||
suggested that one use the cpupower utility to change policy to all CPUs at the
|
||||
same time.
|
||||
|
||||
scaling_setspeed: This attribute can never be used with Intel P-State.
|
||||
|
||||
scaling_max_freq/scaling_min_freq: This interface can be used similarly to
|
||||
the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies
|
||||
are converted to nearest possible P-State, this is prone to rounding errors.
|
||||
This method is not preferred to limit performance.
|
||||
|
||||
affected_cpus: Not used
|
||||
related_cpus: Not used
|
||||
|
||||
For contemporary Intel processors, the frequency is controlled by the
|
||||
processor itself and the P-State exposed to software is related to
|
||||
performance levels. The idea that frequency can be set to a single
|
||||
frequency is fictional for Intel Core processors. Even if the scaling
|
||||
driver selects a single P-State, the actual frequency the processor
|
||||
will run at is selected by the processor itself.
|
||||
|
||||
Per-CPU limits
|
||||
|
||||
The kernel command line option "intel_pstate=per_cpu_perf_limits" forces
|
||||
the intel_pstate driver to use per-CPU performance limits. When it is set,
|
||||
the sysfs control interface described above is subject to limitations.
|
||||
- The following controls are not available for both read and write
|
||||
/sys/devices/system/cpu/intel_pstate/max_perf_pct
|
||||
/sys/devices/system/cpu/intel_pstate/min_perf_pct
|
||||
- The following controls can be used to set performance limits, as far as the
|
||||
architecture of the processor permits:
|
||||
/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
|
||||
/sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq
|
||||
/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
|
||||
- User can still observe turbo percent and number of P-States from
|
||||
/sys/devices/system/cpu/intel_pstate/turbo_pct
|
||||
/sys/devices/system/cpu/intel_pstate/num_pstates
|
||||
- User can read write system wide turbo status
|
||||
/sys/devices/system/cpu/no_turbo
|
||||
|
||||
Support of energy performance hints
|
||||
It is possible to provide hints to the HWP algorithms in the processor
|
||||
to be more performance centric to more energy centric. When the driver
|
||||
is using HWP, two additional cpufreq sysfs attributes are presented for
|
||||
each logical CPU.
|
||||
These attributes are:
|
||||
- energy_performance_available_preferences
|
||||
- energy_performance_preference
|
||||
|
||||
To get list of supported hints:
|
||||
$ cat energy_performance_available_preferences
|
||||
default performance balance_performance balance_power power
|
||||
|
||||
The current preference can be read or changed via cpufreq sysfs
|
||||
attribute "energy_performance_preference". Reading from this attribute
|
||||
will display current effective setting. User can write any of the valid
|
||||
preference string to this attribute. User can always restore to power-on
|
||||
default by writing "default".
|
||||
|
||||
Since threads can migrate to different CPUs, this is possible that the
|
||||
new CPU may have different energy performance preference than the previous
|
||||
one. To avoid such issues, either threads can be pinned to specific CPUs
|
||||
or set the same energy performance preference value to all CPUs.
|
||||
|
||||
Tuning Intel P-State driver
|
||||
|
||||
When the performance can be tuned using PID (Proportional Integral
|
||||
Derivative) controller, debugfs files are provided for adjusting performance.
|
||||
They are presented under:
|
||||
/sys/kernel/debug/pstate_snb/
|
||||
|
||||
The PID tunable parameters are:
|
||||
deadband
|
||||
d_gain_pct
|
||||
i_gain_pct
|
||||
p_gain_pct
|
||||
sample_rate_ms
|
||||
setpoint
|
||||
|
||||
To adjust these parameters, some understanding of driver implementation is
|
||||
necessary. There are some tweeks described here, but be very careful. Adjusting
|
||||
them requires expert level understanding of power and performance relationship.
|
||||
These limits are only useful when the "powersave" policy is active.
|
||||
|
||||
-To make the system more responsive to load changes, sample_rate_ms can
|
||||
be adjusted (current default is 10ms).
|
||||
-To make the system use higher performance, even if the load is lower, setpoint
|
||||
can be adjusted to a lower number. This will also lead to faster ramp up time
|
||||
to reach the maximum P-State.
|
||||
If there are no derivative and integral coefficients, The next P-State will be
|
||||
equal to:
|
||||
current P-State - ((setpoint - current cpu load) * p_gain_pct)
|
||||
|
||||
For example, if the current PID parameters are (Which are defaults for the core
|
||||
processors like SandyBridge):
|
||||
deadband = 0
|
||||
d_gain_pct = 0
|
||||
i_gain_pct = 0
|
||||
p_gain_pct = 20
|
||||
sample_rate_ms = 10
|
||||
setpoint = 97
|
||||
|
||||
If the current P-State = 0x08 and current load = 100, this will result in the
|
||||
next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State
|
||||
goes up by only 1. If during next sample interval the current load doesn't
|
||||
change and still 100, then P-State goes up by one again. This process will
|
||||
continue as long as the load is more than the setpoint until the maximum P-State
|
||||
is reached.
|
||||
|
||||
For the same load at setpoint = 60, this will result in the next P-State
|
||||
= 0x08 - ((60 - 100) * 0.2) = 16
|
||||
So by changing the setpoint from 97 to 60, there is an increase of the
|
||||
next P-State from 9 to 16. So this will make processor execute at higher
|
||||
P-State for the same CPU load. If the load continues to be more than the
|
||||
setpoint during next sample intervals, then P-State will go up again till the
|
||||
maximum P-State is reached. But the ramp up time to reach the maximum P-State
|
||||
will be much faster when the setpoint is 60 compared to 97.
|
||||
|
||||
Debugging Intel P-State driver
|
||||
|
||||
Event tracing
|
||||
To debug P-State transition, the Linux event tracing interface can be used.
|
||||
There are two specific events, which can be enabled (Provided the kernel
|
||||
configs related to event tracing are enabled).
|
||||
|
||||
# cd /sys/kernel/debug/tracing/
|
||||
# echo 1 > events/power/pstate_sample/enable
|
||||
# echo 1 > events/power/cpu_frequency/enable
|
||||
# cat trace
|
||||
gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107
|
||||
scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618
|
||||
freq=2474476
|
||||
cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2
|
||||
|
||||
|
||||
Using ftrace
|
||||
|
||||
If function level tracing is required, the Linux ftrace interface can be used.
|
||||
For example if we want to check how often a function to set a P-State is
|
||||
called, we can set ftrace filter to intel_pstate_set_pstate.
|
||||
|
||||
# cd /sys/kernel/debug/tracing/
|
||||
# cat available_filter_functions | grep -i pstate
|
||||
intel_pstate_set_pstate
|
||||
intel_pstate_cpu_init
|
||||
...
|
||||
|
||||
# echo intel_pstate_set_pstate > set_ftrace_filter
|
||||
# echo function > current_tracer
|
||||
# cat trace | head -15
|
||||
# tracer: function
|
||||
#
|
||||
# entries-in-buffer/entries-written: 80/80 #P:4
|
||||
#
|
||||
# _-----=> irqs-off
|
||||
# / _----=> need-resched
|
||||
# | / _---=> hardirq/softirq
|
||||
# || / _--=> preempt-depth
|
||||
# ||| / delay
|
||||
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
|
||||
# | | | |||| | |
|
||||
Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
<idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
|
|
@ -36,7 +36,7 @@ Optional properties:
|
|||
control gpios
|
||||
|
||||
- threshold: allows setting the "click"-threshold in the range
|
||||
from 20 to 80.
|
||||
from 0 to 80.
|
||||
|
||||
- gain: allows setting the sensitivity in the range from 0 to
|
||||
31. Note that lower values indicate higher
|
||||
|
|
|
@ -16,6 +16,11 @@ Required properties:
|
|||
- reg: Base address of PMIC on Hi6220 SoC.
|
||||
- interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
|
||||
- pmic-gpios: The GPIO used by PMIC IRQ.
|
||||
- #clock-cells: From common clock binding; shall be set to 0
|
||||
|
||||
Optional properties:
|
||||
- clock-output-names: From common clock binding to override the
|
||||
default output clock name
|
||||
|
||||
Example:
|
||||
pmic: pmic@f8000000 {
|
||||
|
@ -24,4 +29,5 @@ Example:
|
|||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
|
||||
#clock-cells = <0>;
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@ Optional properties:
|
|||
"ext_clock" (External clock provided to the card).
|
||||
- post-power-on-delay-ms : Delay in ms after powering the card and
|
||||
de-asserting the reset-gpios (if any)
|
||||
- power-off-delay-us : Delay in us after asserting the reset-gpios (if any)
|
||||
during power off of the card.
|
||||
|
||||
Example:
|
||||
|
||||
|
|
|
@ -26,6 +26,10 @@ Optional properties:
|
|||
- interrupt-controller : Indicates the switch is itself an interrupt
|
||||
controller. This is used for the PHY interrupts.
|
||||
#interrupt-cells = <2> : Controller uses two cells, number and flag
|
||||
- eeprom-length : Set to the length of an EEPROM connected to the
|
||||
switch. Must be set if the switch can not detect
|
||||
the presence and/or size of a connected EEPROM,
|
||||
otherwise optional.
|
||||
- mdio : Container of PHY and devices on the switches MDIO
|
||||
bus.
|
||||
- mdio? : Container of PHYs and devices on the external MDIO
|
||||
|
|
|
@ -15,6 +15,10 @@ Optional properties:
|
|||
- phy-reset-active-high : If present then the reset sequence using the GPIO
|
||||
specified in the "phy-reset-gpios" property is reversed (H=reset state,
|
||||
L=operation state).
|
||||
- phy-reset-post-delay : Post reset delay in milliseconds. If present then
|
||||
a delay of phy-reset-post-delay milliseconds will be observed after the
|
||||
phy-reset-gpios has been toggled. Can be omitted thus no delay is
|
||||
observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
|
||||
- phy-supply : regulator that powers the Ethernet PHY.
|
||||
- phy-handle : phandle to the PHY device connected to this device.
|
||||
- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
|
||||
|
|
|
@ -247,7 +247,6 @@ bias-bus-hold - latch weakly
|
|||
bias-pull-up - pull up the pin
|
||||
bias-pull-down - pull down the pin
|
||||
bias-pull-pin-default - use pin-default pull state
|
||||
bi-directional - pin supports simultaneous input/output operations
|
||||
drive-push-pull - drive actively high and low
|
||||
drive-open-drain - drive with open drain
|
||||
drive-open-source - drive with open source
|
||||
|
@ -260,7 +259,6 @@ input-debounce - debounce mode with debound time X
|
|||
power-source - select between different power supplies
|
||||
low-power-enable - enable low power mode
|
||||
low-power-disable - disable low power mode
|
||||
output-enable - enable output on pin regardless of output value
|
||||
output-low - set the pin to output mode with low level
|
||||
output-high - set the pin to output mode with high level
|
||||
slew-rate - set the slew rate
|
||||
|
|
|
@ -10,6 +10,7 @@ Required properties:
|
|||
- "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc;
|
||||
- "lantiq,arx100-usb": The DWC2 USB controller instance in Lantiq ARX SoCs;
|
||||
- "lantiq,xrx200-usb": The DWC2 USB controller instance in Lantiq XRX SoCs;
|
||||
- "amlogic,meson8-usb": The DWC2 USB controller instance in Amlogic Meson8 SoCs;
|
||||
- "amlogic,meson8b-usb": The DWC2 USB controller instance in Amlogic Meson8b SoCs;
|
||||
- "amlogic,meson-gxbb-usb": The DWC2 USB controller instance in Amlogic S905 SoCs;
|
||||
- "amcc,dwc-otg": The DWC2 USB controller instance in AMCC Canyonlands 460EX SoCs;
|
||||
|
|
|
@ -15,7 +15,7 @@ It has been tested with the following devices:
|
|||
The driver allows configuration of the touch screen via a set of sysfs files:
|
||||
|
||||
/sys/class/input/eventX/device/device/threshold:
|
||||
allows setting the "click"-threshold in the range from 20 to 80.
|
||||
allows setting the "click"-threshold in the range from 0 to 80.
|
||||
|
||||
/sys/class/input/eventX/device/device/gain:
|
||||
allows setting the sensitivity in the range from 0 to 31. Note that
|
||||
|
|
194
Documentation/networking/dpaa.txt
Normal file
194
Documentation/networking/dpaa.txt
Normal file
|
@ -0,0 +1,194 @@
|
|||
The QorIQ DPAA Ethernet Driver
|
||||
==============================
|
||||
|
||||
Authors:
|
||||
Madalin Bucur <madalin.bucur@nxp.com>
|
||||
Camelia Groza <camelia.groza@nxp.com>
|
||||
|
||||
Contents
|
||||
========
|
||||
|
||||
- DPAA Ethernet Overview
|
||||
- DPAA Ethernet Supported SoCs
|
||||
- Configuring DPAA Ethernet in your kernel
|
||||
- DPAA Ethernet Frame Processing
|
||||
- DPAA Ethernet Features
|
||||
- Debugging
|
||||
|
||||
DPAA Ethernet Overview
|
||||
======================
|
||||
|
||||
DPAA stands for Data Path Acceleration Architecture and it is a
|
||||
set of networking acceleration IPs that are available on several
|
||||
generations of SoCs, both on PowerPC and ARM64.
|
||||
|
||||
The Freescale DPAA architecture consists of a series of hardware blocks
|
||||
that support Ethernet connectivity. The Ethernet driver depends upon the
|
||||
following drivers in the Linux kernel:
|
||||
|
||||
- Peripheral Access Memory Unit (PAMU) (* needed only for PPC platforms)
|
||||
drivers/iommu/fsl_*
|
||||
- Frame Manager (FMan)
|
||||
drivers/net/ethernet/freescale/fman
|
||||
- Queue Manager (QMan), Buffer Manager (BMan)
|
||||
drivers/soc/fsl/qbman
|
||||
|
||||
A simplified view of the dpaa_eth interfaces mapped to FMan MACs:
|
||||
|
||||
dpaa_eth /eth0\ ... /ethN\
|
||||
driver | | | |
|
||||
------------- ---- ----------- ---- -------------
|
||||
-Ports / Tx Rx \ ... / Tx Rx \
|
||||
FMan | | | |
|
||||
-MACs | MAC0 | | MACN |
|
||||
/ dtsec0 \ ... / dtsecN \ (or tgec)
|
||||
/ \ / \(or memac)
|
||||
--------- -------------- --- -------------- ---------
|
||||
FMan, FMan Port, FMan SP, FMan MURAM drivers
|
||||
---------------------------------------------------------
|
||||
FMan HW blocks: MURAM, MACs, Ports, SP
|
||||
---------------------------------------------------------
|
||||
|
||||
The dpaa_eth relation to the QMan, BMan and FMan:
|
||||
________________________________
|
||||
dpaa_eth / eth0 \
|
||||
driver / \
|
||||
--------- -^- -^- -^- --- ---------
|
||||
QMan driver / \ / \ / \ \ / | BMan |
|
||||
|Rx | |Rx | |Tx | |Tx | | driver |
|
||||
--------- |Dfl| |Err| |Cnf| |FQs| | |
|
||||
QMan HW |FQ | |FQ | |FQs| | | | |
|
||||
/ \ / \ / \ \ / | |
|
||||
--------- --- --- --- -v- ---------
|
||||
| FMan QMI | |
|
||||
| FMan HW FMan BMI | BMan HW |
|
||||
----------------------- --------
|
||||
|
||||
where the acronyms used above (and in the code) are:
|
||||
DPAA = Data Path Acceleration Architecture
|
||||
FMan = DPAA Frame Manager
|
||||
QMan = DPAA Queue Manager
|
||||
BMan = DPAA Buffers Manager
|
||||
QMI = QMan interface in FMan
|
||||
BMI = BMan interface in FMan
|
||||
FMan SP = FMan Storage Profiles
|
||||
MURAM = Multi-user RAM in FMan
|
||||
FQ = QMan Frame Queue
|
||||
Rx Dfl FQ = default reception FQ
|
||||
Rx Err FQ = Rx error frames FQ
|
||||
Tx Cnf FQ = Tx confirmation FQs
|
||||
Tx FQs = transmission frame queues
|
||||
dtsec = datapath three speed Ethernet controller (10/100/1000 Mbps)
|
||||
tgec = ten gigabit Ethernet controller (10 Gbps)
|
||||
memac = multirate Ethernet MAC (10/100/1000/10000)
|
||||
|
||||
DPAA Ethernet Supported SoCs
|
||||
============================
|
||||
|
||||
The DPAA drivers enable the Ethernet controllers present on the following SoCs:
|
||||
|
||||
# PPC
|
||||
P1023
|
||||
P2041
|
||||
P3041
|
||||
P4080
|
||||
P5020
|
||||
P5040
|
||||
T1023
|
||||
T1024
|
||||
T1040
|
||||
T1042
|
||||
T2080
|
||||
T4240
|
||||
B4860
|
||||
|
||||
# ARM
|
||||
LS1043A
|
||||
LS1046A
|
||||
|
||||
Configuring DPAA Ethernet in your kernel
|
||||
========================================
|
||||
|
||||
To enable the DPAA Ethernet driver, the following Kconfig options are required:
|
||||
|
||||
# common for arch/arm64 and arch/powerpc platforms
|
||||
CONFIG_FSL_DPAA=y
|
||||
CONFIG_FSL_FMAN=y
|
||||
CONFIG_FSL_DPAA_ETH=y
|
||||
CONFIG_FSL_XGMAC_MDIO=y
|
||||
|
||||
# for arch/powerpc only
|
||||
CONFIG_FSL_PAMU=y
|
||||
|
||||
# common options needed for the PHYs used on the RDBs
|
||||
CONFIG_VITESSE_PHY=y
|
||||
CONFIG_REALTEK_PHY=y
|
||||
CONFIG_AQUANTIA_PHY=y
|
||||
|
||||
DPAA Ethernet Frame Processing
|
||||
==============================
|
||||
|
||||
On Rx, buffers for the incoming frames are retrieved from one of the three
|
||||
existing buffers pools. The driver initializes and seeds these, each with
|
||||
buffers of different sizes: 1KB, 2KB and 4KB.
|
||||
|
||||
On Tx, all transmitted frames are returned to the driver through Tx
|
||||
confirmation frame queues. The driver is then responsible for freeing the
|
||||
buffers. In order to do this properly, a backpointer is added to the buffer
|
||||
before transmission that points to the skb. When the buffer returns to the
|
||||
driver on a confirmation FQ, the skb can be correctly consumed.
|
||||
|
||||
DPAA Ethernet Features
|
||||
======================
|
||||
|
||||
Currently the DPAA Ethernet driver enables the basic features required for
|
||||
a Linux Ethernet driver. The support for advanced features will be added
|
||||
gradually.
|
||||
|
||||
The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx
|
||||
checksum offload feature is enabled by default and cannot be controlled through
|
||||
ethtool.
|
||||
|
||||
The driver has support for multiple prioritized Tx traffic classes. Priorities
|
||||
range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with
|
||||
strict priority levels. Each traffic class contains NR_CPU TX queues. By
|
||||
default, only one traffic class is enabled and the lowest priority Tx queues
|
||||
are used. Higher priority traffic classes can be enabled with the mqprio
|
||||
qdisc. For example, all four traffic classes are enabled on an interface with
|
||||
the following command. Furthermore, skb priority levels are mapped to traffic
|
||||
classes as follows:
|
||||
|
||||
* priorities 0 to 3 - traffic class 0 (low priority)
|
||||
* priorities 4 to 7 - traffic class 1 (medium-low priority)
|
||||
* priorities 8 to 11 - traffic class 2 (medium-high priority)
|
||||
* priorities 12 to 15 - traffic class 3 (high priority)
|
||||
|
||||
tc qdisc add dev <int> root handle 1: \
|
||||
mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1
|
||||
|
||||
Debugging
|
||||
=========
|
||||
|
||||
The following statistics are exported for each interface through ethtool:
|
||||
|
||||
- interrupt count per CPU
|
||||
- Rx packets count per CPU
|
||||
- Tx packets count per CPU
|
||||
- Tx confirmed packets count per CPU
|
||||
- Tx S/G frames count per CPU
|
||||
- Tx error count per CPU
|
||||
- Rx error count per CPU
|
||||
- Rx error count per type
|
||||
- congestion related statistics:
|
||||
- congestion status
|
||||
- time spent in congestion
|
||||
- number of time the device entered congestion
|
||||
- dropped packets count per cause
|
||||
|
||||
The driver also exports the following information in sysfs:
|
||||
|
||||
- the FQ IDs for each FQ type
|
||||
/sys/devices/platform/dpaa-ethernet.0/net/<int>/fqids
|
||||
|
||||
- the IDs of the buffer pools in use
|
||||
/sys/devices/platform/dpaa-ethernet.0/net/<int>/bpids
|
|
@ -1,7 +1,7 @@
|
|||
TCP protocol
|
||||
============
|
||||
|
||||
Last updated: 9 February 2008
|
||||
Last updated: 3 June 2017
|
||||
|
||||
Contents
|
||||
========
|
||||
|
@ -29,18 +29,19 @@ As of 2.6.13, Linux supports pluggable congestion control algorithms.
|
|||
A congestion control mechanism can be registered through functions in
|
||||
tcp_cong.c. The functions used by the congestion control mechanism are
|
||||
registered via passing a tcp_congestion_ops struct to
|
||||
tcp_register_congestion_control. As a minimum name, ssthresh,
|
||||
cong_avoid must be valid.
|
||||
tcp_register_congestion_control. As a minimum, the congestion control
|
||||
mechanism must provide a valid name and must implement either ssthresh,
|
||||
cong_avoid and undo_cwnd hooks or the "omnipotent" cong_control hook.
|
||||
|
||||
Private data for a congestion control mechanism is stored in tp->ca_priv.
|
||||
tcp_ca(tp) returns a pointer to this space. This is preallocated space - it
|
||||
is important to check the size of your private data will fit this space, or
|
||||
alternatively space could be allocated elsewhere and a pointer to it could
|
||||
alternatively, space could be allocated elsewhere and a pointer to it could
|
||||
be stored here.
|
||||
|
||||
There are three kinds of congestion control algorithms currently: The
|
||||
simplest ones are derived from TCP reno (highspeed, scalable) and just
|
||||
provide an alternative the congestion window calculation. More complex
|
||||
provide an alternative congestion window calculation. More complex
|
||||
ones like BIC try to look at other events to provide better
|
||||
heuristics. There are also round trip time based algorithms like
|
||||
Vegas and Westwood+.
|
||||
|
@ -49,21 +50,15 @@ Good TCP congestion control is a complex problem because the algorithm
|
|||
needs to maintain fairness and performance. Please review current
|
||||
research and RFC's before developing new modules.
|
||||
|
||||
The method that is used to determine which congestion control mechanism is
|
||||
determined by the setting of the sysctl net.ipv4.tcp_congestion_control.
|
||||
The default congestion control will be the last one registered (LIFO);
|
||||
so if you built everything as modules, the default will be reno. If you
|
||||
build with the defaults from Kconfig, then CUBIC will be builtin (not a
|
||||
module) and it will end up the default.
|
||||
The default congestion control mechanism is chosen based on the
|
||||
DEFAULT_TCP_CONG Kconfig parameter. If you really want a particular default
|
||||
value then you can set it using sysctl net.ipv4.tcp_congestion_control. The
|
||||
module will be autoloaded if needed and you will get the expected protocol. If
|
||||
you ask for an unknown congestion method, then the sysctl attempt will fail.
|
||||
|
||||
If you really want a particular default value then you will need
|
||||
to set it with the sysctl. If you use a sysctl, the module will be autoloaded
|
||||
if needed and you will get the expected protocol. If you ask for an
|
||||
unknown congestion method, then the sysctl attempt will fail.
|
||||
|
||||
If you remove a tcp congestion control module, then you will get the next
|
||||
If you remove a TCP congestion control module, then you will get the next
|
||||
available one. Since reno cannot be built as a module, and cannot be
|
||||
deleted, it will always be available.
|
||||
removed, it will always be available.
|
||||
|
||||
How the new TCP output machine [nyi] works.
|
||||
===========================================
|
||||
|
|
|
@ -16,6 +16,8 @@ ALC880
|
|||
6-jack in back, 2-jack in front
|
||||
6stack-digout
|
||||
6-jack with a SPDIF out
|
||||
6stack-automute
|
||||
6-jack with headphone jack detection
|
||||
|
||||
ALC260
|
||||
======
|
||||
|
@ -62,6 +64,8 @@ lenovo-dock
|
|||
Enables docking station I/O for some Lenovos
|
||||
hp-gpio-led
|
||||
GPIO LED support on HP laptops
|
||||
hp-dock-gpio-mic1-led
|
||||
HP dock with mic LED support
|
||||
dell-headset-multi
|
||||
Headset jack, which can also be used as mic-in
|
||||
dell-headset-dock
|
||||
|
@ -72,6 +76,12 @@ alc283-sense-combo
|
|||
Combo jack sensing on ALC283
|
||||
tpt440-dock
|
||||
Pin configs for Lenovo Thinkpad Dock support
|
||||
tpt440
|
||||
Lenovo Thinkpad T440s setup
|
||||
tpt460
|
||||
Lenovo Thinkpad T460/560 setup
|
||||
dual-codecs
|
||||
Lenovo laptops with dual codecs
|
||||
|
||||
ALC66x/67x/892
|
||||
==============
|
||||
|
@ -97,6 +107,8 @@ inv-dmic
|
|||
Inverted internal mic workaround
|
||||
dell-headset-multi
|
||||
Headset jack, which can also be used as mic-in
|
||||
dual-codecs
|
||||
Lenovo laptops with dual codecs
|
||||
|
||||
ALC680
|
||||
======
|
||||
|
@ -114,6 +126,8 @@ inv-dmic
|
|||
Inverted internal mic workaround
|
||||
no-primary-hp
|
||||
VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC)
|
||||
dual-codecs
|
||||
ALC1220 dual codecs for Gaming mobos
|
||||
|
||||
ALC861/660
|
||||
==========
|
||||
|
@ -206,65 +220,47 @@ auto
|
|||
|
||||
Conexant 5045
|
||||
=============
|
||||
laptop-hpsense
|
||||
Laptop with HP sense (old model laptop)
|
||||
laptop-micsense
|
||||
Laptop with Mic sense (old model fujitsu)
|
||||
laptop-hpmicsense
|
||||
Laptop with HP and Mic senses
|
||||
benq
|
||||
Benq R55E
|
||||
laptop-hp530
|
||||
HP 530 laptop
|
||||
test
|
||||
for testing/debugging purpose, almost all controls can be
|
||||
adjusted. Appearing only when compiled with $CONFIG_SND_DEBUG=y
|
||||
cap-mix-amp
|
||||
Fix max input level on mixer widget
|
||||
toshiba-p105
|
||||
Toshiba P105 quirk
|
||||
hp-530
|
||||
HP 530 quirk
|
||||
|
||||
Conexant 5047
|
||||
=============
|
||||
laptop
|
||||
Basic Laptop config
|
||||
laptop-hp
|
||||
Laptop config for some HP models (subdevice 30A5)
|
||||
laptop-eapd
|
||||
Laptop config with EAPD support
|
||||
test
|
||||
for testing/debugging purpose, almost all controls can be
|
||||
adjusted. Appearing only when compiled with $CONFIG_SND_DEBUG=y
|
||||
cap-mix-amp
|
||||
Fix max input level on mixer widget
|
||||
|
||||
Conexant 5051
|
||||
=============
|
||||
laptop
|
||||
Basic Laptop config (default)
|
||||
hp
|
||||
HP Spartan laptop
|
||||
hp-dv6736
|
||||
HP dv6736
|
||||
hp-f700
|
||||
HP Compaq Presario F700
|
||||
ideapad
|
||||
Lenovo IdeaPad laptop
|
||||
toshiba
|
||||
Toshiba Satellite M300
|
||||
lenovo-x200
|
||||
Lenovo X200 quirk
|
||||
|
||||
Conexant 5066
|
||||
=============
|
||||
laptop
|
||||
Basic Laptop config (default)
|
||||
hp-laptop
|
||||
HP laptops, e g G60
|
||||
asus
|
||||
Asus K52JU, Lenovo G560
|
||||
dell-laptop
|
||||
Dell laptops
|
||||
dell-vostro
|
||||
Dell Vostro
|
||||
olpc-xo-1_5
|
||||
OLPC XO 1.5
|
||||
ideapad
|
||||
Lenovo IdeaPad U150
|
||||
stereo-dmic
|
||||
Workaround for inverted stereo digital mic
|
||||
gpio1
|
||||
Enable GPIO1 pin
|
||||
headphone-mic-pin
|
||||
Enable headphone mic NID 0x18 without detection
|
||||
tp410
|
||||
Thinkpad T400 & co quirks
|
||||
thinkpad
|
||||
Lenovo Thinkpad
|
||||
Thinkpad mute/mic LED quirk
|
||||
lemote-a1004
|
||||
Lemote A1004 quirk
|
||||
lemote-a1205
|
||||
Lemote A1205 quirk
|
||||
olpc-xo
|
||||
OLPC XO quirk
|
||||
mute-led-eapd
|
||||
Mute LED control via EAPD
|
||||
hp-dock
|
||||
HP dock support
|
||||
mute-led-gpio
|
||||
Mute LED control via GPIO
|
||||
|
||||
STAC9200
|
||||
========
|
||||
|
@ -444,6 +440,8 @@ dell-eq
|
|||
Dell desktops/laptops
|
||||
alienware
|
||||
Alienware M17x
|
||||
asus-mobo
|
||||
Pin configs for ASUS mobo with 5.1/SPDIF out
|
||||
auto
|
||||
BIOS setup (default)
|
||||
|
||||
|
@ -477,6 +475,8 @@ hp-envy-ts-bass
|
|||
Pin fixup for HP Envy TS bass speaker (NID 0x10)
|
||||
hp-bnb13-eq
|
||||
Hardware equalizer setup for HP laptops
|
||||
hp-envy-ts-bass
|
||||
HP Envy TS bass support
|
||||
auto
|
||||
BIOS setup (default)
|
||||
|
||||
|
@ -496,10 +496,22 @@ auto
|
|||
|
||||
Cirrus Logic CS4206/4207
|
||||
========================
|
||||
mbp53
|
||||
MacBook Pro 5,3
|
||||
mbp55
|
||||
MacBook Pro 5,5
|
||||
imac27
|
||||
IMac 27 Inch
|
||||
imac27_122
|
||||
iMac 12,2
|
||||
apple
|
||||
Generic Apple quirk
|
||||
mbp101
|
||||
MacBookPro 10,1
|
||||
mbp81
|
||||
MacBookPro 8,1
|
||||
mba42
|
||||
MacBookAir 4,2
|
||||
auto
|
||||
BIOS setup (default)
|
||||
|
||||
|
@ -509,6 +521,10 @@ mba6
|
|||
MacBook Air 6,1 and 6,2
|
||||
gpio0
|
||||
Enable GPIO 0 amp
|
||||
mbp11
|
||||
MacBookPro 11,2
|
||||
macmini
|
||||
MacMini 7,1
|
||||
auto
|
||||
BIOS setup (default)
|
||||
|
||||
|
|
35
MAINTAINERS
35
MAINTAINERS
|
@ -1172,7 +1172,7 @@ N: clps711x
|
|||
|
||||
ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
|
||||
M: Hartley Sweeten <hsweeten@visionengravers.com>
|
||||
M: Ryan Mallon <rmallon@gmail.com>
|
||||
M: Alexander Sverdlin <alexander.sverdlin@gmail.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
F: arch/arm/mach-ep93xx/
|
||||
|
@ -1489,13 +1489,15 @@ M: Gregory Clement <gregory.clement@free-electrons.com>
|
|||
M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
S: Maintained
|
||||
F: arch/arm/mach-mvebu/
|
||||
F: drivers/rtc/rtc-armada38x.c
|
||||
F: arch/arm/boot/dts/armada*
|
||||
F: arch/arm/boot/dts/kirkwood*
|
||||
F: arch/arm/configs/mvebu_*_defconfig
|
||||
F: arch/arm/mach-mvebu/
|
||||
F: arch/arm64/boot/dts/marvell/armada*
|
||||
F: drivers/cpufreq/mvebu-cpufreq.c
|
||||
F: arch/arm/configs/mvebu_*_defconfig
|
||||
F: drivers/irqchip/irq-armada-370-xp.c
|
||||
F: drivers/irqchip/irq-mvebu-*
|
||||
F: drivers/rtc/rtc-armada38x.c
|
||||
|
||||
ARM/Marvell Berlin SoC support
|
||||
M: Jisheng Zhang <jszhang@marvell.com>
|
||||
|
@ -1721,7 +1723,6 @@ N: rockchip
|
|||
ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
|
||||
M: Kukjin Kim <kgene@kernel.org>
|
||||
M: Krzysztof Kozlowski <krzk@kernel.org>
|
||||
R: Javier Martinez Canillas <javier@osg.samsung.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
|
||||
Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
|
||||
|
@ -1829,7 +1830,6 @@ F: drivers/edac/altera_edac.
|
|||
ARM/STI ARCHITECTURE
|
||||
M: Patrice Chotard <patrice.chotard@st.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
L: kernel@stlinux.com
|
||||
W: http://www.stlinux.com
|
||||
S: Maintained
|
||||
F: arch/arm/mach-sti/
|
||||
|
@ -5622,7 +5622,7 @@ F: scripts/get_maintainer.pl
|
|||
|
||||
GENWQE (IBM Generic Workqueue Card)
|
||||
M: Frank Haverkamp <haver@linux.vnet.ibm.com>
|
||||
M: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
|
||||
M: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
|
||||
S: Supported
|
||||
F: drivers/misc/genwqe/
|
||||
|
||||
|
@ -5667,7 +5667,6 @@ F: tools/testing/selftests/gpio/
|
|||
|
||||
GPIO SUBSYSTEM
|
||||
M: Linus Walleij <linus.walleij@linaro.org>
|
||||
M: Alexandre Courbot <gnurou@gmail.com>
|
||||
L: linux-gpio@vger.kernel.org
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
|
||||
S: Maintained
|
||||
|
@ -7143,7 +7142,7 @@ S: Maintained
|
|||
F: drivers/media/platform/rcar_jpu.c
|
||||
|
||||
JSM Neo PCI based serial card
|
||||
M: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
|
||||
M: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
|
||||
L: linux-serial@vger.kernel.org
|
||||
S: Maintained
|
||||
F: drivers/tty/serial/jsm/
|
||||
|
@ -7707,7 +7706,7 @@ F: drivers/platform/x86/hp_accel.c
|
|||
|
||||
LIVE PATCHING
|
||||
M: Josh Poimboeuf <jpoimboe@redhat.com>
|
||||
M: Jessica Yu <jeyu@redhat.com>
|
||||
M: Jessica Yu <jeyu@kernel.org>
|
||||
M: Jiri Kosina <jikos@kernel.org>
|
||||
M: Miroslav Benes <mbenes@suse.cz>
|
||||
R: Petr Mladek <pmladek@suse.com>
|
||||
|
@ -8508,7 +8507,7 @@ S: Odd Fixes
|
|||
F: drivers/media/radio/radio-miropcm20*
|
||||
|
||||
MELLANOX MLX4 core VPI driver
|
||||
M: Yishai Hadas <yishaih@mellanox.com>
|
||||
M: Tariq Toukan <tariqt@mellanox.com>
|
||||
L: netdev@vger.kernel.org
|
||||
L: linux-rdma@vger.kernel.org
|
||||
W: http://www.mellanox.com
|
||||
|
@ -8516,7 +8515,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/
|
|||
S: Supported
|
||||
F: drivers/net/ethernet/mellanox/mlx4/
|
||||
F: include/linux/mlx4/
|
||||
F: include/uapi/rdma/mlx4-abi.h
|
||||
|
||||
MELLANOX MLX4 IB driver
|
||||
M: Yishai Hadas <yishaih@mellanox.com>
|
||||
|
@ -8526,6 +8524,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
|
|||
S: Supported
|
||||
F: drivers/infiniband/hw/mlx4/
|
||||
F: include/linux/mlx4/
|
||||
F: include/uapi/rdma/mlx4-abi.h
|
||||
|
||||
MELLANOX MLX5 core VPI driver
|
||||
M: Saeed Mahameed <saeedm@mellanox.com>
|
||||
|
@ -8538,7 +8537,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/
|
|||
S: Supported
|
||||
F: drivers/net/ethernet/mellanox/mlx5/core/
|
||||
F: include/linux/mlx5/
|
||||
F: include/uapi/rdma/mlx5-abi.h
|
||||
|
||||
MELLANOX MLX5 IB driver
|
||||
M: Matan Barak <matanb@mellanox.com>
|
||||
|
@ -8549,6 +8547,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
|
|||
S: Supported
|
||||
F: drivers/infiniband/hw/mlx5/
|
||||
F: include/linux/mlx5/
|
||||
F: include/uapi/rdma/mlx5-abi.h
|
||||
|
||||
MELEXIS MLX90614 DRIVER
|
||||
M: Crt Mori <cmo@melexis.com>
|
||||
|
@ -8588,7 +8587,7 @@ S: Maintained
|
|||
F: drivers/media/dvb-frontends/mn88473*
|
||||
|
||||
MODULE SUPPORT
|
||||
M: Jessica Yu <jeyu@redhat.com>
|
||||
M: Jessica Yu <jeyu@kernel.org>
|
||||
M: Rusty Russell <rusty@rustcorp.com.au>
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
|
||||
S: Maintained
|
||||
|
@ -10450,7 +10449,7 @@ S: Orphan
|
|||
|
||||
PXA RTC DRIVER
|
||||
M: Robert Jarzmik <robert.jarzmik@free.fr>
|
||||
L: rtc-linux@googlegroups.com
|
||||
L: linux-rtc@vger.kernel.org
|
||||
S: Maintained
|
||||
|
||||
QAT DRIVER
|
||||
|
@ -10757,7 +10756,7 @@ X: kernel/torture.c
|
|||
REAL TIME CLOCK (RTC) SUBSYSTEM
|
||||
M: Alessandro Zummo <a.zummo@towertech.it>
|
||||
M: Alexandre Belloni <alexandre.belloni@free-electrons.com>
|
||||
L: rtc-linux@googlegroups.com
|
||||
L: linux-rtc@vger.kernel.org
|
||||
Q: http://patchwork.ozlabs.org/project/rtc-linux/list/
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
|
||||
S: Maintained
|
||||
|
@ -11268,7 +11267,6 @@ F: drivers/media/rc/serial_ir.c
|
|||
|
||||
STI CEC DRIVER
|
||||
M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
|
||||
L: kernel@stlinux.com
|
||||
S: Maintained
|
||||
F: drivers/staging/media/st-cec/
|
||||
F: Documentation/devicetree/bindings/media/stih-cec.txt
|
||||
|
@ -11778,6 +11776,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
|
|||
S: Supported
|
||||
F: arch/arm/mach-davinci/
|
||||
F: drivers/i2c/busses/i2c-davinci.c
|
||||
F: arch/arm/boot/dts/da850*
|
||||
|
||||
TI DAVINCI SERIES MEDIA DRIVER
|
||||
M: "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
|
||||
|
@ -13861,7 +13860,7 @@ S: Odd fixes
|
|||
F: drivers/net/wireless/wl3501*
|
||||
|
||||
WOLFSON MICROELECTRONICS DRIVERS
|
||||
L: patches@opensource.wolfsonmicro.com
|
||||
L: patches@opensource.cirrus.com
|
||||
T: git https://github.com/CirrusLogic/linux-drivers.git
|
||||
W: https://github.com/CirrusLogic/linux-drivers/wiki
|
||||
S: Supported
|
||||
|
|
2
Makefile
2
Makefile
|
@ -1,7 +1,7 @@
|
|||
VERSION = 4
|
||||
PATCHLEVEL = 12
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc2
|
||||
EXTRAVERSION = -rc5
|
||||
NAME = Fearless Coyote
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
|
|
@ -17,14 +17,12 @@
|
|||
@ there.
|
||||
.inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000
|
||||
#else
|
||||
mov r0, r0
|
||||
W(mov) r0, r0
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro __EFI_HEADER
|
||||
#ifdef CONFIG_EFI_STUB
|
||||
b __efi_start
|
||||
|
||||
.set start_offset, __efi_start - start
|
||||
.org start + 0x3c
|
||||
@
|
||||
|
|
|
@ -130,19 +130,22 @@ start:
|
|||
.rept 7
|
||||
__nop
|
||||
.endr
|
||||
ARM( mov r0, r0 )
|
||||
ARM( b 1f )
|
||||
THUMB( badr r12, 1f )
|
||||
THUMB( bx r12 )
|
||||
#ifndef CONFIG_THUMB2_KERNEL
|
||||
mov r0, r0
|
||||
#else
|
||||
AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode
|
||||
M_CLASS( nop.w ) @ M: already in Thumb2 mode
|
||||
.thumb
|
||||
#endif
|
||||
W(b) 1f
|
||||
|
||||
.word _magic_sig @ Magic numbers to help the loader
|
||||
.word _magic_start @ absolute load/run zImage address
|
||||
.word _magic_end @ zImage end address
|
||||
.word 0x04030201 @ endianness flag
|
||||
|
||||
THUMB( .thumb )
|
||||
1: __EFI_HEADER
|
||||
|
||||
__EFI_HEADER
|
||||
1:
|
||||
ARM_BE8( setend be ) @ go BE8 if compiled for BE8
|
||||
AR_CLASS( mrs r9, cpsr )
|
||||
#ifdef CONFIG_ARM_VIRT_EXT
|
||||
|
|
|
@ -3,6 +3,11 @@
|
|||
#include <dt-bindings/clock/bcm2835-aux.h>
|
||||
#include <dt-bindings/gpio/gpio.h>
|
||||
|
||||
/* firmware-provided startup stubs live here, where the secondary CPUs are
|
||||
* spinning.
|
||||
*/
|
||||
/memreserve/ 0x00000000 0x00001000;
|
||||
|
||||
/* This include file covers the common peripherals and configuration between
|
||||
* bcm2835 and bcm2836 implementations, leaving the CPU configuration to
|
||||
* bcm2835.dtsi and bcm2836.dtsi.
|
||||
|
|
|
@ -120,10 +120,16 @@
|
|||
|
||||
ethphy0: ethernet-phy@2 {
|
||||
reg = <2>;
|
||||
micrel,led-mode = <1>;
|
||||
clocks = <&clks IMX6UL_CLK_ENET_REF>;
|
||||
clock-names = "rmii-ref";
|
||||
};
|
||||
|
||||
ethphy1: ethernet-phy@1 {
|
||||
reg = <1>;
|
||||
micrel,led-mode = <1>;
|
||||
clocks = <&clks IMX6UL_CLK_ENET2_REF>;
|
||||
clock-names = "rmii-ref";
|
||||
};
|
||||
};
|
||||
};
|
||||
|
|
|
@ -137,8 +137,8 @@ netcp: netcp@26000000 {
|
|||
/* NetCP address range */
|
||||
ranges = <0 0x26000000 0x1000000>;
|
||||
|
||||
clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>, <&clkosr>;
|
||||
clock-names = "pa_clk", "ethss_clk", "cpts", "osr_clk";
|
||||
clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
|
||||
clock-names = "pa_clk", "ethss_clk", "cpts";
|
||||
dma-coherent;
|
||||
|
||||
ti,navigator-dmas = <&dma_gbe 0>,
|
||||
|
|
|
@ -232,6 +232,14 @@
|
|||
};
|
||||
};
|
||||
|
||||
osr: sram@70000000 {
|
||||
compatible = "mmio-sram";
|
||||
reg = <0x70000000 0x10000>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
clocks = <&clkosr>;
|
||||
};
|
||||
|
||||
dspgpio0: keystone_dsp_gpio@02620240 {
|
||||
compatible = "ti,keystone-dsp-gpio";
|
||||
gpio-controller;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
#include <versatile-ab.dts>
|
||||
#include "versatile-ab.dts"
|
||||
|
||||
/ {
|
||||
model = "ARM Versatile PB";
|
||||
|
|
|
@ -235,7 +235,7 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
|
|||
return ret;
|
||||
}
|
||||
|
||||
typedef void (*phys_reset_t)(unsigned long);
|
||||
typedef typeof(cpu_reset) phys_reset_t;
|
||||
|
||||
void mcpm_cpu_power_down(void)
|
||||
{
|
||||
|
@ -300,7 +300,7 @@ void mcpm_cpu_power_down(void)
|
|||
* on the CPU.
|
||||
*/
|
||||
phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
|
||||
phys_reset(__pa_symbol(mcpm_entry_point));
|
||||
phys_reset(__pa_symbol(mcpm_entry_point), false);
|
||||
|
||||
/* should never get here */
|
||||
BUG();
|
||||
|
@ -389,7 +389,7 @@ static int __init nocache_trampoline(unsigned long _arg)
|
|||
__mcpm_cpu_down(cpu, cluster);
|
||||
|
||||
phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
|
||||
phys_reset(__pa_symbol(mcpm_entry_point));
|
||||
phys_reset(__pa_symbol(mcpm_entry_point), false);
|
||||
BUG();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,8 @@ struct dev_archdata {
|
|||
#ifdef CONFIG_XEN
|
||||
const struct dma_map_ops *dev_dma_ops;
|
||||
#endif
|
||||
bool dma_coherent;
|
||||
unsigned int dma_coherent:1;
|
||||
unsigned int dma_ops_setup:1;
|
||||
};
|
||||
|
||||
struct omap_device;
|
||||
|
|
|
@ -66,6 +66,7 @@ typedef pte_t *pte_addr_t;
|
|||
#define pgprot_noncached(prot) (prot)
|
||||
#define pgprot_writecombine(prot) (prot)
|
||||
#define pgprot_dmacoherent(prot) (prot)
|
||||
#define pgprot_device(prot) (prot)
|
||||
|
||||
|
||||
/*
|
||||
|
|
|
@ -104,7 +104,6 @@ __do_hyp_init:
|
|||
@ - Write permission implies XN: disabled
|
||||
@ - Instruction cache: enabled
|
||||
@ - Data/Unified cache: enabled
|
||||
@ - Memory alignment checks: enabled
|
||||
@ - MMU: enabled (this code must be run from an identity mapping)
|
||||
mrc p15, 4, r0, c1, c0, 0 @ HSCR
|
||||
ldr r2, =HSCTLR_MASK
|
||||
|
@ -112,8 +111,8 @@ __do_hyp_init:
|
|||
mrc p15, 0, r1, c1, c0, 0 @ SCTLR
|
||||
ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
|
||||
and r1, r1, r2
|
||||
ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) )
|
||||
THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
|
||||
ARM( ldr r2, =(HSCTLR_M) )
|
||||
THUMB( ldr r2, =(HSCTLR_M | HSCTLR_TE) )
|
||||
orr r1, r1, r2
|
||||
orr r0, r0, r1
|
||||
mcr p15, 4, r0, c1, c0, 0 @ HSCR
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
menuconfig ARCH_AT91
|
||||
bool "Atmel SoCs"
|
||||
depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V7
|
||||
select ARM_CPU_SUSPEND if PM
|
||||
select COMMON_CLK_AT91
|
||||
select GPIOLIB
|
||||
select PINCTRL
|
||||
|
|
|
@ -153,7 +153,8 @@ int __init davinci_pm_init(void)
|
|||
davinci_sram_suspend = sram_alloc(davinci_cpu_suspend_sz, NULL);
|
||||
if (!davinci_sram_suspend) {
|
||||
pr_err("PM: cannot allocate SRAM memory\n");
|
||||
return -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto no_sram_mem;
|
||||
}
|
||||
|
||||
davinci_sram_push(davinci_sram_suspend, davinci_cpu_suspend,
|
||||
|
@ -161,6 +162,10 @@ int __init davinci_pm_init(void)
|
|||
|
||||
suspend_set_ops(&davinci_pm_ops);
|
||||
|
||||
return 0;
|
||||
|
||||
no_sram_mem:
|
||||
iounmap(pm_config.ddrpsc_reg_base);
|
||||
no_ddrpsc_mem:
|
||||
iounmap(pm_config.ddrpll_reg_base);
|
||||
no_ddrpll_mem:
|
||||
|
|
|
@ -2311,7 +2311,14 @@ int arm_iommu_attach_device(struct device *dev,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
|
||||
|
||||
static void __arm_iommu_detach_device(struct device *dev)
|
||||
/**
|
||||
* arm_iommu_detach_device
|
||||
* @dev: valid struct device pointer
|
||||
*
|
||||
* Detaches the provided device from a previously attached map.
|
||||
* This voids the dma operations (dma_map_ops pointer)
|
||||
*/
|
||||
void arm_iommu_detach_device(struct device *dev)
|
||||
{
|
||||
struct dma_iommu_mapping *mapping;
|
||||
|
||||
|
@ -2324,22 +2331,10 @@ static void __arm_iommu_detach_device(struct device *dev)
|
|||
iommu_detach_device(mapping->domain, dev);
|
||||
kref_put(&mapping->kref, release_iommu_mapping);
|
||||
to_dma_iommu_mapping(dev) = NULL;
|
||||
set_dma_ops(dev, NULL);
|
||||
|
||||
pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
|
||||
}
|
||||
|
||||
/**
|
||||
* arm_iommu_detach_device
|
||||
* @dev: valid struct device pointer
|
||||
*
|
||||
* Detaches the provided device from a previously attached map.
|
||||
* This voids the dma operations (dma_map_ops pointer)
|
||||
*/
|
||||
void arm_iommu_detach_device(struct device *dev)
|
||||
{
|
||||
__arm_iommu_detach_device(dev);
|
||||
set_dma_ops(dev, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
|
||||
|
||||
static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
|
||||
|
@ -2379,7 +2374,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
|
|||
if (!mapping)
|
||||
return;
|
||||
|
||||
__arm_iommu_detach_device(dev);
|
||||
arm_iommu_detach_device(dev);
|
||||
arm_iommu_release_mapping(mapping);
|
||||
}
|
||||
|
||||
|
@ -2430,9 +2425,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
|
|||
dev->dma_ops = xen_dma_ops;
|
||||
}
|
||||
#endif
|
||||
dev->archdata.dma_ops_setup = true;
|
||||
}
|
||||
|
||||
void arch_teardown_dma_ops(struct device *dev)
|
||||
{
|
||||
if (!dev->archdata.dma_ops_setup)
|
||||
return;
|
||||
|
||||
arm_teardown_iommu_dma_ops(dev);
|
||||
}
|
||||
|
|
|
@ -1084,10 +1084,6 @@ config SYSVIPC_COMPAT
|
|||
def_bool y
|
||||
depends on COMPAT && SYSVIPC
|
||||
|
||||
config KEYS_COMPAT
|
||||
def_bool y
|
||||
depends on COMPAT && KEYS
|
||||
|
||||
endmenu
|
||||
|
||||
menu "Power management options"
|
||||
|
|
|
@ -81,6 +81,45 @@
|
|||
};
|
||||
};
|
||||
|
||||
reg_sys_5v: regulator@0 {
|
||||
compatible = "regulator-fixed";
|
||||
regulator-name = "SYS_5V";
|
||||
regulator-min-microvolt = <5000000>;
|
||||
regulator-max-microvolt = <5000000>;
|
||||
regulator-boot-on;
|
||||
regulator-always-on;
|
||||
};
|
||||
|
||||
reg_vdd_3v3: regulator@1 {
|
||||
compatible = "regulator-fixed";
|
||||
regulator-name = "VDD_3V3";
|
||||
regulator-min-microvolt = <3300000>;
|
||||
regulator-max-microvolt = <3300000>;
|
||||
regulator-boot-on;
|
||||
regulator-always-on;
|
||||
vin-supply = <®_sys_5v>;
|
||||
};
|
||||
|
||||
reg_5v_hub: regulator@2 {
|
||||
compatible = "regulator-fixed";
|
||||
regulator-name = "5V_HUB";
|
||||
regulator-min-microvolt = <5000000>;
|
||||
regulator-max-microvolt = <5000000>;
|
||||
regulator-boot-on;
|
||||
gpio = <&gpio0 7 0>;
|
||||
regulator-always-on;
|
||||
vin-supply = <®_sys_5v>;
|
||||
};
|
||||
|
||||
wl1835_pwrseq: wl1835-pwrseq {
|
||||
compatible = "mmc-pwrseq-simple";
|
||||
/* WLAN_EN GPIO */
|
||||
reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
|
||||
clocks = <&pmic>;
|
||||
clock-names = "ext_clock";
|
||||
power-off-delay-us = <10>;
|
||||
};
|
||||
|
||||
soc {
|
||||
spi0: spi@f7106000 {
|
||||
status = "ok";
|
||||
|
@ -256,11 +295,31 @@
|
|||
|
||||
/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
|
||||
|
||||
dwmmc_2: dwmmc2@f723f000 {
|
||||
ti,non-removable;
|
||||
dwmmc_0: dwmmc0@f723d000 {
|
||||
cap-mmc-highspeed;
|
||||
non-removable;
|
||||
/* WL_EN */
|
||||
vmmc-supply = <&wlan_en_reg>;
|
||||
bus-width = <0x8>;
|
||||
vmmc-supply = <&ldo19>;
|
||||
};
|
||||
|
||||
dwmmc_1: dwmmc1@f723e000 {
|
||||
card-detect-delay = <200>;
|
||||
cap-sd-highspeed;
|
||||
sd-uhs-sdr12;
|
||||
sd-uhs-sdr25;
|
||||
sd-uhs-sdr50;
|
||||
vqmmc-supply = <&ldo7>;
|
||||
vmmc-supply = <&ldo10>;
|
||||
bus-width = <0x4>;
|
||||
disable-wp;
|
||||
cd-gpios = <&gpio1 0 1>;
|
||||
};
|
||||
|
||||
dwmmc_2: dwmmc2@f723f000 {
|
||||
bus-width = <0x4>;
|
||||
non-removable;
|
||||
vmmc-supply = <®_vdd_3v3>;
|
||||
mmc-pwrseq = <&wl1835_pwrseq>;
|
||||
|
||||
#address-cells = <0x1>;
|
||||
#size-cells = <0x0>;
|
||||
|
@ -272,18 +331,6 @@
|
|||
interrupts = <3 IRQ_TYPE_EDGE_RISING>;
|
||||
};
|
||||
};
|
||||
|
||||
wlan_en_reg: regulator@1 {
|
||||
compatible = "regulator-fixed";
|
||||
regulator-name = "wlan-en-regulator";
|
||||
regulator-min-microvolt = <1800000>;
|
||||
regulator-max-microvolt = <1800000>;
|
||||
/* WLAN_EN GPIO */
|
||||
gpio = <&gpio0 5 0>;
|
||||
/* WLAN card specific delay */
|
||||
startup-delay-us = <70000>;
|
||||
enable-active-high;
|
||||
};
|
||||
};
|
||||
|
||||
leds {
|
||||
|
@ -330,6 +377,7 @@
|
|||
pmic: pmic@f8000000 {
|
||||
compatible = "hisilicon,hi655x-pmic";
|
||||
reg = <0x0 0xf8000000 0x0 0x1000>;
|
||||
#clock-cells = <0>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
|
||||
|
|
|
@ -725,20 +725,10 @@
|
|||
status = "disabled";
|
||||
};
|
||||
|
||||
fixed_5v_hub: regulator@0 {
|
||||
compatible = "regulator-fixed";
|
||||
regulator-name = "fixed_5v_hub";
|
||||
regulator-min-microvolt = <5000000>;
|
||||
regulator-max-microvolt = <5000000>;
|
||||
regulator-boot-on;
|
||||
gpio = <&gpio0 7 0>;
|
||||
regulator-always-on;
|
||||
};
|
||||
|
||||
usb_phy: usbphy {
|
||||
compatible = "hisilicon,hi6220-usb-phy";
|
||||
#phy-cells = <0>;
|
||||
phy-supply = <&fixed_5v_hub>;
|
||||
phy-supply = <®_5v_hub>;
|
||||
hisilicon,peripheral-syscon = <&sys_ctrl>;
|
||||
};
|
||||
|
||||
|
@ -766,17 +756,12 @@
|
|||
|
||||
dwmmc_0: dwmmc0@f723d000 {
|
||||
compatible = "hisilicon,hi6220-dw-mshc";
|
||||
num-slots = <0x1>;
|
||||
cap-mmc-highspeed;
|
||||
non-removable;
|
||||
reg = <0x0 0xf723d000 0x0 0x1000>;
|
||||
interrupts = <0x0 0x48 0x4>;
|
||||
clocks = <&sys_ctrl 2>, <&sys_ctrl 1>;
|
||||
clock-names = "ciu", "biu";
|
||||
resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>;
|
||||
reset-names = "reset";
|
||||
bus-width = <0x8>;
|
||||
vmmc-supply = <&ldo19>;
|
||||
pinctrl-names = "default";
|
||||
pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func
|
||||
&emmc_cfg_func &emmc_rst_cfg_func>;
|
||||
|
@ -784,13 +769,7 @@
|
|||
|
||||
dwmmc_1: dwmmc1@f723e000 {
|
||||
compatible = "hisilicon,hi6220-dw-mshc";
|
||||
num-slots = <0x1>;
|
||||
card-detect-delay = <200>;
|
||||
hisilicon,peripheral-syscon = <&ao_ctrl>;
|
||||
cap-sd-highspeed;
|
||||
sd-uhs-sdr12;
|
||||
sd-uhs-sdr25;
|
||||
sd-uhs-sdr50;
|
||||
reg = <0x0 0xf723e000 0x0 0x1000>;
|
||||
interrupts = <0x0 0x49 0x4>;
|
||||
#address-cells = <0x1>;
|
||||
|
@ -799,11 +778,6 @@
|
|||
clock-names = "ciu", "biu";
|
||||
resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>;
|
||||
reset-names = "reset";
|
||||
vqmmc-supply = <&ldo7>;
|
||||
vmmc-supply = <&ldo10>;
|
||||
bus-width = <0x4>;
|
||||
disable-wp;
|
||||
cd-gpios = <&gpio1 0 1>;
|
||||
pinctrl-names = "default", "idle";
|
||||
pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>;
|
||||
pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>;
|
||||
|
@ -811,15 +785,12 @@
|
|||
|
||||
dwmmc_2: dwmmc2@f723f000 {
|
||||
compatible = "hisilicon,hi6220-dw-mshc";
|
||||
num-slots = <0x1>;
|
||||
reg = <0x0 0xf723f000 0x0 0x1000>;
|
||||
interrupts = <0x0 0x4a 0x4>;
|
||||
clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>;
|
||||
clock-names = "ciu", "biu";
|
||||
resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>;
|
||||
reset-names = "reset";
|
||||
bus-width = <0x4>;
|
||||
broken-cd;
|
||||
pinctrl-names = "default", "idle";
|
||||
pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>;
|
||||
pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>;
|
||||
|
|
|
@ -231,8 +231,7 @@
|
|||
cpm_crypto: crypto@800000 {
|
||||
compatible = "inside-secure,safexcel-eip197";
|
||||
reg = <0x800000 0x200000>;
|
||||
interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
|
||||
| IRQ_TYPE_LEVEL_HIGH)>,
|
||||
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
|
||||
|
|
|
@ -221,8 +221,7 @@
|
|||
cps_crypto: crypto@800000 {
|
||||
compatible = "inside-secure,safexcel-eip197";
|
||||
reg = <0x800000 0x200000>;
|
||||
interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
|
||||
| IRQ_TYPE_LEVEL_HIGH)>,
|
||||
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<GIC_SPI 280 IRQ_TYPE_LEVEL_HIGH>,
|
||||
|
|
|
@ -68,6 +68,7 @@ CONFIG_PCIE_QCOM=y
|
|||
CONFIG_PCIE_ARMADA_8K=y
|
||||
CONFIG_PCI_AARDVARK=y
|
||||
CONFIG_PCIE_RCAR=y
|
||||
CONFIG_PCIE_ROCKCHIP=m
|
||||
CONFIG_PCI_HOST_GENERIC=y
|
||||
CONFIG_PCI_XGENE=y
|
||||
CONFIG_ARM64_VA_BITS_48=y
|
||||
|
@ -208,6 +209,8 @@ CONFIG_BRCMFMAC=m
|
|||
CONFIG_WL18XX=m
|
||||
CONFIG_WLCORE_SDIO=m
|
||||
CONFIG_INPUT_EVDEV=y
|
||||
CONFIG_KEYBOARD_ADC=m
|
||||
CONFIG_KEYBOARD_CROS_EC=y
|
||||
CONFIG_KEYBOARD_GPIO=y
|
||||
CONFIG_INPUT_MISC=y
|
||||
CONFIG_INPUT_PM8941_PWRKEY=y
|
||||
|
@ -263,6 +266,7 @@ CONFIG_SPI_MESON_SPIFC=m
|
|||
CONFIG_SPI_ORION=y
|
||||
CONFIG_SPI_PL022=y
|
||||
CONFIG_SPI_QUP=y
|
||||
CONFIG_SPI_ROCKCHIP=y
|
||||
CONFIG_SPI_S3C64XX=y
|
||||
CONFIG_SPI_SPIDEV=m
|
||||
CONFIG_SPMI=y
|
||||
|
@ -292,6 +296,7 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
|
|||
CONFIG_CPU_THERMAL=y
|
||||
CONFIG_THERMAL_EMULATION=y
|
||||
CONFIG_EXYNOS_THERMAL=y
|
||||
CONFIG_ROCKCHIP_THERMAL=m
|
||||
CONFIG_WATCHDOG=y
|
||||
CONFIG_S3C2410_WATCHDOG=y
|
||||
CONFIG_MESON_GXBB_WATCHDOG=m
|
||||
|
@ -300,12 +305,14 @@ CONFIG_RENESAS_WDT=y
|
|||
CONFIG_BCM2835_WDT=y
|
||||
CONFIG_MFD_CROS_EC=y
|
||||
CONFIG_MFD_CROS_EC_I2C=y
|
||||
CONFIG_MFD_CROS_EC_SPI=y
|
||||
CONFIG_MFD_EXYNOS_LPASS=m
|
||||
CONFIG_MFD_HI655X_PMIC=y
|
||||
CONFIG_MFD_MAX77620=y
|
||||
CONFIG_MFD_SPMI_PMIC=y
|
||||
CONFIG_MFD_RK808=y
|
||||
CONFIG_MFD_SEC_CORE=y
|
||||
CONFIG_REGULATOR_FAN53555=y
|
||||
CONFIG_REGULATOR_FIXED_VOLTAGE=y
|
||||
CONFIG_REGULATOR_GPIO=y
|
||||
CONFIG_REGULATOR_HI655X=y
|
||||
|
@ -473,8 +480,10 @@ CONFIG_ARCH_TEGRA_186_SOC=y
|
|||
CONFIG_EXTCON_USB_GPIO=y
|
||||
CONFIG_IIO=y
|
||||
CONFIG_EXYNOS_ADC=y
|
||||
CONFIG_ROCKCHIP_SARADC=m
|
||||
CONFIG_PWM=y
|
||||
CONFIG_PWM_BCM2835=m
|
||||
CONFIG_PWM_CROS_EC=m
|
||||
CONFIG_PWM_MESON=m
|
||||
CONFIG_PWM_ROCKCHIP=y
|
||||
CONFIG_PWM_SAMSUNG=y
|
||||
|
@ -484,6 +493,7 @@ CONFIG_PHY_HI6220_USB=y
|
|||
CONFIG_PHY_SUN4I_USB=y
|
||||
CONFIG_PHY_ROCKCHIP_INNO_USB2=y
|
||||
CONFIG_PHY_ROCKCHIP_EMMC=y
|
||||
CONFIG_PHY_ROCKCHIP_PCIE=m
|
||||
CONFIG_PHY_XGENE=y
|
||||
CONFIG_PHY_TEGRA_XUSB=y
|
||||
CONFIG_ARM_SCPI_PROTOCOL=y
|
||||
|
|
|
@ -23,9 +23,9 @@
|
|||
#define ACPI_MADT_GICC_LENGTH \
|
||||
(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
|
||||
|
||||
#define BAD_MADT_GICC_ENTRY(entry, end) \
|
||||
(!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \
|
||||
(entry)->header.length != ACPI_MADT_GICC_LENGTH)
|
||||
#define BAD_MADT_GICC_ENTRY(entry, end) \
|
||||
(!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \
|
||||
(unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end))
|
||||
|
||||
/* Basic configuration for ACPI */
|
||||
#ifdef CONFIG_ACPI
|
||||
|
|
|
@ -286,6 +286,10 @@
|
|||
#define SCTLR_ELx_A (1 << 1)
|
||||
#define SCTLR_ELx_M 1
|
||||
|
||||
#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \
|
||||
(1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \
|
||||
(1 << 28) | (1 << 29))
|
||||
|
||||
#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
|
||||
SCTLR_ELx_SA | SCTLR_ELx_I)
|
||||
|
||||
|
|
|
@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
|
|||
return NULL;
|
||||
|
||||
root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
|
||||
if (!root_ops)
|
||||
if (!root_ops) {
|
||||
kfree(ri);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ri->cfg = pci_acpi_setup_ecam_mapping(root);
|
||||
if (!ri->cfg) {
|
||||
|
|
|
@ -106,10 +106,13 @@ __do_hyp_init:
|
|||
tlbi alle2
|
||||
dsb sy
|
||||
|
||||
mrs x4, sctlr_el2
|
||||
and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2
|
||||
ldr x5, =SCTLR_ELx_FLAGS
|
||||
orr x4, x4, x5
|
||||
/*
|
||||
* Preserve all the RES1 bits while setting the default flags,
|
||||
* as well as the EE bit on BE. Drop the A flag since the compiler
|
||||
* is allowed to generate unaligned accesses.
|
||||
*/
|
||||
ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
|
||||
CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
|
||||
msr sctlr_el2, x4
|
||||
isb
|
||||
|
||||
|
|
|
@ -65,8 +65,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
* Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
|
||||
* The vgic_set_vmcr() will convert to ICH_VMCR layout.
|
||||
*/
|
||||
vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
|
||||
vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
|
||||
vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
|
||||
vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
|
||||
vgic_set_vmcr(vcpu, &vmcr);
|
||||
} else {
|
||||
val = 0;
|
||||
|
@ -83,8 +83,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
* The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
|
||||
* Extract it directly using ICC_CTLR_EL1 reg definitions.
|
||||
*/
|
||||
val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
|
||||
val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
|
||||
val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
|
||||
val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
|
||||
|
||||
p->regval = val;
|
||||
}
|
||||
|
@ -135,7 +135,7 @@ static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
|
|||
p->regval = 0;
|
||||
|
||||
vgic_get_vmcr(vcpu, &vmcr);
|
||||
if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
|
||||
if (!vmcr.cbpr) {
|
||||
if (p->is_write) {
|
||||
vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
|
||||
ICC_BPR1_EL1_SHIFT;
|
||||
|
|
|
@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void)
|
|||
#define vxtime_lock() do {} while (0)
|
||||
#define vxtime_unlock() do {} while (0)
|
||||
|
||||
/* This attribute is used in include/linux/jiffies.h alongside with
|
||||
* __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp
|
||||
* for frv does not contain another section specification.
|
||||
*/
|
||||
#define __jiffy_arch_data __attribute__((__section__(".data")))
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -37,15 +37,14 @@ __kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count)
|
|||
long uncleared;
|
||||
|
||||
while (count > PAGE_SIZE) {
|
||||
uncleared = __copy_to_user_hexagon(dest, &empty_zero_page,
|
||||
PAGE_SIZE);
|
||||
uncleared = raw_copy_to_user(dest, &empty_zero_page, PAGE_SIZE);
|
||||
if (uncleared)
|
||||
return count - (PAGE_SIZE - uncleared);
|
||||
count -= PAGE_SIZE;
|
||||
dest += PAGE_SIZE;
|
||||
}
|
||||
if (count)
|
||||
count = __copy_to_user_hexagon(dest, &empty_zero_page, count);
|
||||
count = raw_copy_to_user(dest, &empty_zero_page, count);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
|
|
@ -120,7 +120,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
|
|||
struct thread_info *ti = task_thread_info(p);
|
||||
struct pt_regs *childregs, *regs = current_pt_regs();
|
||||
unsigned long childksp;
|
||||
p->set_child_tid = p->clear_child_tid = NULL;
|
||||
|
||||
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
|
||||
|
||||
|
|
|
@ -167,8 +167,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
|
|||
|
||||
top_of_kernel_stack = sp;
|
||||
|
||||
p->set_child_tid = p->clear_child_tid = NULL;
|
||||
|
||||
/* Locate userspace context on stack... */
|
||||
sp -= STACK_FRAME_OVERHEAD; /* redzone */
|
||||
sp -= sizeof(struct pt_regs);
|
||||
|
|
|
@ -380,22 +380,6 @@ source "arch/powerpc/platforms/Kconfig"
|
|||
|
||||
menu "Kernel options"
|
||||
|
||||
config PPC_DT_CPU_FTRS
|
||||
bool "Device-tree based CPU feature discovery & setup"
|
||||
depends on PPC_BOOK3S_64
|
||||
default n
|
||||
help
|
||||
This enables code to use a new device tree binding for describing CPU
|
||||
compatibility and features. Saying Y here will attempt to use the new
|
||||
binding if the firmware provides it. Currently only the skiboot
|
||||
firmware provides this binding.
|
||||
If you're not sure say Y.
|
||||
|
||||
config PPC_CPUFEATURES_ENABLE_UNKNOWN
|
||||
bool "cpufeatures pass through unknown features to guest/userspace"
|
||||
depends on PPC_DT_CPU_FTRS
|
||||
default y
|
||||
|
||||
config HIGHMEM
|
||||
bool "High memory support"
|
||||
depends on PPC32
|
||||
|
@ -1215,11 +1199,6 @@ source "arch/powerpc/Kconfig.debug"
|
|||
|
||||
source "security/Kconfig"
|
||||
|
||||
config KEYS_COMPAT
|
||||
bool
|
||||
depends on COMPAT && KEYS
|
||||
default y
|
||||
|
||||
source "crypto/Kconfig"
|
||||
|
||||
config PPC_LIB_RHEAP
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#define H_PTE_INDEX_SIZE 9
|
||||
#define H_PMD_INDEX_SIZE 7
|
||||
#define H_PUD_INDEX_SIZE 9
|
||||
#define H_PGD_INDEX_SIZE 12
|
||||
#define H_PGD_INDEX_SIZE 9
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
|
||||
|
|
|
@ -214,7 +214,6 @@ enum {
|
|||
#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
|
||||
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
|
||||
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
|
||||
#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000)
|
||||
#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
@ -463,7 +462,7 @@ enum {
|
|||
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
|
||||
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
|
||||
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
|
||||
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
|
||||
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
|
||||
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
|
||||
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
|
||||
#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
|
||||
|
|
|
@ -110,13 +110,18 @@ void release_thread(struct task_struct *);
|
|||
#define TASK_SIZE_128TB (0x0000800000000000UL)
|
||||
#define TASK_SIZE_512TB (0x0002000000000000UL)
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/*
|
||||
* For now 512TB is only supported with book3s and 64K linux page size.
|
||||
*/
|
||||
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
|
||||
/*
|
||||
* Max value currently used:
|
||||
*/
|
||||
#define TASK_SIZE_USER64 TASK_SIZE_512TB
|
||||
#define TASK_SIZE_USER64 TASK_SIZE_512TB
|
||||
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
|
||||
#else
|
||||
#define TASK_SIZE_USER64 TASK_SIZE_64TB
|
||||
#define TASK_SIZE_USER64 TASK_SIZE_64TB
|
||||
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
@ -132,7 +137,7 @@ void release_thread(struct task_struct *);
|
|||
* space during mmap's.
|
||||
*/
|
||||
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
|
||||
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
|
||||
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
|
||||
|
||||
#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
|
||||
TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
|
||||
|
@ -143,21 +148,15 @@ void release_thread(struct task_struct *);
|
|||
* with 128TB and conditionally enable upto 512TB
|
||||
*/
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
|
||||
TASK_SIZE_USER32 : TASK_SIZE_128TB)
|
||||
#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
|
||||
TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
|
||||
#else
|
||||
#define DEFAULT_MAP_WINDOW TASK_SIZE
|
||||
#endif
|
||||
|
||||
#ifdef __powerpc64__
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3S_64
|
||||
/* Limit stack to 128TB */
|
||||
#define STACK_TOP_USER64 TASK_SIZE_128TB
|
||||
#else
|
||||
#define STACK_TOP_USER64 TASK_SIZE_USER64
|
||||
#endif
|
||||
|
||||
#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
|
||||
#define STACK_TOP_USER32 TASK_SIZE_USER32
|
||||
|
||||
#define STACK_TOP (is_32bit_task() ? \
|
||||
|
|
|
@ -44,8 +44,22 @@ extern void __init dump_numa_cpu_topology(void);
|
|||
extern int sysfs_add_device_to_node(struct device *dev, int nid);
|
||||
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
|
||||
|
||||
static inline int early_cpu_to_node(int cpu)
|
||||
{
|
||||
int nid;
|
||||
|
||||
nid = numa_cpu_lookup_table[cpu];
|
||||
|
||||
/*
|
||||
* Fall back to node 0 if nid is unset (it should be, except bugs).
|
||||
* This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
|
||||
*/
|
||||
return (nid < 0) ? 0 : nid;
|
||||
}
|
||||
#else
|
||||
|
||||
static inline int early_cpu_to_node(int cpu) { return 0; }
|
||||
|
||||
static inline void dump_numa_cpu_topology(void) {}
|
||||
|
||||
static inline int sysfs_add_device_to_node(struct device *dev, int nid)
|
||||
|
|
|
@ -46,6 +46,8 @@
|
|||
#define PPC_FEATURE2_HTM_NOSC 0x01000000
|
||||
#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.00 */
|
||||
#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */
|
||||
#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */
|
||||
#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
|
||||
|
||||
/*
|
||||
* IMPORTANT!
|
||||
|
|
|
@ -124,7 +124,8 @@ extern void __restore_cpu_e6500(void);
|
|||
#define COMMON_USER_POWER9 COMMON_USER_POWER8
|
||||
#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
|
||||
PPC_FEATURE2_ARCH_3_00 | \
|
||||
PPC_FEATURE2_HAS_IEEE128)
|
||||
PPC_FEATURE2_HAS_IEEE128 | \
|
||||
PPC_FEATURE2_DARN )
|
||||
|
||||
#ifdef CONFIG_PPC_BOOK3E_64
|
||||
#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/export.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/libfdt.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/sched.h>
|
||||
|
@ -642,7 +643,6 @@ static struct dt_cpu_feature_match __initdata
|
|||
{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
|
||||
{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
|
||||
{"processor-utilization-of-resources-register", feat_enable_purr, 0},
|
||||
{"subcore", feat_enable, CPU_FTR_SUBCORE},
|
||||
{"no-execute", feat_enable, 0},
|
||||
{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
|
||||
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
|
||||
|
@ -671,12 +671,24 @@ static struct dt_cpu_feature_match __initdata
|
|||
{"wait-v3", feat_enable, 0},
|
||||
};
|
||||
|
||||
/* XXX: how to configure this? Default + boot time? */
|
||||
#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN
|
||||
#define CPU_FEATURE_ENABLE_UNKNOWN 1
|
||||
#else
|
||||
#define CPU_FEATURE_ENABLE_UNKNOWN 0
|
||||
#endif
|
||||
static bool __initdata using_dt_cpu_ftrs;
|
||||
static bool __initdata enable_unknown = true;
|
||||
|
||||
static int __init dt_cpu_ftrs_parse(char *str)
|
||||
{
|
||||
if (!str)
|
||||
return 0;
|
||||
|
||||
if (!strcmp(str, "off"))
|
||||
using_dt_cpu_ftrs = false;
|
||||
else if (!strcmp(str, "known"))
|
||||
enable_unknown = false;
|
||||
else
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
|
||||
|
||||
static void __init cpufeatures_setup_start(u32 isa)
|
||||
{
|
||||
|
@ -707,7 +719,7 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
|
|||
}
|
||||
}
|
||||
|
||||
if (!known && CPU_FEATURE_ENABLE_UNKNOWN) {
|
||||
if (!known && enable_unknown) {
|
||||
if (!feat_try_enable_unknown(f)) {
|
||||
pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
|
||||
f->name);
|
||||
|
@ -756,6 +768,26 @@ static void __init cpufeatures_setup_finished(void)
|
|||
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
|
||||
}
|
||||
|
||||
static int __init disabled_on_cmdline(void)
|
||||
{
|
||||
unsigned long root, chosen;
|
||||
const char *p;
|
||||
|
||||
root = of_get_flat_dt_root();
|
||||
chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
|
||||
if (chosen == -FDT_ERR_NOTFOUND)
|
||||
return false;
|
||||
|
||||
p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
|
||||
if (!p)
|
||||
return false;
|
||||
|
||||
if (strstr(p, "dt_cpu_ftrs=off"))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
|
||||
int depth, void *data)
|
||||
{
|
||||
|
@ -766,8 +798,6 @@ static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool __initdata using_dt_cpu_ftrs = false;
|
||||
|
||||
bool __init dt_cpu_ftrs_in_use(void)
|
||||
{
|
||||
return using_dt_cpu_ftrs;
|
||||
|
@ -775,6 +805,8 @@ bool __init dt_cpu_ftrs_in_use(void)
|
|||
|
||||
bool __init dt_cpu_ftrs_init(void *fdt)
|
||||
{
|
||||
using_dt_cpu_ftrs = false;
|
||||
|
||||
/* Setup and verify the FDT, if it fails we just bail */
|
||||
if (!early_init_dt_verify(fdt))
|
||||
return false;
|
||||
|
@ -782,6 +814,9 @@ bool __init dt_cpu_ftrs_init(void *fdt)
|
|||
if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
|
||||
return false;
|
||||
|
||||
if (disabled_on_cmdline())
|
||||
return false;
|
||||
|
||||
cpufeatures_setup_cpu();
|
||||
|
||||
using_dt_cpu_ftrs = true;
|
||||
|
@ -1027,5 +1062,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
|
|||
|
||||
void __init dt_cpu_ftrs_scan(void)
|
||||
{
|
||||
if (!using_dt_cpu_ftrs)
|
||||
return;
|
||||
|
||||
of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
|
||||
}
|
||||
|
|
|
@ -1666,6 +1666,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
|
|||
#ifdef CONFIG_VSX
|
||||
current->thread.used_vsr = 0;
|
||||
#endif
|
||||
current->thread.load_fp = 0;
|
||||
memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state));
|
||||
current->thread.fp_save_area = NULL;
|
||||
#ifdef CONFIG_ALTIVEC
|
||||
|
@ -1674,6 +1675,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
|
|||
current->thread.vr_save_area = NULL;
|
||||
current->thread.vrsave = 0;
|
||||
current->thread.used_vr = 0;
|
||||
current->thread.load_vec = 0;
|
||||
#endif /* CONFIG_ALTIVEC */
|
||||
#ifdef CONFIG_SPE
|
||||
memset(current->thread.evr, 0, sizeof(current->thread.evr));
|
||||
|
@ -1685,6 +1687,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
|
|||
current->thread.tm_tfhar = 0;
|
||||
current->thread.tm_texasr = 0;
|
||||
current->thread.tm_tfiar = 0;
|
||||
current->thread.load_tm = 0;
|
||||
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
|
||||
}
|
||||
EXPORT_SYMBOL(start_thread);
|
||||
|
|
|
@ -161,7 +161,9 @@ static struct ibm_pa_feature {
|
|||
{ .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL },
|
||||
{ .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE },
|
||||
{ .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE },
|
||||
#ifdef CONFIG_PPC_RADIX_MMU
|
||||
{ .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX },
|
||||
#endif
|
||||
{ .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
|
||||
{ .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE,
|
||||
.cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
|
||||
|
|
|
@ -928,7 +928,7 @@ void __init setup_arch(char **cmdline_p)
|
|||
|
||||
#ifdef CONFIG_PPC_MM_SLICES
|
||||
#ifdef CONFIG_PPC64
|
||||
init_mm.context.addr_limit = TASK_SIZE_128TB;
|
||||
init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
|
||||
#else
|
||||
#error "context.addr_limit not initialized."
|
||||
#endif
|
||||
|
|
|
@ -661,7 +661,7 @@ void __init emergency_stack_init(void)
|
|||
|
||||
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
|
||||
{
|
||||
return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
|
||||
return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
|
||||
__pa(MAX_DMA_ADDRESS));
|
||||
}
|
||||
|
||||
|
@ -672,7 +672,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
|
|||
|
||||
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
|
||||
{
|
||||
if (cpu_to_node(from) == cpu_to_node(to))
|
||||
if (early_cpu_to_node(from) == early_cpu_to_node(to))
|
||||
return LOCAL_DISTANCE;
|
||||
else
|
||||
return REMOTE_DISTANCE;
|
||||
|
|
|
@ -99,7 +99,7 @@ static int hash__init_new_context(struct mm_struct *mm)
|
|||
* mm->context.addr_limit. Default to max task size so that we copy the
|
||||
* default values to paca which will help us to handle slb miss early.
|
||||
*/
|
||||
mm->context.addr_limit = TASK_SIZE_128TB;
|
||||
mm->context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
|
||||
|
||||
/*
|
||||
* The old code would re-promote on fork, we don't do that when using
|
||||
|
|
|
@ -402,7 +402,7 @@ static struct power_pmu power9_isa207_pmu = {
|
|||
.name = "POWER9",
|
||||
.n_counter = MAX_PMU_COUNTERS,
|
||||
.add_fields = ISA207_ADD_FIELDS,
|
||||
.test_adder = ISA207_TEST_ADDER,
|
||||
.test_adder = P9_DD1_TEST_ADDER,
|
||||
.compute_mmcr = isa207_compute_mmcr,
|
||||
.config_bhrb = power9_config_bhrb,
|
||||
.bhrb_filter_map = power9_bhrb_filter_map,
|
||||
|
@ -421,7 +421,7 @@ static struct power_pmu power9_pmu = {
|
|||
.name = "POWER9",
|
||||
.n_counter = MAX_PMU_COUNTERS,
|
||||
.add_fields = ISA207_ADD_FIELDS,
|
||||
.test_adder = P9_DD1_TEST_ADDER,
|
||||
.test_adder = ISA207_TEST_ADDER,
|
||||
.compute_mmcr = isa207_compute_mmcr,
|
||||
.config_bhrb = power9_config_bhrb,
|
||||
.bhrb_filter_map = power9_bhrb_filter_map,
|
||||
|
|
|
@ -59,6 +59,17 @@ config PPC_OF_BOOT_TRAMPOLINE
|
|||
|
||||
In case of doubt, say Y
|
||||
|
||||
config PPC_DT_CPU_FTRS
|
||||
bool "Device-tree based CPU feature discovery & setup"
|
||||
depends on PPC_BOOK3S_64
|
||||
default y
|
||||
help
|
||||
This enables code to use a new device tree binding for describing CPU
|
||||
compatibility and features. Saying Y here will attempt to use the new
|
||||
binding if the firmware provides it. Currently only the skiboot
|
||||
firmware provides this binding.
|
||||
If you're not sure say Y.
|
||||
|
||||
config UDBG_RTAS_CONSOLE
|
||||
bool "RTAS based debug console"
|
||||
depends on PPC_RTAS
|
||||
|
|
|
@ -197,7 +197,9 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
|
|||
(REGION_ID(ea) != USER_REGION_ID)) {
|
||||
|
||||
spin_unlock(&spu->register_lock);
|
||||
ret = hash_page(ea, _PAGE_PRESENT | _PAGE_READ, 0x300, dsisr);
|
||||
ret = hash_page(ea,
|
||||
_PAGE_PRESENT | _PAGE_READ | _PAGE_PRIVILEGED,
|
||||
0x300, dsisr);
|
||||
spin_lock(&spu->register_lock);
|
||||
|
||||
if (!ret) {
|
||||
|
|
|
@ -175,6 +175,8 @@ static int spufs_arch_write_note(struct spu_context *ctx, int i,
|
|||
skip = roundup(cprm->pos - total + sz, 4) - cprm->pos;
|
||||
if (!dump_skip(cprm, skip))
|
||||
goto Eio;
|
||||
|
||||
rc = 0;
|
||||
out:
|
||||
free_page((unsigned long)buf);
|
||||
return rc;
|
||||
|
|
|
@ -714,7 +714,7 @@ static void pnv_npu2_release_context(struct kref *kref)
|
|||
void pnv_npu2_destroy_context(struct npu_context *npu_context,
|
||||
struct pci_dev *gpdev)
|
||||
{
|
||||
struct pnv_phb *nphb, *phb;
|
||||
struct pnv_phb *nphb;
|
||||
struct npu *npu;
|
||||
struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
|
||||
struct device_node *nvlink_dn;
|
||||
|
@ -728,13 +728,12 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
|
|||
|
||||
nphb = pci_bus_to_host(npdev->bus)->private_data;
|
||||
npu = &nphb->npu;
|
||||
phb = pci_bus_to_host(gpdev->bus)->private_data;
|
||||
nvlink_dn = of_parse_phandle(npdev->dev.of_node, "ibm,nvlink", 0);
|
||||
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
|
||||
&nvlink_index)))
|
||||
return;
|
||||
npu_context->npdev[npu->index][nvlink_index] = NULL;
|
||||
opal_npu_destroy_context(phb->opal_id, npu_context->mm->context.id,
|
||||
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
|
||||
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
|
||||
kref_put(&npu_context->kref, pnv_npu2_release_context);
|
||||
}
|
||||
|
|
|
@ -407,7 +407,13 @@ static DEVICE_ATTR(subcores_per_core, 0644,
|
|||
|
||||
static int subcore_init(void)
|
||||
{
|
||||
if (!cpu_has_feature(CPU_FTR_SUBCORE))
|
||||
unsigned pvr_ver;
|
||||
|
||||
pvr_ver = PVR_VER(mfspr(SPRN_PVR));
|
||||
|
||||
if (pvr_ver != PVR_POWER8 &&
|
||||
pvr_ver != PVR_POWER8E &&
|
||||
pvr_ver != PVR_POWER8NVL)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
|
|
|
@ -124,6 +124,7 @@ static struct property *dlpar_clone_drconf_property(struct device_node *dn)
|
|||
for (i = 0; i < num_lmbs; i++) {
|
||||
lmbs[i].base_addr = be64_to_cpu(lmbs[i].base_addr);
|
||||
lmbs[i].drc_index = be32_to_cpu(lmbs[i].drc_index);
|
||||
lmbs[i].aa_index = be32_to_cpu(lmbs[i].aa_index);
|
||||
lmbs[i].flags = be32_to_cpu(lmbs[i].flags);
|
||||
}
|
||||
|
||||
|
@ -147,6 +148,7 @@ static void dlpar_update_drconf_property(struct device_node *dn,
|
|||
for (i = 0; i < num_lmbs; i++) {
|
||||
lmbs[i].base_addr = cpu_to_be64(lmbs[i].base_addr);
|
||||
lmbs[i].drc_index = cpu_to_be32(lmbs[i].drc_index);
|
||||
lmbs[i].aa_index = cpu_to_be32(lmbs[i].aa_index);
|
||||
lmbs[i].flags = cpu_to_be32(lmbs[i].flags);
|
||||
}
|
||||
|
||||
|
|
|
@ -75,7 +75,8 @@ static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
|
|||
|
||||
static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
|
||||
{
|
||||
struct u8_gpio_chip *u8_gc = gpiochip_get_data(&mm_gc->gc);
|
||||
struct u8_gpio_chip *u8_gc =
|
||||
container_of(mm_gc, struct u8_gpio_chip, mm_gc);
|
||||
|
||||
u8_gc->data = in_8(mm_gc->regs);
|
||||
}
|
||||
|
|
|
@ -363,9 +363,6 @@ config COMPAT
|
|||
config SYSVIPC_COMPAT
|
||||
def_bool y if COMPAT && SYSVIPC
|
||||
|
||||
config KEYS_COMPAT
|
||||
def_bool y if COMPAT && KEYS
|
||||
|
||||
config SMP
|
||||
def_bool y
|
||||
prompt "Symmetric multi-processing support"
|
||||
|
|
|
@ -541,7 +541,6 @@ struct kvm_s390_float_interrupt {
|
|||
struct mutex ais_lock;
|
||||
u8 simm;
|
||||
u8 nimm;
|
||||
int ais_enabled;
|
||||
};
|
||||
|
||||
struct kvm_hw_wp_info_arch {
|
||||
|
|
|
@ -2160,7 +2160,7 @@ static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
|
|||
struct kvm_s390_ais_req req;
|
||||
int ret = 0;
|
||||
|
||||
if (!fi->ais_enabled)
|
||||
if (!test_kvm_facility(kvm, 72))
|
||||
return -ENOTSUPP;
|
||||
|
||||
if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
|
||||
|
@ -2204,7 +2204,7 @@ static int kvm_s390_inject_airq(struct kvm *kvm,
|
|||
};
|
||||
int ret = 0;
|
||||
|
||||
if (!fi->ais_enabled || !adapter->suppressible)
|
||||
if (!test_kvm_facility(kvm, 72) || !adapter->suppressible)
|
||||
return kvm_s390_inject_vm(kvm, &s390int);
|
||||
|
||||
mutex_lock(&fi->ais_lock);
|
||||
|
|
|
@ -558,7 +558,6 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
|
|||
} else {
|
||||
set_kvm_facility(kvm->arch.model.fac_mask, 72);
|
||||
set_kvm_facility(kvm->arch.model.fac_list, 72);
|
||||
kvm->arch.float_int.ais_enabled = 1;
|
||||
r = 0;
|
||||
}
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
@ -1533,7 +1532,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
|
|||
mutex_init(&kvm->arch.float_int.ais_lock);
|
||||
kvm->arch.float_int.simm = 0;
|
||||
kvm->arch.float_int.nimm = 0;
|
||||
kvm->arch.float_int.ais_enabled = 0;
|
||||
spin_lock_init(&kvm->arch.float_int.lock);
|
||||
for (i = 0; i < FIRQ_LIST_COUNT; i++)
|
||||
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
|
||||
|
|
|
@ -192,9 +192,9 @@ config NR_CPUS
|
|||
int "Maximum number of CPUs"
|
||||
depends on SMP
|
||||
range 2 32 if SPARC32
|
||||
range 2 1024 if SPARC64
|
||||
range 2 4096 if SPARC64
|
||||
default 32 if SPARC32
|
||||
default 64 if SPARC64
|
||||
default 4096 if SPARC64
|
||||
|
||||
source kernel/Kconfig.hz
|
||||
|
||||
|
@ -295,9 +295,13 @@ config NUMA
|
|||
depends on SPARC64 && SMP
|
||||
|
||||
config NODES_SHIFT
|
||||
int
|
||||
default "4"
|
||||
int "Maximum NUMA Nodes (as a power of 2)"
|
||||
range 4 5 if SPARC64
|
||||
default "5"
|
||||
depends on NEED_MULTIPLE_NODES
|
||||
help
|
||||
Specify the maximum number of NUMA Nodes available on the target
|
||||
system. Increases memory reserved to accommodate various tables.
|
||||
|
||||
# Some NUMA nodes have memory ranges that span
|
||||
# other nodes. Even though a pfn is valid and
|
||||
|
@ -573,9 +577,6 @@ config SYSVIPC_COMPAT
|
|||
depends on COMPAT && SYSVIPC
|
||||
default y
|
||||
|
||||
config KEYS_COMPAT
|
||||
def_bool y if COMPAT && KEYS
|
||||
|
||||
endmenu
|
||||
|
||||
source "net/Kconfig"
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
#define CTX_NR_MASK TAG_CONTEXT_BITS
|
||||
#define CTX_HW_MASK (CTX_NR_MASK | CTX_PGSZ_MASK)
|
||||
|
||||
#define CTX_FIRST_VERSION ((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
|
||||
#define CTX_FIRST_VERSION BIT(CTX_VERSION_SHIFT)
|
||||
#define CTX_VALID(__ctx) \
|
||||
(!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
|
||||
#define CTX_HWBITS(__ctx) ((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
|
||||
|
|
|
@ -19,13 +19,8 @@ extern spinlock_t ctx_alloc_lock;
|
|||
extern unsigned long tlb_context_cache;
|
||||
extern unsigned long mmu_context_bmap[];
|
||||
|
||||
DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
|
||||
void get_new_mmu_context(struct mm_struct *mm);
|
||||
#ifdef CONFIG_SMP
|
||||
void smp_new_mmu_context_version(void);
|
||||
#else
|
||||
#define smp_new_mmu_context_version() do { } while (0)
|
||||
#endif
|
||||
|
||||
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
||||
void destroy_context(struct mm_struct *mm);
|
||||
|
||||
|
@ -76,8 +71,9 @@ void __flush_tlb_mm(unsigned long, unsigned long);
|
|||
static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
|
||||
{
|
||||
unsigned long ctx_valid, flags;
|
||||
int cpu;
|
||||
int cpu = smp_processor_id();
|
||||
|
||||
per_cpu(per_cpu_secondary_mm, cpu) = mm;
|
||||
if (unlikely(mm == &init_mm))
|
||||
return;
|
||||
|
||||
|
@ -123,7 +119,6 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
|
|||
* for the first time, we must flush that context out of the
|
||||
* local TLB.
|
||||
*/
|
||||
cpu = smp_processor_id();
|
||||
if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
|
||||
cpumask_set_cpu(cpu, mm_cpumask(mm));
|
||||
__flush_tlb_mm(CTX_HWBITS(mm->context),
|
||||
|
@ -133,26 +128,7 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
|
|||
}
|
||||
|
||||
#define deactivate_mm(tsk,mm) do { } while (0)
|
||||
|
||||
/* Activate a new MM instance for the current task. */
|
||||
static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
|
||||
{
|
||||
unsigned long flags;
|
||||
int cpu;
|
||||
|
||||
spin_lock_irqsave(&mm->context.lock, flags);
|
||||
if (!CTX_VALID(mm->context))
|
||||
get_new_mmu_context(mm);
|
||||
cpu = smp_processor_id();
|
||||
if (!cpumask_test_cpu(cpu, mm_cpumask(mm)))
|
||||
cpumask_set_cpu(cpu, mm_cpumask(mm));
|
||||
|
||||
load_secondary_context(mm);
|
||||
__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
|
||||
tsb_context_switch(mm);
|
||||
spin_unlock_irqrestore(&mm->context.lock, flags);
|
||||
}
|
||||
|
||||
#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL)
|
||||
#endif /* !(__ASSEMBLY__) */
|
||||
|
||||
#endif /* !(__SPARC64_MMU_CONTEXT_H) */
|
||||
|
|
|
@ -20,7 +20,6 @@
|
|||
#define PIL_SMP_CALL_FUNC 1
|
||||
#define PIL_SMP_RECEIVE_SIGNAL 2
|
||||
#define PIL_SMP_CAPTURE 3
|
||||
#define PIL_SMP_CTX_NEW_VERSION 4
|
||||
#define PIL_DEVICE_IRQ 5
|
||||
#define PIL_SMP_CALL_FUNC_SNGL 6
|
||||
#define PIL_DEFERRED_PCR_WORK 7
|
||||
|
|
|
@ -327,6 +327,7 @@ struct vio_dev {
|
|||
int compat_len;
|
||||
|
||||
u64 dev_no;
|
||||
u64 id;
|
||||
|
||||
unsigned long channel_id;
|
||||
|
||||
|
|
|
@ -909,7 +909,7 @@ static int register_services(struct ds_info *dp)
|
|||
pbuf.req.handle = cp->handle;
|
||||
pbuf.req.major = 1;
|
||||
pbuf.req.minor = 0;
|
||||
strcpy(pbuf.req.svc_id, cp->service_id);
|
||||
strcpy(pbuf.id_buf, cp->service_id);
|
||||
|
||||
err = __ds_send(lp, &pbuf, msg_len);
|
||||
if (err > 0)
|
||||
|
|
|
@ -1034,17 +1034,26 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
|
|||
{
|
||||
#ifdef CONFIG_SMP
|
||||
unsigned long page;
|
||||
void *mondo, *p;
|
||||
|
||||
BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
|
||||
BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > PAGE_SIZE);
|
||||
|
||||
/* Make sure mondo block is 64byte aligned */
|
||||
p = kzalloc(127, GFP_KERNEL);
|
||||
if (!p) {
|
||||
prom_printf("SUN4V: Error, cannot allocate mondo block.\n");
|
||||
prom_halt();
|
||||
}
|
||||
mondo = (void *)(((unsigned long)p + 63) & ~0x3f);
|
||||
tb->cpu_mondo_block_pa = __pa(mondo);
|
||||
|
||||
page = get_zeroed_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
|
||||
prom_printf("SUN4V: Error, cannot allocate cpu list page.\n");
|
||||
prom_halt();
|
||||
}
|
||||
|
||||
tb->cpu_mondo_block_pa = __pa(page);
|
||||
tb->cpu_list_pa = __pa(page + 64);
|
||||
tb->cpu_list_pa = __pa(page);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -37,7 +37,6 @@ void handle_stdfmna(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr
|
|||
/* smp_64.c */
|
||||
void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
|
||||
void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs);
|
||||
void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs);
|
||||
void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
|
||||
void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
|
||||
|
||||
|
|
|
@ -964,37 +964,6 @@ void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
|
|||
preempt_enable();
|
||||
}
|
||||
|
||||
void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
|
||||
{
|
||||
struct mm_struct *mm;
|
||||
unsigned long flags;
|
||||
|
||||
clear_softint(1 << irq);
|
||||
|
||||
/* See if we need to allocate a new TLB context because
|
||||
* the version of the one we are using is now out of date.
|
||||
*/
|
||||
mm = current->active_mm;
|
||||
if (unlikely(!mm || (mm == &init_mm)))
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&mm->context.lock, flags);
|
||||
|
||||
if (unlikely(!CTX_VALID(mm->context)))
|
||||
get_new_mmu_context(mm);
|
||||
|
||||
spin_unlock_irqrestore(&mm->context.lock, flags);
|
||||
|
||||
load_secondary_context(mm);
|
||||
__flush_tlb_mm(CTX_HWBITS(mm->context),
|
||||
SECONDARY_CONTEXT);
|
||||
}
|
||||
|
||||
void smp_new_mmu_context_version(void)
|
||||
{
|
||||
smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KGDB
|
||||
void kgdb_roundup_cpus(unsigned long flags)
|
||||
{
|
||||
|
|
|
@ -455,13 +455,16 @@ __tsb_context_switch:
|
|||
.type copy_tsb,#function
|
||||
copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
|
||||
* %o2=new_tsb_base, %o3=new_tsb_size
|
||||
* %o4=page_size_shift
|
||||
*/
|
||||
sethi %uhi(TSB_PASS_BITS), %g7
|
||||
srlx %o3, 4, %o3
|
||||
add %o0, %o1, %g1 /* end of old tsb */
|
||||
add %o0, %o1, %o1 /* end of old tsb */
|
||||
sllx %g7, 32, %g7
|
||||
sub %o3, 1, %o3 /* %o3 == new tsb hash mask */
|
||||
|
||||
mov %o4, %g1 /* page_size_shift */
|
||||
|
||||
661: prefetcha [%o0] ASI_N, #one_read
|
||||
.section .tsb_phys_patch, "ax"
|
||||
.word 661b
|
||||
|
@ -486,9 +489,9 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
|
|||
/* This can definitely be computed faster... */
|
||||
srlx %o0, 4, %o5 /* Build index */
|
||||
and %o5, 511, %o5 /* Mask index */
|
||||
sllx %o5, PAGE_SHIFT, %o5 /* Put into vaddr position */
|
||||
sllx %o5, %g1, %o5 /* Put into vaddr position */
|
||||
or %o4, %o5, %o4 /* Full VADDR. */
|
||||
srlx %o4, PAGE_SHIFT, %o4 /* Shift down to create index */
|
||||
srlx %o4, %g1, %o4 /* Shift down to create index */
|
||||
and %o4, %o3, %o4 /* Mask with new_tsb_nents-1 */
|
||||
sllx %o4, 4, %o4 /* Shift back up into tsb ent offset */
|
||||
TSB_STORE(%o2 + %o4, %g2) /* Store TAG */
|
||||
|
@ -496,7 +499,7 @@ copy_tsb: /* %o0=old_tsb_base, %o1=old_tsb_size
|
|||
TSB_STORE(%o2 + %o4, %g3) /* Store TTE */
|
||||
|
||||
80: add %o0, 16, %o0
|
||||
cmp %o0, %g1
|
||||
cmp %o0, %o1
|
||||
bne,pt %xcc, 90b
|
||||
nop
|
||||
|
||||
|
|
|
@ -50,7 +50,7 @@ tl0_resv03e: BTRAP(0x3e) BTRAP(0x3f) BTRAP(0x40)
|
|||
tl0_irq1: TRAP_IRQ(smp_call_function_client, 1)
|
||||
tl0_irq2: TRAP_IRQ(smp_receive_signal_client, 2)
|
||||
tl0_irq3: TRAP_IRQ(smp_penguin_jailcell, 3)
|
||||
tl0_irq4: TRAP_IRQ(smp_new_mmu_context_version_client, 4)
|
||||
tl0_irq4: BTRAP(0x44)
|
||||
#else
|
||||
tl0_irq1: BTRAP(0x41)
|
||||
tl0_irq2: BTRAP(0x42)
|
||||
|
|
|
@ -302,13 +302,16 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
|
|||
if (!id) {
|
||||
dev_set_name(&vdev->dev, "%s", bus_id_name);
|
||||
vdev->dev_no = ~(u64)0;
|
||||
vdev->id = ~(u64)0;
|
||||
} else if (!cfg_handle) {
|
||||
dev_set_name(&vdev->dev, "%s-%llu", bus_id_name, *id);
|
||||
vdev->dev_no = *id;
|
||||
vdev->id = ~(u64)0;
|
||||
} else {
|
||||
dev_set_name(&vdev->dev, "%s-%llu-%llu", bus_id_name,
|
||||
*cfg_handle, *id);
|
||||
vdev->dev_no = *cfg_handle;
|
||||
vdev->id = *id;
|
||||
}
|
||||
|
||||
vdev->dev.parent = parent;
|
||||
|
@ -351,27 +354,84 @@ static void vio_add(struct mdesc_handle *hp, u64 node)
|
|||
(void) vio_create_one(hp, node, &root_vdev->dev);
|
||||
}
|
||||
|
||||
struct vio_md_node_query {
|
||||
const char *type;
|
||||
u64 dev_no;
|
||||
u64 id;
|
||||
};
|
||||
|
||||
static int vio_md_node_match(struct device *dev, void *arg)
|
||||
{
|
||||
struct vio_md_node_query *query = (struct vio_md_node_query *) arg;
|
||||
struct vio_dev *vdev = to_vio_dev(dev);
|
||||
|
||||
if (vdev->mp == (u64) arg)
|
||||
return 1;
|
||||
if (vdev->dev_no != query->dev_no)
|
||||
return 0;
|
||||
if (vdev->id != query->id)
|
||||
return 0;
|
||||
if (strcmp(vdev->type, query->type))
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void vio_remove(struct mdesc_handle *hp, u64 node)
|
||||
{
|
||||
const char *type;
|
||||
const u64 *id, *cfg_handle;
|
||||
u64 a;
|
||||
struct vio_md_node_query query;
|
||||
struct device *dev;
|
||||
|
||||
dev = device_find_child(&root_vdev->dev, (void *) node,
|
||||
type = mdesc_get_property(hp, node, "device-type", NULL);
|
||||
if (!type) {
|
||||
type = mdesc_get_property(hp, node, "name", NULL);
|
||||
if (!type)
|
||||
type = mdesc_node_name(hp, node);
|
||||
}
|
||||
|
||||
query.type = type;
|
||||
|
||||
id = mdesc_get_property(hp, node, "id", NULL);
|
||||
cfg_handle = NULL;
|
||||
mdesc_for_each_arc(a, hp, node, MDESC_ARC_TYPE_BACK) {
|
||||
u64 target;
|
||||
|
||||
target = mdesc_arc_target(hp, a);
|
||||
cfg_handle = mdesc_get_property(hp, target,
|
||||
"cfg-handle", NULL);
|
||||
if (cfg_handle)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!id) {
|
||||
query.dev_no = ~(u64)0;
|
||||
query.id = ~(u64)0;
|
||||
} else if (!cfg_handle) {
|
||||
query.dev_no = *id;
|
||||
query.id = ~(u64)0;
|
||||
} else {
|
||||
query.dev_no = *cfg_handle;
|
||||
query.id = *id;
|
||||
}
|
||||
|
||||
dev = device_find_child(&root_vdev->dev, &query,
|
||||
vio_md_node_match);
|
||||
if (dev) {
|
||||
printk(KERN_INFO "VIO: Removing device %s\n", dev_name(dev));
|
||||
|
||||
device_unregister(dev);
|
||||
put_device(dev);
|
||||
} else {
|
||||
if (!id)
|
||||
printk(KERN_ERR "VIO: Removed unknown %s node.\n",
|
||||
type);
|
||||
else if (!cfg_handle)
|
||||
printk(KERN_ERR "VIO: Removed unknown %s node %llu.\n",
|
||||
type, *id);
|
||||
else
|
||||
printk(KERN_ERR "VIO: Removed unknown %s node %llu-%llu.\n",
|
||||
type, *cfg_handle, *id);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ lib-$(CONFIG_SPARC32) += copy_user.o locks.o
|
|||
lib-$(CONFIG_SPARC64) += atomic_64.o
|
||||
lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o
|
||||
lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o
|
||||
lib-$(CONFIG_SPARC64) += multi3.o
|
||||
|
||||
lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o
|
||||
lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o
|
||||
|
|
35
arch/sparc/lib/multi3.S
Normal file
35
arch/sparc/lib/multi3.S
Normal file
|
@ -0,0 +1,35 @@
|
|||
#include <linux/linkage.h>
|
||||
#include <asm/export.h>
|
||||
|
||||
.text
|
||||
.align 4
|
||||
ENTRY(__multi3) /* %o0 = u, %o1 = v */
|
||||
mov %o1, %g1
|
||||
srl %o3, 0, %g4
|
||||
mulx %g4, %g1, %o1
|
||||
srlx %g1, 0x20, %g3
|
||||
mulx %g3, %g4, %g5
|
||||
sllx %g5, 0x20, %o5
|
||||
srl %g1, 0, %g4
|
||||
sub %o1, %o5, %o5
|
||||
srlx %o5, 0x20, %o5
|
||||
addcc %g5, %o5, %g5
|
||||
srlx %o3, 0x20, %o5
|
||||
mulx %g4, %o5, %g4
|
||||
mulx %g3, %o5, %o5
|
||||
sethi %hi(0x80000000), %g3
|
||||
addcc %g5, %g4, %g5
|
||||
srlx %g5, 0x20, %g5
|
||||
add %g3, %g3, %g3
|
||||
movcc %xcc, %g0, %g3
|
||||
addcc %o5, %g5, %o5
|
||||
sllx %g4, 0x20, %g4
|
||||
add %o1, %g4, %o1
|
||||
add %o5, %g3, %g2
|
||||
mulx %g1, %o2, %g1
|
||||
add %g1, %g2, %g1
|
||||
mulx %o0, %o3, %o0
|
||||
retl
|
||||
add %g1, %o0, %o0
|
||||
ENDPROC(__multi3)
|
||||
EXPORT_SYMBOL(__multi3)
|
|
@ -358,7 +358,8 @@ static int __init setup_hugepagesz(char *string)
|
|||
}
|
||||
|
||||
if ((hv_pgsz_mask & cpu_pgsz_mask) == 0U) {
|
||||
pr_warn("hugepagesz=%llu not supported by MMU.\n",
|
||||
hugetlb_bad_size();
|
||||
pr_err("hugepagesz=%llu not supported by MMU.\n",
|
||||
hugepage_size);
|
||||
goto out;
|
||||
}
|
||||
|
@ -706,10 +707,58 @@ EXPORT_SYMBOL(__flush_dcache_range);
|
|||
|
||||
/* get_new_mmu_context() uses "cache + 1". */
|
||||
DEFINE_SPINLOCK(ctx_alloc_lock);
|
||||
unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
|
||||
unsigned long tlb_context_cache = CTX_FIRST_VERSION;
|
||||
#define MAX_CTX_NR (1UL << CTX_NR_BITS)
|
||||
#define CTX_BMAP_SLOTS BITS_TO_LONGS(MAX_CTX_NR)
|
||||
DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
|
||||
DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
|
||||
|
||||
static void mmu_context_wrap(void)
|
||||
{
|
||||
unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
|
||||
unsigned long new_ver, new_ctx, old_ctx;
|
||||
struct mm_struct *mm;
|
||||
int cpu;
|
||||
|
||||
bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
|
||||
|
||||
/* Reserve kernel context */
|
||||
set_bit(0, mmu_context_bmap);
|
||||
|
||||
new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
|
||||
if (unlikely(new_ver == 0))
|
||||
new_ver = CTX_FIRST_VERSION;
|
||||
tlb_context_cache = new_ver;
|
||||
|
||||
/*
|
||||
* Make sure that any new mm that are added into per_cpu_secondary_mm,
|
||||
* are going to go through get_new_mmu_context() path.
|
||||
*/
|
||||
mb();
|
||||
|
||||
/*
|
||||
* Updated versions to current on those CPUs that had valid secondary
|
||||
* contexts
|
||||
*/
|
||||
for_each_online_cpu(cpu) {
|
||||
/*
|
||||
* If a new mm is stored after we took this mm from the array,
|
||||
* it will go into get_new_mmu_context() path, because we
|
||||
* already bumped the version in tlb_context_cache.
|
||||
*/
|
||||
mm = per_cpu(per_cpu_secondary_mm, cpu);
|
||||
|
||||
if (unlikely(!mm || mm == &init_mm))
|
||||
continue;
|
||||
|
||||
old_ctx = mm->context.sparc64_ctx_val;
|
||||
if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
|
||||
new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
|
||||
set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
|
||||
mm->context.sparc64_ctx_val = new_ctx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Caller does TLB context flushing on local CPU if necessary.
|
||||
* The caller also ensures that CTX_VALID(mm->context) is false.
|
||||
|
@ -725,48 +774,30 @@ void get_new_mmu_context(struct mm_struct *mm)
|
|||
{
|
||||
unsigned long ctx, new_ctx;
|
||||
unsigned long orig_pgsz_bits;
|
||||
int new_version;
|
||||
|
||||
spin_lock(&ctx_alloc_lock);
|
||||
retry:
|
||||
/* wrap might have happened, test again if our context became valid */
|
||||
if (unlikely(CTX_VALID(mm->context)))
|
||||
goto out;
|
||||
orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
|
||||
ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
|
||||
new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
|
||||
new_version = 0;
|
||||
if (new_ctx >= (1 << CTX_NR_BITS)) {
|
||||
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
|
||||
if (new_ctx >= ctx) {
|
||||
int i;
|
||||
new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
|
||||
CTX_FIRST_VERSION;
|
||||
if (new_ctx == 1)
|
||||
new_ctx = CTX_FIRST_VERSION;
|
||||
|
||||
/* Don't call memset, for 16 entries that's just
|
||||
* plain silly...
|
||||
*/
|
||||
mmu_context_bmap[0] = 3;
|
||||
mmu_context_bmap[1] = 0;
|
||||
mmu_context_bmap[2] = 0;
|
||||
mmu_context_bmap[3] = 0;
|
||||
for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
|
||||
mmu_context_bmap[i + 0] = 0;
|
||||
mmu_context_bmap[i + 1] = 0;
|
||||
mmu_context_bmap[i + 2] = 0;
|
||||
mmu_context_bmap[i + 3] = 0;
|
||||
}
|
||||
new_version = 1;
|
||||
goto out;
|
||||
mmu_context_wrap();
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
if (mm->context.sparc64_ctx_val)
|
||||
cpumask_clear(mm_cpumask(mm));
|
||||
mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
|
||||
new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
|
||||
out:
|
||||
tlb_context_cache = new_ctx;
|
||||
mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
|
||||
out:
|
||||
spin_unlock(&ctx_alloc_lock);
|
||||
|
||||
if (unlikely(new_version))
|
||||
smp_new_mmu_context_version();
|
||||
}
|
||||
|
||||
static int numa_enabled = 1;
|
||||
|
|
|
@ -496,7 +496,8 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
|
|||
extern void copy_tsb(unsigned long old_tsb_base,
|
||||
unsigned long old_tsb_size,
|
||||
unsigned long new_tsb_base,
|
||||
unsigned long new_tsb_size);
|
||||
unsigned long new_tsb_size,
|
||||
unsigned long page_size_shift);
|
||||
unsigned long old_tsb_base = (unsigned long) old_tsb;
|
||||
unsigned long new_tsb_base = (unsigned long) new_tsb;
|
||||
|
||||
|
@ -504,7 +505,9 @@ void tsb_grow(struct mm_struct *mm, unsigned long tsb_index, unsigned long rss)
|
|||
old_tsb_base = __pa(old_tsb_base);
|
||||
new_tsb_base = __pa(new_tsb_base);
|
||||
}
|
||||
copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size);
|
||||
copy_tsb(old_tsb_base, old_size, new_tsb_base, new_size,
|
||||
tsb_index == MM_TSB_BASE ?
|
||||
PAGE_SHIFT : REAL_HPAGE_SHIFT);
|
||||
}
|
||||
|
||||
mm->context.tsb_block[tsb_index].tsb = new_tsb;
|
||||
|
|
|
@ -971,11 +971,6 @@ xcall_capture:
|
|||
wr %g0, (1 << PIL_SMP_CAPTURE), %set_softint
|
||||
retry
|
||||
|
||||
.globl xcall_new_mmu_context_version
|
||||
xcall_new_mmu_context_version:
|
||||
wr %g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
|
||||
retry
|
||||
|
||||
#ifdef CONFIG_KGDB
|
||||
.globl xcall_kgdb_capture
|
||||
xcall_kgdb_capture:
|
||||
|
|
|
@ -360,7 +360,7 @@ config SMP
|
|||
Management" code will be disabled if you say Y here.
|
||||
|
||||
See also <file:Documentation/x86/i386/IO-APIC.txt>,
|
||||
<file:Documentation/nmi_watchdog.txt> and the SMP-HOWTO available at
|
||||
<file:Documentation/lockup-watchdogs.txt> and the SMP-HOWTO available at
|
||||
<http://www.tldp.org/docs.html#howto>.
|
||||
|
||||
If you don't know what to do here, say N.
|
||||
|
@ -2776,10 +2776,6 @@ config COMPAT_FOR_U64_ALIGNMENT
|
|||
config SYSVIPC_COMPAT
|
||||
def_bool y
|
||||
depends on SYSVIPC
|
||||
|
||||
config KEYS_COMPAT
|
||||
def_bool y
|
||||
depends on KEYS
|
||||
endif
|
||||
|
||||
endmenu
|
||||
|
|
|
@ -159,7 +159,7 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|||
# If '-Os' is enabled, disable it and print a warning.
|
||||
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
undefine CONFIG_CC_OPTIMIZE_FOR_SIZE
|
||||
$(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
|
||||
$(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
|
|
@ -94,7 +94,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
|
|||
quiet_cmd_check_data_rel = DATAREL $@
|
||||
define cmd_check_data_rel
|
||||
for obj in $(filter %.o,$^); do \
|
||||
readelf -S $$obj | grep -qF .rel.local && { \
|
||||
${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \
|
||||
echo "error: $$obj has data relocations!" >&2; \
|
||||
exit 1; \
|
||||
} || true; \
|
||||
|
|
|
@ -251,6 +251,23 @@ ENTRY(__switch_to_asm)
|
|||
jmp __switch_to
|
||||
END(__switch_to_asm)
|
||||
|
||||
/*
|
||||
* The unwinder expects the last frame on the stack to always be at the same
|
||||
* offset from the end of the page, which allows it to validate the stack.
|
||||
* Calling schedule_tail() directly would break that convention because its an
|
||||
* asmlinkage function so its argument has to be pushed on the stack. This
|
||||
* wrapper creates a proper "end of stack" frame header before the call.
|
||||
*/
|
||||
ENTRY(schedule_tail_wrapper)
|
||||
FRAME_BEGIN
|
||||
|
||||
pushl %eax
|
||||
call schedule_tail
|
||||
popl %eax
|
||||
|
||||
FRAME_END
|
||||
ret
|
||||
ENDPROC(schedule_tail_wrapper)
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
*
|
||||
|
@ -259,24 +276,15 @@ END(__switch_to_asm)
|
|||
* edi: kernel thread arg
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
FRAME_BEGIN /* help unwinder find end of stack */
|
||||
|
||||
/*
|
||||
* schedule_tail() is asmlinkage so we have to put its 'prev' argument
|
||||
* on the stack.
|
||||
*/
|
||||
pushl %eax
|
||||
call schedule_tail
|
||||
popl %eax
|
||||
call schedule_tail_wrapper
|
||||
|
||||
testl %ebx, %ebx
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
|
||||
2:
|
||||
/* When we fork, we trace the syscall return in the child, too. */
|
||||
leal FRAME_OFFSET(%esp), %eax
|
||||
movl %esp, %eax
|
||||
call syscall_return_slowpath
|
||||
FRAME_END
|
||||
jmp restore_all
|
||||
|
||||
/* kernel thread */
|
||||
|
|
|
@ -36,7 +36,6 @@
|
|||
#include <asm/smap.h>
|
||||
#include <asm/pgtable_types.h>
|
||||
#include <asm/export.h>
|
||||
#include <asm/frame.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
.code64
|
||||
|
@ -406,19 +405,17 @@ END(__switch_to_asm)
|
|||
* r12: kernel thread arg
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
FRAME_BEGIN /* help unwinder find end of stack */
|
||||
movq %rax, %rdi
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
testq %rbx, %rbx /* from kernel_thread? */
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
testq %rbx, %rbx /* from kernel_thread? */
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
|
||||
2:
|
||||
leaq FRAME_OFFSET(%rsp),%rdi /* pt_regs pointer */
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
SWAPGS
|
||||
FRAME_END
|
||||
jmp restore_regs_and_iret
|
||||
|
||||
1:
|
||||
|
|
|
@ -266,6 +266,7 @@ static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *s
|
|||
#endif
|
||||
|
||||
int mce_available(struct cpuinfo_x86 *c);
|
||||
bool mce_is_memory_error(struct mce *m);
|
||||
|
||||
DECLARE_PER_CPU(unsigned, mce_exception_count);
|
||||
DECLARE_PER_CPU(unsigned, mce_poll_count);
|
||||
|
|
|
@ -409,8 +409,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
|
|||
memcpy(insnbuf, replacement, a->replacementlen);
|
||||
insnbuf_sz = a->replacementlen;
|
||||
|
||||
/* 0xe8 is a relative jump; fix the offset. */
|
||||
if (*insnbuf == 0xe8 && a->replacementlen == 5) {
|
||||
/*
|
||||
* 0xe8 is a relative jump; fix the offset.
|
||||
*
|
||||
* Instruction length is checked before the opcode to avoid
|
||||
* accessing uninitialized bytes for zero-length replacements.
|
||||
*/
|
||||
if (a->replacementlen == 5 && *insnbuf == 0xe8) {
|
||||
*(s32 *)(insnbuf + 1) += replacement - instr;
|
||||
DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
|
||||
*(s32 *)(insnbuf + 1),
|
||||
|
|
|
@ -255,6 +255,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
|
|||
break;
|
||||
|
||||
case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */
|
||||
case 11: /* GX1 with inverted Device ID */
|
||||
#ifdef CONFIG_PCI
|
||||
{
|
||||
u32 vendor, device;
|
||||
|
|
|
@ -499,16 +499,14 @@ static int mce_usable_address(struct mce *m)
|
|||
return 1;
|
||||
}
|
||||
|
||||
static bool memory_error(struct mce *m)
|
||||
bool mce_is_memory_error(struct mce *m)
|
||||
{
|
||||
struct cpuinfo_x86 *c = &boot_cpu_data;
|
||||
|
||||
if (c->x86_vendor == X86_VENDOR_AMD) {
|
||||
if (m->cpuvendor == X86_VENDOR_AMD) {
|
||||
/* ErrCodeExt[20:16] */
|
||||
u8 xec = (m->status >> 16) & 0x1f;
|
||||
|
||||
return (xec == 0x0 || xec == 0x8);
|
||||
} else if (c->x86_vendor == X86_VENDOR_INTEL) {
|
||||
} else if (m->cpuvendor == X86_VENDOR_INTEL) {
|
||||
/*
|
||||
* Intel SDM Volume 3B - 15.9.2 Compound Error Codes
|
||||
*
|
||||
|
@ -529,6 +527,7 @@ static bool memory_error(struct mce *m)
|
|||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mce_is_memory_error);
|
||||
|
||||
static bool cec_add_mce(struct mce *m)
|
||||
{
|
||||
|
@ -536,7 +535,7 @@ static bool cec_add_mce(struct mce *m)
|
|||
return false;
|
||||
|
||||
/* We eat only correctable DRAM errors with usable addresses. */
|
||||
if (memory_error(m) &&
|
||||
if (mce_is_memory_error(m) &&
|
||||
!(m->status & MCI_STATUS_UC) &&
|
||||
mce_usable_address(m))
|
||||
if (!cec_add_elem(m->addr >> PAGE_SHIFT))
|
||||
|
@ -713,7 +712,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
|
|||
|
||||
severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
|
||||
|
||||
if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m))
|
||||
if (severity == MCE_DEFERRED_SEVERITY && mce_is_memory_error(&m))
|
||||
if (m.status & MCI_STATUS_ADDRV)
|
||||
m.severity = severity;
|
||||
|
||||
|
|
|
@ -320,7 +320,7 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax)
|
|||
}
|
||||
|
||||
static enum ucode_state
|
||||
load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size);
|
||||
load_microcode_amd(bool save, u8 family, const u8 *data, size_t size);
|
||||
|
||||
int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
|
||||
{
|
||||
|
@ -338,8 +338,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax)
|
|||
if (!desc.mc)
|
||||
return -EINVAL;
|
||||
|
||||
ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax),
|
||||
desc.data, desc.size);
|
||||
ret = load_microcode_amd(true, x86_family(cpuid_1_eax), desc.data, desc.size);
|
||||
if (ret != UCODE_OK)
|
||||
return -EINVAL;
|
||||
|
||||
|
@ -675,7 +674,7 @@ static enum ucode_state __load_microcode_amd(u8 family, const u8 *data,
|
|||
}
|
||||
|
||||
static enum ucode_state
|
||||
load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
|
||||
load_microcode_amd(bool save, u8 family, const u8 *data, size_t size)
|
||||
{
|
||||
enum ucode_state ret;
|
||||
|
||||
|
@ -689,8 +688,8 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size)
|
|||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* save BSP's matching patch for early load */
|
||||
if (cpu_data(cpu).cpu_index == boot_cpu_data.cpu_index) {
|
||||
struct ucode_patch *p = find_patch(cpu);
|
||||
if (save) {
|
||||
struct ucode_patch *p = find_patch(0);
|
||||
if (p) {
|
||||
memset(amd_ucode_patch, 0, PATCH_MAX_SIZE);
|
||||
memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data),
|
||||
|
@ -722,11 +721,12 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
|
|||
{
|
||||
char fw_name[36] = "amd-ucode/microcode_amd.bin";
|
||||
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
||||
bool bsp = c->cpu_index == boot_cpu_data.cpu_index;
|
||||
enum ucode_state ret = UCODE_NFOUND;
|
||||
const struct firmware *fw;
|
||||
|
||||
/* reload ucode container only on the boot cpu */
|
||||
if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
|
||||
if (!refresh_fw || !bsp)
|
||||
return UCODE_OK;
|
||||
|
||||
if (c->x86 >= 0x15)
|
||||
|
@ -743,7 +743,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
|
|||
goto fw_release;
|
||||
}
|
||||
|
||||
ret = load_microcode_amd(cpu, c->x86, fw->data, fw->size);
|
||||
ret = load_microcode_amd(bsp, c->x86, fw->data, fw->size);
|
||||
|
||||
fw_release:
|
||||
release_firmware(fw);
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue