[IA64] Add Variable Page Size and IA64 Support in Intel IOMMU

The patch contains Intel IOMMU IA64 specific code. It defines new
machvec dig_vtd, hooks for IOMMU, DMAR table detection, cache line flush
function, etc.

For a generic kernel with CONFIG_DMAR=y, if Intel IOMMU is detected,
dig_vtd is used for machinve vector. Otherwise, kernel falls back to
dig machine vector. Kernel parameter "machvec=dig" or "intel_iommu=off"
can be used to force kernel to boot dig machine vector.

Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
Fenghua Yu 2008-10-17 12:14:13 -07:00 committed by Tony Luck
parent 6bb7a93548
commit 62fdd7678a
23 changed files with 620 additions and 13 deletions

View file

@ -117,6 +117,7 @@ config IA64_GENERIC
select NUMA
select ACPI_NUMA
select SWIOTLB
select PCI_MSI
help
This selects the system type of your hardware. A "generic" kernel
will run on any supported IA-64 system. However, if you configure
@ -124,6 +125,7 @@ config IA64_GENERIC
generic For any supported IA-64 system
DIG-compliant For DIG ("Developer's Interface Guide") compliant systems
DIG+Intel+IOMMU For DIG systems with Intel IOMMU
HP-zx1/sx1000 For HP systems
HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices.
SGI-SN2 For SGI Altix systems
@ -136,6 +138,11 @@ config IA64_DIG
bool "DIG-compliant"
select SWIOTLB
config IA64_DIG_VTD
bool "DIG+Intel+IOMMU"
select DMAR
select PCI_MSI
config IA64_HP_ZX1
bool "HP-zx1/sx1000"
help
@ -581,6 +588,16 @@ source "drivers/pci/hotplug/Kconfig"
source "drivers/pcmcia/Kconfig"
config DMAR
bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
depends on IA64_GENERIC && ACPI && EXPERIMENTAL
help
DMA remapping (DMAR) devices support enables independent address
translations for Direct Memory Access (DMA) from devices.
These DMA remapping devices are reported via ACPI tables
and include PCI device scope covered by these DMA
remapping devices.
endmenu
endif

View file

@ -53,6 +53,7 @@ libs-y += arch/ia64/lib/
core-y += arch/ia64/kernel/ arch/ia64/mm/
core-$(CONFIG_IA32_SUPPORT) += arch/ia64/ia32/
core-$(CONFIG_IA64_DIG) += arch/ia64/dig/
core-$(CONFIG_IA64_DIG_VTD) += arch/ia64/dig/
core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/

View file

@ -233,6 +233,8 @@ CONFIG_DMIID=y
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
# CONFIG_DMAR is not set
#
# Power management and ACPI
#

View file

@ -172,6 +172,8 @@ CONFIG_DMIID=y
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_MISC=m
# CONFIG_DMAR is not set
#
# Power management and ACPI
#

View file

@ -6,4 +6,9 @@
#
obj-y := setup.o
ifeq ($(CONFIG_DMAR), y)
obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o dig_vtd_iommu.o
else
obj-$(CONFIG_IA64_GENERIC) += machvec.o
endif
obj-$(CONFIG_IA64_DIG_VTD) += dig_vtd_iommu.o

View file

@ -0,0 +1,59 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/intel-iommu.h>
void *
vtd_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
gfp_t flags)
{
return intel_alloc_coherent(dev, size, dma_handle, flags);
}
EXPORT_SYMBOL_GPL(vtd_alloc_coherent);
void
vtd_free_coherent(struct device *dev, size_t size, void *vaddr,
dma_addr_t dma_handle)
{
intel_free_coherent(dev, size, vaddr, dma_handle);
}
EXPORT_SYMBOL_GPL(vtd_free_coherent);
dma_addr_t
vtd_map_single_attrs(struct device *dev, void *addr, size_t size,
int dir, struct dma_attrs *attrs)
{
return intel_map_single(dev, (phys_addr_t)addr, size, dir);
}
EXPORT_SYMBOL_GPL(vtd_map_single_attrs);
void
vtd_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
int dir, struct dma_attrs *attrs)
{
intel_unmap_single(dev, iova, size, dir);
}
EXPORT_SYMBOL_GPL(vtd_unmap_single_attrs);
int
vtd_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
int dir, struct dma_attrs *attrs)
{
return intel_map_sg(dev, sglist, nents, dir);
}
EXPORT_SYMBOL_GPL(vtd_map_sg_attrs);
void
vtd_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
int nents, int dir, struct dma_attrs *attrs)
{
intel_unmap_sg(dev, sglist, nents, dir);
}
EXPORT_SYMBOL_GPL(vtd_unmap_sg_attrs);
int
vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
return 0;
}
EXPORT_SYMBOL_GPL(vtd_dma_mapping_error);

View file

@ -0,0 +1,3 @@
#define MACHVEC_PLATFORM_NAME dig_vtd
#define MACHVEC_PLATFORM_HEADER <asm/machvec_dig_vtd.h>
#include <asm/machvec_init.h>

View file

@ -34,6 +34,8 @@ do { \
#define flush_dcache_mmap_unlock(mapping) do { } while (0)
extern void flush_icache_range (unsigned long start, unsigned long end);
extern void clflush_cache_range(void *addr, int size);
#define flush_icache_user_range(vma, page, user_addr, len) \
do { \

View file

@ -10,6 +10,9 @@ struct dev_archdata {
#ifdef CONFIG_ACPI
void *acpi_handle;
#endif
#ifdef CONFIG_DMAR
void *iommu; /* hook for IOMMU specific extension */
#endif
};
#endif /* _ASM_IA64_DEVICE_H */

View file

@ -7,6 +7,49 @@
*/
#include <asm/machvec.h>
#include <linux/scatterlist.h>
#include <asm/swiotlb.h>
struct dma_mapping_ops {
int (*mapping_error)(struct device *dev,
dma_addr_t dma_addr);
void* (*alloc_coherent)(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp);
void (*free_coherent)(struct device *dev, size_t size,
void *vaddr, dma_addr_t dma_handle);
dma_addr_t (*map_single)(struct device *hwdev, unsigned long ptr,
size_t size, int direction);
void (*unmap_single)(struct device *dev, dma_addr_t addr,
size_t size, int direction);
void (*sync_single_for_cpu)(struct device *hwdev,
dma_addr_t dma_handle, size_t size,
int direction);
void (*sync_single_for_device)(struct device *hwdev,
dma_addr_t dma_handle, size_t size,
int direction);
void (*sync_single_range_for_cpu)(struct device *hwdev,
dma_addr_t dma_handle, unsigned long offset,
size_t size, int direction);
void (*sync_single_range_for_device)(struct device *hwdev,
dma_addr_t dma_handle, unsigned long offset,
size_t size, int direction);
void (*sync_sg_for_cpu)(struct device *hwdev,
struct scatterlist *sg, int nelems,
int direction);
void (*sync_sg_for_device)(struct device *hwdev,
struct scatterlist *sg, int nelems,
int direction);
int (*map_sg)(struct device *hwdev, struct scatterlist *sg,
int nents, int direction);
void (*unmap_sg)(struct device *hwdev,
struct scatterlist *sg, int nents,
int direction);
int (*dma_supported_op)(struct device *hwdev, u64 mask);
int is_phys;
};
extern struct dma_mapping_ops *dma_ops;
extern struct ia64_machine_vector ia64_mv;
extern void set_iommu_machvec(void);
#define dma_alloc_coherent(dev, size, handle, gfp) \
platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA)
@ -96,4 +139,11 @@ dma_cache_sync (struct device *dev, void *vaddr, size_t size,
#define dma_is_consistent(d, h) (1) /* all we do is coherent memory... */
static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
{
return dma_ops;
}
#endif /* _ASM_IA64_DMA_MAPPING_H */

View file

@ -0,0 +1,16 @@
#ifndef _ASM_IA64_IOMMU_H
#define _ASM_IA64_IOMMU_H 1
#define cpu_has_x2apic 0
/* 10 seconds */
#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
extern void pci_iommu_shutdown(void);
extern void no_iommu_init(void);
extern int force_iommu, no_iommu;
extern int iommu_detected;
extern void iommu_dma_init(void);
extern void machvec_init(const char *name);
extern int forbid_dac;
#endif

View file

@ -120,6 +120,8 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
# include <asm/machvec_hpsim.h>
# elif defined (CONFIG_IA64_DIG)
# include <asm/machvec_dig.h>
# elif defined(CONFIG_IA64_DIG_VTD)
# include <asm/machvec_dig_vtd.h>
# elif defined (CONFIG_IA64_HP_ZX1)
# include <asm/machvec_hpzx1.h>
# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)

View file

@ -0,0 +1,38 @@
#ifndef _ASM_IA64_MACHVEC_DIG_VTD_h
#define _ASM_IA64_MACHVEC_DIG_VTD_h
extern ia64_mv_setup_t dig_setup;
extern ia64_mv_dma_alloc_coherent vtd_alloc_coherent;
extern ia64_mv_dma_free_coherent vtd_free_coherent;
extern ia64_mv_dma_map_single_attrs vtd_map_single_attrs;
extern ia64_mv_dma_unmap_single_attrs vtd_unmap_single_attrs;
extern ia64_mv_dma_map_sg_attrs vtd_map_sg_attrs;
extern ia64_mv_dma_unmap_sg_attrs vtd_unmap_sg_attrs;
extern ia64_mv_dma_supported iommu_dma_supported;
extern ia64_mv_dma_mapping_error vtd_dma_mapping_error;
extern ia64_mv_dma_init pci_iommu_alloc;
/*
* This stuff has dual use!
*
* For a generic kernel, the macros are used to initialize the
* platform's machvec structure. When compiling a non-generic kernel,
* the macros are used directly.
*/
#define platform_name "dig_vtd"
#define platform_setup dig_setup
#define platform_dma_init pci_iommu_alloc
#define platform_dma_alloc_coherent vtd_alloc_coherent
#define platform_dma_free_coherent vtd_free_coherent
#define platform_dma_map_single_attrs vtd_map_single_attrs
#define platform_dma_unmap_single_attrs vtd_unmap_single_attrs
#define platform_dma_map_sg_attrs vtd_map_sg_attrs
#define platform_dma_unmap_sg_attrs vtd_unmap_sg_attrs
#define platform_dma_sync_single_for_cpu machvec_dma_sync_single
#define platform_dma_sync_sg_for_cpu machvec_dma_sync_sg
#define platform_dma_sync_single_for_device machvec_dma_sync_single
#define platform_dma_sync_sg_for_device machvec_dma_sync_sg
#define platform_dma_supported iommu_dma_supported
#define platform_dma_mapping_error vtd_dma_mapping_error
#endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */

View file

@ -1,3 +1,4 @@
#include <asm/iommu.h>
#include <asm/machvec.h>
extern ia64_mv_send_ipi_t ia64_send_ipi;

View file

@ -164,4 +164,7 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel)
return channel ? isa_irq_to_vector(15) : isa_irq_to_vector(14);
}
#ifdef CONFIG_DMAR
extern void pci_iommu_alloc(void);
#endif
#endif /* _ASM_IA64_PCI_H */

View file

@ -0,0 +1,56 @@
#ifndef ASM_IA64__SWIOTLB_H
#define ASM_IA64__SWIOTLB_H
#include <linux/dma-mapping.h>
/* SWIOTLB interface */
extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr,
size_t size, int dir);
extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags);
extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
size_t size, int dir);
extern void swiotlb_sync_single_for_cpu(struct device *hwdev,
dma_addr_t dev_addr,
size_t size, int dir);
extern void swiotlb_sync_single_for_device(struct device *hwdev,
dma_addr_t dev_addr,
size_t size, int dir);
extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev,
dma_addr_t dev_addr,
unsigned long offset,
size_t size, int dir);
extern void swiotlb_sync_single_range_for_device(struct device *hwdev,
dma_addr_t dev_addr,
unsigned long offset,
size_t size, int dir);
extern void swiotlb_sync_sg_for_cpu(struct device *hwdev,
struct scatterlist *sg, int nelems,
int dir);
extern void swiotlb_sync_sg_for_device(struct device *hwdev,
struct scatterlist *sg, int nelems,
int dir);
extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, int direction);
extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg,
int nents, int direction);
extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
extern void swiotlb_free_coherent(struct device *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle);
extern int swiotlb_dma_supported(struct device *hwdev, u64 mask);
extern void swiotlb_init(void);
extern int swiotlb_force;
#ifdef CONFIG_SWIOTLB
extern int swiotlb;
extern void pci_swiotlb_init(void);
#else
#define swiotlb 0
static inline void pci_swiotlb_init(void)
{
}
#endif
#endif /* ASM_IA64__SWIOTLB_H */

View file

@ -42,6 +42,10 @@ obj-$(CONFIG_IA64_ESI) += esi.o
ifneq ($(CONFIG_IA64_ESI),)
obj-y += esi_stub.o # must be in kernel proper
endif
obj-$(CONFIG_DMAR) += pci-dma.o
ifeq ($(CONFIG_DMAR), y)
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
endif
# The gate DSO image is built using a special linker script.
targets += gate.so gate-syms.o

View file

@ -91,6 +91,9 @@ acpi_get_sysname(void)
struct acpi_table_rsdp *rsdp;
struct acpi_table_xsdt *xsdt;
struct acpi_table_header *hdr;
#ifdef CONFIG_DMAR
u64 i, nentries;
#endif
rsdp_phys = acpi_find_rsdp();
if (!rsdp_phys) {
@ -123,6 +126,18 @@ acpi_get_sysname(void)
return "sn2";
}
#ifdef CONFIG_DMAR
/* Look for Intel IOMMU */
nentries = (hdr->length - sizeof(*hdr)) /
sizeof(xsdt->table_offset_entry[0]);
for (i = 0; i < nentries; i++) {
hdr = __va(xsdt->table_offset_entry[i]);
if (strncmp(hdr->signature, ACPI_SIG_DMAR,
sizeof(ACPI_SIG_DMAR) - 1) == 0)
return "dig_vtd";
}
#endif
return "dig";
#else
# if defined (CONFIG_IA64_HP_SIM)
@ -137,6 +152,8 @@ acpi_get_sysname(void)
return "uv";
# elif defined (CONFIG_IA64_DIG)
return "dig";
# elif defined(CONFIG_IA64_DIG_VTD)
return "dig_vtd";
# else
# error Unknown platform. Fix acpi.c.
# endif

View file

@ -5,6 +5,7 @@
#include <linux/pci.h>
#include <linux/irq.h>
#include <linux/msi.h>
#include <linux/dmar.h>
#include <asm/smp.h>
/*
@ -162,3 +163,82 @@ void arch_teardown_msi_irq(unsigned int irq)
return ia64_teardown_msi_irq(irq);
}
#ifdef CONFIG_DMAR
#ifdef CONFIG_SMP
static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
{
struct irq_cfg *cfg = irq_cfg + irq;
struct msi_msg msg;
int cpu = first_cpu(mask);
if (!cpu_online(cpu))
return;
if (irq_prepare_move(irq, cpu))
return;
dmar_msi_read(irq, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DESTID_MASK;
msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
dmar_msi_write(irq, &msg);
irq_desc[irq].affinity = mask;
}
#endif /* CONFIG_SMP */
struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI",
.unmask = dmar_msi_unmask,
.mask = dmar_msi_mask,
.ack = ia64_ack_msi_irq,
#ifdef CONFIG_SMP
.set_affinity = dmar_msi_set_affinity,
#endif
.retrigger = ia64_msi_retrigger_irq,
};
static int
msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
{
struct irq_cfg *cfg = irq_cfg + irq;
unsigned dest;
cpumask_t mask;
cpus_and(mask, irq_to_domain(irq), cpu_online_map);
dest = cpu_physical_id(first_cpu(mask));
msg->address_hi = 0;
msg->address_lo =
MSI_ADDR_HEADER |
MSI_ADDR_DESTMODE_PHYS |
MSI_ADDR_REDIRECTION_CPU |
MSI_ADDR_DESTID_CPU(dest);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
MSI_DATA_DELIVERY_FIXED |
MSI_DATA_VECTOR(cfg->vector);
return 0;
}
int arch_setup_dmar_msi(unsigned int irq)
{
int ret;
struct msi_msg msg;
ret = msi_compose_msg(NULL, irq, &msg);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
"edge");
return 0;
}
#endif /* CONFIG_DMAR */

129
arch/ia64/kernel/pci-dma.c Normal file
View file

@ -0,0 +1,129 @@
/*
* Dynamic DMA mapping support.
*/
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/dmar.h>
#include <asm/iommu.h>
#include <asm/machvec.h>
#include <linux/dma-mapping.h>
#include <asm/machvec.h>
#include <asm/system.h>
#ifdef CONFIG_DMAR
#include <linux/kernel.h>
#include <linux/string.h>
#include <asm/page.h>
#include <asm/iommu.h>
dma_addr_t bad_dma_address __read_mostly;
EXPORT_SYMBOL(bad_dma_address);
static int iommu_sac_force __read_mostly;
int no_iommu __read_mostly;
#ifdef CONFIG_IOMMU_DEBUG
int force_iommu __read_mostly = 1;
#else
int force_iommu __read_mostly;
#endif
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly;
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible
to i386. */
struct device fallback_dev = {
.bus_id = "fallback device",
.coherent_dma_mask = DMA_32BIT_MASK,
.dma_mask = &fallback_dev.coherent_dma_mask,
};
void __init pci_iommu_alloc(void)
{
/*
* The order of these functions is important for
* fall-back/fail-over reasons
*/
detect_intel_iommu();
#ifdef CONFIG_SWIOTLB
pci_swiotlb_init();
#endif
}
static int __init pci_iommu_init(void)
{
if (iommu_detected)
intel_iommu_init();
return 0;
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
void pci_iommu_shutdown(void)
{
return;
}
void __init
iommu_dma_init(void)
{
return;
}
struct dma_mapping_ops *dma_ops;
EXPORT_SYMBOL(dma_ops);
int iommu_dma_supported(struct device *dev, u64 mask)
{
struct dma_mapping_ops *ops = get_dma_ops(dev);
#ifdef CONFIG_PCI
if (mask > 0xffffffff && forbid_dac > 0) {
dev_info(dev, "Disallowing DAC for device\n");
return 0;
}
#endif
if (ops->dma_supported_op)
return ops->dma_supported_op(dev, mask);
/* Copied from i386. Doesn't make much sense, because it will
only work for pci_alloc_coherent.
The caller just has to use GFP_DMA in this case. */
if (mask < DMA_24BIT_MASK)
return 0;
/* Tell the device to use SAC when IOMMU force is on. This
allows the driver to use cheaper accesses in some cases.
Problem with this is that if we overflow the IOMMU area and
return DAC as fallback address the device may not handle it
correctly.
As a special case some controllers have a 39bit address
mode that is as efficient as 32bit (aic79xx). Don't force
SAC for these. Assume all masks <= 40 bits are of this
type. Normally this doesn't make any difference, but gives
more gentle handling of IOMMU overflow. */
if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
dev_info(dev, "Force SAC with mask %lx\n", mask);
return 0;
}
return 1;
}
EXPORT_SYMBOL(iommu_dma_supported);
#endif

View file

@ -0,0 +1,46 @@
/* Glue code to lib/swiotlb.c */
#include <linux/pci.h>
#include <linux/cache.h>
#include <linux/module.h>
#include <linux/dma-mapping.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
#include <asm/iommu.h>
#include <asm/machvec.h>
int swiotlb __read_mostly;
EXPORT_SYMBOL(swiotlb);
struct dma_mapping_ops swiotlb_dma_ops = {
.mapping_error = swiotlb_dma_mapping_error,
.alloc_coherent = swiotlb_alloc_coherent,
.free_coherent = swiotlb_free_coherent,
.map_single = swiotlb_map_single,
.unmap_single = swiotlb_unmap_single,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
.map_sg = swiotlb_map_sg,
.unmap_sg = swiotlb_unmap_sg,
.dma_supported_op = swiotlb_dma_supported,
};
void __init pci_swiotlb_init(void)
{
if (!iommu_detected) {
#ifdef CONFIG_IA64_GENERIC
swiotlb = 1;
printk(KERN_INFO "PCI-DMA: Re-initialize machine vector.\n");
machvec_init("dig");
swiotlb_init();
dma_ops = &swiotlb_dma_ops;
#else
panic("Unable to find Intel IOMMU");
#endif
}
}

View file

@ -116,6 +116,13 @@ unsigned int num_io_spaces;
*/
#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */
unsigned long ia64_i_cache_stride_shift = ~0;
/*
* "clflush_cache_range()" needs to know what processor dependent stride size to
* use when it flushes cache lines including both d-cache and i-cache.
*/
/* Safest way to go: 32 bytes by 32 bytes */
#define CACHE_STRIDE_SHIFT 5
unsigned long ia64_cache_stride_shift = ~0;
/*
* The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
@ -847,13 +854,14 @@ setup_per_cpu_areas (void)
}
/*
* Calculate the max. cache line size.
* Do the following calculations:
*
* In addition, the minimum of the i-cache stride sizes is calculated for
* "flush_icache_range()".
* 1. the max. cache line size.
* 2. the minimum of the i-cache stride sizes for "flush_icache_range()".
* 3. the minimum of the cache stride sizes for "clflush_cache_range()".
*/
static void __cpuinit
get_max_cacheline_size (void)
get_cache_info(void)
{
unsigned long line_size, max = 1;
u64 l, levels, unique_caches;
@ -867,12 +875,14 @@ get_max_cacheline_size (void)
max = SMP_CACHE_BYTES;
/* Safest setup for "flush_icache_range()" */
ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT;
/* Safest setup for "clflush_cache_range()" */
ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
goto out;
}
for (l = 0; l < levels; ++l) {
status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
&cci);
/* cache_type (data_or_unified)=2 */
status = ia64_pal_cache_config_info(l, 2, &cci);
if (status != 0) {
printk(KERN_ERR
"%s: ia64_pal_cache_config_info(l=%lu, 2) failed (status=%ld)\n",
@ -880,15 +890,21 @@ get_max_cacheline_size (void)
max = SMP_CACHE_BYTES;
/* The safest setup for "flush_icache_range()" */
cci.pcci_stride = I_CACHE_STRIDE_SHIFT;
/* The safest setup for "clflush_cache_range()" */
ia64_cache_stride_shift = CACHE_STRIDE_SHIFT;
cci.pcci_unified = 1;
} else {
if (cci.pcci_stride < ia64_cache_stride_shift)
ia64_cache_stride_shift = cci.pcci_stride;
line_size = 1 << cci.pcci_line_size;
if (line_size > max)
max = line_size;
}
line_size = 1 << cci.pcci_line_size;
if (line_size > max)
max = line_size;
if (!cci.pcci_unified) {
status = ia64_pal_cache_config_info(l,
/* cache_type (instruction)= */ 1,
&cci);
/* cache_type (instruction)=1*/
status = ia64_pal_cache_config_info(l, 1, &cci);
if (status != 0) {
printk(KERN_ERR
"%s: ia64_pal_cache_config_info(l=%lu, 1) failed (status=%ld)\n",
@ -942,7 +958,7 @@ cpu_init (void)
}
#endif
get_max_cacheline_size();
get_cache_info();
/*
* We can't pass "local_cpu_data" to identify_cpu() because we haven't called

View file

@ -60,3 +60,58 @@ GLOBAL_ENTRY(flush_icache_range)
mov ar.lc=r3 // restore ar.lc
br.ret.sptk.many rp
END(flush_icache_range)
/*
* clflush_cache_range(start,size)
*
* Flush cache lines from start to start+size-1.
*
* Must deal with range from start to start+size-1 but nothing else
* (need to be careful not to touch addresses that may be
* unmapped).
*
* Note: "in0" and "in1" are preserved for debugging purposes.
*/
.section .kprobes.text,"ax"
GLOBAL_ENTRY(clflush_cache_range)
.prologue
alloc r2=ar.pfs,2,0,0,0
movl r3=ia64_cache_stride_shift
mov r21=1
add r22=in1,in0
;;
ld8 r20=[r3] // r20: stride shift
sub r22=r22,r0,1 // last byte address
;;
shr.u r23=in0,r20 // start / (stride size)
shr.u r22=r22,r20 // (last byte address) / (stride size)
shl r21=r21,r20 // r21: stride size of the i-cache(s)
;;
sub r8=r22,r23 // number of strides - 1
shl r24=r23,r20 // r24: addresses for "fc" =
// "start" rounded down to stride
// boundary
.save ar.lc,r3
mov r3=ar.lc // save ar.lc
;;
.body
mov ar.lc=r8
;;
/*
* 32 byte aligned loop, even number of (actually 2) bundles
*/
.Loop_fc:
fc r24 // issuable on M0 only
add r24=r21,r24 // we flush "stride size" bytes per iteration
nop.i 0
br.cloop.sptk.few .Loop_fc
;;
sync.i
;;
srlz.i
;;
mov ar.lc=r3 // restore ar.lc
br.ret.sptk.many rp
END(clflush_cache_range)