iommu/vt-d: Add basic SVM PASID support

This provides basic PASID support for endpoint devices, tested with a
version of the i915 driver.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
This commit is contained in:
David Woodhouse 2015-09-09 11:40:47 +01:00
parent b16d0cb9e2
commit 2f26e0a9c9
6 changed files with 548 additions and 5 deletions

View file

@ -139,6 +139,7 @@ config INTEL_IOMMU_SVM
bool "Support for Shared Virtual Memory with Intel IOMMU"
depends on INTEL_IOMMU && X86
select PCI_PASID
select MMU_NOTIFIER
help
Shared Virtual Memory (SVM) provides a facility for devices
to access DMA resources through process address space by

View file

@ -4929,6 +4929,110 @@ static void intel_iommu_remove_device(struct device *dev)
iommu_device_unlink(iommu->iommu_dev, dev);
}
#ifdef CONFIG_INTEL_IOMMU_SVM
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev)
{
struct device_domain_info *info;
struct context_entry *context;
struct dmar_domain *domain;
unsigned long flags;
u64 ctx_lo;
int ret;
domain = get_valid_domain_for_dev(sdev->dev);
if (!domain)
return -EINVAL;
spin_lock_irqsave(&device_domain_lock, flags);
spin_lock(&iommu->lock);
ret = -EINVAL;
info = sdev->dev->archdata.iommu;
if (!info || !info->pasid_supported)
goto out;
context = iommu_context_addr(iommu, info->bus, info->devfn, 0);
if (WARN_ON(!context))
goto out;
ctx_lo = context[0].lo;
sdev->did = domain->iommu_did[iommu->seq_id];
sdev->sid = PCI_DEVID(info->bus, info->devfn);
if (!(ctx_lo & CONTEXT_PASIDE)) {
context[1].hi = (u64)virt_to_phys(iommu->pasid_state_table);
context[1].lo = (u64)virt_to_phys(iommu->pasid_table) | ecap_pss(iommu->ecap);
wmb();
/* CONTEXT_TT_MULTI_LEVEL and CONTEXT_TT_DEV_IOTLB are both
* extended to permit requests-with-PASID if the PASIDE bit
* is set. which makes sense. For CONTEXT_TT_PASS_THROUGH,
* however, the PASIDE bit is ignored and requests-with-PASID
* are unconditionally blocked. Which makes less sense.
* So convert from CONTEXT_TT_PASS_THROUGH to one of the new
* "guest mode" translation types depending on whether ATS
* is available or not. Annoyingly, we can't use the new
* modes *unless* PASIDE is set. */
if ((ctx_lo & CONTEXT_TT_MASK) == (CONTEXT_TT_PASS_THROUGH << 2)) {
ctx_lo &= ~CONTEXT_TT_MASK;
if (info->ats_supported)
ctx_lo |= CONTEXT_TT_PT_PASID_DEV_IOTLB << 2;
else
ctx_lo |= CONTEXT_TT_PT_PASID << 2;
}
ctx_lo |= CONTEXT_PASIDE;
context[0].lo = ctx_lo;
wmb();
iommu->flush.flush_context(iommu, sdev->did, sdev->sid,
DMA_CCMD_MASK_NOBIT,
DMA_CCMD_DEVICE_INVL);
}
/* Enable PASID support in the device, if it wasn't already */
if (!info->pasid_enabled)
iommu_enable_dev_iotlb(info);
if (info->ats_enabled) {
sdev->dev_iotlb = 1;
sdev->qdep = info->ats_qdep;
if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS)
sdev->qdep = 0;
}
ret = 0;
out:
spin_unlock(&iommu->lock);
spin_unlock_irqrestore(&device_domain_lock, flags);
return ret;
}
struct intel_iommu *intel_svm_device_to_iommu(struct device *dev)
{
struct intel_iommu *iommu;
u8 bus, devfn;
if (iommu_dummy(dev)) {
dev_warn(dev,
"No IOMMU translation for device; cannot enable SVM\n");
return NULL;
}
iommu = device_to_iommu(dev, &bus, &devfn);
if ((!iommu)) {
dev_dbg(dev, "No IOMMU for device; cannot enable SVM\n");
return NULL;
}
if (!iommu->pasid_table) {
dev_dbg(dev, "PASID not enabled on IOMMU; cannot enable SVM\n");
return NULL;
}
return iommu;
}
#endif /* CONFIG_INTEL_IOMMU_SVM */
static const struct iommu_ops intel_iommu_ops = {
.capable = intel_iommu_capable,
.domain_alloc = intel_iommu_domain_alloc,

View file

@ -14,6 +14,17 @@
*/
#include <linux/intel-iommu.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/intel-svm.h>
#include <linux/rculist.h>
#include <linux/pci.h>
#include <linux/pci-ats.h>
struct pasid_entry {
u64 val;
};
int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
{
@ -42,6 +53,8 @@ int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
iommu->name);
}
idr_init(&iommu->pasid_idr);
return 0;
}
@ -61,5 +74,283 @@ int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
free_pages((unsigned long)iommu->pasid_state_table, order);
iommu->pasid_state_table = NULL;
}
idr_destroy(&iommu->pasid_idr);
return 0;
}
static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
unsigned long address, int pages, int ih)
{
struct qi_desc desc;
int mask = ilog2(__roundup_pow_of_two(pages));
if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap) ||
mask > cap_max_amask_val(svm->iommu->cap)) {
desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
desc.high = 0;
} else {
desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(1) |
QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
}
qi_submit_sync(&desc, svm->iommu);
if (sdev->dev_iotlb) {
desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
if (mask) {
unsigned long adr, delta;
/* Least significant zero bits in the address indicate the
* range of the request. So mask them out according to the
* size. */
adr = address & ((1<<(VTD_PAGE_SHIFT + mask)) - 1);
/* Now ensure that we round down further if the original
* request was not aligned w.r.t. its size */
delta = address - adr;
if (delta + (pages << VTD_PAGE_SHIFT) >= (1 << (VTD_PAGE_SHIFT + mask)))
adr &= ~(1 << (VTD_PAGE_SHIFT + mask));
desc.high = QI_DEV_EIOTLB_ADDR(adr) | QI_DEV_EIOTLB_SIZE;
} else {
desc.high = QI_DEV_EIOTLB_ADDR(address);
}
qi_submit_sync(&desc, svm->iommu);
}
}
static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
int pages, int ih)
{
struct intel_svm_dev *sdev;
rcu_read_lock();
list_for_each_entry_rcu(sdev, &svm->devs, list)
intel_flush_svm_range_dev(svm, sdev, address, pages, ih);
rcu_read_unlock();
}
static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
unsigned long address, pte_t pte)
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
intel_flush_svm_range(svm, address, 1, 1);
}
static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
unsigned long address)
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
intel_flush_svm_range(svm, address, 1, 1);
}
/* Pages have been freed at this point */
static void intel_invalidate_range(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end)
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
intel_flush_svm_range(svm, start,
(end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT , 0);
}
static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev)
{
struct qi_desc desc;
desc.high = 0;
desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(svm->pasid);
qi_submit_sync(&desc, svm->iommu);
}
static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
{
struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
svm->iommu->pasid_table[svm->pasid].val = 0;
/* There's no need to do any flush because we can't get here if there
* are any devices left anyway. */
WARN_ON(!list_empty(&svm->devs));
}
static const struct mmu_notifier_ops intel_mmuops = {
.release = intel_mm_release,
.change_pte = intel_change_pte,
.invalidate_page = intel_invalidate_page,
.invalidate_range = intel_invalidate_range,
};
static DEFINE_MUTEX(pasid_mutex);
int intel_svm_bind_mm(struct device *dev, int *pasid)
{
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL;
int pasid_max;
int ret;
BUG_ON(pasid && !current->mm);
if (WARN_ON(!iommu))
return -EINVAL;
if (dev_is_pci(dev)) {
pasid_max = pci_max_pasids(to_pci_dev(dev));
if (pasid_max < 0)
return -EINVAL;
} else
pasid_max = 1 << 20;
mutex_lock(&pasid_mutex);
if (pasid) {
int i;
idr_for_each_entry(&iommu->pasid_idr, svm, i) {
if (svm->mm != current->mm)
continue;
if (svm->pasid >= pasid_max) {
dev_warn(dev,
"Limited PASID width. Cannot use existing PASID %d\n",
svm->pasid);
ret = -ENOSPC;
goto out;
}
list_for_each_entry(sdev, &svm->devs, list) {
if (dev == sdev->dev) {
sdev->users++;
goto success;
}
}
break;
}
}
sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
if (!sdev) {
ret = -ENOMEM;
goto out;
}
sdev->dev = dev;
ret = intel_iommu_enable_pasid(iommu, sdev);
if (ret || !pasid) {
/* If they don't actually want to assign a PASID, this is
* just an enabling check/preparation. */
kfree(sdev);
goto out;
}
/* Finish the setup now we know we're keeping it */
sdev->users = 1;
init_rcu_head(&sdev->rcu);
if (!svm) {
svm = kzalloc(sizeof(*svm), GFP_KERNEL);
if (!svm) {
ret = -ENOMEM;
kfree(sdev);
goto out;
}
svm->iommu = iommu;
if (pasid_max > 2 << ecap_pss(iommu->ecap))
pasid_max = 2 << ecap_pss(iommu->ecap);
ret = idr_alloc(&iommu->pasid_idr, svm, 0, pasid_max - 1,
GFP_KERNEL);
if (ret < 0) {
kfree(svm);
goto out;
}
svm->pasid = ret;
svm->notifier.ops = &intel_mmuops;
svm->mm = get_task_mm(current);
INIT_LIST_HEAD_RCU(&svm->devs);
ret = -ENOMEM;
if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) {
idr_remove(&svm->iommu->pasid_idr, svm->pasid);
kfree(svm);
kfree(sdev);
goto out;
}
iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1;
wmb();
}
list_add_rcu(&sdev->list, &svm->devs);
success:
*pasid = svm->pasid;
ret = 0;
out:
mutex_unlock(&pasid_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
int intel_svm_unbind_mm(struct device *dev, int pasid)
{
struct intel_svm_dev *sdev;
struct intel_iommu *iommu;
struct intel_svm *svm;
int ret = -EINVAL;
mutex_lock(&pasid_mutex);
iommu = intel_svm_device_to_iommu(dev);
if (!iommu || !iommu->pasid_table)
goto out;
svm = idr_find(&iommu->pasid_idr, pasid);
if (!svm)
goto out;
list_for_each_entry(sdev, &svm->devs, list) {
if (dev == sdev->dev) {
ret = 0;
sdev->users--;
if (!sdev->users) {
list_del_rcu(&sdev->list);
/* Flush the PASID cache and IOTLB for this device.
* Note that we do depend on the hardware *not* using
* the PASID any more. Just as we depend on other
* devices never using PASIDs that they have no right
* to use. We have a *shared* PASID table, because it's
* large and has to be physically contiguous. So it's
* hard to be as defensive as we might like. */
intel_flush_pasid_dev(svm, sdev);
intel_flush_svm_range_dev(svm, sdev, 0, -1, 0);
kfree_rcu(sdev, rcu);
if (list_empty(&svm->devs)) {
mmu_notifier_unregister(&svm->notifier, svm->mm);
idr_remove(&svm->iommu->pasid_idr, svm->pasid);
mmput(svm->mm);
/* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called.
* If that is not obeyed, subtle errors will happen.
* Let's make them less subtle... */
memset(svm, 0x6b, sizeof(*svm));
kfree(svm);
}
}
break;
}
}
out:
mutex_unlock(&pasid_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);

View file

@ -20,6 +20,13 @@
#define CONTEXT_TT_MULTI_LEVEL 0
#define CONTEXT_TT_DEV_IOTLB 1
#define CONTEXT_TT_PASS_THROUGH 2
/* Extended context entry types */
#define CONTEXT_TT_PT_PASID 4
#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5
#define CONTEXT_TT_MASK (7ULL << 2)
#define CONTEXT_PRS (1ULL << 9)
#define CONTEXT_PASIDE (1ULL << 11)
struct intel_iommu;
struct dmar_domain;

View file

@ -1,5 +1,9 @@
/*
* Copyright (c) 2006, Intel Corporation.
* Copyright © 2006-2015, Intel Corporation.
*
* Authors: Ashok Raj <ashok.raj@intel.com>
* Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
* David Woodhouse <David.Woodhouse@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
@ -13,10 +17,6 @@
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 59 Temple
* Place - Suite 330, Boston, MA 02111-1307 USA.
*
* Copyright (C) 2006-2008 Intel Corporation
* Author: Ashok Raj <ashok.raj@intel.com>
* Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
*/
#ifndef _INTEL_IOMMU_H_
@ -25,7 +25,10 @@
#include <linux/types.h>
#include <linux/iova.h>
#include <linux/io.h>
#include <linux/idr.h>
#include <linux/dma_remapping.h>
#include <linux/mmu_notifier.h>
#include <linux/list.h>
#include <asm/cacheflush.h>
#include <asm/iommu.h>
@ -251,6 +254,9 @@ enum {
#define QI_DIOTLB_TYPE 0x3
#define QI_IEC_TYPE 0x4
#define QI_IWD_TYPE 0x5
#define QI_EIOTLB_TYPE 0x6
#define QI_PC_TYPE 0x7
#define QI_DEIOTLB_TYPE 0x8
#define QI_IEC_SELECTIVE (((u64)1) << 4)
#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
@ -278,6 +284,34 @@ enum {
#define QI_DEV_IOTLB_SIZE 1
#define QI_DEV_IOTLB_MAX_INVS 32
#define QI_PC_PASID(pasid) (((u64)pasid) << 32)
#define QI_PC_DID(did) (((u64)did) << 16)
#define QI_PC_GRAN(gran) (((u64)gran) << 4)
#define QI_PC_ALL_PASIDS (QI_PC_TYPE | QI_PC_GRAN(0))
#define QI_PC_PASID_SEL (QI_PC_TYPE | QI_PC_GRAN(1))
#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK)
#define QI_EIOTLB_GL(gl) (((u64)gl) << 7)
#define QI_EIOTLB_IH(ih) (((u64)ih) << 6)
#define QI_EIOTLB_AM(am) (((u64)am))
#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32)
#define QI_EIOTLB_DID(did) (((u64)did) << 16)
#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4)
#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK)
#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11)
#define QI_DEV_EIOTLB_GLOB(g) ((u64)g)
#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32)
#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32)
#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16)
#define QI_DEV_EIOTLB_MAX_INVS 32
#define QI_GRAN_ALL_ALL 0
#define QI_GRAN_NONG_ALL 1
#define QI_GRAN_NONG_PASID 2
#define QI_GRAN_PSI_PASID 3
struct qi_desc {
u64 low, high;
};
@ -359,6 +393,7 @@ struct intel_iommu {
* told to. But while it's all driver-arbitrated, we're fine. */
struct pasid_entry *pasid_table;
struct pasid_state_entry *pasid_state_table;
struct idr pasid_idr;
#endif
struct q_inval *qi; /* Queued invalidation info */
u32 *iommu_state; /* Store iommu states between suspend and resume.*/
@ -399,9 +434,32 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
extern int dmar_ir_support(void);
#ifdef CONFIG_INTEL_IOMMU_SVM
extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu);
extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu);
struct intel_svm_dev {
struct list_head list;
struct rcu_head rcu;
struct device *dev;
int users;
u16 did;
u16 dev_iotlb:1;
u16 sid, qdep;
};
struct intel_svm {
struct mmu_notifier notifier;
struct mm_struct *mm;
struct intel_iommu *iommu;
int pasid;
struct list_head devs;
};
extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev);
extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
#endif
extern const struct attribute_group *intel_iommu_groups[];
#endif

82
include/linux/intel-svm.h Normal file
View file

@ -0,0 +1,82 @@
/*
* Copyright © 2015 Intel Corporation.
*
* Authors: David Woodhouse <David.Woodhouse@intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#ifndef __INTEL_SVM_H__
#define __INTEL_SVM_H__
#ifdef CONFIG_INTEL_IOMMU_SVM
struct device;
/**
* intel_svm_bind_mm() - Bind the current process to a PASID
* @dev: Device to be granted acccess
* @pasid: Address for allocated PASID
*
* This function attempts to enable PASID support for the given device.
* If the @pasid argument is non-%NULL, a PASID is allocated for access
* to the MM of the current process.
*
* By using a %NULL value for the @pasid argument, this function can
* be used to simply validate that PASID support is available for the
* given device i.e. that it is behind an IOMMU which has the
* requisite support, and is enabled.
*
* Page faults are handled transparently by the IOMMU code, and there
* should be no need for the device driver to be involved. If a page
* fault cannot be handled (i.e. is an invalid address rather than
* just needs paging in), then the page request will be completed by
* the core IOMMU code with appropriate status, and the device itself
* can then report the resulting fault to its driver via whatever
* mechanism is appropriate.
*
* Multiple calls from the same process may result in the same PASID
* being re-used. A reference count is kept.
*/
extern int intel_svm_bind_mm(struct device *dev, int *pasid);
/**
* intel_svm_unbind_mm() - Unbind a specified PASID
* @dev: Device for which PASID was allocated
* @pasid: PASID value to be unbound
*
* This function allows a PASID to be retired when the device no
* longer requires access to the address space of a given process.
*
* If the use count for the PASID in question reaches zero, the
* PASID is revoked and may no longer be used by hardware.
*
* Device drivers are required to ensure that no access (including
* page requests) is currently outstanding for the PASID in question,
* before calling this function.
*/
extern int intel_svm_unbind_mm(struct device *dev, int pasid);
#else /* CONFIG_INTEL_IOMMU_SVM */
static inline int intel_svm_bind_mm(struct device *dev, int *pasid)
{
return -ENOSYS;
}
static inline int intel_svm_unbind_mm(struct device *dev, int pasid)
{
BUG();
}
#endif /* CONFIG_INTEL_IOMMU_SVM */
#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL))
#endif /* __INTEL_SVM_H__ */