drm/radeon: use an intervall tree to manage the VMA v2

Scales much better than scanning the address range linearly.

v2: store pfn instead of address

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Alex Deucher 2014-07-30 11:49:56 -04:00
parent c265f24d5c
commit 0aea5e4aa2
5 changed files with 52 additions and 61 deletions

View file

@ -114,6 +114,7 @@ config DRM_RADEON
select POWER_SUPPLY select POWER_SUPPLY
select HWMON select HWMON
select BACKLIGHT_CLASS_DEVICE select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
help help
Choose this option if you have an ATI Radeon graphics card. There Choose this option if you have an ATI Radeon graphics card. There
are both PCI and AGP versions. You don't need to choose this to are both PCI and AGP versions. You don't need to choose this to

View file

@ -64,6 +64,7 @@
#include <linux/wait.h> #include <linux/wait.h>
#include <linux/list.h> #include <linux/list.h>
#include <linux/kref.h> #include <linux/kref.h>
#include <linux/interval_tree.h>
#include <ttm/ttm_bo_api.h> #include <ttm/ttm_bo_api.h>
#include <ttm/ttm_bo_driver.h> #include <ttm/ttm_bo_driver.h>
@ -447,14 +448,12 @@ struct radeon_mman {
struct radeon_bo_va { struct radeon_bo_va {
/* protected by bo being reserved */ /* protected by bo being reserved */
struct list_head bo_list; struct list_head bo_list;
uint64_t soffset;
uint64_t eoffset;
uint32_t flags; uint32_t flags;
uint64_t addr; uint64_t addr;
unsigned ref_count; unsigned ref_count;
/* protected by vm mutex */ /* protected by vm mutex */
struct list_head vm_list; struct interval_tree_node it;
struct list_head vm_status; struct list_head vm_status;
/* constant after initialization */ /* constant after initialization */
@ -877,7 +876,7 @@ struct radeon_vm_pt {
}; };
struct radeon_vm { struct radeon_vm {
struct list_head va; struct rb_root va;
unsigned id; unsigned id;
/* BOs moved, but not yet updated in the PT */ /* BOs moved, but not yet updated in the PT */

View file

@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
switch (args->operation) { switch (args->operation) {
case RADEON_VA_MAP: case RADEON_VA_MAP:
if (bo_va->soffset) { if (bo_va->it.start) {
args->operation = RADEON_VA_RESULT_VA_EXIST; args->operation = RADEON_VA_RESULT_VA_EXIST;
args->offset = bo_va->soffset; args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
goto out; goto out;
} }
r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);

View file

@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
), ),
TP_fast_assign( TP_fast_assign(
__entry->soffset = bo_va->soffset; __entry->soffset = bo_va->it.start;
__entry->eoffset = bo_va->eoffset; __entry->eoffset = bo_va->it.last + 1;
__entry->flags = bo_va->flags; __entry->flags = bo_va->flags;
), ),
TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",

View file

@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
} }
bo_va->vm = vm; bo_va->vm = vm;
bo_va->bo = bo; bo_va->bo = bo;
bo_va->soffset = 0; bo_va->it.start = 0;
bo_va->eoffset = 0; bo_va->it.last = 0;
bo_va->flags = 0; bo_va->flags = 0;
bo_va->addr = 0; bo_va->addr = 0;
bo_va->ref_count = 1; bo_va->ref_count = 1;
INIT_LIST_HEAD(&bo_va->bo_list); INIT_LIST_HEAD(&bo_va->bo_list);
INIT_LIST_HEAD(&bo_va->vm_list);
INIT_LIST_HEAD(&bo_va->vm_status); INIT_LIST_HEAD(&bo_va->vm_status);
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
list_add(&bo_va->vm_list, &vm->va);
list_add_tail(&bo_va->bo_list, &bo->va); list_add_tail(&bo_va->bo_list, &bo->va);
mutex_unlock(&vm->mutex); mutex_unlock(&vm->mutex);
@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
uint32_t flags) uint32_t flags)
{ {
uint64_t size = radeon_bo_size(bo_va->bo); uint64_t size = radeon_bo_size(bo_va->bo);
uint64_t eoffset, last_offset = 0;
struct radeon_vm *vm = bo_va->vm; struct radeon_vm *vm = bo_va->vm;
struct radeon_bo_va *tmp;
struct list_head *head;
unsigned last_pfn, pt_idx; unsigned last_pfn, pt_idx;
uint64_t eoffset;
int r; int r;
if (soffset) { if (soffset) {
@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
} }
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
head = &vm->va; if (bo_va->it.start || bo_va->it.last) {
last_offset = 0; if (bo_va->addr) {
list_for_each_entry(tmp, &vm->va, vm_list) { /* add a clone of the bo_va to clear the old address */
if (bo_va == tmp) { struct radeon_bo_va *tmp;
/* skip over currently modified bo */ tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
continue; tmp->it.start = bo_va->it.start;
tmp->it.last = bo_va->it.last;
tmp->vm = vm;
tmp->addr = bo_va->addr;
list_add(&tmp->vm_status, &vm->freed);
} }
if (soffset >= last_offset && eoffset <= tmp->soffset) { interval_tree_remove(&bo_va->it, &vm->va);
/* bo can be added before this one */ bo_va->it.start = 0;
break; bo_va->it.last = 0;
} }
if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
soffset /= RADEON_GPU_PAGE_SIZE;
eoffset /= RADEON_GPU_PAGE_SIZE;
if (soffset || eoffset) {
struct interval_tree_node *it;
it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
if (it) {
struct radeon_bo_va *tmp;
tmp = container_of(it, struct radeon_bo_va, it);
/* bo and tmp overlap, invalid offset */ /* bo and tmp overlap, invalid offset */
dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
(unsigned)tmp->soffset, (unsigned)tmp->eoffset); soffset, tmp->bo, tmp->it.start, tmp->it.last);
mutex_unlock(&vm->mutex); mutex_unlock(&vm->mutex);
return -EINVAL; return -EINVAL;
} }
last_offset = tmp->eoffset; bo_va->it.start = soffset;
head = &tmp->vm_list; bo_va->it.last = eoffset - 1;
interval_tree_insert(&bo_va->it, &vm->va);
} }
if (bo_va->soffset) {
/* add a clone of the bo_va to clear the old address */
tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
if (!tmp) {
mutex_unlock(&vm->mutex);
return -ENOMEM;
}
tmp->soffset = bo_va->soffset;
tmp->eoffset = bo_va->eoffset;
tmp->vm = vm;
list_add(&tmp->vm_status, &vm->freed);
}
bo_va->soffset = soffset;
bo_va->eoffset = eoffset;
bo_va->flags = flags; bo_va->flags = flags;
bo_va->addr = 0; bo_va->addr = 0;
list_move(&bo_va->vm_list, head);
soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; soffset >>= radeon_vm_block_size;
eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; eoffset >>= radeon_vm_block_size;
BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
unsigned count = 0; unsigned count = 0;
uint64_t addr; uint64_t addr;
start = start / RADEON_GPU_PAGE_SIZE;
end = end / RADEON_GPU_PAGE_SIZE;
/* walk over the address space and update the page tables */ /* walk over the address space and update the page tables */
for (addr = start; addr < end; ) { for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> radeon_vm_block_size; uint64_t pt_idx = addr >> radeon_vm_block_size;
@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
uint64_t addr; uint64_t addr;
int r; int r;
if (!bo_va->soffset) { if (!bo_va->it.start) {
dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
bo_va->bo, vm); bo_va->bo, vm);
return -EINVAL; return -EINVAL;
@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
trace_radeon_vm_bo_update(bo_va); trace_radeon_vm_bo_update(bo_va);
nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; nptes = bo_va->it.last - bo_va->it.start + 1;
/* padding, etc. */ /* padding, etc. */
ndw = 64; ndw = 64;
@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
return r; return r;
ib.length_dw = 0; ib.length_dw = 0;
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
addr, radeon_vm_page_flags(bo_va->flags)); bo_va->it.last + 1, addr,
radeon_vm_page_flags(bo_va->flags));
radeon_semaphore_sync_to(ib.semaphore, vm->fence); radeon_semaphore_sync_to(ib.semaphore, vm->fence);
r = radeon_ib_schedule(rdev, &ib, NULL); r = radeon_ib_schedule(rdev, &ib, NULL);
@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
list_del(&bo_va->bo_list); list_del(&bo_va->bo_list);
mutex_lock(&vm->mutex); mutex_lock(&vm->mutex);
list_del(&bo_va->vm_list); interval_tree_remove(&bo_va->it, &vm->va);
list_del(&bo_va->vm_status); list_del(&bo_va->vm_status);
if (bo_va->addr) { if (bo_va->addr) {
@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
vm->last_flush = NULL; vm->last_flush = NULL;
vm->last_id_use = NULL; vm->last_id_use = NULL;
mutex_init(&vm->mutex); mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->va); vm->va = RB_ROOT;
INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->freed);
@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
struct radeon_bo_va *bo_va, *tmp; struct radeon_bo_va *bo_va, *tmp;
int i, r; int i, r;
if (!list_empty(&vm->va)) { if (!RB_EMPTY_ROOT(&vm->va)) {
dev_err(rdev->dev, "still active bo inside vm\n"); dev_err(rdev->dev, "still active bo inside vm\n");
} }
list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
list_del_init(&bo_va->vm_list); interval_tree_remove(&bo_va->it, &vm->va);
r = radeon_bo_reserve(bo_va->bo, false); r = radeon_bo_reserve(bo_va->bo, false);
if (!r) { if (!r) {
list_del_init(&bo_va->bo_list); list_del_init(&bo_va->bo_list);