From 0aea5e4aa299c465afafc77883ea2c19475036b1 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 30 Jul 2014 11:49:56 -0400 Subject: [PATCH] drm/radeon: use an intervall tree to manage the VMA v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scales much better than scanning the address range linearly. v2: store pfn instead of address Signed-off-by: Christian König Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/Kconfig | 1 + drivers/gpu/drm/radeon/radeon.h | 7 +- drivers/gpu/drm/radeon/radeon_gem.c | 4 +- drivers/gpu/drm/radeon/radeon_trace.h | 4 +- drivers/gpu/drm/radeon/radeon_vm.c | 97 ++++++++++++--------------- 5 files changed, 52 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 31894c8c1773..b066bb3ca01a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -114,6 +114,7 @@ config DRM_RADEON select POWER_SUPPLY select HWMON select BACKLIGHT_CLASS_DEVICE + select INTERVAL_TREE help Choose this option if you have an ATI Radeon graphics card. There are both PCI and AGP versions. You don't need to choose this to diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 31dda41394d8..56fc7d2da149 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -64,6 +64,7 @@ #include #include #include +#include #include #include @@ -447,14 +448,12 @@ struct radeon_mman { struct radeon_bo_va { /* protected by bo being reserved */ struct list_head bo_list; - uint64_t soffset; - uint64_t eoffset; uint32_t flags; uint64_t addr; unsigned ref_count; /* protected by vm mutex */ - struct list_head vm_list; + struct interval_tree_node it; struct list_head vm_status; /* constant after initialization */ @@ -877,7 +876,7 @@ struct radeon_vm_pt { }; struct radeon_vm { - struct list_head va; + struct rb_root va; unsigned id; /* BOs moved, but not yet updated in the PT */ diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 99e4e0cd72a6..bfd7e1b0ff3f 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data, switch (args->operation) { case RADEON_VA_MAP: - if (bo_va->soffset) { + if (bo_va->it.start) { args->operation = RADEON_VA_RESULT_VA_EXIST; - args->offset = bo_va->soffset; + args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE; goto out; } r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags); diff --git a/drivers/gpu/drm/radeon/radeon_trace.h b/drivers/gpu/drm/radeon/radeon_trace.h index cd781f34bd8d..9db74a96ef61 100644 --- a/drivers/gpu/drm/radeon/radeon_trace.h +++ b/drivers/gpu/drm/radeon/radeon_trace.h @@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update, ), TP_fast_assign( - __entry->soffset = bo_va->soffset; - __entry->eoffset = bo_va->eoffset; + __entry->soffset = bo_va->it.start; + __entry->eoffset = bo_va->it.last + 1; __entry->flags = bo_va->flags; ), TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x", diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 906c8ae867ac..39bc5c2b02d1 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, } bo_va->vm = vm; bo_va->bo = bo; - bo_va->soffset = 0; - bo_va->eoffset = 0; + bo_va->it.start = 0; + bo_va->it.last = 0; bo_va->flags = 0; bo_va->addr = 0; bo_va->ref_count = 1; INIT_LIST_HEAD(&bo_va->bo_list); - INIT_LIST_HEAD(&bo_va->vm_list); INIT_LIST_HEAD(&bo_va->vm_status); mutex_lock(&vm->mutex); - list_add(&bo_va->vm_list, &vm->va); list_add_tail(&bo_va->bo_list, &bo->va); mutex_unlock(&vm->mutex); @@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, uint32_t flags) { uint64_t size = radeon_bo_size(bo_va->bo); - uint64_t eoffset, last_offset = 0; struct radeon_vm *vm = bo_va->vm; - struct radeon_bo_va *tmp; - struct list_head *head; unsigned last_pfn, pt_idx; + uint64_t eoffset; int r; if (soffset) { @@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } mutex_lock(&vm->mutex); - head = &vm->va; - last_offset = 0; - list_for_each_entry(tmp, &vm->va, vm_list) { - if (bo_va == tmp) { - /* skip over currently modified bo */ - continue; + if (bo_va->it.start || bo_va->it.last) { + if (bo_va->addr) { + /* add a clone of the bo_va to clear the old address */ + struct radeon_bo_va *tmp; + tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + tmp->it.start = bo_va->it.start; + tmp->it.last = bo_va->it.last; + tmp->vm = vm; + tmp->addr = bo_va->addr; + list_add(&tmp->vm_status, &vm->freed); } - if (soffset >= last_offset && eoffset <= tmp->soffset) { - /* bo can be added before this one */ - break; - } - if (eoffset > tmp->soffset && soffset < tmp->eoffset) { + interval_tree_remove(&bo_va->it, &vm->va); + bo_va->it.start = 0; + bo_va->it.last = 0; + } + + soffset /= RADEON_GPU_PAGE_SIZE; + eoffset /= RADEON_GPU_PAGE_SIZE; + if (soffset || eoffset) { + struct interval_tree_node *it; + it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); + if (it) { + struct radeon_bo_va *tmp; + tmp = container_of(it, struct radeon_bo_va, it); /* bo and tmp overlap, invalid offset */ - dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n", - bo_va->bo, (unsigned)bo_va->soffset, tmp->bo, - (unsigned)tmp->soffset, (unsigned)tmp->eoffset); + dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " + "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, + soffset, tmp->bo, tmp->it.start, tmp->it.last); mutex_unlock(&vm->mutex); return -EINVAL; } - last_offset = tmp->eoffset; - head = &tmp->vm_list; + bo_va->it.start = soffset; + bo_va->it.last = eoffset - 1; + interval_tree_insert(&bo_va->it, &vm->va); } - if (bo_va->soffset) { - /* add a clone of the bo_va to clear the old address */ - tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (!tmp) { - mutex_unlock(&vm->mutex); - return -ENOMEM; - } - tmp->soffset = bo_va->soffset; - tmp->eoffset = bo_va->eoffset; - tmp->vm = vm; - list_add(&tmp->vm_status, &vm->freed); - } - - bo_va->soffset = soffset; - bo_va->eoffset = eoffset; bo_va->flags = flags; bo_va->addr = 0; - list_move(&bo_va->vm_list, head); - soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; - eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size; + soffset >>= radeon_vm_block_size; + eoffset >>= radeon_vm_block_size; BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); @@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev, unsigned count = 0; uint64_t addr; - start = start / RADEON_GPU_PAGE_SIZE; - end = end / RADEON_GPU_PAGE_SIZE; - /* walk over the address space and update the page tables */ for (addr = start; addr < end; ) { uint64_t pt_idx = addr >> radeon_vm_block_size; @@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, uint64_t addr; int r; - if (!bo_va->soffset) { + if (!bo_va->it.start) { dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", bo_va->bo, vm); return -EINVAL; @@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev, trace_radeon_vm_bo_update(bo_va); - nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE; + nptes = bo_va->it.last - bo_va->it.start + 1; /* padding, etc. */ ndw = 64; @@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev, return r; ib.length_dw = 0; - radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset, - addr, radeon_vm_page_flags(bo_va->flags)); + radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, + bo_va->it.last + 1, addr, + radeon_vm_page_flags(bo_va->flags)); radeon_semaphore_sync_to(ib.semaphore, vm->fence); r = radeon_ib_schedule(rdev, &ib, NULL); @@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev, list_del(&bo_va->bo_list); mutex_lock(&vm->mutex); - list_del(&bo_va->vm_list); + interval_tree_remove(&bo_va->it, &vm->va); list_del(&bo_va->vm_status); if (bo_va->addr) { @@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) vm->last_flush = NULL; vm->last_id_use = NULL; mutex_init(&vm->mutex); - INIT_LIST_HEAD(&vm->va); + vm->va = RB_ROOT; INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->freed); @@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) struct radeon_bo_va *bo_va, *tmp; int i, r; - if (!list_empty(&vm->va)) { + if (!RB_EMPTY_ROOT(&vm->va)) { dev_err(rdev->dev, "still active bo inside vm\n"); } - list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) { - list_del_init(&bo_va->vm_list); + rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { + interval_tree_remove(&bo_va->it, &vm->va); r = radeon_bo_reserve(bo_va->bo, false); if (!r) { list_del_init(&bo_va->bo_list);