drm/amdgpu: sync ce and me with SWITCH_BUFFER(2)
we used to adopt wait_reg_mem to let CE wait before DE finish page updating, but from Tonga+, CE doesn't support wait_reg_mem package so this logic no longer works. so here is another approach to do same thing: Insert two of SWITCH_BUFFER at both front and end of vm_flush can guarantee that CE not go further to process IB_const before vm_flush done. Insert two of SWITCH_BUFFER also works on CI, so remove legency method to sync CE and ME v2: Insert double SWITCH_BUFFER at front of vm flush as well. Signed-off-by: monk.liu <monk.liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com>
This commit is contained in:
parent
54ef0b5461
commit
5c3422b0b1
3 changed files with 23 additions and 92 deletions
|
@ -1202,8 +1202,6 @@ struct amdgpu_gfx {
|
|||
struct amdgpu_irq_src priv_inst_irq;
|
||||
/* gfx status */
|
||||
uint32_t gfx_current_status;
|
||||
/* sync signal for const engine */
|
||||
unsigned ce_sync_offs;
|
||||
/* ce ram size*/
|
||||
unsigned ce_ram_size;
|
||||
};
|
||||
|
|
|
@ -3610,41 +3610,6 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void gfx_v7_0_ce_sync_me(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
|
||||
|
||||
/* instruct DE to set a magic number */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
|
||||
WRITE_DATA_DST_SEL(5)));
|
||||
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
|
||||
amdgpu_ring_write(ring, 1);
|
||||
|
||||
/* let CE wait till condition satisfied */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
|
||||
amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
|
||||
WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
|
||||
WAIT_REG_MEM_FUNCTION(3) | /* == */
|
||||
WAIT_REG_MEM_ENGINE(2))); /* ce */
|
||||
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
|
||||
amdgpu_ring_write(ring, 1);
|
||||
amdgpu_ring_write(ring, 0xffffffff);
|
||||
amdgpu_ring_write(ring, 4); /* poll interval */
|
||||
|
||||
/* instruct CE to reset wb of ce_sync to zero */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
|
||||
WRITE_DATA_DST_SEL(5) |
|
||||
WR_CONFIRM));
|
||||
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* vm
|
||||
* VMID 0 is the physical GPU addresses as used by the kernel.
|
||||
|
@ -3663,6 +3628,13 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
|||
unsigned vm_id, uint64_t pd_addr)
|
||||
{
|
||||
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
|
||||
if (usepfp) {
|
||||
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
|
||||
|
@ -3703,7 +3675,10 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, 0x0);
|
||||
|
||||
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
|
||||
gfx_v7_0_ce_sync_me(ring);
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4805,12 +4780,6 @@ static int gfx_v7_0_sw_init(void *handle)
|
|||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
|
||||
if (r) {
|
||||
DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
ring->ring_obj = NULL;
|
||||
|
@ -4889,8 +4858,6 @@ static int gfx_v7_0_sw_fini(void *handle)
|
|||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
|
||||
|
||||
amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
|
||||
|
||||
gfx_v7_0_cp_compute_fini(adev);
|
||||
gfx_v7_0_rlc_fini(adev);
|
||||
gfx_v7_0_mec_fini(adev);
|
||||
|
|
|
@ -940,12 +940,6 @@ static int gfx_v8_0_sw_init(void *handle)
|
|||
return r;
|
||||
}
|
||||
|
||||
r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
|
||||
if (r) {
|
||||
DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* set up the gfx ring */
|
||||
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
|
||||
ring = &adev->gfx.gfx_ring[i];
|
||||
|
@ -1033,8 +1027,6 @@ static int gfx_v8_0_sw_fini(void *handle)
|
|||
for (i = 0; i < adev->gfx.num_compute_rings; i++)
|
||||
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
|
||||
|
||||
amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
|
||||
|
||||
gfx_v8_0_mec_fini(adev);
|
||||
|
||||
return 0;
|
||||
|
@ -4006,41 +3998,6 @@ static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
|
|||
return true;
|
||||
}
|
||||
|
||||
static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring)
|
||||
{
|
||||
struct amdgpu_device *adev = ring->adev;
|
||||
u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
|
||||
|
||||
/* instruct DE to set a magic number */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
|
||||
WRITE_DATA_DST_SEL(5)));
|
||||
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
|
||||
amdgpu_ring_write(ring, 1);
|
||||
|
||||
/* let CE wait till condition satisfied */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
|
||||
amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
|
||||
WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
|
||||
WAIT_REG_MEM_FUNCTION(3) | /* == */
|
||||
WAIT_REG_MEM_ENGINE(2))); /* ce */
|
||||
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
|
||||
amdgpu_ring_write(ring, 1);
|
||||
amdgpu_ring_write(ring, 0xffffffff);
|
||||
amdgpu_ring_write(ring, 4); /* poll interval */
|
||||
|
||||
/* instruct CE to reset wb of ce_sync to zero */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
|
||||
WRITE_DATA_DST_SEL(5) |
|
||||
WR_CONFIRM));
|
||||
amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
|
||||
amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
||||
unsigned vm_id, uint64_t pd_addr)
|
||||
{
|
||||
|
@ -4057,6 +4014,14 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
|||
amdgpu_ring_write(ring, 0xffffffff);
|
||||
amdgpu_ring_write(ring, 4); /* poll interval */
|
||||
|
||||
if (usepfp) {
|
||||
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
|
||||
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
|
||||
WRITE_DATA_DST_SEL(0)) |
|
||||
|
@ -4096,9 +4061,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
|
|||
/* sync PFP to ME, otherwise we might get invalid PFP reads */
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
|
||||
amdgpu_ring_write(ring, 0x0);
|
||||
|
||||
/* synce CE with ME to prevent CE fetch CEIB before context switch done */
|
||||
gfx_v8_0_ce_sync_me(ring);
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
|
||||
amdgpu_ring_write(ring, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue