msm: kgsl: Mark the scratch buffer as privileged

Mark the scratch buffer as privileged so that it can only be accessed by
GPU through the ringbuffer. To accomplish this, we need to:

1. Move the preemption data out of the scratch buffer.
2. Disable the shadow rptr feature.
3. Trigger RPTR update from GPU using a WHERE_AM_I packet.
4. Add support for the new ucode.

Change-Id: I9b388f55f53b69028b9bbb2306cb43fd1297c52f
Signed-off-by: Akhil P Oommen <akhilpo@codeaurora.org>
This commit is contained in:
Akhil P Oommen 2020-07-16 00:49:09 +05:30
parent 346ce0bbcb
commit 21dc7da573
12 changed files with 108 additions and 47 deletions

View file

@ -3986,6 +3986,19 @@ static bool adreno_is_hwcg_on(struct kgsl_device *device)
return test_bit(ADRENO_HWCG_CTRL, &adreno_dev->pwrctrl_flag);
}
u32 adreno_get_ucode_version(const u32 *data)
{
u32 version;
version = data[1];
if ((version & 0xf) != 0xa)
return version;
version &= ~0xfff;
return version | ((data[3] & 0xfff000) >> 12);
}
static const struct kgsl_functable adreno_functable = {
/* Mandatory functions */
.regread = adreno_regread,

View file

@ -265,8 +265,8 @@ enum adreno_preempt_states {
/**
* struct adreno_preemption
* @state: The current state of preemption
* @counters: Memory descriptor for the memory where the GPU writes the
* preemption counters on switch
* @scratch: Memory descriptor for the memory where the GPU writes the
* current ctxt record address and preemption counters on switch
* @timer: A timer to make sure preemption doesn't stall
* @work: A work struct for the preemption worker (for 5XX)
* preempt_level: The level of preemption (for 6XX)
@ -276,7 +276,7 @@ enum adreno_preempt_states {
*/
struct adreno_preemption {
atomic_t state;
struct kgsl_memdesc counters;
struct kgsl_memdesc scratch;
struct timer_list timer;
struct work_struct work;
unsigned int preempt_level;
@ -882,6 +882,7 @@ struct adreno_gpudev {
struct adreno_irq *irq;
int num_prio_levels;
int cp_rb_cntl;
unsigned int vbif_xin_halt_ctrl0_mask;
unsigned int gbif_client_halt_mask;
unsigned int gbif_arb_halt_mask;
@ -1110,6 +1111,7 @@ void adreno_rscc_regread(struct adreno_device *adreno_dev,
unsigned int offsetwords, unsigned int *value);
void adreno_isense_regread(struct adreno_device *adreno_dev,
unsigned int offsetwords, unsigned int *value);
u32 adreno_get_ucode_version(const u32 *data);
#define ADRENO_TARGET(_name, _id) \

View file

@ -1724,12 +1724,15 @@ static int a5xx_post_start(struct adreno_device *adreno_dev)
*cmds++ = 0xF;
}
if (adreno_is_preemption_enabled(adreno_dev))
if (adreno_is_preemption_enabled(adreno_dev)) {
cmds += _preemption_init(adreno_dev, rb, cmds, NULL);
rb->_wptr = rb->_wptr - (42 - (cmds - start));
ret = adreno_ringbuffer_submit_spin_nosync(rb, NULL, 2000);
} else {
rb->_wptr = rb->_wptr - (42 - (cmds - start));
ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
}
rb->_wptr = rb->_wptr - (42 - (cmds - start));
ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
if (ret)
adreno_spin_idle_debug(adreno_dev,
"hw initialization failed to idle\n");
@ -2038,7 +2041,7 @@ static int _load_firmware(struct kgsl_device *device, const char *fwfile,
memcpy(firmware->memdesc.hostptr, &fw->data[4], fw->size - 4);
firmware->size = (fw->size - 4) / sizeof(uint32_t);
firmware->version = *(unsigned int *)&fw->data[4];
firmware->version = adreno_get_ucode_version((u32 *)fw->data);
done:
release_firmware(fw);

View file

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2015-2017,2019 The Linux Foundation. All rights reserved.
* Copyright (c) 2015-2017,2019-2020 The Linux Foundation. All rights reserved.
*/
#ifndef _ADRENO_A5XX_H_
@ -134,7 +134,7 @@ void a5xx_crashdump_init(struct adreno_device *adreno_dev);
void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on);
#define A5XX_CP_RB_CNTL_DEFAULT (((ilog2(4) << 8) & 0x1F00) | \
#define A5XX_CP_RB_CNTL_DEFAULT ((1 << 27) | ((ilog2(4) << 8) & 0x1F00) | \
(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
/* GPMU interrupt multiplexor */
#define FW_INTR_INFO (0)

View file

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2014-2017,2019 The Linux Foundation. All rights reserved.
* Copyright (c) 2014-2017,2019-2020 The Linux Foundation. All rights reserved.
*/
#include "adreno.h"
@ -570,7 +570,7 @@ static void _preemption_close(struct adreno_device *adreno_dev)
unsigned int i;
del_timer(&preempt->timer);
kgsl_free_global(device, &preempt->counters);
kgsl_free_global(device, &preempt->scratch);
a5xx_preemption_iommu_close(adreno_dev);
FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
@ -604,14 +604,14 @@ int a5xx_preemption_init(struct adreno_device *adreno_dev)
timer_setup(&preempt->timer, _a5xx_preemption_timer, 0);
/* Allocate mem for storing preemption counters */
ret = kgsl_allocate_global(device, &preempt->counters,
ret = kgsl_allocate_global(device, &preempt->scratch,
adreno_dev->num_ringbuffers *
A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0,
"preemption_counters");
if (ret)
goto err;
addr = preempt->counters.gpuaddr;
addr = preempt->scratch.gpuaddr;
/* Allocate mem for storing preemption switch record */
FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {

View file

@ -830,7 +830,7 @@ static int a6xx_post_start(struct adreno_device *adreno_dev)
rb->_wptr = rb->_wptr - (42 - (cmds - start));
ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
ret = adreno_ringbuffer_submit_spin_nosync(rb, NULL, 2000);
if (ret)
adreno_spin_idle_debug(adreno_dev,
"hw preemption initialization failed to idle\n");
@ -858,6 +858,7 @@ static int a6xx_post_start(struct adreno_device *adreno_dev)
*/
static int a6xx_rb_start(struct adreno_device *adreno_dev)
{
struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
struct kgsl_device *device = &adreno_dev->dev;
struct adreno_firmware *fw = ADRENO_FW(adreno_dev, ADRENO_FW_SQE);
@ -874,7 +875,7 @@ static int a6xx_rb_start(struct adreno_device *adreno_dev)
* representation of the size in quadwords (sizedwords / 2).
*/
adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
A6XX_CP_RB_CNTL_DEFAULT);
gpudev->cp_rb_cntl);
adreno_writereg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
ADRENO_REG_CP_RB_BASE_HI, rb->buffer_desc.gpuaddr);
@ -993,7 +994,7 @@ static int _load_firmware(struct kgsl_device *device, const char *fwfile,
if (!ret) {
memcpy(firmware->memdesc.hostptr, &fw->data[4], fw->size - 4);
firmware->size = (fw->size - 4) / sizeof(uint32_t);
firmware->version = *(unsigned int *)&fw->data[4];
firmware->version = adreno_get_ucode_version((u32 *)fw->data);
}
release_firmware(fw);
@ -2384,6 +2385,9 @@ static void a6xx_platform_setup(struct adreno_device *adreno_dev)
if (ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC))
set_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag);
if (!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
gpudev->cp_rb_cntl |= (1 << 27);
/* Check efuse bits for various capabilties */
a6xx_check_features(adreno_dev);
}
@ -2677,6 +2681,7 @@ struct adreno_gpudev adreno_a6xx_gpudev = {
.irq = &a6xx_irq,
.irq_trace = trace_kgsl_a5xx_irq_status,
.num_prio_levels = KGSL_PRIORITY_MAX_RB_LEVELS,
.cp_rb_cntl = A6XX_CP_RB_CNTL_DEFAULT,
.platform_setup = a6xx_platform_setup,
.init = a6xx_init,
.rb_start = a6xx_rb_start,

View file

@ -316,8 +316,8 @@ void a6xx_preemption_trigger(struct adreno_device *adreno_dev)
kgsl_sharedmem_writel(device, &iommu->smmu_info,
PREEMPT_SMMU_RECORD(context_idr), contextidr);
kgsl_sharedmem_readq(&device->scratch, &gpuaddr,
SCRATCH_PREEMPTION_CTXT_RESTORE_ADDR_OFFSET(next->id));
kgsl_sharedmem_readq(&preempt->scratch, &gpuaddr,
next->id * sizeof(u64));
/*
* Set a keepalive bit before the first preemption register write.
@ -543,12 +543,10 @@ unsigned int a6xx_preemption_pre_ibsubmit(
rb->perfcounter_save_restore_desc.gpuaddr);
if (context) {
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
struct adreno_ringbuffer *rb = drawctxt->rb;
uint64_t dest =
SCRATCH_PREEMPTION_CTXT_RESTORE_GPU_ADDR(device,
rb->id);
uint64_t dest = adreno_dev->preempt.scratch.gpuaddr +
sizeof(u64) * rb->id;
*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
cmds += cp_gpuaddr(adreno_dev, cmds, dest);
@ -566,9 +564,8 @@ unsigned int a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
if (rb) {
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
uint64_t dest = SCRATCH_PREEMPTION_CTXT_RESTORE_GPU_ADDR(device,
rb->id);
uint64_t dest = adreno_dev->preempt.scratch.gpuaddr +
sizeof(u64) * rb->id;
*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 2);
cmds += cp_gpuaddr(adreno_dev, cmds, dest);
@ -633,6 +630,7 @@ void a6xx_preemption_start(struct adreno_device *adreno_dev)
static int a6xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
struct adreno_ringbuffer *rb)
{
struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
int ret;
@ -675,7 +673,7 @@ static int a6xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
kgsl_sharedmem_writel(device, &rb->preemption_desc,
PREEMPT_RECORD(data), 0);
kgsl_sharedmem_writel(device, &rb->preemption_desc,
PREEMPT_RECORD(cntl), A6XX_CP_RB_CNTL_DEFAULT);
PREEMPT_RECORD(cntl), gpudev->cp_rb_cntl);
kgsl_sharedmem_writel(device, &rb->preemption_desc,
PREEMPT_RECORD(rptr), 0);
kgsl_sharedmem_writel(device, &rb->preemption_desc,
@ -729,6 +727,7 @@ static void _preemption_close(struct adreno_device *adreno_dev)
unsigned int i;
del_timer(&preempt->timer);
kgsl_free_global(device, &preempt->scratch);
a6xx_preemption_iommu_close(adreno_dev);
FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
@ -764,6 +763,9 @@ int a6xx_preemption_init(struct adreno_device *adreno_dev)
timer_setup(&preempt->timer, _a6xx_preemption_timer, 0);
ret = kgsl_allocate_global(device, &preempt->scratch, PAGE_SIZE, 0, 0,
"preemption_scratch");
/* Allocate mem for storing preemption switch record */
FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
ret = a6xx_preemption_ringbuffer_init(adreno_dev, rb);

View file

@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved.
* Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved.
*/
#include <linux/slab.h>
@ -160,7 +160,7 @@ static long adreno_ioctl_preemption_counters_query(
levels_to_copy = gpudev->num_prio_levels;
if (copy_to_user((void __user *) (uintptr_t) read->counters,
adreno_dev->preempt.counters.hostptr,
adreno_dev->preempt.scratch.hostptr,
levels_to_copy * size_level))
return -EFAULT;

View file

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2002,2007-2018, The Linux Foundation. All rights reserved.
* Copyright (c) 2002,2007-2018,2020 The Linux Foundation. All rights reserved.
*/
#ifndef __ADRENO_PM4TYPES_H
#define __ADRENO_PM4TYPES_H
@ -95,6 +95,8 @@
/* A5XX Enable yield in RB only */
#define CP_YIELD_ENABLE 0x1C
#define CP_WHERE_AM_I 0x62
/* Enable/Disable/Defer A5x global preemption model */
#define CP_PREEMPT_ENABLE_GLOBAL 0x69

View file

@ -195,7 +195,7 @@ void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
adreno_ringbuffer_wptr(adreno_dev, rb);
}
int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
int adreno_ringbuffer_submit_spin_nosync(struct adreno_ringbuffer *rb,
struct adreno_submit_time *time, unsigned int timeout)
{
struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
@ -204,6 +204,40 @@ int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
return adreno_spin_idle(adreno_dev, timeout);
}
/*
* adreno_ringbuffer_submit_spin() - Submit the cmds and wait until GPU is idle
* @rb: Pointer to ringbuffer
* @time: Pointer to adreno_submit_time
* @timeout: timeout value in ms
*
* Add commands to the ringbuffer and wait until GPU goes to idle. This routine
* inserts a WHERE_AM_I packet to trigger a shadow rptr update. So, use
* adreno_ringbuffer_submit_spin_nosync() if the previous cmd in the RB is a
* CSY packet because CSY followed by WHERE_AM_I is not legal.
*/
int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
struct adreno_submit_time *time, unsigned int timeout)
{
struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
unsigned int *cmds;
/* GPUs which support APRIV feature doesn't require a WHERE_AM_I */
if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV) ||
adreno_is_a3xx(adreno_dev))
return adreno_ringbuffer_submit_spin_nosync(rb, time, timeout);
cmds = adreno_ringbuffer_allocspace(rb, 3);
if (IS_ERR(cmds))
return PTR_ERR(cmds);
*cmds++ = cp_packet(adreno_dev, CP_WHERE_AM_I, 2);
cmds += cp_gpuaddr(adreno_dev, cmds,
SCRATCH_RPTR_GPU_ADDR(device, rb->id));
return adreno_ringbuffer_submit_spin_nosync(rb, time, timeout);
}
unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
unsigned int dwords)
{
@ -332,11 +366,8 @@ int adreno_ringbuffer_probe(struct adreno_device *adreno_dev)
int status = -ENOMEM;
if (!adreno_is_a3xx(adreno_dev)) {
unsigned int priv = KGSL_MEMDESC_RANDOM;
/* For targets that support it, make the scratch privileged */
if (ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
priv |= KGSL_MEMDESC_PRIVILEGED;
unsigned int priv =
KGSL_MEMDESC_RANDOM | KGSL_MEMDESC_PRIVILEGED;
status = kgsl_allocate_global(device, &device->scratch,
PAGE_SIZE, 0, priv, "scratch");
@ -539,6 +570,9 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
if (gpudev->preemption_post_ibsubmit &&
adreno_is_preemption_enabled(adreno_dev))
total_sizedwords += 10;
else if (!adreno_is_a3xx(adreno_dev) &&
!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV))
total_sizedwords += 3;
/*
* a5xx uses 64 bit memory address. pm4 commands that involve read/write
@ -745,6 +779,12 @@ adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
adreno_is_preemption_enabled(adreno_dev))
ringcmds += gpudev->preemption_post_ibsubmit(adreno_dev,
ringcmds);
else if (!adreno_is_a3xx(adreno_dev) &&
!ADRENO_FEATURE(adreno_dev, ADRENO_APRIV)) {
*ringcmds++ = cp_packet(adreno_dev, CP_WHERE_AM_I, 2);
ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
SCRATCH_RPTR_GPU_ADDR(device, rb->id));
}
/*
* If we have more ringbuffer commands than space reserved

View file

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2002,2007-2019, The Linux Foundation. All rights reserved.
* Copyright (c) 2002,2007-2020, The Linux Foundation. All rights reserved.
*/
#ifndef __ADRENO_RINGBUFFER_H
#define __ADRENO_RINGBUFFER_H
@ -165,6 +165,9 @@ int adreno_ringbuffer_issue_internal_cmds(struct adreno_ringbuffer *rb,
void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
struct adreno_submit_time *time);
int adreno_ringbuffer_submit_spin_nosync(struct adreno_ringbuffer *rb,
struct adreno_submit_time *time, unsigned int timeout);
int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
struct adreno_submit_time *time, unsigned int timeout);

View file

@ -57,13 +57,11 @@
/*
* SCRATCH MEMORY: The scratch memory is one page worth of data that
* is mapped into the GPU. This allows for some 'shared' data between
* the GPU and CPU. For example, it will be used by the GPU to write
* each updated RPTR for each RB.
* the GPU and CPU.
*
* Used Data:
* Offset: Length(bytes): What
* 0x0: 4 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 RPTR
* 0x10: 8 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 CTXT RESTORE ADDR
*/
/* Shadow global helpers */
@ -71,13 +69,6 @@
#define SCRATCH_RPTR_GPU_ADDR(dev, id) \
((dev)->scratch.gpuaddr + SCRATCH_RPTR_OFFSET(id))
#define SCRATCH_PREEMPTION_CTXT_RESTORE_ADDR_OFFSET(id) \
(SCRATCH_RPTR_OFFSET(KGSL_PRIORITY_MAX_RB_LEVELS) + \
((id) * sizeof(uint64_t)))
#define SCRATCH_PREEMPTION_CTXT_RESTORE_GPU_ADDR(dev, id) \
((dev)->scratch.gpuaddr + \
SCRATCH_PREEMPTION_CTXT_RESTORE_ADDR_OFFSET(id))
/* Timestamp window used to detect rollovers (half of integer range) */
#define KGSL_TIMESTAMP_WINDOW 0x80000000