drm/radeon/kms: add blit support for cayman (v2)
Allows us to use the 3D engine for memory management and allows us to use vram beyond the BAR aperture. v2: fix copy paste typo Reported-by: Nils Wallménius <nils.wallmenius@gmail.com> Signed-off-by: Alex Deucher <alexdeucher@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
ac10f81d94
commit
cb92d452ba
5 changed files with 597 additions and 254 deletions
|
@ -39,17 +39,335 @@
|
|||
|
||||
const u32 cayman_default_state[] =
|
||||
{
|
||||
/* XXX fill in additional blit state */
|
||||
0xc0066900,
|
||||
0x00000000,
|
||||
0x00000060, /* DB_RENDER_CONTROL */
|
||||
0x00000000, /* DB_COUNT_CONTROL */
|
||||
0x00000000, /* DB_DEPTH_VIEW */
|
||||
0x0000002a, /* DB_RENDER_OVERRIDE */
|
||||
0x00000000, /* DB_RENDER_OVERRIDE2 */
|
||||
0x00000000, /* DB_HTILE_DATA_BASE */
|
||||
|
||||
0xc0026900,
|
||||
0x00000316,
|
||||
0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
|
||||
0x00000010, /* */
|
||||
0x0000000a,
|
||||
0x00000000, /* DB_STENCIL_CLEAR */
|
||||
0x00000000, /* DB_DEPTH_CLEAR */
|
||||
|
||||
0xc0036900,
|
||||
0x0000000f,
|
||||
0x00000000, /* DB_DEPTH_INFO */
|
||||
0x00000000, /* DB_Z_INFO */
|
||||
0x00000000, /* DB_STENCIL_INFO */
|
||||
|
||||
0xc0016900,
|
||||
0x00000080,
|
||||
0x00000000, /* PA_SC_WINDOW_OFFSET */
|
||||
|
||||
0xc00d6900,
|
||||
0x00000083,
|
||||
0x0000ffff, /* PA_SC_CLIPRECT_RULE */
|
||||
0x00000000, /* PA_SC_CLIPRECT_0_TL */
|
||||
0x20002000, /* PA_SC_CLIPRECT_0_BR */
|
||||
0x00000000,
|
||||
0x20002000,
|
||||
0x00000000,
|
||||
0x20002000,
|
||||
0x00000000,
|
||||
0x20002000,
|
||||
0xaaaaaaaa, /* PA_SC_EDGERULE */
|
||||
0x00000000, /* PA_SU_HARDWARE_SCREEN_OFFSET */
|
||||
0x0000000f, /* CB_TARGET_MASK */
|
||||
0x0000000f, /* CB_SHADER_MASK */
|
||||
|
||||
0xc0226900,
|
||||
0x00000094,
|
||||
0x80000000, /* PA_SC_VPORT_SCISSOR_0_TL */
|
||||
0x20002000, /* PA_SC_VPORT_SCISSOR_0_BR */
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x80000000,
|
||||
0x20002000,
|
||||
0x00000000, /* PA_SC_VPORT_ZMIN_0 */
|
||||
0x3f800000, /* PA_SC_VPORT_ZMAX_0 */
|
||||
|
||||
0xc0016900,
|
||||
0x000000d4,
|
||||
0x00000000, /* SX_MISC */
|
||||
|
||||
0xc0026900,
|
||||
0x000000d9,
|
||||
0x00000000, /* CP_RINGID */
|
||||
0x00000000, /* CP_VMID */
|
||||
|
||||
0xc0096900,
|
||||
0x00000100,
|
||||
0x00ffffff, /* VGT_MAX_VTX_INDX */
|
||||
0x00000000, /* VGT_MIN_VTX_INDX */
|
||||
0x00000000, /* VGT_INDX_OFFSET */
|
||||
0x00000000, /* VGT_MULTI_PRIM_IB_RESET_INDX */
|
||||
0x00000000, /* SX_ALPHA_TEST_CONTROL */
|
||||
0x00000000, /* CB_BLEND_RED */
|
||||
0x00000000, /* CB_BLEND_GREEN */
|
||||
0x00000000, /* CB_BLEND_BLUE */
|
||||
0x00000000, /* CB_BLEND_ALPHA */
|
||||
|
||||
0xc0016900,
|
||||
0x00000187,
|
||||
0x00000100, /* SPI_VS_OUT_ID_0 */
|
||||
|
||||
0xc0026900,
|
||||
0x00000191,
|
||||
0x00000100, /* SPI_PS_INPUT_CNTL_0 */
|
||||
0x00000101, /* SPI_PS_INPUT_CNTL_1 */
|
||||
|
||||
0xc0016900,
|
||||
0x000001b1,
|
||||
0x00000000, /* SPI_VS_OUT_CONFIG */
|
||||
|
||||
0xc0106900,
|
||||
0x000001b3,
|
||||
0x20000001, /* SPI_PS_IN_CONTROL_0 */
|
||||
0x00000000, /* SPI_PS_IN_CONTROL_1 */
|
||||
0x00000000, /* SPI_INTERP_CONTROL_0 */
|
||||
0x00000000, /* SPI_INPUT_Z */
|
||||
0x00000000, /* SPI_FOG_CNTL */
|
||||
0x00100000, /* SPI_BARYC_CNTL */
|
||||
0x00000000, /* SPI_PS_IN_CONTROL_2 */
|
||||
0x00000000, /* SPI_COMPUTE_INPUT_CNTL */
|
||||
0x00000000, /* SPI_COMPUTE_NUM_THREAD_X */
|
||||
0x00000000, /* SPI_COMPUTE_NUM_THREAD_Y */
|
||||
0x00000000, /* SPI_COMPUTE_NUM_THREAD_Z */
|
||||
0x00000000, /* SPI_GPR_MGMT */
|
||||
0x00000000, /* SPI_LDS_MGMT */
|
||||
0x00000000, /* SPI_STACK_MGMT */
|
||||
0x00000000, /* SPI_WAVE_MGMT_1 */
|
||||
0x00000000, /* SPI_WAVE_MGMT_2 */
|
||||
|
||||
0xc0016900,
|
||||
0x000001e0,
|
||||
0x00000000, /* CB_BLEND0_CONTROL */
|
||||
|
||||
0xc00e6900,
|
||||
0x00000200,
|
||||
0x00000000, /* DB_DEPTH_CONTROL */
|
||||
0x00000000, /* DB_EQAA */
|
||||
0x00cc0010, /* CB_COLOR_CONTROL */
|
||||
0x00000210, /* DB_SHADER_CONTROL */
|
||||
0x00010000, /* PA_CL_CLIP_CNTL */
|
||||
0x00000004, /* PA_SU_SC_MODE_CNTL */
|
||||
0x00000100, /* PA_CL_VTE_CNTL */
|
||||
0x00000000, /* PA_CL_VS_OUT_CNTL */
|
||||
0x00000000, /* PA_CL_NANINF_CNTL */
|
||||
0x00000000, /* PA_SU_LINE_STIPPLE_CNTL */
|
||||
0x00000000, /* PA_SU_LINE_STIPPLE_SCALE */
|
||||
0x00000000, /* PA_SU_PRIM_FILTER_CNTL */
|
||||
0x00000000, /* */
|
||||
0x00000000, /* */
|
||||
|
||||
0xc0026900,
|
||||
0x00000229,
|
||||
0x00000000, /* SQ_PGM_START_FS */
|
||||
0x00000000,
|
||||
|
||||
0xc0016900,
|
||||
0x0000023b,
|
||||
0x00000000, /* SQ_LDS_ALLOC_PS */
|
||||
|
||||
0xc0066900,
|
||||
0x00000240,
|
||||
0x00000000, /* SQ_ESGS_RING_ITEMSIZE */
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
|
||||
0xc0046900,
|
||||
0x00000247,
|
||||
0x00000000, /* SQ_GS_VERT_ITEMSIZE */
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
|
||||
0xc0116900,
|
||||
0x00000280,
|
||||
0x00000000, /* PA_SU_POINT_SIZE */
|
||||
0x00000000, /* PA_SU_POINT_MINMAX */
|
||||
0x00000008, /* PA_SU_LINE_CNTL */
|
||||
0x00000000, /* PA_SC_LINE_STIPPLE */
|
||||
0x00000000, /* VGT_OUTPUT_PATH_CNTL */
|
||||
0x00000000, /* VGT_HOS_CNTL */
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000, /* VGT_GS_MODE */
|
||||
|
||||
0xc0026900,
|
||||
0x00000292,
|
||||
0x00000000, /* PA_SC_MODE_CNTL_0 */
|
||||
0x00000000, /* PA_SC_MODE_CNTL_1 */
|
||||
|
||||
0xc0016900,
|
||||
0x000002a1,
|
||||
0x00000000, /* VGT_PRIMITIVEID_EN */
|
||||
|
||||
0xc0016900,
|
||||
0x000002a5,
|
||||
0x00000000, /* VGT_MULTI_PRIM_IB_RESET_EN */
|
||||
|
||||
0xc0026900,
|
||||
0x000002a8,
|
||||
0x00000000, /* VGT_INSTANCE_STEP_RATE_0 */
|
||||
0x00000000,
|
||||
|
||||
0xc0026900,
|
||||
0x000002ad,
|
||||
0x00000000, /* VGT_REUSE_OFF */
|
||||
0x00000000,
|
||||
|
||||
0xc0016900,
|
||||
0x000002d5,
|
||||
0x00000000, /* VGT_SHADER_STAGES_EN */
|
||||
|
||||
0xc0016900,
|
||||
0x000002dc,
|
||||
0x0000aa00, /* DB_ALPHA_TO_MASK */
|
||||
|
||||
0xc0066900,
|
||||
0x000002de,
|
||||
0x00000000, /* PA_SU_POLY_OFFSET_DB_FMT_CNTL */
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
|
||||
0xc0026900,
|
||||
0x000002e5,
|
||||
0x00000000, /* VGT_STRMOUT_CONFIG */
|
||||
0x00000000,
|
||||
|
||||
0xc01b6900,
|
||||
0x000002f5,
|
||||
0x76543210, /* PA_SC_CENTROID_PRIORITY_0 */
|
||||
0xfedcba98, /* PA_SC_CENTROID_PRIORITY_1 */
|
||||
0x00000000, /* PA_SC_LINE_CNTL */
|
||||
0x00000000, /* PA_SC_AA_CONFIG */
|
||||
0x00000005, /* PA_SU_VTX_CNTL */
|
||||
0x3f800000, /* PA_CL_GB_VERT_CLIP_ADJ */
|
||||
0x3f800000, /* PA_CL_GB_VERT_DISC_ADJ */
|
||||
0x3f800000, /* PA_CL_GB_HORZ_CLIP_ADJ */
|
||||
0x3f800000, /* PA_CL_GB_HORZ_DISC_ADJ */
|
||||
0x00000000, /* PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0x00000000,
|
||||
0xffffffff, /* PA_SC_AA_MASK_X0Y0_X1Y0 */
|
||||
0xffffffff,
|
||||
|
||||
0xc0026900,
|
||||
0x00000316,
|
||||
0x0000000e, /* VGT_VERTEX_REUSE_BLOCK_CNTL */
|
||||
0x00000010, /* */
|
||||
};
|
||||
|
||||
const u32 cayman_vs[] =
|
||||
{
|
||||
0x00000004,
|
||||
0x80400400,
|
||||
0x0000a03c,
|
||||
0x95000688,
|
||||
0x00004000,
|
||||
0x15000688,
|
||||
0x00000000,
|
||||
0x88000000,
|
||||
0x04000000,
|
||||
0x67961001,
|
||||
#ifdef __BIG_ENDIAN
|
||||
0x00020000,
|
||||
#else
|
||||
0x00000000,
|
||||
#endif
|
||||
0x00000000,
|
||||
0x04000000,
|
||||
0x67961000,
|
||||
#ifdef __BIG_ENDIAN
|
||||
0x00020008,
|
||||
#else
|
||||
0x00000008,
|
||||
#endif
|
||||
0x00000000,
|
||||
};
|
||||
|
||||
const u32 cayman_ps[] =
|
||||
{
|
||||
0x00000004,
|
||||
0xa00c0000,
|
||||
0x00000008,
|
||||
0x80400000,
|
||||
0x00000000,
|
||||
0x95000688,
|
||||
0x00000000,
|
||||
0x88000000,
|
||||
0x00380400,
|
||||
0x00146b10,
|
||||
0x00380000,
|
||||
0x20146b10,
|
||||
0x00380400,
|
||||
0x40146b00,
|
||||
0x80380000,
|
||||
0x60146b00,
|
||||
0x00000010,
|
||||
0x000d1000,
|
||||
0xb0800000,
|
||||
0x00000000,
|
||||
};
|
||||
|
||||
const u32 cayman_ps_size = ARRAY_SIZE(cayman_ps);
|
||||
const u32 cayman_vs_size = ARRAY_SIZE(cayman_vs);
|
||||
const u32 cayman_default_size = ARRAY_SIZE(cayman_default_state);
|
||||
|
|
|
@ -25,8 +25,11 @@
|
|||
#ifndef CAYMAN_BLIT_SHADERS_H
|
||||
#define CAYMAN_BLIT_SHADERS_H
|
||||
|
||||
extern const u32 cayman_ps[];
|
||||
extern const u32 cayman_vs[];
|
||||
extern const u32 cayman_default_state[];
|
||||
|
||||
extern const u32 cayman_ps_size, cayman_vs_size;
|
||||
extern const u32 cayman_default_size;
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
|
||||
#include "evergreend.h"
|
||||
#include "evergreen_blit_shaders.h"
|
||||
#include "cayman_blit_shaders.h"
|
||||
|
||||
#define DI_PT_RECTLIST 0x11
|
||||
#define DI_INDEX_SIZE_16_BIT 0x0
|
||||
|
@ -265,238 +266,240 @@ set_default_state(struct radeon_device *rdev)
|
|||
u64 gpu_addr;
|
||||
int dwords;
|
||||
|
||||
switch (rdev->family) {
|
||||
case CHIP_CEDAR:
|
||||
default:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 96;
|
||||
num_vs_threads = 16;
|
||||
num_gs_threads = 16;
|
||||
num_es_threads = 16;
|
||||
num_hs_threads = 16;
|
||||
num_ls_threads = 16;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_REDWOOD:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_JUNIPER:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 85;
|
||||
num_vs_stack_entries = 85;
|
||||
num_gs_stack_entries = 85;
|
||||
num_es_stack_entries = 85;
|
||||
num_hs_stack_entries = 85;
|
||||
num_ls_stack_entries = 85;
|
||||
break;
|
||||
case CHIP_CYPRESS:
|
||||
case CHIP_HEMLOCK:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 85;
|
||||
num_vs_stack_entries = 85;
|
||||
num_gs_stack_entries = 85;
|
||||
num_es_stack_entries = 85;
|
||||
num_hs_stack_entries = 85;
|
||||
num_ls_stack_entries = 85;
|
||||
break;
|
||||
case CHIP_PALM:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 96;
|
||||
num_vs_threads = 16;
|
||||
num_gs_threads = 16;
|
||||
num_es_threads = 16;
|
||||
num_hs_threads = 16;
|
||||
num_ls_threads = 16;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_BARTS:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 85;
|
||||
num_vs_stack_entries = 85;
|
||||
num_gs_stack_entries = 85;
|
||||
num_es_stack_entries = 85;
|
||||
num_hs_stack_entries = 85;
|
||||
num_ls_stack_entries = 85;
|
||||
break;
|
||||
case CHIP_TURKS:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_CAICOS:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 10;
|
||||
num_gs_threads = 10;
|
||||
num_es_threads = 10;
|
||||
num_hs_threads = 10;
|
||||
num_ls_threads = 10;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((rdev->family == CHIP_CEDAR) ||
|
||||
(rdev->family == CHIP_PALM) ||
|
||||
(rdev->family == CHIP_CAICOS))
|
||||
sq_config = 0;
|
||||
else
|
||||
sq_config = VC_ENABLE;
|
||||
|
||||
sq_config |= (EXPORT_SRC_C |
|
||||
CS_PRIO(0) |
|
||||
LS_PRIO(0) |
|
||||
HS_PRIO(0) |
|
||||
PS_PRIO(0) |
|
||||
VS_PRIO(1) |
|
||||
GS_PRIO(2) |
|
||||
ES_PRIO(3));
|
||||
|
||||
sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
|
||||
NUM_VS_GPRS(num_vs_gprs) |
|
||||
NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
|
||||
sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
|
||||
NUM_ES_GPRS(num_es_gprs));
|
||||
sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
|
||||
NUM_LS_GPRS(num_ls_gprs));
|
||||
sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
|
||||
NUM_VS_THREADS(num_vs_threads) |
|
||||
NUM_GS_THREADS(num_gs_threads) |
|
||||
NUM_ES_THREADS(num_es_threads));
|
||||
sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
|
||||
NUM_LS_THREADS(num_ls_threads));
|
||||
sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
|
||||
NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
|
||||
sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
|
||||
NUM_ES_STACK_ENTRIES(num_es_stack_entries));
|
||||
sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
|
||||
NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
|
||||
|
||||
/* set clear context state */
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_CLEAR_STATE, 0));
|
||||
radeon_ring_write(rdev, 0);
|
||||
|
||||
/* disable dyn gprs */
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
if (rdev->family < CHIP_CAYMAN) {
|
||||
switch (rdev->family) {
|
||||
case CHIP_CEDAR:
|
||||
default:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 96;
|
||||
num_vs_threads = 16;
|
||||
num_gs_threads = 16;
|
||||
num_es_threads = 16;
|
||||
num_hs_threads = 16;
|
||||
num_ls_threads = 16;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_REDWOOD:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_JUNIPER:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 85;
|
||||
num_vs_stack_entries = 85;
|
||||
num_gs_stack_entries = 85;
|
||||
num_es_stack_entries = 85;
|
||||
num_hs_stack_entries = 85;
|
||||
num_ls_stack_entries = 85;
|
||||
break;
|
||||
case CHIP_CYPRESS:
|
||||
case CHIP_HEMLOCK:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 85;
|
||||
num_vs_stack_entries = 85;
|
||||
num_gs_stack_entries = 85;
|
||||
num_es_stack_entries = 85;
|
||||
num_hs_stack_entries = 85;
|
||||
num_ls_stack_entries = 85;
|
||||
break;
|
||||
case CHIP_PALM:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 96;
|
||||
num_vs_threads = 16;
|
||||
num_gs_threads = 16;
|
||||
num_es_threads = 16;
|
||||
num_hs_threads = 16;
|
||||
num_ls_threads = 16;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_BARTS:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 85;
|
||||
num_vs_stack_entries = 85;
|
||||
num_gs_stack_entries = 85;
|
||||
num_es_stack_entries = 85;
|
||||
num_hs_stack_entries = 85;
|
||||
num_ls_stack_entries = 85;
|
||||
break;
|
||||
case CHIP_TURKS:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 20;
|
||||
num_gs_threads = 20;
|
||||
num_es_threads = 20;
|
||||
num_hs_threads = 20;
|
||||
num_ls_threads = 20;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
case CHIP_CAICOS:
|
||||
num_ps_gprs = 93;
|
||||
num_vs_gprs = 46;
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 31;
|
||||
num_es_gprs = 31;
|
||||
num_hs_gprs = 23;
|
||||
num_ls_gprs = 23;
|
||||
num_ps_threads = 128;
|
||||
num_vs_threads = 10;
|
||||
num_gs_threads = 10;
|
||||
num_es_threads = 10;
|
||||
num_hs_threads = 10;
|
||||
num_ls_threads = 10;
|
||||
num_ps_stack_entries = 42;
|
||||
num_vs_stack_entries = 42;
|
||||
num_gs_stack_entries = 42;
|
||||
num_es_stack_entries = 42;
|
||||
num_hs_stack_entries = 42;
|
||||
num_ls_stack_entries = 42;
|
||||
break;
|
||||
}
|
||||
|
||||
/* SQ config */
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
|
||||
radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
|
||||
radeon_ring_write(rdev, sq_config);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, sq_thread_resource_mgmt);
|
||||
radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
|
||||
if ((rdev->family == CHIP_CEDAR) ||
|
||||
(rdev->family == CHIP_PALM) ||
|
||||
(rdev->family == CHIP_CAICOS))
|
||||
sq_config = 0;
|
||||
else
|
||||
sq_config = VC_ENABLE;
|
||||
|
||||
sq_config |= (EXPORT_SRC_C |
|
||||
CS_PRIO(0) |
|
||||
LS_PRIO(0) |
|
||||
HS_PRIO(0) |
|
||||
PS_PRIO(0) |
|
||||
VS_PRIO(1) |
|
||||
GS_PRIO(2) |
|
||||
ES_PRIO(3));
|
||||
|
||||
sq_gpr_resource_mgmt_1 = (NUM_PS_GPRS(num_ps_gprs) |
|
||||
NUM_VS_GPRS(num_vs_gprs) |
|
||||
NUM_CLAUSE_TEMP_GPRS(num_temp_gprs));
|
||||
sq_gpr_resource_mgmt_2 = (NUM_GS_GPRS(num_gs_gprs) |
|
||||
NUM_ES_GPRS(num_es_gprs));
|
||||
sq_gpr_resource_mgmt_3 = (NUM_HS_GPRS(num_hs_gprs) |
|
||||
NUM_LS_GPRS(num_ls_gprs));
|
||||
sq_thread_resource_mgmt = (NUM_PS_THREADS(num_ps_threads) |
|
||||
NUM_VS_THREADS(num_vs_threads) |
|
||||
NUM_GS_THREADS(num_gs_threads) |
|
||||
NUM_ES_THREADS(num_es_threads));
|
||||
sq_thread_resource_mgmt_2 = (NUM_HS_THREADS(num_hs_threads) |
|
||||
NUM_LS_THREADS(num_ls_threads));
|
||||
sq_stack_resource_mgmt_1 = (NUM_PS_STACK_ENTRIES(num_ps_stack_entries) |
|
||||
NUM_VS_STACK_ENTRIES(num_vs_stack_entries));
|
||||
sq_stack_resource_mgmt_2 = (NUM_GS_STACK_ENTRIES(num_gs_stack_entries) |
|
||||
NUM_ES_STACK_ENTRIES(num_es_stack_entries));
|
||||
sq_stack_resource_mgmt_3 = (NUM_HS_STACK_ENTRIES(num_hs_stack_entries) |
|
||||
NUM_LS_STACK_ENTRIES(num_ls_stack_entries));
|
||||
|
||||
/* disable dyn gprs */
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 1));
|
||||
radeon_ring_write(rdev, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ - PACKET3_SET_CONFIG_REG_START) >> 2);
|
||||
radeon_ring_write(rdev, 0);
|
||||
|
||||
/* SQ config */
|
||||
radeon_ring_write(rdev, PACKET3(PACKET3_SET_CONFIG_REG, 11));
|
||||
radeon_ring_write(rdev, (SQ_CONFIG - PACKET3_SET_CONFIG_REG_START) >> 2);
|
||||
radeon_ring_write(rdev, sq_config);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_1);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_2);
|
||||
radeon_ring_write(rdev, sq_gpr_resource_mgmt_3);
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, 0);
|
||||
radeon_ring_write(rdev, sq_thread_resource_mgmt);
|
||||
radeon_ring_write(rdev, sq_thread_resource_mgmt_2);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_1);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
|
||||
radeon_ring_write(rdev, sq_stack_resource_mgmt_3);
|
||||
}
|
||||
|
||||
/* CONTEXT_CONTROL */
|
||||
radeon_ring_write(rdev, 0xc0012800);
|
||||
|
@ -570,7 +573,10 @@ int evergreen_blit_init(struct radeon_device *rdev)
|
|||
mutex_init(&rdev->r600_blit.mutex);
|
||||
rdev->r600_blit.state_offset = 0;
|
||||
|
||||
rdev->r600_blit.state_len = evergreen_default_size;
|
||||
if (rdev->family < CHIP_CAYMAN)
|
||||
rdev->r600_blit.state_len = evergreen_default_size;
|
||||
else
|
||||
rdev->r600_blit.state_len = cayman_default_size;
|
||||
|
||||
dwords = rdev->r600_blit.state_len;
|
||||
while (dwords & 0xf) {
|
||||
|
@ -582,11 +588,17 @@ int evergreen_blit_init(struct radeon_device *rdev)
|
|||
obj_size = ALIGN(obj_size, 256);
|
||||
|
||||
rdev->r600_blit.vs_offset = obj_size;
|
||||
obj_size += evergreen_vs_size * 4;
|
||||
if (rdev->family < CHIP_CAYMAN)
|
||||
obj_size += evergreen_vs_size * 4;
|
||||
else
|
||||
obj_size += cayman_vs_size * 4;
|
||||
obj_size = ALIGN(obj_size, 256);
|
||||
|
||||
rdev->r600_blit.ps_offset = obj_size;
|
||||
obj_size += evergreen_ps_size * 4;
|
||||
if (rdev->family < CHIP_CAYMAN)
|
||||
obj_size += evergreen_ps_size * 4;
|
||||
else
|
||||
obj_size += cayman_ps_size * 4;
|
||||
obj_size = ALIGN(obj_size, 256);
|
||||
|
||||
r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
|
||||
|
@ -609,16 +621,29 @@ int evergreen_blit_init(struct radeon_device *rdev)
|
|||
return r;
|
||||
}
|
||||
|
||||
memcpy_toio(ptr + rdev->r600_blit.state_offset,
|
||||
evergreen_default_state, rdev->r600_blit.state_len * 4);
|
||||
if (rdev->family < CHIP_CAYMAN) {
|
||||
memcpy_toio(ptr + rdev->r600_blit.state_offset,
|
||||
evergreen_default_state, rdev->r600_blit.state_len * 4);
|
||||
|
||||
if (num_packet2s)
|
||||
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
|
||||
packet2s, num_packet2s * 4);
|
||||
for (i = 0; i < evergreen_vs_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
|
||||
for (i = 0; i < evergreen_ps_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
|
||||
if (num_packet2s)
|
||||
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
|
||||
packet2s, num_packet2s * 4);
|
||||
for (i = 0; i < evergreen_vs_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(evergreen_vs[i]);
|
||||
for (i = 0; i < evergreen_ps_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(evergreen_ps[i]);
|
||||
} else {
|
||||
memcpy_toio(ptr + rdev->r600_blit.state_offset,
|
||||
cayman_default_state, rdev->r600_blit.state_len * 4);
|
||||
|
||||
if (num_packet2s)
|
||||
memcpy_toio(ptr + rdev->r600_blit.state_offset + (rdev->r600_blit.state_len * 4),
|
||||
packet2s, num_packet2s * 4);
|
||||
for (i = 0; i < cayman_vs_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_blit.vs_offset + i * 4) = cpu_to_le32(cayman_vs[i]);
|
||||
for (i = 0; i < cayman_ps_size; i++)
|
||||
*(u32 *)((unsigned long)ptr + rdev->r600_blit.ps_offset + i * 4) = cpu_to_le32(cayman_ps[i]);
|
||||
}
|
||||
radeon_bo_kunmap(rdev->r600_blit.shader_obj);
|
||||
radeon_bo_unreserve(rdev->r600_blit.shader_obj);
|
||||
|
||||
|
|
|
@ -1387,14 +1387,12 @@ static int cayman_startup(struct radeon_device *rdev)
|
|||
return r;
|
||||
cayman_gpu_init(rdev);
|
||||
|
||||
#if 0
|
||||
r = cayman_blit_init(rdev);
|
||||
r = evergreen_blit_init(rdev);
|
||||
if (r) {
|
||||
cayman_blit_fini(rdev);
|
||||
evergreen_blit_fini(rdev);
|
||||
rdev->asic->copy = NULL;
|
||||
dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* allocate wb buffer */
|
||||
r = radeon_wb_init(rdev);
|
||||
|
@ -1452,7 +1450,7 @@ int cayman_resume(struct radeon_device *rdev)
|
|||
|
||||
int cayman_suspend(struct radeon_device *rdev)
|
||||
{
|
||||
/* int r; */
|
||||
int r;
|
||||
|
||||
/* FIXME: we should wait for ring to be empty */
|
||||
cayman_cp_enable(rdev, false);
|
||||
|
@ -1461,14 +1459,13 @@ int cayman_suspend(struct radeon_device *rdev)
|
|||
radeon_wb_disable(rdev);
|
||||
cayman_pcie_gart_disable(rdev);
|
||||
|
||||
#if 0
|
||||
/* unpin shaders bo */
|
||||
r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
|
||||
if (likely(r == 0)) {
|
||||
radeon_bo_unpin(rdev->r600_blit.shader_obj);
|
||||
radeon_bo_unreserve(rdev->r600_blit.shader_obj);
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1580,7 +1577,7 @@ int cayman_init(struct radeon_device *rdev)
|
|||
|
||||
void cayman_fini(struct radeon_device *rdev)
|
||||
{
|
||||
/* cayman_blit_fini(rdev); */
|
||||
evergreen_blit_fini(rdev);
|
||||
cayman_cp_fini(rdev);
|
||||
r600_irq_fini(rdev);
|
||||
radeon_wb_fini(rdev);
|
||||
|
|
|
@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
|
|||
.get_vblank_counter = &evergreen_get_vblank_counter,
|
||||
.fence_ring_emit = &r600_fence_ring_emit,
|
||||
.cs_parse = &evergreen_cs_parse,
|
||||
.copy_blit = NULL,
|
||||
.copy_dma = NULL,
|
||||
.copy = NULL,
|
||||
.copy_blit = &evergreen_copy_blit,
|
||||
.copy_dma = &evergreen_copy_blit,
|
||||
.copy = &evergreen_copy_blit,
|
||||
.get_engine_clock = &radeon_atom_get_engine_clock,
|
||||
.set_engine_clock = &radeon_atom_set_engine_clock,
|
||||
.get_memory_clock = &radeon_atom_get_memory_clock,
|
||||
|
|
Loading…
Reference in a new issue