414ed53799
This adds initial r300 3D support to the radeon DRM. From: Nicolai Haehnle, Vladimir Dergachev, and others. Signed-off-by: David Airlie <airlied@linux.ie>
801 lines
22 KiB
C
801 lines
22 KiB
C
/* r300_cmdbuf.c -- Command buffer emission for R300 -*- linux-c -*-
|
|
*
|
|
* Copyright (C) The Weather Channel, Inc. 2002.
|
|
* Copyright (C) 2004 Nicolai Haehnle.
|
|
* All Rights Reserved.
|
|
*
|
|
* The Weather Channel (TM) funded Tungsten Graphics to develop the
|
|
* initial release of the Radeon 8500 driver under the XFree86 license.
|
|
* This notice must be preserved.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
* DEALINGS IN THE SOFTWARE.
|
|
*
|
|
* Authors:
|
|
* Nicolai Haehnle <prefect_@gmx.net>
|
|
*/
|
|
|
|
#include "drmP.h"
|
|
#include "drm.h"
|
|
#include "radeon_drm.h"
|
|
#include "radeon_drv.h"
|
|
#include "r300_reg.h"
|
|
|
|
|
|
#define R300_SIMULTANEOUS_CLIPRECTS 4
|
|
|
|
/* Values for R300_RE_CLIPRECT_CNTL depending on the number of cliprects
|
|
*/
|
|
static const int r300_cliprect_cntl[4] = {
|
|
0xAAAA,
|
|
0xEEEE,
|
|
0xFEFE,
|
|
0xFFFE
|
|
};
|
|
|
|
|
|
/**
|
|
* Emit up to R300_SIMULTANEOUS_CLIPRECTS cliprects from the given command
|
|
* buffer, starting with index n.
|
|
*/
|
|
static int r300_emit_cliprects(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf,
|
|
int n)
|
|
{
|
|
drm_clip_rect_t box;
|
|
int nr;
|
|
int i;
|
|
RING_LOCALS;
|
|
|
|
nr = cmdbuf->nbox - n;
|
|
if (nr > R300_SIMULTANEOUS_CLIPRECTS)
|
|
nr = R300_SIMULTANEOUS_CLIPRECTS;
|
|
|
|
DRM_DEBUG("%i cliprects\n", nr);
|
|
|
|
if (nr) {
|
|
BEGIN_RING(6 + nr*2);
|
|
OUT_RING( CP_PACKET0( R300_RE_CLIPRECT_TL_0, nr*2 - 1 ) );
|
|
|
|
for(i = 0; i < nr; ++i) {
|
|
if (DRM_COPY_FROM_USER_UNCHECKED(&box, &cmdbuf->boxes[n+i], sizeof(box))) {
|
|
DRM_ERROR("copy cliprect faulted\n");
|
|
return DRM_ERR(EFAULT);
|
|
}
|
|
|
|
box.x1 = (box.x1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
|
|
box.y1 = (box.y1 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
|
|
box.x2 = (box.x2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
|
|
box.y2 = (box.y2 + R300_CLIPRECT_OFFSET) & R300_CLIPRECT_MASK;
|
|
|
|
OUT_RING((box.x1 << R300_CLIPRECT_X_SHIFT) |
|
|
(box.y1 << R300_CLIPRECT_Y_SHIFT));
|
|
OUT_RING((box.x2 << R300_CLIPRECT_X_SHIFT) |
|
|
(box.y2 << R300_CLIPRECT_Y_SHIFT));
|
|
}
|
|
|
|
OUT_RING_REG( R300_RE_CLIPRECT_CNTL, r300_cliprect_cntl[nr-1] );
|
|
|
|
/* TODO/SECURITY: Force scissors to a safe value, otherwise the
|
|
* client might be able to trample over memory.
|
|
* The impact should be very limited, but I'd rather be safe than
|
|
* sorry.
|
|
*/
|
|
OUT_RING( CP_PACKET0( R300_RE_SCISSORS_TL, 1 ) );
|
|
OUT_RING( 0 );
|
|
OUT_RING( R300_SCISSORS_X_MASK | R300_SCISSORS_Y_MASK );
|
|
ADVANCE_RING();
|
|
} else {
|
|
/* Why we allow zero cliprect rendering:
|
|
* There are some commands in a command buffer that must be submitted
|
|
* even when there are no cliprects, e.g. DMA buffer discard
|
|
* or state setting (though state setting could be avoided by
|
|
* simulating a loss of context).
|
|
*
|
|
* Now since the cmdbuf interface is so chaotic right now (and is
|
|
* bound to remain that way for a bit until things settle down),
|
|
* it is basically impossible to filter out the commands that are
|
|
* necessary and those that aren't.
|
|
*
|
|
* So I choose the safe way and don't do any filtering at all;
|
|
* instead, I simply set up the engine so that all rendering
|
|
* can't produce any fragments.
|
|
*/
|
|
BEGIN_RING(2);
|
|
OUT_RING_REG( R300_RE_CLIPRECT_CNTL, 0 );
|
|
ADVANCE_RING();
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
u8 r300_reg_flags[0x10000>>2];
|
|
|
|
|
|
void r300_init_reg_flags(void)
|
|
{
|
|
int i;
|
|
memset(r300_reg_flags, 0, 0x10000>>2);
|
|
#define ADD_RANGE_MARK(reg, count,mark) \
|
|
for(i=((reg)>>2);i<((reg)>>2)+(count);i++)\
|
|
r300_reg_flags[i]|=(mark);
|
|
|
|
#define MARK_SAFE 1
|
|
#define MARK_CHECK_OFFSET 2
|
|
|
|
#define ADD_RANGE(reg, count) ADD_RANGE_MARK(reg, count, MARK_SAFE)
|
|
|
|
/* these match cmducs() command in r300_driver/r300/r300_cmdbuf.c */
|
|
ADD_RANGE(R300_SE_VPORT_XSCALE, 6);
|
|
ADD_RANGE(0x2080, 1);
|
|
ADD_RANGE(R300_SE_VTE_CNTL, 2);
|
|
ADD_RANGE(0x2134, 2);
|
|
ADD_RANGE(0x2140, 1);
|
|
ADD_RANGE(R300_VAP_INPUT_CNTL_0, 2);
|
|
ADD_RANGE(0x21DC, 1);
|
|
ADD_RANGE(0x221C, 1);
|
|
ADD_RANGE(0x2220, 4);
|
|
ADD_RANGE(0x2288, 1);
|
|
ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
|
|
ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
|
|
ADD_RANGE(R300_GB_ENABLE, 1);
|
|
ADD_RANGE(R300_GB_MSPOS0, 5);
|
|
ADD_RANGE(R300_TX_ENABLE, 1);
|
|
ADD_RANGE(0x4200, 4);
|
|
ADD_RANGE(0x4214, 1);
|
|
ADD_RANGE(R300_RE_POINTSIZE, 1);
|
|
ADD_RANGE(0x4230, 3);
|
|
ADD_RANGE(R300_RE_LINE_CNT, 1);
|
|
ADD_RANGE(0x4238, 1);
|
|
ADD_RANGE(0x4260, 3);
|
|
ADD_RANGE(0x4274, 4);
|
|
ADD_RANGE(0x4288, 5);
|
|
ADD_RANGE(0x42A0, 1);
|
|
ADD_RANGE(R300_RE_ZBIAS_T_FACTOR, 4);
|
|
ADD_RANGE(0x42B4, 1);
|
|
ADD_RANGE(R300_RE_CULL_CNTL, 1);
|
|
ADD_RANGE(0x42C0, 2);
|
|
ADD_RANGE(R300_RS_CNTL_0, 2);
|
|
ADD_RANGE(R300_RS_INTERP_0, 8);
|
|
ADD_RANGE(R300_RS_ROUTE_0, 8);
|
|
ADD_RANGE(0x43A4, 2);
|
|
ADD_RANGE(0x43E8, 1);
|
|
ADD_RANGE(R300_PFS_CNTL_0, 3);
|
|
ADD_RANGE(R300_PFS_NODE_0, 4);
|
|
ADD_RANGE(R300_PFS_TEXI_0, 64);
|
|
ADD_RANGE(0x46A4, 5);
|
|
ADD_RANGE(R300_PFS_INSTR0_0, 64);
|
|
ADD_RANGE(R300_PFS_INSTR1_0, 64);
|
|
ADD_RANGE(R300_PFS_INSTR2_0, 64);
|
|
ADD_RANGE(R300_PFS_INSTR3_0, 64);
|
|
ADD_RANGE(0x4BC0, 1);
|
|
ADD_RANGE(0x4BC8, 3);
|
|
ADD_RANGE(R300_PP_ALPHA_TEST, 2);
|
|
ADD_RANGE(0x4BD8, 1);
|
|
ADD_RANGE(R300_PFS_PARAM_0_X, 64);
|
|
ADD_RANGE(0x4E00, 1);
|
|
ADD_RANGE(R300_RB3D_CBLEND, 2);
|
|
ADD_RANGE(R300_RB3D_COLORMASK, 1);
|
|
ADD_RANGE(0x4E10, 3);
|
|
ADD_RANGE_MARK(R300_RB3D_COLOROFFSET0, 1, MARK_CHECK_OFFSET); /* check offset */
|
|
ADD_RANGE(R300_RB3D_COLORPITCH0, 1);
|
|
ADD_RANGE(0x4E50, 9);
|
|
ADD_RANGE(0x4E88, 1);
|
|
ADD_RANGE(0x4EA0, 2);
|
|
ADD_RANGE(R300_RB3D_ZSTENCIL_CNTL_0, 3);
|
|
ADD_RANGE(0x4F10, 4);
|
|
ADD_RANGE_MARK(R300_RB3D_DEPTHOFFSET, 1, MARK_CHECK_OFFSET); /* check offset */
|
|
ADD_RANGE(R300_RB3D_DEPTHPITCH, 1);
|
|
ADD_RANGE(0x4F28, 1);
|
|
ADD_RANGE(0x4F30, 2);
|
|
ADD_RANGE(0x4F44, 1);
|
|
ADD_RANGE(0x4F54, 1);
|
|
|
|
ADD_RANGE(R300_TX_FILTER_0, 16);
|
|
ADD_RANGE(R300_TX_UNK1_0, 16);
|
|
ADD_RANGE(R300_TX_SIZE_0, 16);
|
|
ADD_RANGE(R300_TX_FORMAT_0, 16);
|
|
/* Texture offset is dangerous and needs more checking */
|
|
ADD_RANGE_MARK(R300_TX_OFFSET_0, 16, MARK_CHECK_OFFSET);
|
|
ADD_RANGE(R300_TX_UNK4_0, 16);
|
|
ADD_RANGE(R300_TX_BORDER_COLOR_0, 16);
|
|
|
|
/* Sporadic registers used as primitives are emitted */
|
|
ADD_RANGE(0x4f18, 1);
|
|
ADD_RANGE(R300_RB3D_DSTCACHE_CTLSTAT, 1);
|
|
ADD_RANGE(R300_VAP_INPUT_ROUTE_0_0, 8);
|
|
ADD_RANGE(R300_VAP_INPUT_ROUTE_1_0, 8);
|
|
|
|
}
|
|
|
|
static __inline__ int r300_check_range(unsigned reg, int count)
|
|
{
|
|
int i;
|
|
if(reg & ~0xffff)return -1;
|
|
for(i=(reg>>2);i<(reg>>2)+count;i++)
|
|
if(r300_reg_flags[i]!=MARK_SAFE)return 1;
|
|
return 0;
|
|
}
|
|
|
|
/* we expect offsets passed to the framebuffer to be either within video memory or
|
|
within AGP space */
|
|
static __inline__ int r300_check_offset(drm_radeon_private_t* dev_priv, u32 offset)
|
|
{
|
|
/* we realy want to check against end of video aperture
|
|
but this value is not being kept.
|
|
This code is correct for now (does the same thing as the
|
|
code that sets MC_FB_LOCATION) in radeon_cp.c */
|
|
if((offset>=dev_priv->fb_location) &&
|
|
(offset<dev_priv->gart_vm_start))return 0;
|
|
if((offset>=dev_priv->gart_vm_start) &&
|
|
(offset<dev_priv->gart_vm_start+dev_priv->gart_size))return 0;
|
|
return 1;
|
|
}
|
|
|
|
static __inline__ int r300_emit_carefully_checked_packet0(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf,
|
|
drm_r300_cmd_header_t header)
|
|
{
|
|
int reg;
|
|
int sz;
|
|
int i;
|
|
int values[64];
|
|
RING_LOCALS;
|
|
|
|
sz = header.packet0.count;
|
|
reg = (header.packet0.reghi << 8) | header.packet0.reglo;
|
|
|
|
if((sz>64)||(sz<0)){
|
|
DRM_ERROR("Cannot emit more than 64 values at a time (reg=%04x sz=%d)\n", reg, sz);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
for(i=0;i<sz;i++){
|
|
values[i]=((int __user*)cmdbuf->buf)[i];
|
|
switch(r300_reg_flags[(reg>>2)+i]){
|
|
case MARK_SAFE:
|
|
break;
|
|
case MARK_CHECK_OFFSET:
|
|
if(r300_check_offset(dev_priv, (u32)values[i])){
|
|
DRM_ERROR("Offset failed range check (reg=%04x sz=%d)\n", reg, sz);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
break;
|
|
default:
|
|
DRM_ERROR("Register %04x failed check as flag=%02x\n", reg+i*4, r300_reg_flags[(reg>>2)+i]);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
}
|
|
|
|
BEGIN_RING(1+sz);
|
|
OUT_RING( CP_PACKET0( reg, sz-1 ) );
|
|
OUT_RING_TABLE( values, sz );
|
|
ADVANCE_RING();
|
|
|
|
cmdbuf->buf += sz*4;
|
|
cmdbuf->bufsz -= sz*4;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Emits a packet0 setting arbitrary registers.
|
|
* Called by r300_do_cp_cmdbuf.
|
|
*
|
|
* Note that checks are performed on contents and addresses of the registers
|
|
*/
|
|
static __inline__ int r300_emit_packet0(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf,
|
|
drm_r300_cmd_header_t header)
|
|
{
|
|
int reg;
|
|
int sz;
|
|
RING_LOCALS;
|
|
|
|
sz = header.packet0.count;
|
|
reg = (header.packet0.reghi << 8) | header.packet0.reglo;
|
|
|
|
if (!sz)
|
|
return 0;
|
|
|
|
if (sz*4 > cmdbuf->bufsz)
|
|
return DRM_ERR(EINVAL);
|
|
|
|
if (reg+sz*4 >= 0x10000){
|
|
DRM_ERROR("No such registers in hardware reg=%04x sz=%d\n", reg, sz);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
|
|
if(r300_check_range(reg, sz)){
|
|
/* go and check everything */
|
|
return r300_emit_carefully_checked_packet0(dev_priv, cmdbuf, header);
|
|
}
|
|
/* the rest of the data is safe to emit, whatever the values the user passed */
|
|
|
|
BEGIN_RING(1+sz);
|
|
OUT_RING( CP_PACKET0( reg, sz-1 ) );
|
|
OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz );
|
|
ADVANCE_RING();
|
|
|
|
cmdbuf->buf += sz*4;
|
|
cmdbuf->bufsz -= sz*4;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Uploads user-supplied vertex program instructions or parameters onto
|
|
* the graphics card.
|
|
* Called by r300_do_cp_cmdbuf.
|
|
*/
|
|
static __inline__ int r300_emit_vpu(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf,
|
|
drm_r300_cmd_header_t header)
|
|
{
|
|
int sz;
|
|
int addr;
|
|
RING_LOCALS;
|
|
|
|
sz = header.vpu.count;
|
|
addr = (header.vpu.adrhi << 8) | header.vpu.adrlo;
|
|
|
|
if (!sz)
|
|
return 0;
|
|
if (sz*16 > cmdbuf->bufsz)
|
|
return DRM_ERR(EINVAL);
|
|
|
|
BEGIN_RING(5+sz*4);
|
|
/* Wait for VAP to come to senses.. */
|
|
/* there is no need to emit it multiple times, (only once before VAP is programmed,
|
|
but this optimization is for later */
|
|
OUT_RING_REG( R300_VAP_PVS_WAITIDLE, 0 );
|
|
OUT_RING_REG( R300_VAP_PVS_UPLOAD_ADDRESS, addr );
|
|
OUT_RING( CP_PACKET0_TABLE( R300_VAP_PVS_UPLOAD_DATA, sz*4 - 1 ) );
|
|
OUT_RING_TABLE( (int __user*)cmdbuf->buf, sz*4 );
|
|
|
|
ADVANCE_RING();
|
|
|
|
cmdbuf->buf += sz*16;
|
|
cmdbuf->bufsz -= sz*16;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Emit a clear packet from userspace.
|
|
* Called by r300_emit_packet3.
|
|
*/
|
|
static __inline__ int r300_emit_clear(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf)
|
|
{
|
|
RING_LOCALS;
|
|
|
|
if (8*4 > cmdbuf->bufsz)
|
|
return DRM_ERR(EINVAL);
|
|
|
|
BEGIN_RING(10);
|
|
OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 8 ) );
|
|
OUT_RING( R300_PRIM_TYPE_POINT|R300_PRIM_WALK_RING|
|
|
(1<<R300_PRIM_NUM_VERTICES_SHIFT) );
|
|
OUT_RING_TABLE( (int __user*)cmdbuf->buf, 8 );
|
|
ADVANCE_RING();
|
|
|
|
cmdbuf->buf += 8*4;
|
|
cmdbuf->bufsz -= 8*4;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static __inline__ int r300_emit_3d_load_vbpntr(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf,
|
|
u32 header)
|
|
{
|
|
int count, i,k;
|
|
#define MAX_ARRAY_PACKET 64
|
|
u32 payload[MAX_ARRAY_PACKET];
|
|
u32 narrays;
|
|
RING_LOCALS;
|
|
|
|
count=(header>>16) & 0x3fff;
|
|
|
|
if((count+1)>MAX_ARRAY_PACKET){
|
|
DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n", count);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
memset(payload, 0, MAX_ARRAY_PACKET*4);
|
|
memcpy(payload, cmdbuf->buf+4, (count+1)*4);
|
|
|
|
/* carefully check packet contents */
|
|
|
|
narrays=payload[0];
|
|
k=0;
|
|
i=1;
|
|
while((k<narrays) && (i<(count+1))){
|
|
i++; /* skip attribute field */
|
|
if(r300_check_offset(dev_priv, payload[i])){
|
|
DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
k++;
|
|
i++;
|
|
if(k==narrays)break;
|
|
/* have one more to process, they come in pairs */
|
|
if(r300_check_offset(dev_priv, payload[i])){
|
|
DRM_ERROR("Offset failed range check (k=%d i=%d) while processing 3D_LOAD_VBPNTR packet.\n", k, i);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
k++;
|
|
i++;
|
|
}
|
|
/* do the counts match what we expect ? */
|
|
if((k!=narrays) || (i!=(count+1))){
|
|
DRM_ERROR("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n", k, i, narrays, count+1);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
|
|
/* all clear, output packet */
|
|
|
|
BEGIN_RING(count+2);
|
|
OUT_RING(header);
|
|
OUT_RING_TABLE(payload, count+1);
|
|
ADVANCE_RING();
|
|
|
|
cmdbuf->buf += (count+2)*4;
|
|
cmdbuf->bufsz -= (count+2)*4;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf)
|
|
{
|
|
u32 header;
|
|
int count;
|
|
RING_LOCALS;
|
|
|
|
if (4 > cmdbuf->bufsz)
|
|
return DRM_ERR(EINVAL);
|
|
|
|
/* Fixme !! This simply emits a packet without much checking.
|
|
We need to be smarter. */
|
|
|
|
/* obtain first word - actual packet3 header */
|
|
header = *(u32 __user*)cmdbuf->buf;
|
|
|
|
/* Is it packet 3 ? */
|
|
if( (header>>30)!=0x3 ) {
|
|
DRM_ERROR("Not a packet3 header (0x%08x)\n", header);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
|
|
count=(header>>16) & 0x3fff;
|
|
|
|
/* Check again now that we know how much data to expect */
|
|
if ((count+2)*4 > cmdbuf->bufsz){
|
|
DRM_ERROR("Expected packet3 of length %d but have only %d bytes left\n",
|
|
(count+2)*4, cmdbuf->bufsz);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
|
|
/* Is it a packet type we know about ? */
|
|
switch(header & 0xff00){
|
|
case RADEON_3D_LOAD_VBPNTR: /* load vertex array pointers */
|
|
return r300_emit_3d_load_vbpntr(dev_priv, cmdbuf, header);
|
|
|
|
case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */
|
|
case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */
|
|
case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */
|
|
case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
|
|
case RADEON_WAIT_FOR_IDLE:
|
|
case RADEON_CP_NOP:
|
|
/* these packets are safe */
|
|
break;
|
|
default:
|
|
DRM_ERROR("Unknown packet3 header (0x%08x)\n", header);
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
|
|
|
|
BEGIN_RING(count+2);
|
|
OUT_RING(header);
|
|
OUT_RING_TABLE( (int __user*)(cmdbuf->buf+4), count+1);
|
|
ADVANCE_RING();
|
|
|
|
cmdbuf->buf += (count+2)*4;
|
|
cmdbuf->bufsz -= (count+2)*4;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Emit a rendering packet3 from userspace.
|
|
* Called by r300_do_cp_cmdbuf.
|
|
*/
|
|
static __inline__ int r300_emit_packet3(drm_radeon_private_t* dev_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf,
|
|
drm_r300_cmd_header_t header)
|
|
{
|
|
int n;
|
|
int ret;
|
|
char __user* orig_buf = cmdbuf->buf;
|
|
int orig_bufsz = cmdbuf->bufsz;
|
|
|
|
/* This is a do-while-loop so that we run the interior at least once,
|
|
* even if cmdbuf->nbox is 0. Compare r300_emit_cliprects for rationale.
|
|
*/
|
|
n = 0;
|
|
do {
|
|
if (cmdbuf->nbox > R300_SIMULTANEOUS_CLIPRECTS) {
|
|
ret = r300_emit_cliprects(dev_priv, cmdbuf, n);
|
|
if (ret)
|
|
return ret;
|
|
|
|
cmdbuf->buf = orig_buf;
|
|
cmdbuf->bufsz = orig_bufsz;
|
|
}
|
|
|
|
switch(header.packet3.packet) {
|
|
case R300_CMD_PACKET3_CLEAR:
|
|
DRM_DEBUG("R300_CMD_PACKET3_CLEAR\n");
|
|
ret = r300_emit_clear(dev_priv, cmdbuf);
|
|
if (ret) {
|
|
DRM_ERROR("r300_emit_clear failed\n");
|
|
return ret;
|
|
}
|
|
break;
|
|
|
|
case R300_CMD_PACKET3_RAW:
|
|
DRM_DEBUG("R300_CMD_PACKET3_RAW\n");
|
|
ret = r300_emit_raw_packet3(dev_priv, cmdbuf);
|
|
if (ret) {
|
|
DRM_ERROR("r300_emit_raw_packet3 failed\n");
|
|
return ret;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
DRM_ERROR("bad packet3 type %i at %p\n",
|
|
header.packet3.packet,
|
|
cmdbuf->buf - sizeof(header));
|
|
return DRM_ERR(EINVAL);
|
|
}
|
|
|
|
n += R300_SIMULTANEOUS_CLIPRECTS;
|
|
} while(n < cmdbuf->nbox);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Some of the R300 chips seem to be extremely touchy about the two registers
|
|
* that are configured in r300_pacify.
|
|
* Among the worst offenders seems to be the R300 ND (0x4E44): When userspace
|
|
* sends a command buffer that contains only state setting commands and a
|
|
* vertex program/parameter upload sequence, this will eventually lead to a
|
|
* lockup, unless the sequence is bracketed by calls to r300_pacify.
|
|
* So we should take great care to *always* call r300_pacify before
|
|
* *anything* 3D related, and again afterwards. This is what the
|
|
* call bracket in r300_do_cp_cmdbuf is for.
|
|
*/
|
|
|
|
/**
|
|
* Emit the sequence to pacify R300.
|
|
*/
|
|
static __inline__ void r300_pacify(drm_radeon_private_t* dev_priv)
|
|
{
|
|
RING_LOCALS;
|
|
|
|
BEGIN_RING(6);
|
|
OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) );
|
|
OUT_RING( 0xa );
|
|
OUT_RING( CP_PACKET0( 0x4f18, 0 ) );
|
|
OUT_RING( 0x3 );
|
|
OUT_RING( CP_PACKET3( RADEON_CP_NOP, 0 ) );
|
|
OUT_RING( 0x0 );
|
|
ADVANCE_RING();
|
|
}
|
|
|
|
|
|
/**
|
|
* Called by r300_do_cp_cmdbuf to update the internal buffer age and state.
|
|
* The actual age emit is done by r300_do_cp_cmdbuf, which is why you must
|
|
* be careful about how this function is called.
|
|
*/
|
|
static void r300_discard_buffer(drm_device_t * dev, drm_buf_t * buf)
|
|
{
|
|
drm_radeon_private_t *dev_priv = dev->dev_private;
|
|
drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
|
|
|
|
buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
|
|
buf->pending = 1;
|
|
buf->used = 0;
|
|
}
|
|
|
|
|
|
/**
|
|
* Parses and validates a user-supplied command buffer and emits appropriate
|
|
* commands on the DMA ring buffer.
|
|
* Called by the ioctl handler function radeon_cp_cmdbuf.
|
|
*/
|
|
int r300_do_cp_cmdbuf(drm_device_t* dev,
|
|
DRMFILE filp,
|
|
drm_file_t* filp_priv,
|
|
drm_radeon_cmd_buffer_t* cmdbuf)
|
|
{
|
|
drm_radeon_private_t *dev_priv = dev->dev_private;
|
|
drm_device_dma_t *dma = dev->dma;
|
|
drm_buf_t *buf = NULL;
|
|
int emit_dispatch_age = 0;
|
|
int ret = 0;
|
|
|
|
DRM_DEBUG("\n");
|
|
|
|
/* See the comment above r300_emit_begin3d for why this call must be here,
|
|
* and what the cleanup gotos are for. */
|
|
r300_pacify(dev_priv);
|
|
|
|
if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
|
|
ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
|
|
if (ret)
|
|
goto cleanup;
|
|
}
|
|
|
|
while(cmdbuf->bufsz >= sizeof(drm_r300_cmd_header_t)) {
|
|
int idx;
|
|
drm_r300_cmd_header_t header;
|
|
|
|
header.u = *(unsigned int *)cmdbuf->buf;
|
|
|
|
cmdbuf->buf += sizeof(header);
|
|
cmdbuf->bufsz -= sizeof(header);
|
|
|
|
switch(header.header.cmd_type) {
|
|
case R300_CMD_PACKET0:
|
|
DRM_DEBUG("R300_CMD_PACKET0\n");
|
|
ret = r300_emit_packet0(dev_priv, cmdbuf, header);
|
|
if (ret) {
|
|
DRM_ERROR("r300_emit_packet0 failed\n");
|
|
goto cleanup;
|
|
}
|
|
break;
|
|
|
|
case R300_CMD_VPU:
|
|
DRM_DEBUG("R300_CMD_VPU\n");
|
|
ret = r300_emit_vpu(dev_priv, cmdbuf, header);
|
|
if (ret) {
|
|
DRM_ERROR("r300_emit_vpu failed\n");
|
|
goto cleanup;
|
|
}
|
|
break;
|
|
|
|
case R300_CMD_PACKET3:
|
|
DRM_DEBUG("R300_CMD_PACKET3\n");
|
|
ret = r300_emit_packet3(dev_priv, cmdbuf, header);
|
|
if (ret) {
|
|
DRM_ERROR("r300_emit_packet3 failed\n");
|
|
goto cleanup;
|
|
}
|
|
break;
|
|
|
|
case R300_CMD_END3D:
|
|
DRM_DEBUG("R300_CMD_END3D\n");
|
|
/* TODO:
|
|
Ideally userspace driver should not need to issue this call,
|
|
i.e. the drm driver should issue it automatically and prevent
|
|
lockups.
|
|
|
|
In practice, we do not understand why this call is needed and what
|
|
it does (except for some vague guesses that it has to do with cache
|
|
coherence) and so the user space driver does it.
|
|
|
|
Once we are sure which uses prevent lockups the code could be moved
|
|
into the kernel and the userspace driver will not
|
|
need to use this command.
|
|
|
|
Note that issuing this command does not hurt anything
|
|
except, possibly, performance */
|
|
r300_pacify(dev_priv);
|
|
break;
|
|
|
|
case R300_CMD_CP_DELAY:
|
|
/* simple enough, we can do it here */
|
|
DRM_DEBUG("R300_CMD_CP_DELAY\n");
|
|
{
|
|
int i;
|
|
RING_LOCALS;
|
|
|
|
BEGIN_RING(header.delay.count);
|
|
for(i=0;i<header.delay.count;i++)
|
|
OUT_RING(RADEON_CP_PACKET2);
|
|
ADVANCE_RING();
|
|
}
|
|
break;
|
|
|
|
case R300_CMD_DMA_DISCARD:
|
|
DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
|
|
idx = header.dma.buf_idx;
|
|
if (idx < 0 || idx >= dma->buf_count) {
|
|
DRM_ERROR("buffer index %d (of %d max)\n",
|
|
idx, dma->buf_count - 1);
|
|
ret = DRM_ERR(EINVAL);
|
|
goto cleanup;
|
|
}
|
|
|
|
buf = dma->buflist[idx];
|
|
if (buf->filp != filp || buf->pending) {
|
|
DRM_ERROR("bad buffer %p %p %d\n",
|
|
buf->filp, filp, buf->pending);
|
|
ret = DRM_ERR(EINVAL);
|
|
goto cleanup;
|
|
}
|
|
|
|
emit_dispatch_age = 1;
|
|
r300_discard_buffer(dev, buf);
|
|
break;
|
|
|
|
case R300_CMD_WAIT:
|
|
/* simple enough, we can do it here */
|
|
DRM_DEBUG("R300_CMD_WAIT\n");
|
|
if(header.wait.flags==0)break; /* nothing to do */
|
|
|
|
{
|
|
RING_LOCALS;
|
|
|
|
BEGIN_RING(2);
|
|
OUT_RING( CP_PACKET0( RADEON_WAIT_UNTIL, 0 ) );
|
|
OUT_RING( (header.wait.flags & 0xf)<<14 );
|
|
ADVANCE_RING();
|
|
}
|
|
break;
|
|
|
|
default:
|
|
DRM_ERROR("bad cmd_type %i at %p\n",
|
|
header.header.cmd_type,
|
|
cmdbuf->buf - sizeof(header));
|
|
ret = DRM_ERR(EINVAL);
|
|
goto cleanup;
|
|
}
|
|
}
|
|
|
|
DRM_DEBUG("END\n");
|
|
|
|
cleanup:
|
|
r300_pacify(dev_priv);
|
|
|
|
/* We emit the vertex buffer age here, outside the pacifier "brackets"
|
|
* for two reasons:
|
|
* (1) This may coalesce multiple age emissions into a single one and
|
|
* (2) more importantly, some chips lock up hard when scratch registers
|
|
* are written inside the pacifier bracket.
|
|
*/
|
|
if (emit_dispatch_age) {
|
|
RING_LOCALS;
|
|
|
|
/* Emit the vertex buffer age */
|
|
BEGIN_RING(2);
|
|
RADEON_DISPATCH_AGE(dev_priv->sarea_priv->last_dispatch);
|
|
ADVANCE_RING();
|
|
}
|
|
|
|
COMMIT_RING();
|
|
|
|
return ret;
|
|
}
|
|
|