PageRenderTime 120ms CodeModel.GetById 16ms app.highlight 91ms RepoModel.GetById 1ms app.codeStats 1ms

/drivers/char/drm/radeon_state.c

https://bitbucket.org/evzijst/gittest
C | 3102 lines | 2319 code | 481 blank | 302 comment | 325 complexity | ffee6bcbcc422e06c42ca5a737156a5c MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1/* radeon_state.c -- State support for Radeon -*- linux-c -*-
   2 *
   3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
   4 * All Rights Reserved.
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a
   7 * copy of this software and associated documentation files (the "Software"),
   8 * to deal in the Software without restriction, including without limitation
   9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10 * and/or sell copies of the Software, and to permit persons to whom the
  11 * Software is furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice (including the next
  14 * paragraph) shall be included in all copies or substantial portions of the
  15 * Software.
  16 *
  17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23 * DEALINGS IN THE SOFTWARE.
  24 *
  25 * Authors:
  26 *    Gareth Hughes <gareth@valinux.com>
  27 *    Kevin E. Martin <martin@valinux.com>
  28 */
  29
  30#include "drmP.h"
  31#include "drm.h"
  32#include "drm_sarea.h"
  33#include "radeon_drm.h"
  34#include "radeon_drv.h"
  35
  36/* ================================================================
  37 * Helper functions for client state checking and fixup
  38 */
  39
  40static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
  41						     drm_file_t *filp_priv,
  42						     u32 *offset ) {
  43	u32 off = *offset;
  44	struct drm_radeon_driver_file_fields *radeon_priv;
  45
  46	if ( off >= dev_priv->fb_location &&
  47	     off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
  48		return 0;
  49
  50	radeon_priv = filp_priv->driver_priv;
  51	off += radeon_priv->radeon_fb_delta;
  52
  53	DRM_DEBUG( "offset fixed up to 0x%x\n", off );
  54
  55	if ( off < dev_priv->fb_location ||
  56	     off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
  57		return DRM_ERR( EINVAL );
  58
  59	*offset = off;
  60
  61	return 0;
  62}
  63
  64static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
  65						      drm_file_t *filp_priv,
  66						      int id,
  67						      u32 __user *data ) {
  68	switch ( id ) {
  69
  70	case RADEON_EMIT_PP_MISC:
  71		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
  72						    &data[( RADEON_RB3D_DEPTHOFFSET
  73							    - RADEON_PP_MISC ) / 4] ) ) {
  74			DRM_ERROR( "Invalid depth buffer offset\n" );
  75			return DRM_ERR( EINVAL );
  76		}
  77		break;
  78
  79	case RADEON_EMIT_PP_CNTL:
  80		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
  81						    &data[( RADEON_RB3D_COLOROFFSET
  82							    - RADEON_PP_CNTL ) / 4] ) ) {
  83			DRM_ERROR( "Invalid colour buffer offset\n" );
  84			return DRM_ERR( EINVAL );
  85		}
  86		break;
  87
  88	case R200_EMIT_PP_TXOFFSET_0:
  89	case R200_EMIT_PP_TXOFFSET_1:
  90	case R200_EMIT_PP_TXOFFSET_2:
  91	case R200_EMIT_PP_TXOFFSET_3:
  92	case R200_EMIT_PP_TXOFFSET_4:
  93	case R200_EMIT_PP_TXOFFSET_5:
  94		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
  95						    &data[0] ) ) {
  96			DRM_ERROR( "Invalid R200 texture offset\n" );
  97			return DRM_ERR( EINVAL );
  98		}
  99		break;
 100
 101	case RADEON_EMIT_PP_TXFILTER_0:
 102	case RADEON_EMIT_PP_TXFILTER_1:
 103	case RADEON_EMIT_PP_TXFILTER_2:
 104		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
 105						    &data[( RADEON_PP_TXOFFSET_0
 106							    - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
 107			DRM_ERROR( "Invalid R100 texture offset\n" );
 108			return DRM_ERR( EINVAL );
 109		}
 110		break;
 111
 112	case R200_EMIT_PP_CUBIC_OFFSETS_0:
 113	case R200_EMIT_PP_CUBIC_OFFSETS_1:
 114	case R200_EMIT_PP_CUBIC_OFFSETS_2:
 115	case R200_EMIT_PP_CUBIC_OFFSETS_3:
 116	case R200_EMIT_PP_CUBIC_OFFSETS_4:
 117	case R200_EMIT_PP_CUBIC_OFFSETS_5: {
 118		int i;
 119		for ( i = 0; i < 5; i++ ) {
 120			if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
 121							    &data[i] ) ) {
 122				DRM_ERROR( "Invalid R200 cubic texture offset\n" );
 123				return DRM_ERR( EINVAL );
 124			}
 125		}
 126		break;
 127	}
 128
 129	case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
 130	case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
 131	case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
 132			int i;
 133			for (i = 0; i < 5; i++) {
 134				if (radeon_check_and_fixup_offset(dev_priv,
 135								  filp_priv,
 136								  &data[i])) {
 137					DRM_ERROR
 138					    ("Invalid R100 cubic texture offset\n");
 139					return DRM_ERR(EINVAL);
 140				}
 141			}
 142		}
 143		break;
 144
 145	case RADEON_EMIT_RB3D_COLORPITCH:
 146	case RADEON_EMIT_RE_LINE_PATTERN:
 147	case RADEON_EMIT_SE_LINE_WIDTH:
 148	case RADEON_EMIT_PP_LUM_MATRIX:
 149	case RADEON_EMIT_PP_ROT_MATRIX_0:
 150	case RADEON_EMIT_RB3D_STENCILREFMASK:
 151	case RADEON_EMIT_SE_VPORT_XSCALE:
 152	case RADEON_EMIT_SE_CNTL:
 153	case RADEON_EMIT_SE_CNTL_STATUS:
 154	case RADEON_EMIT_RE_MISC:
 155	case RADEON_EMIT_PP_BORDER_COLOR_0:
 156	case RADEON_EMIT_PP_BORDER_COLOR_1:
 157	case RADEON_EMIT_PP_BORDER_COLOR_2:
 158	case RADEON_EMIT_SE_ZBIAS_FACTOR:
 159	case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
 160	case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
 161	case R200_EMIT_PP_TXCBLEND_0:
 162	case R200_EMIT_PP_TXCBLEND_1:
 163	case R200_EMIT_PP_TXCBLEND_2:
 164	case R200_EMIT_PP_TXCBLEND_3:
 165	case R200_EMIT_PP_TXCBLEND_4:
 166	case R200_EMIT_PP_TXCBLEND_5:
 167	case R200_EMIT_PP_TXCBLEND_6:
 168	case R200_EMIT_PP_TXCBLEND_7:
 169	case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
 170	case R200_EMIT_TFACTOR_0:
 171	case R200_EMIT_VTX_FMT_0:
 172	case R200_EMIT_VAP_CTL:
 173	case R200_EMIT_MATRIX_SELECT_0:
 174	case R200_EMIT_TEX_PROC_CTL_2:
 175	case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
 176	case R200_EMIT_PP_TXFILTER_0:
 177	case R200_EMIT_PP_TXFILTER_1:
 178	case R200_EMIT_PP_TXFILTER_2:
 179	case R200_EMIT_PP_TXFILTER_3:
 180	case R200_EMIT_PP_TXFILTER_4:
 181	case R200_EMIT_PP_TXFILTER_5:
 182	case R200_EMIT_VTE_CNTL:
 183	case R200_EMIT_OUTPUT_VTX_COMP_SEL:
 184	case R200_EMIT_PP_TAM_DEBUG3:
 185	case R200_EMIT_PP_CNTL_X:
 186	case R200_EMIT_RB3D_DEPTHXY_OFFSET:
 187	case R200_EMIT_RE_AUX_SCISSOR_CNTL:
 188	case R200_EMIT_RE_SCISSOR_TL_0:
 189	case R200_EMIT_RE_SCISSOR_TL_1:
 190	case R200_EMIT_RE_SCISSOR_TL_2:
 191	case R200_EMIT_SE_VAP_CNTL_STATUS:
 192	case R200_EMIT_SE_VTX_STATE_CNTL:
 193	case R200_EMIT_RE_POINTSIZE:
 194	case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
 195	case R200_EMIT_PP_CUBIC_FACES_0:
 196	case R200_EMIT_PP_CUBIC_FACES_1:
 197	case R200_EMIT_PP_CUBIC_FACES_2:
 198	case R200_EMIT_PP_CUBIC_FACES_3:
 199	case R200_EMIT_PP_CUBIC_FACES_4:
 200	case R200_EMIT_PP_CUBIC_FACES_5:
 201	case RADEON_EMIT_PP_TEX_SIZE_0:
 202	case RADEON_EMIT_PP_TEX_SIZE_1:
 203	case RADEON_EMIT_PP_TEX_SIZE_2:
 204	case R200_EMIT_RB3D_BLENDCOLOR:
 205	case R200_EMIT_TCL_POINT_SPRITE_CNTL:
 206	case RADEON_EMIT_PP_CUBIC_FACES_0:
 207	case RADEON_EMIT_PP_CUBIC_FACES_1:
 208	case RADEON_EMIT_PP_CUBIC_FACES_2:
 209	case R200_EMIT_PP_TRI_PERF_CNTL:
 210		/* These packets don't contain memory offsets */
 211		break;
 212
 213	default:
 214		DRM_ERROR( "Unknown state packet ID %d\n", id );
 215		return DRM_ERR( EINVAL );
 216	}
 217
 218	return 0;
 219}
 220
 221static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
 222						      drm_file_t *filp_priv,
 223						      drm_radeon_cmd_buffer_t *cmdbuf,
 224						      unsigned int *cmdsz ) {
 225	u32 *cmd = (u32 *) cmdbuf->buf;
 226
 227	*cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
 228
 229	if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
 230		DRM_ERROR( "Not a type 3 packet\n" );
 231		return DRM_ERR( EINVAL );
 232	}
 233
 234	if ( 4 * *cmdsz > cmdbuf->bufsz ) {
 235		DRM_ERROR( "Packet size larger than size of data provided\n" );
 236		return DRM_ERR( EINVAL );
 237	}
 238
 239	/* Check client state and fix it up if necessary */
 240	if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
 241		u32 offset;
 242
 243		if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
 244			      | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
 245			offset = cmd[2] << 10;
 246			if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
 247				DRM_ERROR( "Invalid first packet offset\n" );
 248				return DRM_ERR( EINVAL );
 249			}
 250			cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
 251		}
 252
 253		if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
 254		     ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
 255			offset = cmd[3] << 10;
 256			if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
 257				DRM_ERROR( "Invalid second packet offset\n" );
 258				return DRM_ERR( EINVAL );
 259			}
 260			cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
 261		}
 262	}
 263
 264	return 0;
 265}
 266
 267
 268/* ================================================================
 269 * CP hardware state programming functions
 270 */
 271
 272static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
 273					  drm_clip_rect_t *box )
 274{
 275	RING_LOCALS;
 276
 277	DRM_DEBUG( "   box:  x1=%d y1=%d  x2=%d y2=%d\n",
 278		   box->x1, box->y1, box->x2, box->y2 );
 279
 280	BEGIN_RING( 4 );
 281	OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
 282	OUT_RING( (box->y1 << 16) | box->x1 );
 283	OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
 284	OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
 285	ADVANCE_RING();
 286}
 287
 288/* Emit 1.1 state
 289 */
 290static int radeon_emit_state( drm_radeon_private_t *dev_priv,
 291			      drm_file_t *filp_priv,
 292			      drm_radeon_context_regs_t *ctx,
 293			      drm_radeon_texture_regs_t *tex,
 294			      unsigned int dirty )
 295{
 296	RING_LOCALS;
 297	DRM_DEBUG( "dirty=0x%08x\n", dirty );
 298
 299	if ( dirty & RADEON_UPLOAD_CONTEXT ) {
 300		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
 301						    &ctx->rb3d_depthoffset ) ) {
 302			DRM_ERROR( "Invalid depth buffer offset\n" );
 303			return DRM_ERR( EINVAL );
 304		}
 305
 306		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
 307						    &ctx->rb3d_coloroffset ) ) {
 308			DRM_ERROR( "Invalid depth buffer offset\n" );
 309			return DRM_ERR( EINVAL );
 310		}
 311
 312		BEGIN_RING( 14 );
 313		OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
 314		OUT_RING( ctx->pp_misc );
 315		OUT_RING( ctx->pp_fog_color );
 316		OUT_RING( ctx->re_solid_color );
 317		OUT_RING( ctx->rb3d_blendcntl );
 318		OUT_RING( ctx->rb3d_depthoffset );
 319		OUT_RING( ctx->rb3d_depthpitch );
 320		OUT_RING( ctx->rb3d_zstencilcntl );
 321		OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
 322		OUT_RING( ctx->pp_cntl );
 323		OUT_RING( ctx->rb3d_cntl );
 324		OUT_RING( ctx->rb3d_coloroffset );
 325		OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
 326		OUT_RING( ctx->rb3d_colorpitch );
 327		ADVANCE_RING();
 328	}
 329
 330	if ( dirty & RADEON_UPLOAD_VERTFMT ) {
 331		BEGIN_RING( 2 );
 332		OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
 333		OUT_RING( ctx->se_coord_fmt );
 334		ADVANCE_RING();
 335	}
 336
 337	if ( dirty & RADEON_UPLOAD_LINE ) {
 338		BEGIN_RING( 5 );
 339		OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
 340		OUT_RING( ctx->re_line_pattern );
 341		OUT_RING( ctx->re_line_state );
 342		OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
 343		OUT_RING( ctx->se_line_width );
 344		ADVANCE_RING();
 345	}
 346
 347	if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
 348		BEGIN_RING( 5 );
 349		OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
 350		OUT_RING( ctx->pp_lum_matrix );
 351		OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
 352		OUT_RING( ctx->pp_rot_matrix_0 );
 353		OUT_RING( ctx->pp_rot_matrix_1 );
 354		ADVANCE_RING();
 355	}
 356
 357	if ( dirty & RADEON_UPLOAD_MASKS ) {
 358		BEGIN_RING( 4 );
 359		OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
 360		OUT_RING( ctx->rb3d_stencilrefmask );
 361		OUT_RING( ctx->rb3d_ropcntl );
 362		OUT_RING( ctx->rb3d_planemask );
 363		ADVANCE_RING();
 364	}
 365
 366	if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
 367		BEGIN_RING( 7 );
 368		OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
 369		OUT_RING( ctx->se_vport_xscale );
 370		OUT_RING( ctx->se_vport_xoffset );
 371		OUT_RING( ctx->se_vport_yscale );
 372		OUT_RING( ctx->se_vport_yoffset );
 373		OUT_RING( ctx->se_vport_zscale );
 374		OUT_RING( ctx->se_vport_zoffset );
 375		ADVANCE_RING();
 376	}
 377
 378	if ( dirty & RADEON_UPLOAD_SETUP ) {
 379		BEGIN_RING( 4 );
 380		OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
 381		OUT_RING( ctx->se_cntl );
 382		OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
 383		OUT_RING( ctx->se_cntl_status );
 384		ADVANCE_RING();
 385	}
 386
 387	if ( dirty & RADEON_UPLOAD_MISC ) {
 388		BEGIN_RING( 2 );
 389		OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
 390		OUT_RING( ctx->re_misc );
 391		ADVANCE_RING();
 392	}
 393
 394	if ( dirty & RADEON_UPLOAD_TEX0 ) {
 395		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
 396						    &tex[0].pp_txoffset ) ) {
 397			DRM_ERROR( "Invalid texture offset for unit 0\n" );
 398			return DRM_ERR( EINVAL );
 399		}
 400
 401		BEGIN_RING( 9 );
 402		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
 403		OUT_RING( tex[0].pp_txfilter );
 404		OUT_RING( tex[0].pp_txformat );
 405		OUT_RING( tex[0].pp_txoffset );
 406		OUT_RING( tex[0].pp_txcblend );
 407		OUT_RING( tex[0].pp_txablend );
 408		OUT_RING( tex[0].pp_tfactor );
 409		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
 410		OUT_RING( tex[0].pp_border_color );
 411		ADVANCE_RING();
 412	}
 413
 414	if ( dirty & RADEON_UPLOAD_TEX1 ) {
 415		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
 416						    &tex[1].pp_txoffset ) ) {
 417			DRM_ERROR( "Invalid texture offset for unit 1\n" );
 418			return DRM_ERR( EINVAL );
 419		}
 420
 421		BEGIN_RING( 9 );
 422		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
 423		OUT_RING( tex[1].pp_txfilter );
 424		OUT_RING( tex[1].pp_txformat );
 425		OUT_RING( tex[1].pp_txoffset );
 426		OUT_RING( tex[1].pp_txcblend );
 427		OUT_RING( tex[1].pp_txablend );
 428		OUT_RING( tex[1].pp_tfactor );
 429		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
 430		OUT_RING( tex[1].pp_border_color );
 431		ADVANCE_RING();
 432	}
 433
 434	if ( dirty & RADEON_UPLOAD_TEX2 ) {
 435		if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
 436						    &tex[2].pp_txoffset ) ) {
 437			DRM_ERROR( "Invalid texture offset for unit 2\n" );
 438			return DRM_ERR( EINVAL );
 439		}
 440
 441		BEGIN_RING( 9 );
 442		OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
 443		OUT_RING( tex[2].pp_txfilter );
 444		OUT_RING( tex[2].pp_txformat );
 445		OUT_RING( tex[2].pp_txoffset );
 446		OUT_RING( tex[2].pp_txcblend );
 447		OUT_RING( tex[2].pp_txablend );
 448		OUT_RING( tex[2].pp_tfactor );
 449		OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
 450		OUT_RING( tex[2].pp_border_color );
 451		ADVANCE_RING();
 452	}
 453
 454	return 0;
 455}
 456
 457/* Emit 1.2 state
 458 */
 459static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
 460			       drm_file_t *filp_priv,
 461			       drm_radeon_state_t *state )
 462{
 463	RING_LOCALS;
 464
 465	if (state->dirty & RADEON_UPLOAD_ZBIAS) {
 466		BEGIN_RING( 3 );
 467		OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
 468		OUT_RING( state->context2.se_zbias_factor ); 
 469		OUT_RING( state->context2.se_zbias_constant ); 
 470		ADVANCE_RING();
 471	}
 472
 473	return radeon_emit_state( dev_priv, filp_priv, &state->context,
 474			   state->tex, state->dirty );
 475}
 476
 477/* New (1.3) state mechanism.  3 commands (packet, scalar, vector) in
 478 * 1.3 cmdbuffers allow all previous state to be updated as well as
 479 * the tcl scalar and vector areas.  
 480 */
 481static struct { 
 482	int start; 
 483	int len; 
 484	const char *name;
 485} packet[RADEON_MAX_STATE_PACKETS] = {
 486	{ RADEON_PP_MISC,7,"RADEON_PP_MISC" },
 487	{ RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
 488	{ RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
 489	{ RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
 490	{ RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
 491	{ RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
 492	{ RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
 493	{ RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
 494	{ RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
 495	{ RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
 496	{ RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
 497	{ RADEON_RE_MISC,1,"RADEON_RE_MISC" },
 498	{ RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
 499	{ RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
 500	{ RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
 501	{ RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
 502	{ RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
 503	{ RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
 504	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
 505	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
 506	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
 507	{ R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
 508	{ R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
 509	{ R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
 510	{ R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
 511	{ R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
 512	{ R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
 513	{ R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
 514	{ R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
 515	{ R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
 516	{ R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
 517	{ R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
 518	{ R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
 519	{ R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
 520	{ R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
 521	{ R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
 522	{ R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
 523	{ R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
 524	{ R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
 525	{ R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
 526	{ R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
 527	{ R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
 528	{ R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
 529	{ R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
 530	{ R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
 531	{ R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
 532	{ R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
 533	{ R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
 534	{ R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
 535	{ R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
 536	{ R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
 537	{ R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, 
 538	{ R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, 
 539	{ R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, 
 540	{ R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, 
 541	{ R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, 
 542	{ R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, 
 543	{ R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, 
 544	{ R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, 
 545	{ R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, 
 546	{ R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
 547	{ R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
 548	{ R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
 549	{ R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
 550	{ R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
 551	{ R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
 552	{ R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
 553	{ R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
 554	{ R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
 555	{ R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
 556	{ R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
 557	{ R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
 558	{ R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
 559	{ RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
 560	{ RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
 561	{ RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
 562	{ R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
 563	{ R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
 564	{ RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
 565	{ RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
 566	{ RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
 567	{ RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
 568	{ RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
 569	{ RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
 570	{ R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
 571};
 572
 573
 574
 575/* ================================================================
 576 * Performance monitoring functions
 577 */
 578
 579static void radeon_clear_box( drm_radeon_private_t *dev_priv,
 580			      int x, int y, int w, int h,
 581			      int r, int g, int b )
 582{
 583	u32 color;
 584	RING_LOCALS;
 585
 586	x += dev_priv->sarea_priv->boxes[0].x1;
 587	y += dev_priv->sarea_priv->boxes[0].y1;
 588
 589	switch ( dev_priv->color_fmt ) {
 590	case RADEON_COLOR_FORMAT_RGB565:
 591		color = (((r & 0xf8) << 8) |
 592			 ((g & 0xfc) << 3) |
 593			 ((b & 0xf8) >> 3));
 594		break;
 595	case RADEON_COLOR_FORMAT_ARGB8888:
 596	default:
 597		color = (((0xff) << 24) | (r << 16) | (g <<  8) | b);
 598		break;
 599	}
 600
 601	BEGIN_RING( 4 );
 602	RADEON_WAIT_UNTIL_3D_IDLE();		
 603	OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
 604	OUT_RING( 0xffffffff );
 605	ADVANCE_RING();
 606
 607	BEGIN_RING( 6 );
 608
 609	OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
 610	OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 611		  RADEON_GMC_BRUSH_SOLID_COLOR |
 612		  (dev_priv->color_fmt << 8) |
 613		  RADEON_GMC_SRC_DATATYPE_COLOR |
 614		  RADEON_ROP3_P |
 615		  RADEON_GMC_CLR_CMP_CNTL_DIS );
 616
 617 	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { 
 618		OUT_RING( dev_priv->front_pitch_offset );
 619 	} else {	 
 620		OUT_RING( dev_priv->back_pitch_offset );
 621 	} 
 622
 623	OUT_RING( color );
 624
 625	OUT_RING( (x << 16) | y );
 626	OUT_RING( (w << 16) | h );
 627
 628	ADVANCE_RING();
 629}
 630
 631static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
 632{
 633	/* Collapse various things into a wait flag -- trying to
 634	 * guess if userspase slept -- better just to have them tell us.
 635	 */
 636	if (dev_priv->stats.last_frame_reads > 1 ||
 637	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
 638		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 639	}
 640
 641	if (dev_priv->stats.freelist_loops) {
 642		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
 643	}
 644
 645	/* Purple box for page flipping
 646	 */
 647	if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) 
 648		radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
 649
 650	/* Red box if we have to wait for idle at any point
 651	 */
 652	if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) 
 653		radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
 654
 655	/* Blue box: lost context?
 656	 */
 657
 658	/* Yellow box for texture swaps
 659	 */
 660	if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) 
 661		radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
 662
 663	/* Green box if hardware never idles (as far as we can tell)
 664	 */
 665	if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) 
 666		radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
 667
 668
 669	/* Draw bars indicating number of buffers allocated 
 670	 * (not a great measure, easily confused)
 671	 */
 672	if (dev_priv->stats.requested_bufs) {
 673		if (dev_priv->stats.requested_bufs > 100)
 674			dev_priv->stats.requested_bufs = 100;
 675
 676		radeon_clear_box( dev_priv, 4, 16,  
 677				  dev_priv->stats.requested_bufs, 4,
 678				  196, 128, 128 );
 679	}
 680
 681	memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
 682
 683}
 684/* ================================================================
 685 * CP command dispatch functions
 686 */
 687
 688static void radeon_cp_dispatch_clear( drm_device_t *dev,
 689				      drm_radeon_clear_t *clear,
 690				      drm_radeon_clear_rect_t *depth_boxes )
 691{
 692	drm_radeon_private_t *dev_priv = dev->dev_private;
 693	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
 694	drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
 695	int nbox = sarea_priv->nbox;
 696	drm_clip_rect_t *pbox = sarea_priv->boxes;
 697	unsigned int flags = clear->flags;
 698	u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
 699	int i;
 700	RING_LOCALS;
 701	DRM_DEBUG( "flags = 0x%x\n", flags );
 702
 703	dev_priv->stats.clears++;
 704
 705	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
 706		unsigned int tmp = flags;
 707
 708		flags &= ~(RADEON_FRONT | RADEON_BACK);
 709		if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
 710		if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;
 711	}
 712
 713	if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
 714
 715		BEGIN_RING( 4 );
 716
 717		/* Ensure the 3D stream is idle before doing a
 718		 * 2D fill to clear the front or back buffer.
 719		 */
 720		RADEON_WAIT_UNTIL_3D_IDLE();
 721		
 722		OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
 723		OUT_RING( clear->color_mask );
 724
 725		ADVANCE_RING();
 726
 727		/* Make sure we restore the 3D state next time.
 728		 */
 729		dev_priv->sarea_priv->ctx_owner = 0;
 730
 731		for ( i = 0 ; i < nbox ; i++ ) {
 732			int x = pbox[i].x1;
 733			int y = pbox[i].y1;
 734			int w = pbox[i].x2 - x;
 735			int h = pbox[i].y2 - y;
 736
 737			DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
 738				   x, y, w, h, flags );
 739
 740			if ( flags & RADEON_FRONT ) {
 741				BEGIN_RING( 6 );
 742				
 743				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
 744				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 745					  RADEON_GMC_BRUSH_SOLID_COLOR |
 746					  (dev_priv->color_fmt << 8) |
 747					  RADEON_GMC_SRC_DATATYPE_COLOR |
 748					  RADEON_ROP3_P |
 749					  RADEON_GMC_CLR_CMP_CNTL_DIS );
 750
 751				OUT_RING( dev_priv->front_pitch_offset );
 752				OUT_RING( clear->clear_color );
 753				
 754				OUT_RING( (x << 16) | y );
 755				OUT_RING( (w << 16) | h );
 756				
 757				ADVANCE_RING();
 758			}
 759			
 760			if ( flags & RADEON_BACK ) {
 761				BEGIN_RING( 6 );
 762				
 763				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
 764				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 765					  RADEON_GMC_BRUSH_SOLID_COLOR |
 766					  (dev_priv->color_fmt << 8) |
 767					  RADEON_GMC_SRC_DATATYPE_COLOR |
 768					  RADEON_ROP3_P |
 769					  RADEON_GMC_CLR_CMP_CNTL_DIS );
 770				
 771				OUT_RING( dev_priv->back_pitch_offset );
 772				OUT_RING( clear->clear_color );
 773
 774				OUT_RING( (x << 16) | y );
 775				OUT_RING( (w << 16) | h );
 776
 777				ADVANCE_RING();
 778			}
 779		}
 780	}
 781	
 782	/* hyper z clear */
 783	/* no docs available, based on reverse engeneering by Stephane Marchesin */
 784	if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
 785
 786		int i;
 787		int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z? 
 788			(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
 789		
 790		u32 clearmask;
 791
 792		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
 793			((clear->depth_mask & 0xff) << 24);
 794	
 795		
 796		/* Make sure we restore the 3D state next time.
 797		 * we haven't touched any "normal" state - still need this?
 798		 */
 799		dev_priv->sarea_priv->ctx_owner = 0;
 800
 801		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
 802		/* FIXME : reverse engineer that for Rx00 cards */
 803		/* FIXME : the mask supposedly contains low-res z values. So can't set
 804		   just to the max (0xff? or actually 0x3fff?), need to take z clear
 805		   value into account? */
 806		/* pattern seems to work for r100, though get slight
 807		   rendering errors with glxgears. If hierz is not enabled for r100,
 808		   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
 809		   other ones are ignored, and the same clear mask can be used. That's
 810		   very different behaviour than R200 which needs different clear mask
 811		   and different number of tiles to clear if hierz is enabled or not !?!
 812		*/
 813			clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
 814		}
 815		else {
 816		/* clear mask : chooses the clearing pattern.
 817		   rv250: could be used to clear only parts of macrotiles
 818		   (but that would get really complicated...)?
 819		   bit 0 and 1 (either or both of them ?!?!) are used to
 820		   not clear tile (or maybe one of the bits indicates if the tile is
 821		   compressed or not), bit 2 and 3 to not clear tile 1,...,.
 822		   Pattern is as follows:
 823		        | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
 824		   bits -------------------------------------------------
 825		        | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
 826		   rv100: clearmask covers 2x8 4x1 tiles, but one clear still
 827		   covers 256 pixels ?!?
 828		*/
 829			clearmask = 0x0;
 830		}
 831
 832		BEGIN_RING( 8 );
 833		RADEON_WAIT_UNTIL_2D_IDLE();
 834		OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
 835			tempRB3D_DEPTHCLEARVALUE);
 836		/* what offset is this exactly ? */
 837		OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
 838		/* need ctlstat, otherwise get some strange black flickering */
 839		OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
 840		ADVANCE_RING();
 841
 842		for (i = 0; i < nbox; i++) {
 843			int tileoffset, nrtilesx, nrtilesy, j;
 844			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
 845			if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
 846				/* FIXME : figure this out for r200 (when hierz is enabled). Or
 847				   maybe r200 actually doesn't need to put the low-res z value into
 848				   the tile cache like r100, but just needs to clear the hi-level z-buffer?
 849				   Works for R100, both with hierz and without.
 850				   R100 seems to operate on 2x1 8x8 tiles, but...
 851				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
 852				   problematic with resolutions which are not 64 pix aligned? */
 853				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
 854				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
 855				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
 856				for (j = 0; j <= nrtilesy; j++) {
 857					BEGIN_RING( 4 );
 858					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
 859					/* first tile */
 860					OUT_RING( tileoffset * 8 );
 861					/* the number of tiles to clear */
 862					OUT_RING( nrtilesx + 4 );
 863					/* clear mask : chooses the clearing pattern. */
 864					OUT_RING( clearmask );
 865					ADVANCE_RING();
 866					tileoffset += depthpixperline >> 6;
 867				}
 868			}
 869			else if (dev_priv->microcode_version==UCODE_R200) {
 870				/* works for rv250. */
 871				/* find first macro tile (8x2 4x4 z-pixels on rv250) */
 872				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
 873				nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
 874				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
 875				for (j = 0; j <= nrtilesy; j++) {
 876					BEGIN_RING( 4 );
 877					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
 878					/* first tile */
 879					/* judging by the first tile offset needed, could possibly
 880					   directly address/clear 4x4 tiles instead of 8x2 * 4x4
 881					   macro tiles, though would still need clear mask for
 882					   right/bottom if truely 4x4 granularity is desired ? */
 883					OUT_RING( tileoffset * 16 );
 884					/* the number of tiles to clear */
 885					OUT_RING( nrtilesx + 1 );
 886					/* clear mask : chooses the clearing pattern. */
 887					OUT_RING( clearmask );
 888					ADVANCE_RING();
 889					tileoffset += depthpixperline >> 5;
 890				}
 891			}
 892			else { /* rv 100 */
 893				/* rv100 might not need 64 pix alignment, who knows */
 894				/* offsets are, hmm, weird */
 895				tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
 896				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
 897				nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
 898				for (j = 0; j <= nrtilesy; j++) {
 899					BEGIN_RING( 4 );
 900					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
 901					OUT_RING( tileoffset * 128 );
 902					/* the number of tiles to clear */
 903					OUT_RING( nrtilesx + 4 );
 904					/* clear mask : chooses the clearing pattern. */
 905					OUT_RING( clearmask );
 906					ADVANCE_RING();
 907					tileoffset += depthpixperline >> 6;
 908				}
 909			}
 910		}
 911
 912		/* TODO don't always clear all hi-level z tiles */
 913		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
 914			&& (flags & RADEON_USE_HIERZ))
 915		/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
 916		/* FIXME : the mask supposedly contains low-res z values. So can't set
 917		   just to the max (0xff? or actually 0x3fff?), need to take z clear
 918		   value into account? */
 919		{
 920			BEGIN_RING( 4 );
 921			OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
 922			OUT_RING( 0x0 ); /* First tile */
 923			OUT_RING( 0x3cc0 );
 924			OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
 925			ADVANCE_RING();
 926		}
 927	}
 928
 929	/* We have to clear the depth and/or stencil buffers by
 930	 * rendering a quad into just those buffers.  Thus, we have to
 931	 * make sure the 3D engine is configured correctly.
 932	 */
 933	if ((dev_priv->microcode_version == UCODE_R200) &&
 934	    (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
 935
 936		int tempPP_CNTL;
 937		int tempRE_CNTL;
 938		int tempRB3D_CNTL;
 939		int tempRB3D_ZSTENCILCNTL;
 940		int tempRB3D_STENCILREFMASK;
 941		int tempRB3D_PLANEMASK;
 942		int tempSE_CNTL;
 943		int tempSE_VTE_CNTL;
 944		int tempSE_VTX_FMT_0;
 945		int tempSE_VTX_FMT_1;
 946		int tempSE_VAP_CNTL;
 947		int tempRE_AUX_SCISSOR_CNTL;
 948
 949		tempPP_CNTL = 0;
 950		tempRE_CNTL = 0;
 951
 952		tempRB3D_CNTL = depth_clear->rb3d_cntl;
 953
 954		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
 955		tempRB3D_STENCILREFMASK = 0x0;
 956
 957		tempSE_CNTL = depth_clear->se_cntl;
 958
 959
 960
 961		/* Disable TCL */
 962
 963		tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */
 964				   (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
 965
 966		tempRB3D_PLANEMASK = 0x0;
 967
 968		tempRE_AUX_SCISSOR_CNTL = 0x0;
 969
 970		tempSE_VTE_CNTL =
 971			SE_VTE_CNTL__VTX_XY_FMT_MASK |
 972			SE_VTE_CNTL__VTX_Z_FMT_MASK;
 973
 974		/* Vertex format (X, Y, Z, W)*/
 975		tempSE_VTX_FMT_0 =
 976			SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
 977			SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
 978		tempSE_VTX_FMT_1 = 0x0;
 979
 980
 981		/* 
 982		 * Depth buffer specific enables 
 983		 */
 984		if (flags & RADEON_DEPTH) {
 985			/* Enable depth buffer */
 986			tempRB3D_CNTL |= RADEON_Z_ENABLE;
 987		} else {
 988			/* Disable depth buffer */
 989			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
 990		}
 991
 992		/* 
 993		 * Stencil buffer specific enables
 994		 */
 995		if ( flags & RADEON_STENCIL ) {
 996			tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE;
 997			tempRB3D_STENCILREFMASK = clear->depth_mask; 
 998		} else {
 999			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1000			tempRB3D_STENCILREFMASK = 0x00000000;
1001		}
1002
1003		if (flags & RADEON_USE_COMP_ZBUF) {
1004			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1005				RADEON_Z_DECOMPRESSION_ENABLE;
1006		}
1007		if (flags & RADEON_USE_HIERZ) {
1008			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1009		}
1010
1011		BEGIN_RING( 26 );
1012		RADEON_WAIT_UNTIL_2D_IDLE();
1013
1014		OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
1015		OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
1016		OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
1017		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
1018			      tempRB3D_ZSTENCILCNTL );
1019		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, 
1020			      tempRB3D_STENCILREFMASK );
1021		OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
1022		OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
1023		OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
1024		OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
1025		OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
1026		OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
1027		OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, 
1028			      tempRE_AUX_SCISSOR_CNTL );
1029		ADVANCE_RING();
1030
1031		/* Make sure we restore the 3D state next time.
1032		 */
1033		dev_priv->sarea_priv->ctx_owner = 0;
1034
1035		for ( i = 0 ; i < nbox ; i++ ) {
1036			
1037			/* Funny that this should be required -- 
1038			 *  sets top-left?
1039			 */
1040			radeon_emit_clip_rect( dev_priv,
1041					       &sarea_priv->boxes[i] );
1042
1043			BEGIN_RING( 14 );
1044			OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
1045			OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1046				   RADEON_PRIM_WALK_RING |
1047				   (3 << RADEON_NUM_VERTICES_SHIFT)) );
1048			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1049			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1050			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1051			OUT_RING( 0x3f800000 );
1052			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1053			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1054			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1055			OUT_RING( 0x3f800000 );
1056			OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1057			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1058			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1059			OUT_RING( 0x3f800000 );
1060			ADVANCE_RING();
1061		}
1062	} 
1063	else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
1064
1065		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1066
1067		rb3d_cntl = depth_clear->rb3d_cntl;
1068
1069		if ( flags & RADEON_DEPTH ) {
1070			rb3d_cntl |=  RADEON_Z_ENABLE;
1071		} else {
1072			rb3d_cntl &= ~RADEON_Z_ENABLE;
1073		}
1074
1075		if ( flags & RADEON_STENCIL ) {
1076			rb3d_cntl |=  RADEON_STENCIL_ENABLE;
1077			rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1078		} else {
1079			rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1080			rb3d_stencilrefmask = 0x00000000;
1081		}
1082
1083		if (flags & RADEON_USE_COMP_ZBUF) {
1084			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1085				RADEON_Z_DECOMPRESSION_ENABLE;
1086		}
1087		if (flags & RADEON_USE_HIERZ) {
1088			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1089		}
1090
1091		BEGIN_RING( 13 );
1092		RADEON_WAIT_UNTIL_2D_IDLE();
1093
1094		OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
1095		OUT_RING( 0x00000000 );
1096		OUT_RING( rb3d_cntl );
1097		
1098		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
1099		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1100			      rb3d_stencilrefmask );
1101		OUT_RING_REG( RADEON_RB3D_PLANEMASK,
1102			      0x00000000 );
1103		OUT_RING_REG( RADEON_SE_CNTL,
1104			      depth_clear->se_cntl );
1105		ADVANCE_RING();
1106
1107		/* Make sure we restore the 3D state next time.
1108		 */
1109		dev_priv->sarea_priv->ctx_owner = 0;
1110
1111		for ( i = 0 ; i < nbox ; i++ ) {
1112			
1113			/* Funny that this should be required -- 
1114			 *  sets top-left?
1115			 */
1116			radeon_emit_clip_rect( dev_priv,
1117					       &sarea_priv->boxes[i] );
1118
1119			BEGIN_RING( 15 );
1120
1121			OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
1122			OUT_RING( RADEON_VTX_Z_PRESENT |
1123				  RADEON_VTX_PKCOLOR_PRESENT);
1124			OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1125				   RADEON_PRIM_WALK_RING |
1126				   RADEON_MAOS_ENABLE |
1127				   RADEON_VTX_FMT_RADEON_MODE |
1128				   (3 << RADEON_NUM_VERTICES_SHIFT)) );
1129
1130
1131			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1132			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1133			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1134			OUT_RING( 0x0 );
1135
1136			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1137			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1138			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1139			OUT_RING( 0x0 );
1140
1141			OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1142			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1143			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1144			OUT_RING( 0x0 );
1145
1146			ADVANCE_RING();
1147		}
1148	}
1149
1150	/* Increment the clear counter.  The client-side 3D driver must
1151	 * wait on this value before performing the clear ioctl.  We
1152	 * need this because the card's so damned fast...
1153	 */
1154	dev_priv->sarea_priv->last_clear++;
1155
1156	BEGIN_RING( 4 );
1157
1158	RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
1159	RADEON_WAIT_UNTIL_IDLE();
1160
1161	ADVANCE_RING();
1162}
1163
1164static void radeon_cp_dispatch_swap( drm_device_t *dev )
1165{
1166	drm_radeon_private_t *dev_priv = dev->dev_private;
1167	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1168	int nbox = sarea_priv->nbox;
1169	drm_clip_rect_t *pbox = sarea_priv->boxes;
1170	int i;
1171	RING_LOCALS;
1172	DRM_DEBUG( "\n" );
1173
1174	/* Do some trivial performance monitoring...
1175	 */
1176	if (dev_priv->do_boxes)
1177		radeon_cp_performance_boxes( dev_priv );
1178
1179
1180	/* Wait for the 3D stream to idle before dispatching the bitblt.
1181	 * This will prevent data corruption between the two streams.
1182	 */
1183	BEGIN_RING( 2 );
1184
1185	RADEON_WAIT_UNTIL_3D_IDLE();
1186
1187	ADVANCE_RING();
1188
1189	for ( i = 0 ; i < nbox ; i++ ) {
1190		int x = pbox[i].x1;
1191		int y = pbox[i].y1;
1192		int w = pbox[i].x2 - x;
1193		int h = pbox[i].y2 - y;
1194
1195		DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1196			   x, y, w, h );
1197
1198		BEGIN_RING( 7 );
1199
1200		OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1201		OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1202			  RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1203			  RADEON_GMC_BRUSH_NONE |
1204			  (dev_priv->color_fmt << 8) |
1205			  RADEON_GMC_SRC_DATATYPE_COLOR |
1206			  RADEON_ROP3_S |
1207			  RADEON_DP_SRC_SOURCE_MEMORY |
1208			  RADEON_GMC_CLR_CMP_CNTL_DIS |
1209			  RADEON_GMC_WR_MSK_DIS );
1210		
1211		/* Make this work even if front & back are flipped:
1212		 */
1213		if (dev_priv->current_page == 0) {
1214			OUT_RING( dev_priv->back_pitch_offset );
1215			OUT_RING( dev_priv->front_pitch_offset );
1216		} 
1217		else {
1218			OUT_RING( dev_priv->front_pitch_offset );
1219			OUT_RING( dev_priv->back_pitch_offset );
1220		}
1221
1222		OUT_RING( (x << 16) | y );
1223		OUT_RING( (x << 16) | y );
1224		OUT_RING( (w << 16) | h );
1225
1226		ADVANCE_RING();
1227	}
1228
1229	/* Increment the frame counter.  The client-side 3D driver must
1230	 * throttle the framerate by waiting for this value before
1231	 * performing the swapbuffer ioctl.
1232	 */
1233	dev_priv->sarea_priv->last_frame++;
1234
1235	BEGIN_RING( 4 );
1236
1237	RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1238	RADEON_WAIT_UNTIL_2D_IDLE();
1239
1240	ADVANCE_RING();
1241}
1242
1243static void radeon_cp_dispatch_flip( drm_device_t *dev )
1244{
1245	drm_radeon_private_t *dev_priv = dev->dev_private;
1246	drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1247	int offset = (dev_priv->current_page == 1)
1248		   ? dev_priv->front_offset : dev_priv->back_offset;
1249	RING_LOCALS;
1250	DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", 
1251		__FUNCTION__, 
1252		dev_priv->current_page,
1253		dev_priv->sarea_priv->pfCurrentPage);
1254
1255	/* Do some trivial performance monitoring...
1256	 */
1257	if (dev_priv->do_boxes) {
1258		dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1259		radeon_cp_performance_boxes( dev_priv );
1260	}
1261
1262	/* Update the frame offsets for both CRTCs
1263	 */
1264	BEGIN_RING( 6 );
1265
1266	RADEON_WAIT_UNTIL_3D_IDLE();
1267	OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1268					      + sarea->frame.x 
1269					      * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1270					  + offset );
1271	OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1272					   + offset );
1273
1274	ADVANCE_RING();
1275
1276	/* Increment the frame counter.  The client-side 3D driver must
1277	 * throttle the framerate by waiting for this value before
1278	 * performing the swapbuffer ioctl.
1279	 */
1280	dev_priv->sarea_priv->last_frame++;
1281	dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1282					      1 - dev_priv->current_page;
1283
1284	BEGIN_RING( 2 );
1285
1286	RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1287
1288	ADVANCE_RING();
1289}
1290
1291static int bad_prim_vertex_nr( int primitive, int nr )
1292{
1293	switch (primitive & RADEON_PRIM_TYPE_MASK) {
1294	case RADEON_PRIM_TYPE_NONE:
1295	case RADEON_PRIM_TYPE_POINT:
1296		return nr < 1;
1297	case RADEON_PRIM_TYPE_LINE:
1298		return (nr & 1) || nr == 0;
1299	case RADEON_PRIM_TYPE_LINE_STRIP:
1300		return nr < 2;
1301	case RADEON_PRIM_TYPE_TRI_LIST:
1302	case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1303	case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1304	case RADEON_PRIM_TYPE_RECT_LIST:
1305		return nr % 3 || nr == 0;
1306	case RADEON_PRIM_TYPE_TRI_FAN:
1307	case RADEON_PRIM_TYPE_TRI_STRIP:
1308		return nr < 3;
1309	default:
1310		return 1;
1311	}	
1312}
1313
1314
1315
1316typedef struct {
1317	unsigned int start;
1318	unsigned int finish;
1319	unsigned int prim;
1320	unsigned int numverts;
1321	unsigned int offset;   
1322        unsigned int vc_format;
1323} drm_radeon_tcl_prim_t;
1324
1325static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1326				       drm_buf_t *buf,
1327				       drm_radeon_tcl_prim_t *prim )
1328
1329{
1330	drm_radeon_private_t *dev_priv = dev->dev_private;
1331	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1332	int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1333	int numverts = (int)prim->numverts;
1334	int nbox = sarea_priv->nbox;
1335	int i = 0;
1336	RING_LOCALS;
1337
1338	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1339		  prim->prim,
1340		  prim->vc_format,
1341		  prim->start,
1342		  prim->finish,
1343		  prim->numverts);
1344
1345	if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1346		DRM_ERROR( "bad prim %x numverts %d\n", 
1347			   prim->prim, prim->numverts );
1348		return;
1349	}
1350
1351	do {
1352		/* Emit the next cliprect */
1353		if ( i < nbox ) {
1354			radeon_emit_clip_rect( dev_priv, 
1355					       &sarea_priv->boxes[i] );
1356		}
1357
1358		/* Emit the vertex buffer rendering commands */
1359		BEGIN_RING( 5 );
1360
1361		OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1362		OUT_RING( offset );
1363		OUT_RING( numverts );
1364		OUT_RING( prim->vc_format );
1365		OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1366			  RADEON_COLOR_ORDER_RGBA |
1367			  RADEON_VTX_FMT_RADEON_MODE |
1368			  (numverts << RADEON_NUM_VERTICES_SHIFT) );
1369
1370		ADVANCE_RING();
1371
1372		i++;
1373	} while ( i < nbox );
1374}
1375
1376
1377
1378static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1379{
1380	drm_radeon_private_t *dev_priv = dev->dev_private;
1381	drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1382	RING_LOCALS;
1383
1384	buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1385
1386	/* Emit the vertex buffer age */
1387	BEGIN_RING( 2 );
1388	RADEON_DISPATCH_AGE( buf_priv->age );
1389	ADVANCE_RING();
1390
1391	buf->pending = 1;
1392	buf->used = 0;
1393}
1394
1395static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1396					 drm_buf_t *buf,
1397					 int start, int end )
1398{
1399	drm_radeon_private_t *dev_priv = dev->dev_private;
1400	RING_LOCALS;
1401	DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1402		   buf->idx, start, end );
1403
1404	if ( start != end ) {
1405		int offset = (dev_priv->gart_buffers_offset
1406			      + buf->offset + start);
1407		int dwords = (end - start + 3) / sizeof(u32);
1408
1409		/* Indirect buffer data must be an even number of
1410		 * dwords, so if we've been given an odd number we must
1411		 * pad the data with a Type-2 CP packet.
1412		 */
1413		if ( dwords & 1 ) {
1414			u32 *data = (u32 *)
1415				((char *)dev->agp_buffer_map->handle
1416				 + buf->offset + start);
1417			data[dwords++] = RADEON_CP_PACKET2;
1418		}
1419
1420		/* Fire off the indirect buffer */
1421		BEGIN_RING( 3 );
1422
1423		OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1424		OUT_RING( offset );
1425		OUT_RING( dwords );
1426
1427		ADVANCE_RING();
1428	}
1429}
1430
1431
1432static void radeon_cp_dispatch_indices( drm_device_t *dev,
1433					drm_buf_t *elt_buf,
1434					drm_radeon_tcl_prim_t *prim )
1435{
1436	drm_radeon_private_t *dev_priv = dev->dev_private;
1437	drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1438	int offset = dev_priv->gart_buffers_offset + prim->offset;
1439	u32 *data;
1440	int dwords;
1441	int i = 0;
1442	int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1443	int count = (prim->finish - start) / sizeof(u16);
1444	int nbox = sarea_priv->nbox;
1445
1446	DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1447		  prim->prim,
1448		  prim->vc_format,
1449		  prim->start,
1450		  prim->finish,
1451		  prim->offset,
1452		  prim->numverts);
1453
1454	if (bad_prim_vertex_nr( prim->prim, count )) {
1455		DRM_ERROR( "bad prim %x count %d\n", 
1456			   prim->prim, count );
1457		return;
1458	}
1459
1460
1461	if ( start >= prim->finish ||
1462	     (prim->start & 0x7) ) {
1463		DRM_ERROR( "buffer prim %d\n", prim->prim );
1464		return;
1465	}
1466
1467	dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1468
1469	data = (u32 *)((char *)dev->agp_buffer_map->handle +
1470		       elt_buf->offset + prim->start);
1471
1472	data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1473	data[1] = offset;
1474	data[2] = prim->numverts;
1475	data[3] = prim->vc_format;
1476	data[4] = (prim->prim |
1477		   RADEON_PRIM_WALK_IND |
1478		   RADEON_COLOR_ORDER_RGBA |
1479		   RADEON_VTX_FMT_RADEON_MODE |
1480		   (count << RADEON_NUM_VERTICES_SHIFT) );
1481
1482	do {
1483		if ( i < nbox ) 
1484			radeon_emit_clip_rect( dev_priv, 
1485					       &sarea_priv->boxes[i] );
1486
1487		radeon_cp_dispatch_indirect( dev, elt_buf,
1488					     prim->start,
1489					     prim->finish );
1490
1491		i++;
1492	} while ( i < nbox );
1493
1494}
1495
1496#define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
1497
1498static int radeon_cp_dispatch_texture( DRMFILE filp,
1499				       drm_device_t *dev,
1500				       drm_radeon_texture_t *tex,
1501				       drm_radeon_tex_image_t *image )
1502{
1503	drm_radeon_private_t *dev_priv = dev->dev_private;
1504	drm_file_t *filp_priv;
1505	drm_buf_t *buf;
1506	u32 format;
1507	u32 *buffer;
1508	const u8 __user *data;
1509	int size, dwords, tex_width, blit_width;
1510	u32 height;
1511	int i;
1512	u32 texpitch, microtile;
1513	RING_LOCALS;
1514
1515	DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1516
1517	if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1518		DRM_ERROR( "Invalid destination offset\n" );
1519		return DRM_ERR( EINVAL );
1520	}
1521
1522	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1523
1524	/* Flush the pixel cache.  This ensures no pixel data gets mixed
1525	 * up with the texture data from the host data blit, otherwise
1526	 * part of the texture image may be corrupted.
1527	 */
1528	BEGIN_RING( 4 );
1529	RADEON_FLUSH_CACHE();
1530	RADEON_WAIT_UNTIL_IDLE();
1531	ADVANCE_RING();
1532
1533#ifdef __BIG_ENDIAN
1534	/* The Mesa texture functions provide the data in little endian as the
1535	 * chip wants it, but we need to compensate for the fact that the CP
1536	 * ring gets byte-swapped
1537	 */
1538	BEGIN_RING( 2 );
1539	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
1540	ADVANCE_RING();
1541#endif
1542
1543
1544	/* The compiler won't optimize away a division by a variable,
1545	 * even if the only legal values are powers of two.  Thus, we'll
1546	 * use a shift instead.
1547	 */
1548	switch ( tex->format ) {
1549	case RADEON_TXFORMAT_ARGB8888:
1550	case RADEON_TXFORMAT_RGBA8888:
1551		format = RADEON_COLOR_FORMAT_ARGB8888;
1552		tex_width = tex->width * 4;
1553		blit_width = image->width * 4;
1554		break;
1555	case RADEON_TXFORMAT_AI88:
1556	case RADEON_TXFORMAT_ARGB1555:
1557	case RADEON_TXFORMAT_RGB565:
1558	case RADEON_TXFORMAT_ARGB4444:
1559	case RADEON_TXFORMAT_VYUY422:
1560	case RADEON_TXFORMAT_YVYU422:
1561		format = RADEON_COLOR_FORMAT_RGB565;
1562		tex_width = tex->width * 2;
1563		blit_width = image->width * 2;
1564		break;
1565	case RADEON_TXFORMAT_I8:
1566	case RADEON_TXFORMAT_RGB332:
1567		format = RADEON_COLOR_FORMAT_CI8;
1568		tex_width = tex->width * 1;
1569		blit_width = image->width * 1;
1570		break;
1571	default:
1572		DRM_ERROR( "invalid texture format %d\n", tex->format );
1573		return DRM_ERR(EINVAL);
1574	}
1575	texpitch = tex->pitch;
1576	if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1577		microtile = 1;
1578		if (tex_width < 64) {
1579			texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1580			/* we got tiled coordinates, untile them */
1581			image->x *= 2;
1582		}
1583	}
1584	else microtile = 0;
1585
1586	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1587
1588	do {
1589		DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1590			   tex->offset >> 10, tex->pitch, tex->format,
1591			   image->x, image->y, image->width, image->height );
1592
1593		/* Make a copy of some parameters in case we have to
1594		 * update them for a multi-pass texture blit.
1595		 */
1596		height = image->height;
1597		data = (const u8 __user *)image->data;
1598		
1599		size = height * blit_width;
1600
1601		if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1602			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1603			size = height * blit_width;
1604		} else if ( size < 4 && size > 0 ) {
1605			size = 4;
1606		} else if ( size == 0 ) {
1607			return 0;
1608		}
1609
1610		buf = radeon_freelist_get( dev );
1611		if ( 0 && !buf ) {
1612			radeon_do_cp_idle( dev_priv );
1613			buf = radeon_freelist_get( dev );
1614		}
1615		if ( !buf ) {
1616			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1617			if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
1618				return DRM_ERR(EFAULT);
1619			return DRM_ERR(EAGAIN);
1620		}
1621
1622
1623		/* Dispatch the indirect buffer.
1624		 */
1625		buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
1626		dwords = size / 4;
1627		buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
1628		buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1629			     RADEON_GMC_BRUSH_NONE |
1630			     (format << 8) |
1631			     RADEON_GMC_SRC_DATATYPE_COLOR |
1632			     RADEON_ROP3_S |
1633			     RADEON_DP_SRC_SOURCE_HOST_DATA |
1634			     RADEON_GMC_CLR_CMP_CNTL_DIS |
1635			     RADEON_GMC_WR_MSK_DIS);
1636		
1637		buffer[2] = (texpitch << 22) | (tex->offset >> 10);
1638		buffer[3] = 0xffffffff;
1639		buffer[4] = 0xffffffff;
1640		buffer[5] = (image->y << 16) | image->x;
1641		buffer[6] = (height << 16) | image->width;
1642		buffer[7] = dwords;
1643		buffer += 8;
1644
1645		
1646
1647		if (microtile) {
1648			/* texture micro tiling in use, minimum texture width is thus 16 bytes.
1649			   however, we cannot use blitter directly for texture width < 64 bytes,
1650			   since minimum tex pitch is 64 bytes and we need this to match
1651			   the texture width, otherwise the blitter will tile it wrong.
1652			   Thus, tiling manually in this case. Additionally, need to special
1653			   case tex height = 1, since our actual image will have height 2
1654			   and we need to ensure we don't read beyond the texture siz…

Large files files are truncated, but you can click here to view the full file