blob: 1f79e249146c9585a8ae1d968b47dc852924760b [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* radeon_state.c -- State support for Radeon -*- linux-c -*-
2 *
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
28 */
29
30#include "drmP.h"
31#include "drm.h"
32#include "drm_sarea.h"
33#include "radeon_drm.h"
34#include "radeon_drv.h"
35
36/* ================================================================
37 * Helper functions for client state checking and fixup
38 */
39
40static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
41 drm_file_t *filp_priv,
42 u32 *offset ) {
43 u32 off = *offset;
44 struct drm_radeon_driver_file_fields *radeon_priv;
45
46 if ( off >= dev_priv->fb_location &&
47 off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
48 return 0;
49
50 radeon_priv = filp_priv->driver_priv;
51 off += radeon_priv->radeon_fb_delta;
52
53 DRM_DEBUG( "offset fixed up to 0x%x\n", off );
54
55 if ( off < dev_priv->fb_location ||
56 off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
57 return DRM_ERR( EINVAL );
58
59 *offset = off;
60
61 return 0;
62}
63
64static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
65 drm_file_t *filp_priv,
66 int id,
67 u32 __user *data ) {
68 switch ( id ) {
69
70 case RADEON_EMIT_PP_MISC:
71 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
72 &data[( RADEON_RB3D_DEPTHOFFSET
73 - RADEON_PP_MISC ) / 4] ) ) {
74 DRM_ERROR( "Invalid depth buffer offset\n" );
75 return DRM_ERR( EINVAL );
76 }
77 break;
78
79 case RADEON_EMIT_PP_CNTL:
80 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
81 &data[( RADEON_RB3D_COLOROFFSET
82 - RADEON_PP_CNTL ) / 4] ) ) {
83 DRM_ERROR( "Invalid colour buffer offset\n" );
84 return DRM_ERR( EINVAL );
85 }
86 break;
87
88 case R200_EMIT_PP_TXOFFSET_0:
89 case R200_EMIT_PP_TXOFFSET_1:
90 case R200_EMIT_PP_TXOFFSET_2:
91 case R200_EMIT_PP_TXOFFSET_3:
92 case R200_EMIT_PP_TXOFFSET_4:
93 case R200_EMIT_PP_TXOFFSET_5:
94 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
95 &data[0] ) ) {
96 DRM_ERROR( "Invalid R200 texture offset\n" );
97 return DRM_ERR( EINVAL );
98 }
99 break;
100
101 case RADEON_EMIT_PP_TXFILTER_0:
102 case RADEON_EMIT_PP_TXFILTER_1:
103 case RADEON_EMIT_PP_TXFILTER_2:
104 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
105 &data[( RADEON_PP_TXOFFSET_0
106 - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
107 DRM_ERROR( "Invalid R100 texture offset\n" );
108 return DRM_ERR( EINVAL );
109 }
110 break;
111
112 case R200_EMIT_PP_CUBIC_OFFSETS_0:
113 case R200_EMIT_PP_CUBIC_OFFSETS_1:
114 case R200_EMIT_PP_CUBIC_OFFSETS_2:
115 case R200_EMIT_PP_CUBIC_OFFSETS_3:
116 case R200_EMIT_PP_CUBIC_OFFSETS_4:
117 case R200_EMIT_PP_CUBIC_OFFSETS_5: {
118 int i;
119 for ( i = 0; i < 5; i++ ) {
120 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
121 &data[i] ) ) {
122 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
123 return DRM_ERR( EINVAL );
124 }
125 }
126 break;
127 }
128
129 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
130 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
131 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
132 int i;
133 for (i = 0; i < 5; i++) {
134 if (radeon_check_and_fixup_offset(dev_priv,
135 filp_priv,
136 &data[i])) {
137 DRM_ERROR
138 ("Invalid R100 cubic texture offset\n");
139 return DRM_ERR(EINVAL);
140 }
141 }
142 }
143 break;
144
145 case RADEON_EMIT_RB3D_COLORPITCH:
146 case RADEON_EMIT_RE_LINE_PATTERN:
147 case RADEON_EMIT_SE_LINE_WIDTH:
148 case RADEON_EMIT_PP_LUM_MATRIX:
149 case RADEON_EMIT_PP_ROT_MATRIX_0:
150 case RADEON_EMIT_RB3D_STENCILREFMASK:
151 case RADEON_EMIT_SE_VPORT_XSCALE:
152 case RADEON_EMIT_SE_CNTL:
153 case RADEON_EMIT_SE_CNTL_STATUS:
154 case RADEON_EMIT_RE_MISC:
155 case RADEON_EMIT_PP_BORDER_COLOR_0:
156 case RADEON_EMIT_PP_BORDER_COLOR_1:
157 case RADEON_EMIT_PP_BORDER_COLOR_2:
158 case RADEON_EMIT_SE_ZBIAS_FACTOR:
159 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
160 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
161 case R200_EMIT_PP_TXCBLEND_0:
162 case R200_EMIT_PP_TXCBLEND_1:
163 case R200_EMIT_PP_TXCBLEND_2:
164 case R200_EMIT_PP_TXCBLEND_3:
165 case R200_EMIT_PP_TXCBLEND_4:
166 case R200_EMIT_PP_TXCBLEND_5:
167 case R200_EMIT_PP_TXCBLEND_6:
168 case R200_EMIT_PP_TXCBLEND_7:
169 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
170 case R200_EMIT_TFACTOR_0:
171 case R200_EMIT_VTX_FMT_0:
172 case R200_EMIT_VAP_CTL:
173 case R200_EMIT_MATRIX_SELECT_0:
174 case R200_EMIT_TEX_PROC_CTL_2:
175 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
176 case R200_EMIT_PP_TXFILTER_0:
177 case R200_EMIT_PP_TXFILTER_1:
178 case R200_EMIT_PP_TXFILTER_2:
179 case R200_EMIT_PP_TXFILTER_3:
180 case R200_EMIT_PP_TXFILTER_4:
181 case R200_EMIT_PP_TXFILTER_5:
182 case R200_EMIT_VTE_CNTL:
183 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
184 case R200_EMIT_PP_TAM_DEBUG3:
185 case R200_EMIT_PP_CNTL_X:
186 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
187 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
188 case R200_EMIT_RE_SCISSOR_TL_0:
189 case R200_EMIT_RE_SCISSOR_TL_1:
190 case R200_EMIT_RE_SCISSOR_TL_2:
191 case R200_EMIT_SE_VAP_CNTL_STATUS:
192 case R200_EMIT_SE_VTX_STATE_CNTL:
193 case R200_EMIT_RE_POINTSIZE:
194 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
195 case R200_EMIT_PP_CUBIC_FACES_0:
196 case R200_EMIT_PP_CUBIC_FACES_1:
197 case R200_EMIT_PP_CUBIC_FACES_2:
198 case R200_EMIT_PP_CUBIC_FACES_3:
199 case R200_EMIT_PP_CUBIC_FACES_4:
200 case R200_EMIT_PP_CUBIC_FACES_5:
201 case RADEON_EMIT_PP_TEX_SIZE_0:
202 case RADEON_EMIT_PP_TEX_SIZE_1:
203 case RADEON_EMIT_PP_TEX_SIZE_2:
204 case R200_EMIT_RB3D_BLENDCOLOR:
205 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
206 case RADEON_EMIT_PP_CUBIC_FACES_0:
207 case RADEON_EMIT_PP_CUBIC_FACES_1:
208 case RADEON_EMIT_PP_CUBIC_FACES_2:
209 case R200_EMIT_PP_TRI_PERF_CNTL:
210 /* These packets don't contain memory offsets */
211 break;
212
213 default:
214 DRM_ERROR( "Unknown state packet ID %d\n", id );
215 return DRM_ERR( EINVAL );
216 }
217
218 return 0;
219}
220
221static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
222 drm_file_t *filp_priv,
223 drm_radeon_cmd_buffer_t *cmdbuf,
224 unsigned int *cmdsz ) {
225 u32 *cmd = (u32 *) cmdbuf->buf;
226
227 *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
228
229 if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
230 DRM_ERROR( "Not a type 3 packet\n" );
231 return DRM_ERR( EINVAL );
232 }
233
234 if ( 4 * *cmdsz > cmdbuf->bufsz ) {
235 DRM_ERROR( "Packet size larger than size of data provided\n" );
236 return DRM_ERR( EINVAL );
237 }
238
239 /* Check client state and fix it up if necessary */
240 if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
241 u32 offset;
242
243 if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
244 | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
245 offset = cmd[2] << 10;
246 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
247 DRM_ERROR( "Invalid first packet offset\n" );
248 return DRM_ERR( EINVAL );
249 }
250 cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
251 }
252
253 if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
254 ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
255 offset = cmd[3] << 10;
256 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
257 DRM_ERROR( "Invalid second packet offset\n" );
258 return DRM_ERR( EINVAL );
259 }
260 cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
261 }
262 }
263
264 return 0;
265}
266
267
268/* ================================================================
269 * CP hardware state programming functions
270 */
271
272static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
273 drm_clip_rect_t *box )
274{
275 RING_LOCALS;
276
277 DRM_DEBUG( " box: x1=%d y1=%d x2=%d y2=%d\n",
278 box->x1, box->y1, box->x2, box->y2 );
279
280 BEGIN_RING( 4 );
281 OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
282 OUT_RING( (box->y1 << 16) | box->x1 );
283 OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
284 OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
285 ADVANCE_RING();
286}
287
288/* Emit 1.1 state
289 */
290static int radeon_emit_state( drm_radeon_private_t *dev_priv,
291 drm_file_t *filp_priv,
292 drm_radeon_context_regs_t *ctx,
293 drm_radeon_texture_regs_t *tex,
294 unsigned int dirty )
295{
296 RING_LOCALS;
297 DRM_DEBUG( "dirty=0x%08x\n", dirty );
298
299 if ( dirty & RADEON_UPLOAD_CONTEXT ) {
300 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
301 &ctx->rb3d_depthoffset ) ) {
302 DRM_ERROR( "Invalid depth buffer offset\n" );
303 return DRM_ERR( EINVAL );
304 }
305
306 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
307 &ctx->rb3d_coloroffset ) ) {
308 DRM_ERROR( "Invalid depth buffer offset\n" );
309 return DRM_ERR( EINVAL );
310 }
311
312 BEGIN_RING( 14 );
313 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
314 OUT_RING( ctx->pp_misc );
315 OUT_RING( ctx->pp_fog_color );
316 OUT_RING( ctx->re_solid_color );
317 OUT_RING( ctx->rb3d_blendcntl );
318 OUT_RING( ctx->rb3d_depthoffset );
319 OUT_RING( ctx->rb3d_depthpitch );
320 OUT_RING( ctx->rb3d_zstencilcntl );
321 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
322 OUT_RING( ctx->pp_cntl );
323 OUT_RING( ctx->rb3d_cntl );
324 OUT_RING( ctx->rb3d_coloroffset );
325 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
326 OUT_RING( ctx->rb3d_colorpitch );
327 ADVANCE_RING();
328 }
329
330 if ( dirty & RADEON_UPLOAD_VERTFMT ) {
331 BEGIN_RING( 2 );
332 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
333 OUT_RING( ctx->se_coord_fmt );
334 ADVANCE_RING();
335 }
336
337 if ( dirty & RADEON_UPLOAD_LINE ) {
338 BEGIN_RING( 5 );
339 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
340 OUT_RING( ctx->re_line_pattern );
341 OUT_RING( ctx->re_line_state );
342 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
343 OUT_RING( ctx->se_line_width );
344 ADVANCE_RING();
345 }
346
347 if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
348 BEGIN_RING( 5 );
349 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
350 OUT_RING( ctx->pp_lum_matrix );
351 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
352 OUT_RING( ctx->pp_rot_matrix_0 );
353 OUT_RING( ctx->pp_rot_matrix_1 );
354 ADVANCE_RING();
355 }
356
357 if ( dirty & RADEON_UPLOAD_MASKS ) {
358 BEGIN_RING( 4 );
359 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
360 OUT_RING( ctx->rb3d_stencilrefmask );
361 OUT_RING( ctx->rb3d_ropcntl );
362 OUT_RING( ctx->rb3d_planemask );
363 ADVANCE_RING();
364 }
365
366 if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
367 BEGIN_RING( 7 );
368 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
369 OUT_RING( ctx->se_vport_xscale );
370 OUT_RING( ctx->se_vport_xoffset );
371 OUT_RING( ctx->se_vport_yscale );
372 OUT_RING( ctx->se_vport_yoffset );
373 OUT_RING( ctx->se_vport_zscale );
374 OUT_RING( ctx->se_vport_zoffset );
375 ADVANCE_RING();
376 }
377
378 if ( dirty & RADEON_UPLOAD_SETUP ) {
379 BEGIN_RING( 4 );
380 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
381 OUT_RING( ctx->se_cntl );
382 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
383 OUT_RING( ctx->se_cntl_status );
384 ADVANCE_RING();
385 }
386
387 if ( dirty & RADEON_UPLOAD_MISC ) {
388 BEGIN_RING( 2 );
389 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
390 OUT_RING( ctx->re_misc );
391 ADVANCE_RING();
392 }
393
394 if ( dirty & RADEON_UPLOAD_TEX0 ) {
395 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
396 &tex[0].pp_txoffset ) ) {
397 DRM_ERROR( "Invalid texture offset for unit 0\n" );
398 return DRM_ERR( EINVAL );
399 }
400
401 BEGIN_RING( 9 );
402 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
403 OUT_RING( tex[0].pp_txfilter );
404 OUT_RING( tex[0].pp_txformat );
405 OUT_RING( tex[0].pp_txoffset );
406 OUT_RING( tex[0].pp_txcblend );
407 OUT_RING( tex[0].pp_txablend );
408 OUT_RING( tex[0].pp_tfactor );
409 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
410 OUT_RING( tex[0].pp_border_color );
411 ADVANCE_RING();
412 }
413
414 if ( dirty & RADEON_UPLOAD_TEX1 ) {
415 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
416 &tex[1].pp_txoffset ) ) {
417 DRM_ERROR( "Invalid texture offset for unit 1\n" );
418 return DRM_ERR( EINVAL );
419 }
420
421 BEGIN_RING( 9 );
422 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
423 OUT_RING( tex[1].pp_txfilter );
424 OUT_RING( tex[1].pp_txformat );
425 OUT_RING( tex[1].pp_txoffset );
426 OUT_RING( tex[1].pp_txcblend );
427 OUT_RING( tex[1].pp_txablend );
428 OUT_RING( tex[1].pp_tfactor );
429 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
430 OUT_RING( tex[1].pp_border_color );
431 ADVANCE_RING();
432 }
433
434 if ( dirty & RADEON_UPLOAD_TEX2 ) {
435 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
436 &tex[2].pp_txoffset ) ) {
437 DRM_ERROR( "Invalid texture offset for unit 2\n" );
438 return DRM_ERR( EINVAL );
439 }
440
441 BEGIN_RING( 9 );
442 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
443 OUT_RING( tex[2].pp_txfilter );
444 OUT_RING( tex[2].pp_txformat );
445 OUT_RING( tex[2].pp_txoffset );
446 OUT_RING( tex[2].pp_txcblend );
447 OUT_RING( tex[2].pp_txablend );
448 OUT_RING( tex[2].pp_tfactor );
449 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
450 OUT_RING( tex[2].pp_border_color );
451 ADVANCE_RING();
452 }
453
454 return 0;
455}
456
457/* Emit 1.2 state
458 */
459static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
460 drm_file_t *filp_priv,
461 drm_radeon_state_t *state )
462{
463 RING_LOCALS;
464
465 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
466 BEGIN_RING( 3 );
467 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
468 OUT_RING( state->context2.se_zbias_factor );
469 OUT_RING( state->context2.se_zbias_constant );
470 ADVANCE_RING();
471 }
472
473 return radeon_emit_state( dev_priv, filp_priv, &state->context,
474 state->tex, state->dirty );
475}
476
477/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
478 * 1.3 cmdbuffers allow all previous state to be updated as well as
479 * the tcl scalar and vector areas.
480 */
481static struct {
482 int start;
483 int len;
484 const char *name;
485} packet[RADEON_MAX_STATE_PACKETS] = {
486 { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
487 { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
488 { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
489 { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
490 { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
491 { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
492 { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
493 { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
494 { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
495 { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
496 { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
497 { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
498 { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
499 { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
500 { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
501 { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
502 { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
503 { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
504 { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
505 { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
506 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
507 { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
508 { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
509 { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
510 { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
511 { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
512 { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
513 { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
514 { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
515 { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
516 { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
517 { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
518 { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
519 { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
520 { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
521 { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
522 { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
523 { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
524 { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
525 { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
526 { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
527 { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
528 { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
529 { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
530 { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
531 { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
532 { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
533 { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
534 { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
535 { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
536 { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
537 { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" },
538 { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" },
539 { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" },
540 { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" },
541 { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" },
542 { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" },
543 { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" },
544 { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" },
545 { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" },
546 { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
547 { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
548 { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
549 { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
550 { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
551 { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
552 { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
553 { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
554 { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
555 { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
556 { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
557 { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
558 { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
559 { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
560 { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
561 { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
562 { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
563 { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
564 { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
565 { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
566 { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
567 { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
568 { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
569 { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
570 { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
571};
572
573
574
575/* ================================================================
576 * Performance monitoring functions
577 */
578
579static void radeon_clear_box( drm_radeon_private_t *dev_priv,
580 int x, int y, int w, int h,
581 int r, int g, int b )
582{
583 u32 color;
584 RING_LOCALS;
585
586 x += dev_priv->sarea_priv->boxes[0].x1;
587 y += dev_priv->sarea_priv->boxes[0].y1;
588
589 switch ( dev_priv->color_fmt ) {
590 case RADEON_COLOR_FORMAT_RGB565:
591 color = (((r & 0xf8) << 8) |
592 ((g & 0xfc) << 3) |
593 ((b & 0xf8) >> 3));
594 break;
595 case RADEON_COLOR_FORMAT_ARGB8888:
596 default:
597 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
598 break;
599 }
600
601 BEGIN_RING( 4 );
602 RADEON_WAIT_UNTIL_3D_IDLE();
603 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
604 OUT_RING( 0xffffffff );
605 ADVANCE_RING();
606
607 BEGIN_RING( 6 );
608
609 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
610 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
611 RADEON_GMC_BRUSH_SOLID_COLOR |
612 (dev_priv->color_fmt << 8) |
613 RADEON_GMC_SRC_DATATYPE_COLOR |
614 RADEON_ROP3_P |
615 RADEON_GMC_CLR_CMP_CNTL_DIS );
616
617 if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
618 OUT_RING( dev_priv->front_pitch_offset );
619 } else {
620 OUT_RING( dev_priv->back_pitch_offset );
621 }
622
623 OUT_RING( color );
624
625 OUT_RING( (x << 16) | y );
626 OUT_RING( (w << 16) | h );
627
628 ADVANCE_RING();
629}
630
631static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
632{
633 /* Collapse various things into a wait flag -- trying to
634 * guess if userspase slept -- better just to have them tell us.
635 */
636 if (dev_priv->stats.last_frame_reads > 1 ||
637 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
638 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
639 }
640
641 if (dev_priv->stats.freelist_loops) {
642 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
643 }
644
645 /* Purple box for page flipping
646 */
647 if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )
648 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
649
650 /* Red box if we have to wait for idle at any point
651 */
652 if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE )
653 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
654
655 /* Blue box: lost context?
656 */
657
658 /* Yellow box for texture swaps
659 */
660 if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD )
661 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
662
663 /* Green box if hardware never idles (as far as we can tell)
664 */
665 if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) )
666 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
667
668
669 /* Draw bars indicating number of buffers allocated
670 * (not a great measure, easily confused)
671 */
672 if (dev_priv->stats.requested_bufs) {
673 if (dev_priv->stats.requested_bufs > 100)
674 dev_priv->stats.requested_bufs = 100;
675
676 radeon_clear_box( dev_priv, 4, 16,
677 dev_priv->stats.requested_bufs, 4,
678 196, 128, 128 );
679 }
680
681 memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
682
683}
684/* ================================================================
685 * CP command dispatch functions
686 */
687
688static void radeon_cp_dispatch_clear( drm_device_t *dev,
689 drm_radeon_clear_t *clear,
690 drm_radeon_clear_rect_t *depth_boxes )
691{
692 drm_radeon_private_t *dev_priv = dev->dev_private;
693 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
694 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
695 int nbox = sarea_priv->nbox;
696 drm_clip_rect_t *pbox = sarea_priv->boxes;
697 unsigned int flags = clear->flags;
698 u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
699 int i;
700 RING_LOCALS;
701 DRM_DEBUG( "flags = 0x%x\n", flags );
702
703 dev_priv->stats.clears++;
704
705 if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
706 unsigned int tmp = flags;
707
708 flags &= ~(RADEON_FRONT | RADEON_BACK);
709 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
710 if ( tmp & RADEON_BACK ) flags |= RADEON_FRONT;
711 }
712
713 if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
714
715 BEGIN_RING( 4 );
716
717 /* Ensure the 3D stream is idle before doing a
718 * 2D fill to clear the front or back buffer.
719 */
720 RADEON_WAIT_UNTIL_3D_IDLE();
721
722 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
723 OUT_RING( clear->color_mask );
724
725 ADVANCE_RING();
726
727 /* Make sure we restore the 3D state next time.
728 */
729 dev_priv->sarea_priv->ctx_owner = 0;
730
731 for ( i = 0 ; i < nbox ; i++ ) {
732 int x = pbox[i].x1;
733 int y = pbox[i].y1;
734 int w = pbox[i].x2 - x;
735 int h = pbox[i].y2 - y;
736
737 DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
738 x, y, w, h, flags );
739
740 if ( flags & RADEON_FRONT ) {
741 BEGIN_RING( 6 );
742
743 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
744 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
745 RADEON_GMC_BRUSH_SOLID_COLOR |
746 (dev_priv->color_fmt << 8) |
747 RADEON_GMC_SRC_DATATYPE_COLOR |
748 RADEON_ROP3_P |
749 RADEON_GMC_CLR_CMP_CNTL_DIS );
750
751 OUT_RING( dev_priv->front_pitch_offset );
752 OUT_RING( clear->clear_color );
753
754 OUT_RING( (x << 16) | y );
755 OUT_RING( (w << 16) | h );
756
757 ADVANCE_RING();
758 }
759
760 if ( flags & RADEON_BACK ) {
761 BEGIN_RING( 6 );
762
763 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
764 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
765 RADEON_GMC_BRUSH_SOLID_COLOR |
766 (dev_priv->color_fmt << 8) |
767 RADEON_GMC_SRC_DATATYPE_COLOR |
768 RADEON_ROP3_P |
769 RADEON_GMC_CLR_CMP_CNTL_DIS );
770
771 OUT_RING( dev_priv->back_pitch_offset );
772 OUT_RING( clear->clear_color );
773
774 OUT_RING( (x << 16) | y );
775 OUT_RING( (w << 16) | h );
776
777 ADVANCE_RING();
778 }
779 }
780 }
781
782 /* hyper z clear */
783 /* no docs available, based on reverse engeneering by Stephane Marchesin */
784 if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
785
786 int i;
787 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
788 (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
789
790 u32 clearmask;
791
792 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
793 ((clear->depth_mask & 0xff) << 24);
794
795
796 /* Make sure we restore the 3D state next time.
797 * we haven't touched any "normal" state - still need this?
798 */
799 dev_priv->sarea_priv->ctx_owner = 0;
800
801 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
802 /* FIXME : reverse engineer that for Rx00 cards */
803 /* FIXME : the mask supposedly contains low-res z values. So can't set
804 just to the max (0xff? or actually 0x3fff?), need to take z clear
805 value into account? */
806 /* pattern seems to work for r100, though get slight
807 rendering errors with glxgears. If hierz is not enabled for r100,
808 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
809 other ones are ignored, and the same clear mask can be used. That's
810 very different behaviour than R200 which needs different clear mask
811 and different number of tiles to clear if hierz is enabled or not !?!
812 */
813 clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
814 }
815 else {
816 /* clear mask : chooses the clearing pattern.
817 rv250: could be used to clear only parts of macrotiles
818 (but that would get really complicated...)?
819 bit 0 and 1 (either or both of them ?!?!) are used to
820 not clear tile (or maybe one of the bits indicates if the tile is
821 compressed or not), bit 2 and 3 to not clear tile 1,...,.
822 Pattern is as follows:
823 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
824 bits -------------------------------------------------
825 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
826 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
827 covers 256 pixels ?!?
828 */
829 clearmask = 0x0;
830 }
831
832 BEGIN_RING( 8 );
833 RADEON_WAIT_UNTIL_2D_IDLE();
834 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
835 tempRB3D_DEPTHCLEARVALUE);
836 /* what offset is this exactly ? */
837 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
838 /* need ctlstat, otherwise get some strange black flickering */
839 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
840 ADVANCE_RING();
841
842 for (i = 0; i < nbox; i++) {
843 int tileoffset, nrtilesx, nrtilesy, j;
844 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
845 if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
846 /* FIXME : figure this out for r200 (when hierz is enabled). Or
847 maybe r200 actually doesn't need to put the low-res z value into
848 the tile cache like r100, but just needs to clear the hi-level z-buffer?
849 Works for R100, both with hierz and without.
850 R100 seems to operate on 2x1 8x8 tiles, but...
851 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
852 problematic with resolutions which are not 64 pix aligned? */
853 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
854 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
855 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
856 for (j = 0; j <= nrtilesy; j++) {
857 BEGIN_RING( 4 );
858 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
859 /* first tile */
860 OUT_RING( tileoffset * 8 );
861 /* the number of tiles to clear */
862 OUT_RING( nrtilesx + 4 );
863 /* clear mask : chooses the clearing pattern. */
864 OUT_RING( clearmask );
865 ADVANCE_RING();
866 tileoffset += depthpixperline >> 6;
867 }
868 }
869 else if (dev_priv->microcode_version==UCODE_R200) {
870 /* works for rv250. */
871 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
872 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
873 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
874 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
875 for (j = 0; j <= nrtilesy; j++) {
876 BEGIN_RING( 4 );
877 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
878 /* first tile */
879 /* judging by the first tile offset needed, could possibly
880 directly address/clear 4x4 tiles instead of 8x2 * 4x4
881 macro tiles, though would still need clear mask for
882 right/bottom if truely 4x4 granularity is desired ? */
883 OUT_RING( tileoffset * 16 );
884 /* the number of tiles to clear */
885 OUT_RING( nrtilesx + 1 );
886 /* clear mask : chooses the clearing pattern. */
887 OUT_RING( clearmask );
888 ADVANCE_RING();
889 tileoffset += depthpixperline >> 5;
890 }
891 }
892 else { /* rv 100 */
893 /* rv100 might not need 64 pix alignment, who knows */
894 /* offsets are, hmm, weird */
895 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
896 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
897 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
898 for (j = 0; j <= nrtilesy; j++) {
899 BEGIN_RING( 4 );
900 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
901 OUT_RING( tileoffset * 128 );
902 /* the number of tiles to clear */
903 OUT_RING( nrtilesx + 4 );
904 /* clear mask : chooses the clearing pattern. */
905 OUT_RING( clearmask );
906 ADVANCE_RING();
907 tileoffset += depthpixperline >> 6;
908 }
909 }
910 }
911
912 /* TODO don't always clear all hi-level z tiles */
913 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
914 && (flags & RADEON_USE_HIERZ))
915 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
916 /* FIXME : the mask supposedly contains low-res z values. So can't set
917 just to the max (0xff? or actually 0x3fff?), need to take z clear
918 value into account? */
919 {
920 BEGIN_RING( 4 );
921 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
922 OUT_RING( 0x0 ); /* First tile */
923 OUT_RING( 0x3cc0 );
924 OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
925 ADVANCE_RING();
926 }
927 }
928
929 /* We have to clear the depth and/or stencil buffers by
930 * rendering a quad into just those buffers. Thus, we have to
931 * make sure the 3D engine is configured correctly.
932 */
933 if ((dev_priv->microcode_version == UCODE_R200) &&
934 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
935
936 int tempPP_CNTL;
937 int tempRE_CNTL;
938 int tempRB3D_CNTL;
939 int tempRB3D_ZSTENCILCNTL;
940 int tempRB3D_STENCILREFMASK;
941 int tempRB3D_PLANEMASK;
942 int tempSE_CNTL;
943 int tempSE_VTE_CNTL;
944 int tempSE_VTX_FMT_0;
945 int tempSE_VTX_FMT_1;
946 int tempSE_VAP_CNTL;
947 int tempRE_AUX_SCISSOR_CNTL;
948
949 tempPP_CNTL = 0;
950 tempRE_CNTL = 0;
951
952 tempRB3D_CNTL = depth_clear->rb3d_cntl;
953
954 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
955 tempRB3D_STENCILREFMASK = 0x0;
956
957 tempSE_CNTL = depth_clear->se_cntl;
958
959
960
961 /* Disable TCL */
962
963 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
964 (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
965
966 tempRB3D_PLANEMASK = 0x0;
967
968 tempRE_AUX_SCISSOR_CNTL = 0x0;
969
970 tempSE_VTE_CNTL =
971 SE_VTE_CNTL__VTX_XY_FMT_MASK |
972 SE_VTE_CNTL__VTX_Z_FMT_MASK;
973
974 /* Vertex format (X, Y, Z, W)*/
975 tempSE_VTX_FMT_0 =
976 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
977 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
978 tempSE_VTX_FMT_1 = 0x0;
979
980
981 /*
982 * Depth buffer specific enables
983 */
984 if (flags & RADEON_DEPTH) {
985 /* Enable depth buffer */
986 tempRB3D_CNTL |= RADEON_Z_ENABLE;
987 } else {
988 /* Disable depth buffer */
989 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
990 }
991
992 /*
993 * Stencil buffer specific enables
994 */
995 if ( flags & RADEON_STENCIL ) {
996 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
997 tempRB3D_STENCILREFMASK = clear->depth_mask;
998 } else {
999 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1000 tempRB3D_STENCILREFMASK = 0x00000000;
1001 }
1002
1003 if (flags & RADEON_USE_COMP_ZBUF) {
1004 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1005 RADEON_Z_DECOMPRESSION_ENABLE;
1006 }
1007 if (flags & RADEON_USE_HIERZ) {
1008 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1009 }
1010
1011 BEGIN_RING( 26 );
1012 RADEON_WAIT_UNTIL_2D_IDLE();
1013
1014 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
1015 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
1016 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
1017 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
1018 tempRB3D_ZSTENCILCNTL );
1019 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1020 tempRB3D_STENCILREFMASK );
1021 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
1022 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
1023 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
1024 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
1025 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
1026 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
1027 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL,
1028 tempRE_AUX_SCISSOR_CNTL );
1029 ADVANCE_RING();
1030
1031 /* Make sure we restore the 3D state next time.
1032 */
1033 dev_priv->sarea_priv->ctx_owner = 0;
1034
1035 for ( i = 0 ; i < nbox ; i++ ) {
1036
1037 /* Funny that this should be required --
1038 * sets top-left?
1039 */
1040 radeon_emit_clip_rect( dev_priv,
1041 &sarea_priv->boxes[i] );
1042
1043 BEGIN_RING( 14 );
1044 OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
1045 OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1046 RADEON_PRIM_WALK_RING |
1047 (3 << RADEON_NUM_VERTICES_SHIFT)) );
1048 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1049 OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1050 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1051 OUT_RING( 0x3f800000 );
1052 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1053 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1054 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1055 OUT_RING( 0x3f800000 );
1056 OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1057 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1058 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1059 OUT_RING( 0x3f800000 );
1060 ADVANCE_RING();
1061 }
1062 }
1063 else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
1064
1065 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1066
1067 rb3d_cntl = depth_clear->rb3d_cntl;
1068
1069 if ( flags & RADEON_DEPTH ) {
1070 rb3d_cntl |= RADEON_Z_ENABLE;
1071 } else {
1072 rb3d_cntl &= ~RADEON_Z_ENABLE;
1073 }
1074
1075 if ( flags & RADEON_STENCIL ) {
1076 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1077 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1078 } else {
1079 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1080 rb3d_stencilrefmask = 0x00000000;
1081 }
1082
1083 if (flags & RADEON_USE_COMP_ZBUF) {
1084 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1085 RADEON_Z_DECOMPRESSION_ENABLE;
1086 }
1087 if (flags & RADEON_USE_HIERZ) {
1088 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1089 }
1090
1091 BEGIN_RING( 13 );
1092 RADEON_WAIT_UNTIL_2D_IDLE();
1093
1094 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
1095 OUT_RING( 0x00000000 );
1096 OUT_RING( rb3d_cntl );
1097
1098 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
1099 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1100 rb3d_stencilrefmask );
1101 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
1102 0x00000000 );
1103 OUT_RING_REG( RADEON_SE_CNTL,
1104 depth_clear->se_cntl );
1105 ADVANCE_RING();
1106
1107 /* Make sure we restore the 3D state next time.
1108 */
1109 dev_priv->sarea_priv->ctx_owner = 0;
1110
1111 for ( i = 0 ; i < nbox ; i++ ) {
1112
1113 /* Funny that this should be required --
1114 * sets top-left?
1115 */
1116 radeon_emit_clip_rect( dev_priv,
1117 &sarea_priv->boxes[i] );
1118
1119 BEGIN_RING( 15 );
1120
1121 OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
1122 OUT_RING( RADEON_VTX_Z_PRESENT |
1123 RADEON_VTX_PKCOLOR_PRESENT);
1124 OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1125 RADEON_PRIM_WALK_RING |
1126 RADEON_MAOS_ENABLE |
1127 RADEON_VTX_FMT_RADEON_MODE |
1128 (3 << RADEON_NUM_VERTICES_SHIFT)) );
1129
1130
1131 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1132 OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1133 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1134 OUT_RING( 0x0 );
1135
1136 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1137 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1138 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1139 OUT_RING( 0x0 );
1140
1141 OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1142 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1143 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1144 OUT_RING( 0x0 );
1145
1146 ADVANCE_RING();
1147 }
1148 }
1149
1150 /* Increment the clear counter. The client-side 3D driver must
1151 * wait on this value before performing the clear ioctl. We
1152 * need this because the card's so damned fast...
1153 */
1154 dev_priv->sarea_priv->last_clear++;
1155
1156 BEGIN_RING( 4 );
1157
1158 RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
1159 RADEON_WAIT_UNTIL_IDLE();
1160
1161 ADVANCE_RING();
1162}
1163
1164static void radeon_cp_dispatch_swap( drm_device_t *dev )
1165{
1166 drm_radeon_private_t *dev_priv = dev->dev_private;
1167 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1168 int nbox = sarea_priv->nbox;
1169 drm_clip_rect_t *pbox = sarea_priv->boxes;
1170 int i;
1171 RING_LOCALS;
1172 DRM_DEBUG( "\n" );
1173
1174 /* Do some trivial performance monitoring...
1175 */
1176 if (dev_priv->do_boxes)
1177 radeon_cp_performance_boxes( dev_priv );
1178
1179
1180 /* Wait for the 3D stream to idle before dispatching the bitblt.
1181 * This will prevent data corruption between the two streams.
1182 */
1183 BEGIN_RING( 2 );
1184
1185 RADEON_WAIT_UNTIL_3D_IDLE();
1186
1187 ADVANCE_RING();
1188
1189 for ( i = 0 ; i < nbox ; i++ ) {
1190 int x = pbox[i].x1;
1191 int y = pbox[i].y1;
1192 int w = pbox[i].x2 - x;
1193 int h = pbox[i].y2 - y;
1194
1195 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1196 x, y, w, h );
1197
1198 BEGIN_RING( 7 );
1199
1200 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1201 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1202 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1203 RADEON_GMC_BRUSH_NONE |
1204 (dev_priv->color_fmt << 8) |
1205 RADEON_GMC_SRC_DATATYPE_COLOR |
1206 RADEON_ROP3_S |
1207 RADEON_DP_SRC_SOURCE_MEMORY |
1208 RADEON_GMC_CLR_CMP_CNTL_DIS |
1209 RADEON_GMC_WR_MSK_DIS );
1210
1211 /* Make this work even if front & back are flipped:
1212 */
1213 if (dev_priv->current_page == 0) {
1214 OUT_RING( dev_priv->back_pitch_offset );
1215 OUT_RING( dev_priv->front_pitch_offset );
1216 }
1217 else {
1218 OUT_RING( dev_priv->front_pitch_offset );
1219 OUT_RING( dev_priv->back_pitch_offset );
1220 }
1221
1222 OUT_RING( (x << 16) | y );
1223 OUT_RING( (x << 16) | y );
1224 OUT_RING( (w << 16) | h );
1225
1226 ADVANCE_RING();
1227 }
1228
1229 /* Increment the frame counter. The client-side 3D driver must
1230 * throttle the framerate by waiting for this value before
1231 * performing the swapbuffer ioctl.
1232 */
1233 dev_priv->sarea_priv->last_frame++;
1234
1235 BEGIN_RING( 4 );
1236
1237 RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1238 RADEON_WAIT_UNTIL_2D_IDLE();
1239
1240 ADVANCE_RING();
1241}
1242
1243static void radeon_cp_dispatch_flip( drm_device_t *dev )
1244{
1245 drm_radeon_private_t *dev_priv = dev->dev_private;
1246 drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1247 int offset = (dev_priv->current_page == 1)
1248 ? dev_priv->front_offset : dev_priv->back_offset;
1249 RING_LOCALS;
1250 DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n",
1251 __FUNCTION__,
1252 dev_priv->current_page,
1253 dev_priv->sarea_priv->pfCurrentPage);
1254
1255 /* Do some trivial performance monitoring...
1256 */
1257 if (dev_priv->do_boxes) {
1258 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1259 radeon_cp_performance_boxes( dev_priv );
1260 }
1261
1262 /* Update the frame offsets for both CRTCs
1263 */
1264 BEGIN_RING( 6 );
1265
1266 RADEON_WAIT_UNTIL_3D_IDLE();
1267 OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1268 + sarea->frame.x
1269 * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1270 + offset );
1271 OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1272 + offset );
1273
1274 ADVANCE_RING();
1275
1276 /* Increment the frame counter. The client-side 3D driver must
1277 * throttle the framerate by waiting for this value before
1278 * performing the swapbuffer ioctl.
1279 */
1280 dev_priv->sarea_priv->last_frame++;
1281 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1282 1 - dev_priv->current_page;
1283
1284 BEGIN_RING( 2 );
1285
1286 RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1287
1288 ADVANCE_RING();
1289}
1290
1291static int bad_prim_vertex_nr( int primitive, int nr )
1292{
1293 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1294 case RADEON_PRIM_TYPE_NONE:
1295 case RADEON_PRIM_TYPE_POINT:
1296 return nr < 1;
1297 case RADEON_PRIM_TYPE_LINE:
1298 return (nr & 1) || nr == 0;
1299 case RADEON_PRIM_TYPE_LINE_STRIP:
1300 return nr < 2;
1301 case RADEON_PRIM_TYPE_TRI_LIST:
1302 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1303 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1304 case RADEON_PRIM_TYPE_RECT_LIST:
1305 return nr % 3 || nr == 0;
1306 case RADEON_PRIM_TYPE_TRI_FAN:
1307 case RADEON_PRIM_TYPE_TRI_STRIP:
1308 return nr < 3;
1309 default:
1310 return 1;
1311 }
1312}
1313
1314
1315
1316typedef struct {
1317 unsigned int start;
1318 unsigned int finish;
1319 unsigned int prim;
1320 unsigned int numverts;
1321 unsigned int offset;
1322 unsigned int vc_format;
1323} drm_radeon_tcl_prim_t;
1324
1325static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1326 drm_buf_t *buf,
1327 drm_radeon_tcl_prim_t *prim )
1328
1329{
1330 drm_radeon_private_t *dev_priv = dev->dev_private;
1331 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1332 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1333 int numverts = (int)prim->numverts;
1334 int nbox = sarea_priv->nbox;
1335 int i = 0;
1336 RING_LOCALS;
1337
1338 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1339 prim->prim,
1340 prim->vc_format,
1341 prim->start,
1342 prim->finish,
1343 prim->numverts);
1344
1345 if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1346 DRM_ERROR( "bad prim %x numverts %d\n",
1347 prim->prim, prim->numverts );
1348 return;
1349 }
1350
1351 do {
1352 /* Emit the next cliprect */
1353 if ( i < nbox ) {
1354 radeon_emit_clip_rect( dev_priv,
1355 &sarea_priv->boxes[i] );
1356 }
1357
1358 /* Emit the vertex buffer rendering commands */
1359 BEGIN_RING( 5 );
1360
1361 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1362 OUT_RING( offset );
1363 OUT_RING( numverts );
1364 OUT_RING( prim->vc_format );
1365 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1366 RADEON_COLOR_ORDER_RGBA |
1367 RADEON_VTX_FMT_RADEON_MODE |
1368 (numverts << RADEON_NUM_VERTICES_SHIFT) );
1369
1370 ADVANCE_RING();
1371
1372 i++;
1373 } while ( i < nbox );
1374}
1375
1376
1377
1378static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1379{
1380 drm_radeon_private_t *dev_priv = dev->dev_private;
1381 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1382 RING_LOCALS;
1383
1384 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1385
1386 /* Emit the vertex buffer age */
1387 BEGIN_RING( 2 );
1388 RADEON_DISPATCH_AGE( buf_priv->age );
1389 ADVANCE_RING();
1390
1391 buf->pending = 1;
1392 buf->used = 0;
1393}
1394
1395static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1396 drm_buf_t *buf,
1397 int start, int end )
1398{
1399 drm_radeon_private_t *dev_priv = dev->dev_private;
1400 RING_LOCALS;
1401 DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1402 buf->idx, start, end );
1403
1404 if ( start != end ) {
1405 int offset = (dev_priv->gart_buffers_offset
1406 + buf->offset + start);
1407 int dwords = (end - start + 3) / sizeof(u32);
1408
1409 /* Indirect buffer data must be an even number of
1410 * dwords, so if we've been given an odd number we must
1411 * pad the data with a Type-2 CP packet.
1412 */
1413 if ( dwords & 1 ) {
1414 u32 *data = (u32 *)
1415 ((char *)dev->agp_buffer_map->handle
1416 + buf->offset + start);
1417 data[dwords++] = RADEON_CP_PACKET2;
1418 }
1419
1420 /* Fire off the indirect buffer */
1421 BEGIN_RING( 3 );
1422
1423 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1424 OUT_RING( offset );
1425 OUT_RING( dwords );
1426
1427 ADVANCE_RING();
1428 }
1429}
1430
1431
1432static void radeon_cp_dispatch_indices( drm_device_t *dev,
1433 drm_buf_t *elt_buf,
1434 drm_radeon_tcl_prim_t *prim )
1435{
1436 drm_radeon_private_t *dev_priv = dev->dev_private;
1437 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1438 int offset = dev_priv->gart_buffers_offset + prim->offset;
1439 u32 *data;
1440 int dwords;
1441 int i = 0;
1442 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1443 int count = (prim->finish - start) / sizeof(u16);
1444 int nbox = sarea_priv->nbox;
1445
1446 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1447 prim->prim,
1448 prim->vc_format,
1449 prim->start,
1450 prim->finish,
1451 prim->offset,
1452 prim->numverts);
1453
1454 if (bad_prim_vertex_nr( prim->prim, count )) {
1455 DRM_ERROR( "bad prim %x count %d\n",
1456 prim->prim, count );
1457 return;
1458 }
1459
1460
1461 if ( start >= prim->finish ||
1462 (prim->start & 0x7) ) {
1463 DRM_ERROR( "buffer prim %d\n", prim->prim );
1464 return;
1465 }
1466
1467 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1468
1469 data = (u32 *)((char *)dev->agp_buffer_map->handle +
1470 elt_buf->offset + prim->start);
1471
1472 data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1473 data[1] = offset;
1474 data[2] = prim->numverts;
1475 data[3] = prim->vc_format;
1476 data[4] = (prim->prim |
1477 RADEON_PRIM_WALK_IND |
1478 RADEON_COLOR_ORDER_RGBA |
1479 RADEON_VTX_FMT_RADEON_MODE |
1480 (count << RADEON_NUM_VERTICES_SHIFT) );
1481
1482 do {
1483 if ( i < nbox )
1484 radeon_emit_clip_rect( dev_priv,
1485 &sarea_priv->boxes[i] );
1486
1487 radeon_cp_dispatch_indirect( dev, elt_buf,
1488 prim->start,
1489 prim->finish );
1490
1491 i++;
1492 } while ( i < nbox );
1493
1494}
1495
1496#define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
1497
1498static int radeon_cp_dispatch_texture( DRMFILE filp,
1499 drm_device_t *dev,
1500 drm_radeon_texture_t *tex,
1501 drm_radeon_tex_image_t *image )
1502{
1503 drm_radeon_private_t *dev_priv = dev->dev_private;
1504 drm_file_t *filp_priv;
1505 drm_buf_t *buf;
1506 u32 format;
1507 u32 *buffer;
1508 const u8 __user *data;
1509 int size, dwords, tex_width, blit_width;
1510 u32 height;
1511 int i;
1512 u32 texpitch, microtile;
1513 RING_LOCALS;
1514
1515 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1516
1517 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1518 DRM_ERROR( "Invalid destination offset\n" );
1519 return DRM_ERR( EINVAL );
1520 }
1521
1522 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1523
1524 /* Flush the pixel cache. This ensures no pixel data gets mixed
1525 * up with the texture data from the host data blit, otherwise
1526 * part of the texture image may be corrupted.
1527 */
1528 BEGIN_RING( 4 );
1529 RADEON_FLUSH_CACHE();
1530 RADEON_WAIT_UNTIL_IDLE();
1531 ADVANCE_RING();
1532
1533#ifdef __BIG_ENDIAN
1534 /* The Mesa texture functions provide the data in little endian as the
1535 * chip wants it, but we need to compensate for the fact that the CP
1536 * ring gets byte-swapped
1537 */
1538 BEGIN_RING( 2 );
1539 OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
1540 ADVANCE_RING();
1541#endif
1542
1543
1544 /* The compiler won't optimize away a division by a variable,
1545 * even if the only legal values are powers of two. Thus, we'll
1546 * use a shift instead.
1547 */
1548 switch ( tex->format ) {
1549 case RADEON_TXFORMAT_ARGB8888:
1550 case RADEON_TXFORMAT_RGBA8888:
1551 format = RADEON_COLOR_FORMAT_ARGB8888;
1552 tex_width = tex->width * 4;
1553 blit_width = image->width * 4;
1554 break;
1555 case RADEON_TXFORMAT_AI88:
1556 case RADEON_TXFORMAT_ARGB1555:
1557 case RADEON_TXFORMAT_RGB565:
1558 case RADEON_TXFORMAT_ARGB4444:
1559 case RADEON_TXFORMAT_VYUY422:
1560 case RADEON_TXFORMAT_YVYU422:
1561 format = RADEON_COLOR_FORMAT_RGB565;
1562 tex_width = tex->width * 2;
1563 blit_width = image->width * 2;
1564 break;
1565 case RADEON_TXFORMAT_I8:
1566 case RADEON_TXFORMAT_RGB332:
1567 format = RADEON_COLOR_FORMAT_CI8;
1568 tex_width = tex->width * 1;
1569 blit_width = image->width * 1;
1570 break;
1571 default:
1572 DRM_ERROR( "invalid texture format %d\n", tex->format );
1573 return DRM_ERR(EINVAL);
1574 }
1575 texpitch = tex->pitch;
1576 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1577 microtile = 1;
1578 if (tex_width < 64) {
1579 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1580 /* we got tiled coordinates, untile them */
1581 image->x *= 2;
1582 }
1583 }
1584 else microtile = 0;
1585
1586 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1587
1588 do {
1589 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1590 tex->offset >> 10, tex->pitch, tex->format,
1591 image->x, image->y, image->width, image->height );
1592
1593 /* Make a copy of some parameters in case we have to
1594 * update them for a multi-pass texture blit.
1595 */
1596 height = image->height;
1597 data = (const u8 __user *)image->data;
1598
1599 size = height * blit_width;
1600
1601 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1602 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1603 size = height * blit_width;
1604 } else if ( size < 4 && size > 0 ) {
1605 size = 4;
1606 } else if ( size == 0 ) {
1607 return 0;
1608 }
1609
1610 buf = radeon_freelist_get( dev );
1611 if ( 0 && !buf ) {
1612 radeon_do_cp_idle( dev_priv );
1613 buf = radeon_freelist_get( dev );
1614 }
1615 if ( !buf ) {
1616 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1617 if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
1618 return DRM_ERR(EFAULT);
1619 return DRM_ERR(EAGAIN);
1620 }
1621
1622
1623 /* Dispatch the indirect buffer.
1624 */
1625 buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
1626 dwords = size / 4;
1627 buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
1628 buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1629 RADEON_GMC_BRUSH_NONE |
1630 (format << 8) |
1631 RADEON_GMC_SRC_DATATYPE_COLOR |
1632 RADEON_ROP3_S |
1633 RADEON_DP_SRC_SOURCE_HOST_DATA |
1634 RADEON_GMC_CLR_CMP_CNTL_DIS |
1635 RADEON_GMC_WR_MSK_DIS);
1636
1637 buffer[2] = (texpitch << 22) | (tex->offset >> 10);
1638 buffer[3] = 0xffffffff;
1639 buffer[4] = 0xffffffff;
1640 buffer[5] = (image->y << 16) | image->x;
1641 buffer[6] = (height << 16) | image->width;
1642 buffer[7] = dwords;
1643 buffer += 8;
1644
1645
1646
1647 if (microtile) {
1648 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1649 however, we cannot use blitter directly for texture width < 64 bytes,
1650 since minimum tex pitch is 64 bytes and we need this to match
1651 the texture width, otherwise the blitter will tile it wrong.
1652 Thus, tiling manually in this case. Additionally, need to special
1653 case tex height = 1, since our actual image will have height 2
1654 and we need to ensure we don't read beyond the texture size
1655 from user space. */
1656 if (tex->height == 1) {
1657 if (tex_width >= 64 || tex_width <= 16) {
1658 if (DRM_COPY_FROM_USER(buffer, data,
1659 tex_width * sizeof(u32))) {
1660 DRM_ERROR("EFAULT on pad, %d bytes\n",
1661 tex_width);
1662 return DRM_ERR(EFAULT);
1663 }
1664 } else if (tex_width == 32) {
1665 if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1666 DRM_ERROR("EFAULT on pad, %d bytes\n",
1667 tex_width);
1668 return DRM_ERR(EFAULT);
1669 }
1670 if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
1671 DRM_ERROR("EFAULT on pad, %d bytes\n",
1672 tex_width);
1673 return DRM_ERR(EFAULT);
1674 }
1675 }
1676 } else if (tex_width >= 64 || tex_width == 16) {
1677 if (DRM_COPY_FROM_USER(buffer, data,
1678 dwords * sizeof(u32))) {
1679 DRM_ERROR("EFAULT on data, %d dwords\n",
1680 dwords);
1681 return DRM_ERR(EFAULT);
1682 }
1683 } else if (tex_width < 16) {
1684 for (i = 0; i < tex->height; i++) {
1685 if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1686 DRM_ERROR("EFAULT on pad, %d bytes\n",
1687 tex_width);
1688 return DRM_ERR(EFAULT);
1689 }
1690 buffer += 4;
1691 data += tex_width;
1692 }
1693 } else if (tex_width == 32) {
1694 /* TODO: make sure this works when not fitting in one buffer
1695 (i.e. 32bytes x 2048...) */
1696 for (i = 0; i < tex->height; i += 2) {
1697 if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1698 DRM_ERROR("EFAULT on pad, %d bytes\n",
1699 tex_width);
1700 return DRM_ERR(EFAULT);
1701 }
1702 data += 16;
1703 if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
1704 DRM_ERROR("EFAULT on pad, %d bytes\n",
1705 tex_width);
1706 return DRM_ERR(EFAULT);
1707 }
1708 data += 16;
1709 if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
1710 DRM_ERROR("EFAULT on pad, %d bytes\n",
1711 tex_width);
1712 return DRM_ERR(EFAULT);
1713 }
1714 data += 16;
1715 if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
1716 DRM_ERROR("EFAULT on pad, %d bytes\n",
1717 tex_width);
1718 return DRM_ERR(EFAULT);
1719 }
1720 data += 16;
1721 buffer += 16;
1722 }
1723 }
1724 }
1725 else {
1726 if (tex_width >= 32) {
1727 /* Texture image width is larger than the minimum, so we
1728 * can upload it directly.
1729 */
1730 if (DRM_COPY_FROM_USER(buffer, data,
1731 dwords * sizeof(u32))) {
1732 DRM_ERROR("EFAULT on data, %d dwords\n",
1733 dwords);
1734 return DRM_ERR(EFAULT);
1735 }
1736 } else {
1737 /* Texture image width is less than the minimum, so we
1738 * need to pad out each image scanline to the minimum
1739 * width.
1740 */
1741 for (i = 0 ; i < tex->height ; i++) {
1742 if (DRM_COPY_FROM_USER(buffer, data, tex_width )) {
1743 DRM_ERROR("EFAULT on pad, %d bytes\n", tex_width);
1744 return DRM_ERR(EFAULT);
1745 }
1746 buffer += 8;
1747 data += tex_width;
1748 }
1749 }
1750 }
1751
1752 buf->filp = filp;
1753 buf->used = (dwords + 8) * sizeof(u32);
1754 radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
1755 radeon_cp_discard_buffer( dev, buf );
1756
1757 /* Update the input parameters for next time */
1758 image->y += height;
1759 image->height -= height;
1760 image->data = (const u8 __user *)image->data + size;
1761 } while (image->height > 0);
1762
1763 /* Flush the pixel cache after the blit completes. This ensures
1764 * the texture data is written out to memory before rendering
1765 * continues.
1766 */
1767 BEGIN_RING( 4 );
1768 RADEON_FLUSH_CACHE();
1769 RADEON_WAIT_UNTIL_2D_IDLE();
1770 ADVANCE_RING();
1771 return 0;
1772}
1773
1774
1775static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1776{
1777 drm_radeon_private_t *dev_priv = dev->dev_private;
1778 int i;
1779 RING_LOCALS;
1780 DRM_DEBUG( "\n" );
1781
1782 BEGIN_RING( 35 );
1783
1784 OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1785 OUT_RING( 0x00000000 );
1786
1787 OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1788 for ( i = 0 ; i < 32 ; i++ ) {
1789 OUT_RING( stipple[i] );
1790 }
1791
1792 ADVANCE_RING();
1793}
1794
1795static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1796{
1797 if (!dev_priv->mmio)
1798 return;
1799
1800 radeon_do_cp_idle(dev_priv);
1801
1802 RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1803 dev_priv->surfaces[surf_index].flags);
1804 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1805 dev_priv->surfaces[surf_index].lower);
1806 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1807 dev_priv->surfaces[surf_index].upper);
1808}
1809
1810
1811/* Allocates a virtual surface
1812 * doesn't always allocate a real surface, will stretch an existing
1813 * surface when possible.
1814 *
1815 * Note that refcount can be at most 2, since during a free refcount=3
1816 * might mean we have to allocate a new surface which might not always
1817 * be available.
1818 * For example : we allocate three contigous surfaces ABC. If B is
1819 * freed, we suddenly need two surfaces to store A and C, which might
1820 * not always be available.
1821 */
1822static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1823{
1824 struct radeon_virt_surface *s;
1825 int i;
1826 int virt_surface_index;
1827 uint32_t new_upper, new_lower;
1828
1829 new_lower = new->address;
1830 new_upper = new_lower + new->size - 1;
1831
1832 /* sanity check */
1833 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1834 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1835 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1836 return -1;
1837
1838 /* make sure there is no overlap with existing surfaces */
1839 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1840 if ((dev_priv->surfaces[i].refcount != 0) &&
1841 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1842 (new_lower < dev_priv->surfaces[i].upper) ) ||
1843 ( (new_lower < dev_priv->surfaces[i].lower) &&
1844 (new_upper > dev_priv->surfaces[i].lower) )) ){
1845 return -1;}
1846 }
1847
1848 /* find a virtual surface */
1849 for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1850 if (dev_priv->virt_surfaces[i].filp == 0)
1851 break;
1852 if (i == 2*RADEON_MAX_SURFACES) {
1853 return -1;}
1854 virt_surface_index = i;
1855
1856 /* try to reuse an existing surface */
1857 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1858 /* extend before */
1859 if ((dev_priv->surfaces[i].refcount == 1) &&
1860 (new->flags == dev_priv->surfaces[i].flags) &&
1861 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1862 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1863 s->surface_index = i;
1864 s->lower = new_lower;
1865 s->upper = new_upper;
1866 s->flags = new->flags;
1867 s->filp = filp;
1868 dev_priv->surfaces[i].refcount++;
1869 dev_priv->surfaces[i].lower = s->lower;
1870 radeon_apply_surface_regs(s->surface_index, dev_priv);
1871 return virt_surface_index;
1872 }
1873
1874 /* extend after */
1875 if ((dev_priv->surfaces[i].refcount == 1) &&
1876 (new->flags == dev_priv->surfaces[i].flags) &&
1877 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1878 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1879 s->surface_index = i;
1880 s->lower = new_lower;
1881 s->upper = new_upper;
1882 s->flags = new->flags;
1883 s->filp = filp;
1884 dev_priv->surfaces[i].refcount++;
1885 dev_priv->surfaces[i].upper = s->upper;
1886 radeon_apply_surface_regs(s->surface_index, dev_priv);
1887 return virt_surface_index;
1888 }
1889 }
1890
1891 /* okay, we need a new one */
1892 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1893 if (dev_priv->surfaces[i].refcount == 0) {
1894 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1895 s->surface_index = i;
1896 s->lower = new_lower;
1897 s->upper = new_upper;
1898 s->flags = new->flags;
1899 s->filp = filp;
1900 dev_priv->surfaces[i].refcount = 1;
1901 dev_priv->surfaces[i].lower = s->lower;
1902 dev_priv->surfaces[i].upper = s->upper;
1903 dev_priv->surfaces[i].flags = s->flags;
1904 radeon_apply_surface_regs(s->surface_index, dev_priv);
1905 return virt_surface_index;
1906 }
1907 }
1908
1909 /* we didn't find anything */
1910 return -1;
1911}
1912
1913static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1914{
1915 struct radeon_virt_surface *s;
1916 int i;
1917 /* find the virtual surface */
1918 for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1919 s = &(dev_priv->virt_surfaces[i]);
1920 if (s->filp) {
1921 if ((lower == s->lower) && (filp == s->filp)) {
1922 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1923 dev_priv->surfaces[s->surface_index].lower = s->upper;
1924
1925 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1926 dev_priv->surfaces[s->surface_index].upper = s->lower;
1927
1928 dev_priv->surfaces[s->surface_index].refcount--;
1929 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1930 dev_priv->surfaces[s->surface_index].flags = 0;
1931 s->filp = NULL;
1932 radeon_apply_surface_regs(s->surface_index, dev_priv);
1933 return 0;
1934 }
1935 }
1936 }
1937 return 1;
1938}
1939
1940static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1941{
1942 int i;
1943 for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1944 {
1945 if (dev_priv->virt_surfaces[i].filp == filp)
1946 free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1947 }
1948}
1949
1950/* ================================================================
1951 * IOCTL functions
1952 */
1953static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1954{
1955 DRM_DEVICE;
1956 drm_radeon_private_t *dev_priv = dev->dev_private;
1957 drm_radeon_surface_alloc_t alloc;
1958
1959 if (!dev_priv) {
1960 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1961 return DRM_ERR(EINVAL);
1962 }
1963
1964 DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1965 sizeof(alloc));
1966
1967 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1968 return DRM_ERR(EINVAL);
1969 else
1970 return 0;
1971}
1972
1973static int radeon_surface_free(DRM_IOCTL_ARGS)
1974{
1975 DRM_DEVICE;
1976 drm_radeon_private_t *dev_priv = dev->dev_private;
1977 drm_radeon_surface_free_t memfree;
1978
1979 if (!dev_priv) {
1980 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1981 return DRM_ERR(EINVAL);
1982 }
1983
1984 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data,
1985 sizeof(memfree) );
1986
1987 if (free_surface(filp, dev_priv, memfree.address))
1988 return DRM_ERR(EINVAL);
1989 else
1990 return 0;
1991}
1992
1993static int radeon_cp_clear( DRM_IOCTL_ARGS )
1994{
1995 DRM_DEVICE;
1996 drm_radeon_private_t *dev_priv = dev->dev_private;
1997 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1998 drm_radeon_clear_t clear;
1999 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2000 DRM_DEBUG( "\n" );
2001
2002 LOCK_TEST_WITH_RETURN( dev, filp );
2003
2004 DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
2005 sizeof(clear) );
2006
2007 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2008
2009 if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2010 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2011
2012 if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
2013 sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
2014 return DRM_ERR(EFAULT);
2015
2016 radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
2017
2018 COMMIT_RING();
2019 return 0;
2020}
2021
2022
2023/* Not sure why this isn't set all the time:
2024 */
2025static int radeon_do_init_pageflip( drm_device_t *dev )
2026{
2027 drm_radeon_private_t *dev_priv = dev->dev_private;
2028 RING_LOCALS;
2029
2030 DRM_DEBUG( "\n" );
2031
2032 BEGIN_RING( 6 );
2033 RADEON_WAIT_UNTIL_3D_IDLE();
2034 OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
2035 OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2036 OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
2037 OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2038 ADVANCE_RING();
2039
2040 dev_priv->page_flipping = 1;
2041 dev_priv->current_page = 0;
2042 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2043
2044 return 0;
2045}
2046
2047/* Called whenever a client dies, from drm_release.
2048 * NOTE: Lock isn't necessarily held when this is called!
2049 */
2050static int radeon_do_cleanup_pageflip( drm_device_t *dev )
2051{
2052 drm_radeon_private_t *dev_priv = dev->dev_private;
2053 DRM_DEBUG( "\n" );
2054
2055 if (dev_priv->current_page != 0)
2056 radeon_cp_dispatch_flip( dev );
2057
2058 dev_priv->page_flipping = 0;
2059 return 0;
2060}
2061
2062/* Swapping and flipping are different operations, need different ioctls.
2063 * They can & should be intermixed to support multiple 3d windows.
2064 */
2065static int radeon_cp_flip( DRM_IOCTL_ARGS )
2066{
2067 DRM_DEVICE;
2068 drm_radeon_private_t *dev_priv = dev->dev_private;
2069 DRM_DEBUG( "\n" );
2070
2071 LOCK_TEST_WITH_RETURN( dev, filp );
2072
2073 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2074
2075 if (!dev_priv->page_flipping)
2076 radeon_do_init_pageflip( dev );
2077
2078 radeon_cp_dispatch_flip( dev );
2079
2080 COMMIT_RING();
2081 return 0;
2082}
2083
2084static int radeon_cp_swap( DRM_IOCTL_ARGS )
2085{
2086 DRM_DEVICE;
2087 drm_radeon_private_t *dev_priv = dev->dev_private;
2088 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2089 DRM_DEBUG( "\n" );
2090
2091 LOCK_TEST_WITH_RETURN( dev, filp );
2092
2093 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2094
2095 if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2096 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2097
2098 radeon_cp_dispatch_swap( dev );
2099 dev_priv->sarea_priv->ctx_owner = 0;
2100
2101 COMMIT_RING();
2102 return 0;
2103}
2104
2105static int radeon_cp_vertex( DRM_IOCTL_ARGS )
2106{
2107 DRM_DEVICE;
2108 drm_radeon_private_t *dev_priv = dev->dev_private;
2109 drm_file_t *filp_priv;
2110 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2111 drm_device_dma_t *dma = dev->dma;
2112 drm_buf_t *buf;
2113 drm_radeon_vertex_t vertex;
2114 drm_radeon_tcl_prim_t prim;
2115
2116 LOCK_TEST_WITH_RETURN( dev, filp );
2117
2118 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2119
2120 DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
2121 sizeof(vertex) );
2122
2123 DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
2124 DRM_CURRENTPID,
2125 vertex.idx, vertex.count, vertex.discard );
2126
2127 if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2128 DRM_ERROR( "buffer index %d (of %d max)\n",
2129 vertex.idx, dma->buf_count - 1 );
2130 return DRM_ERR(EINVAL);
2131 }
2132 if ( vertex.prim < 0 ||
2133 vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2134 DRM_ERROR( "buffer prim %d\n", vertex.prim );
2135 return DRM_ERR(EINVAL);
2136 }
2137
2138 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2139 VB_AGE_TEST_WITH_RETURN( dev_priv );
2140
2141 buf = dma->buflist[vertex.idx];
2142
2143 if ( buf->filp != filp ) {
2144 DRM_ERROR( "process %d using buffer owned by %p\n",
2145 DRM_CURRENTPID, buf->filp );
2146 return DRM_ERR(EINVAL);
2147 }
2148 if ( buf->pending ) {
2149 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2150 return DRM_ERR(EINVAL);
2151 }
2152
2153 /* Build up a prim_t record:
2154 */
2155 if (vertex.count) {
2156 buf->used = vertex.count; /* not used? */
2157
2158 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2159 if ( radeon_emit_state( dev_priv, filp_priv,
2160 &sarea_priv->context_state,
2161 sarea_priv->tex_state,
2162 sarea_priv->dirty ) ) {
2163 DRM_ERROR( "radeon_emit_state failed\n" );
2164 return DRM_ERR( EINVAL );
2165 }
2166
2167 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2168 RADEON_UPLOAD_TEX1IMAGES |
2169 RADEON_UPLOAD_TEX2IMAGES |
2170 RADEON_REQUIRE_QUIESCENCE);
2171 }
2172
2173 prim.start = 0;
2174 prim.finish = vertex.count; /* unused */
2175 prim.prim = vertex.prim;
2176 prim.numverts = vertex.count;
2177 prim.vc_format = dev_priv->sarea_priv->vc_format;
2178
2179 radeon_cp_dispatch_vertex( dev, buf, &prim );
2180 }
2181
2182 if (vertex.discard) {
2183 radeon_cp_discard_buffer( dev, buf );
2184 }
2185
2186 COMMIT_RING();
2187 return 0;
2188}
2189
2190static int radeon_cp_indices( DRM_IOCTL_ARGS )
2191{
2192 DRM_DEVICE;
2193 drm_radeon_private_t *dev_priv = dev->dev_private;
2194 drm_file_t *filp_priv;
2195 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2196 drm_device_dma_t *dma = dev->dma;
2197 drm_buf_t *buf;
2198 drm_radeon_indices_t elts;
2199 drm_radeon_tcl_prim_t prim;
2200 int count;
2201
2202 LOCK_TEST_WITH_RETURN( dev, filp );
2203
2204 if ( !dev_priv ) {
2205 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2206 return DRM_ERR(EINVAL);
2207 }
2208
2209 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2210
2211 DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
2212 sizeof(elts) );
2213
2214 DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
2215 DRM_CURRENTPID,
2216 elts.idx, elts.start, elts.end, elts.discard );
2217
2218 if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
2219 DRM_ERROR( "buffer index %d (of %d max)\n",
2220 elts.idx, dma->buf_count - 1 );
2221 return DRM_ERR(EINVAL);
2222 }
2223 if ( elts.prim < 0 ||
2224 elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2225 DRM_ERROR( "buffer prim %d\n", elts.prim );
2226 return DRM_ERR(EINVAL);
2227 }
2228
2229 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2230 VB_AGE_TEST_WITH_RETURN( dev_priv );
2231
2232 buf = dma->buflist[elts.idx];
2233
2234 if ( buf->filp != filp ) {
2235 DRM_ERROR( "process %d using buffer owned by %p\n",
2236 DRM_CURRENTPID, buf->filp );
2237 return DRM_ERR(EINVAL);
2238 }
2239 if ( buf->pending ) {
2240 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
2241 return DRM_ERR(EINVAL);
2242 }
2243
2244 count = (elts.end - elts.start) / sizeof(u16);
2245 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2246
2247 if ( elts.start & 0x7 ) {
2248 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
2249 return DRM_ERR(EINVAL);
2250 }
2251 if ( elts.start < buf->used ) {
2252 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
2253 return DRM_ERR(EINVAL);
2254 }
2255
2256 buf->used = elts.end;
2257
2258 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2259 if ( radeon_emit_state( dev_priv, filp_priv,
2260 &sarea_priv->context_state,
2261 sarea_priv->tex_state,
2262 sarea_priv->dirty ) ) {
2263 DRM_ERROR( "radeon_emit_state failed\n" );
2264 return DRM_ERR( EINVAL );
2265 }
2266
2267 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2268 RADEON_UPLOAD_TEX1IMAGES |
2269 RADEON_UPLOAD_TEX2IMAGES |
2270 RADEON_REQUIRE_QUIESCENCE);
2271 }
2272
2273
2274 /* Build up a prim_t record:
2275 */
2276 prim.start = elts.start;
2277 prim.finish = elts.end;
2278 prim.prim = elts.prim;
2279 prim.offset = 0; /* offset from start of dma buffers */
2280 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2281 prim.vc_format = dev_priv->sarea_priv->vc_format;
2282
2283 radeon_cp_dispatch_indices( dev, buf, &prim );
2284 if (elts.discard) {
2285 radeon_cp_discard_buffer( dev, buf );
2286 }
2287
2288 COMMIT_RING();
2289 return 0;
2290}
2291
2292static int radeon_cp_texture( DRM_IOCTL_ARGS )
2293{
2294 DRM_DEVICE;
2295 drm_radeon_private_t *dev_priv = dev->dev_private;
2296 drm_radeon_texture_t tex;
2297 drm_radeon_tex_image_t image;
2298 int ret;
2299
2300 LOCK_TEST_WITH_RETURN( dev, filp );
2301
2302 DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );
2303
2304 if ( tex.image == NULL ) {
2305 DRM_ERROR( "null texture image!\n" );
2306 return DRM_ERR(EINVAL);
2307 }
2308
2309 if ( DRM_COPY_FROM_USER( &image,
2310 (drm_radeon_tex_image_t __user *)tex.image,
2311 sizeof(image) ) )
2312 return DRM_ERR(EFAULT);
2313
2314 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2315 VB_AGE_TEST_WITH_RETURN( dev_priv );
2316
2317 ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
2318
2319 COMMIT_RING();
2320 return ret;
2321}
2322
2323static int radeon_cp_stipple( DRM_IOCTL_ARGS )
2324{
2325 DRM_DEVICE;
2326 drm_radeon_private_t *dev_priv = dev->dev_private;
2327 drm_radeon_stipple_t stipple;
2328 u32 mask[32];
2329
2330 LOCK_TEST_WITH_RETURN( dev, filp );
2331
2332 DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
2333 sizeof(stipple) );
2334
2335 if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
2336 return DRM_ERR(EFAULT);
2337
2338 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2339
2340 radeon_cp_dispatch_stipple( dev, mask );
2341
2342 COMMIT_RING();
2343 return 0;
2344}
2345
2346static int radeon_cp_indirect( DRM_IOCTL_ARGS )
2347{
2348 DRM_DEVICE;
2349 drm_radeon_private_t *dev_priv = dev->dev_private;
2350 drm_device_dma_t *dma = dev->dma;
2351 drm_buf_t *buf;
2352 drm_radeon_indirect_t indirect;
2353 RING_LOCALS;
2354
2355 LOCK_TEST_WITH_RETURN( dev, filp );
2356
2357 if ( !dev_priv ) {
2358 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2359 return DRM_ERR(EINVAL);
2360 }
2361
2362 DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
2363 sizeof(indirect) );
2364
2365 DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
2366 indirect.idx, indirect.start,
2367 indirect.end, indirect.discard );
2368
2369 if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
2370 DRM_ERROR( "buffer index %d (of %d max)\n",
2371 indirect.idx, dma->buf_count - 1 );
2372 return DRM_ERR(EINVAL);
2373 }
2374
2375 buf = dma->buflist[indirect.idx];
2376
2377 if ( buf->filp != filp ) {
2378 DRM_ERROR( "process %d using buffer owned by %p\n",
2379 DRM_CURRENTPID, buf->filp );
2380 return DRM_ERR(EINVAL);
2381 }
2382 if ( buf->pending ) {
2383 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
2384 return DRM_ERR(EINVAL);
2385 }
2386
2387 if ( indirect.start < buf->used ) {
2388 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
2389 indirect.start, buf->used );
2390 return DRM_ERR(EINVAL);
2391 }
2392
2393 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2394 VB_AGE_TEST_WITH_RETURN( dev_priv );
2395
2396 buf->used = indirect.end;
2397
2398 /* Wait for the 3D stream to idle before the indirect buffer
2399 * containing 2D acceleration commands is processed.
2400 */
2401 BEGIN_RING( 2 );
2402
2403 RADEON_WAIT_UNTIL_3D_IDLE();
2404
2405 ADVANCE_RING();
2406
2407 /* Dispatch the indirect buffer full of commands from the
2408 * X server. This is insecure and is thus only available to
2409 * privileged clients.
2410 */
2411 radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
2412 if (indirect.discard) {
2413 radeon_cp_discard_buffer( dev, buf );
2414 }
2415
2416
2417 COMMIT_RING();
2418 return 0;
2419}
2420
2421static int radeon_cp_vertex2( DRM_IOCTL_ARGS )
2422{
2423 DRM_DEVICE;
2424 drm_radeon_private_t *dev_priv = dev->dev_private;
2425 drm_file_t *filp_priv;
2426 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2427 drm_device_dma_t *dma = dev->dma;
2428 drm_buf_t *buf;
2429 drm_radeon_vertex2_t vertex;
2430 int i;
2431 unsigned char laststate;
2432
2433 LOCK_TEST_WITH_RETURN( dev, filp );
2434
2435 if ( !dev_priv ) {
2436 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2437 return DRM_ERR(EINVAL);
2438 }
2439
2440 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2441
2442 DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t __user *)data,
2443 sizeof(vertex) );
2444
2445 DRM_DEBUG( "pid=%d index=%d discard=%d\n",
2446 DRM_CURRENTPID,
2447 vertex.idx, vertex.discard );
2448
2449 if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2450 DRM_ERROR( "buffer index %d (of %d max)\n",
2451 vertex.idx, dma->buf_count - 1 );
2452 return DRM_ERR(EINVAL);
2453 }
2454
2455 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2456 VB_AGE_TEST_WITH_RETURN( dev_priv );
2457
2458 buf = dma->buflist[vertex.idx];
2459
2460 if ( buf->filp != filp ) {
2461 DRM_ERROR( "process %d using buffer owned by %p\n",
2462 DRM_CURRENTPID, buf->filp );
2463 return DRM_ERR(EINVAL);
2464 }
2465
2466 if ( buf->pending ) {
2467 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2468 return DRM_ERR(EINVAL);
2469 }
2470
2471 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2472 return DRM_ERR(EINVAL);
2473
2474 for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2475 drm_radeon_prim_t prim;
2476 drm_radeon_tcl_prim_t tclprim;
2477
2478 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2479 return DRM_ERR(EFAULT);
2480
2481 if ( prim.stateidx != laststate ) {
2482 drm_radeon_state_t state;
2483
2484 if ( DRM_COPY_FROM_USER( &state,
2485 &vertex.state[prim.stateidx],
2486 sizeof(state) ) )
2487 return DRM_ERR(EFAULT);
2488
2489 if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2490 DRM_ERROR( "radeon_emit_state2 failed\n" );
2491 return DRM_ERR( EINVAL );
2492 }
2493
2494 laststate = prim.stateidx;
2495 }
2496
2497 tclprim.start = prim.start;
2498 tclprim.finish = prim.finish;
2499 tclprim.prim = prim.prim;
2500 tclprim.vc_format = prim.vc_format;
2501
2502 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2503 tclprim.offset = prim.numverts * 64;
2504 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2505
2506 radeon_cp_dispatch_indices( dev, buf, &tclprim );
2507 } else {
2508 tclprim.numverts = prim.numverts;
2509 tclprim.offset = 0; /* not used */
2510
2511 radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2512 }
2513
2514 if (sarea_priv->nbox == 1)
2515 sarea_priv->nbox = 0;
2516 }
2517
2518 if ( vertex.discard ) {
2519 radeon_cp_discard_buffer( dev, buf );
2520 }
2521
2522 COMMIT_RING();
2523 return 0;
2524}
2525
2526
2527static int radeon_emit_packets(
2528 drm_radeon_private_t *dev_priv,
2529 drm_file_t *filp_priv,
2530 drm_radeon_cmd_header_t header,
2531 drm_radeon_cmd_buffer_t *cmdbuf )
2532{
2533 int id = (int)header.packet.packet_id;
2534 int sz, reg;
2535 int *data = (int *)cmdbuf->buf;
2536 RING_LOCALS;
2537
2538 if (id >= RADEON_MAX_STATE_PACKETS)
2539 return DRM_ERR(EINVAL);
2540
2541 sz = packet[id].len;
2542 reg = packet[id].start;
2543
2544 if (sz * sizeof(int) > cmdbuf->bufsz) {
2545 DRM_ERROR( "Packet size provided larger than data provided\n" );
2546 return DRM_ERR(EINVAL);
2547 }
2548
2549 if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2550 DRM_ERROR( "Packet verification failed\n" );
2551 return DRM_ERR( EINVAL );
2552 }
2553
2554 BEGIN_RING(sz+1);
2555 OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2556 OUT_RING_TABLE( data, sz );
2557 ADVANCE_RING();
2558
2559 cmdbuf->buf += sz * sizeof(int);
2560 cmdbuf->bufsz -= sz * sizeof(int);
2561 return 0;
2562}
2563
2564static __inline__ int radeon_emit_scalars(
2565 drm_radeon_private_t *dev_priv,
2566 drm_radeon_cmd_header_t header,
2567 drm_radeon_cmd_buffer_t *cmdbuf )
2568{
2569 int sz = header.scalars.count;
2570 int start = header.scalars.offset;
2571 int stride = header.scalars.stride;
2572 RING_LOCALS;
2573
2574 BEGIN_RING( 3+sz );
2575 OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2576 OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2577 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2578 OUT_RING_TABLE( cmdbuf->buf, sz );
2579 ADVANCE_RING();
2580 cmdbuf->buf += sz * sizeof(int);
2581 cmdbuf->bufsz -= sz * sizeof(int);
2582 return 0;
2583}
2584
2585/* God this is ugly
2586 */
2587static __inline__ int radeon_emit_scalars2(
2588 drm_radeon_private_t *dev_priv,
2589 drm_radeon_cmd_header_t header,
2590 drm_radeon_cmd_buffer_t *cmdbuf )
2591{
2592 int sz = header.scalars.count;
2593 int start = ((unsigned int)header.scalars.offset) + 0x100;
2594 int stride = header.scalars.stride;
2595 RING_LOCALS;
2596
2597 BEGIN_RING( 3+sz );
2598 OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2599 OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2600 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2601 OUT_RING_TABLE( cmdbuf->buf, sz );
2602 ADVANCE_RING();
2603 cmdbuf->buf += sz * sizeof(int);
2604 cmdbuf->bufsz -= sz * sizeof(int);
2605 return 0;
2606}
2607
2608static __inline__ int radeon_emit_vectors(
2609 drm_radeon_private_t *dev_priv,
2610 drm_radeon_cmd_header_t header,
2611 drm_radeon_cmd_buffer_t *cmdbuf )
2612{
2613 int sz = header.vectors.count;
2614 int start = header.vectors.offset;
2615 int stride = header.vectors.stride;
2616 RING_LOCALS;
2617
2618 BEGIN_RING( 3+sz );
2619 OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2620 OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2621 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2622 OUT_RING_TABLE( cmdbuf->buf, sz );
2623 ADVANCE_RING();
2624
2625 cmdbuf->buf += sz * sizeof(int);
2626 cmdbuf->bufsz -= sz * sizeof(int);
2627 return 0;
2628}
2629
2630
2631static int radeon_emit_packet3( drm_device_t *dev,
2632 drm_file_t *filp_priv,
2633 drm_radeon_cmd_buffer_t *cmdbuf )
2634{
2635 drm_radeon_private_t *dev_priv = dev->dev_private;
2636 unsigned int cmdsz;
2637 int ret;
2638 RING_LOCALS;
2639
2640 DRM_DEBUG("\n");
2641
2642 if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2643 cmdbuf, &cmdsz ) ) ) {
2644 DRM_ERROR( "Packet verification failed\n" );
2645 return ret;
2646 }
2647
2648 BEGIN_RING( cmdsz );
2649 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2650 ADVANCE_RING();
2651
2652 cmdbuf->buf += cmdsz * 4;
2653 cmdbuf->bufsz -= cmdsz * 4;
2654 return 0;
2655}
2656
2657
2658static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2659 drm_file_t *filp_priv,
2660 drm_radeon_cmd_buffer_t *cmdbuf,
2661 int orig_nbox )
2662{
2663 drm_radeon_private_t *dev_priv = dev->dev_private;
2664 drm_clip_rect_t box;
2665 unsigned int cmdsz;
2666 int ret;
2667 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2668 int i = 0;
2669 RING_LOCALS;
2670
2671 DRM_DEBUG("\n");
2672
2673 if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2674 cmdbuf, &cmdsz ) ) ) {
2675 DRM_ERROR( "Packet verification failed\n" );
2676 return ret;
2677 }
2678
2679 if (!orig_nbox)
2680 goto out;
2681
2682 do {
2683 if ( i < cmdbuf->nbox ) {
2684 if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) ))
2685 return DRM_ERR(EFAULT);
2686 /* FIXME The second and subsequent times round
2687 * this loop, send a WAIT_UNTIL_3D_IDLE before
2688 * calling emit_clip_rect(). This fixes a
2689 * lockup on fast machines when sending
2690 * several cliprects with a cmdbuf, as when
2691 * waving a 2D window over a 3D
2692 * window. Something in the commands from user
2693 * space seems to hang the card when they're
2694 * sent several times in a row. That would be
2695 * the correct place to fix it but this works
2696 * around it until I can figure that out - Tim
2697 * Smith */
2698 if ( i ) {
2699 BEGIN_RING( 2 );
2700 RADEON_WAIT_UNTIL_3D_IDLE();
2701 ADVANCE_RING();
2702 }
2703 radeon_emit_clip_rect( dev_priv, &box );
2704 }
2705
2706 BEGIN_RING( cmdsz );
2707 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2708 ADVANCE_RING();
2709
2710 } while ( ++i < cmdbuf->nbox );
2711 if (cmdbuf->nbox == 1)
2712 cmdbuf->nbox = 0;
2713
2714 out:
2715 cmdbuf->buf += cmdsz * 4;
2716 cmdbuf->bufsz -= cmdsz * 4;
2717 return 0;
2718}
2719
2720
2721static int radeon_emit_wait( drm_device_t *dev, int flags )
2722{
2723 drm_radeon_private_t *dev_priv = dev->dev_private;
2724 RING_LOCALS;
2725
2726 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2727 switch (flags) {
2728 case RADEON_WAIT_2D:
2729 BEGIN_RING( 2 );
2730 RADEON_WAIT_UNTIL_2D_IDLE();
2731 ADVANCE_RING();
2732 break;
2733 case RADEON_WAIT_3D:
2734 BEGIN_RING( 2 );
2735 RADEON_WAIT_UNTIL_3D_IDLE();
2736 ADVANCE_RING();
2737 break;
2738 case RADEON_WAIT_2D|RADEON_WAIT_3D:
2739 BEGIN_RING( 2 );
2740 RADEON_WAIT_UNTIL_IDLE();
2741 ADVANCE_RING();
2742 break;
2743 default:
2744 return DRM_ERR(EINVAL);
2745 }
2746
2747 return 0;
2748}
2749
2750static int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2751{
2752 DRM_DEVICE;
2753 drm_radeon_private_t *dev_priv = dev->dev_private;
2754 drm_file_t *filp_priv;
2755 drm_device_dma_t *dma = dev->dma;
2756 drm_buf_t *buf = NULL;
2757 int idx;
2758 drm_radeon_cmd_buffer_t cmdbuf;
2759 drm_radeon_cmd_header_t header;
2760 int orig_nbox, orig_bufsz;
2761 char *kbuf=NULL;
2762
2763 LOCK_TEST_WITH_RETURN( dev, filp );
2764
2765 if ( !dev_priv ) {
2766 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2767 return DRM_ERR(EINVAL);
2768 }
2769
2770 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2771
2772 DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t __user *)data,
2773 sizeof(cmdbuf) );
2774
2775 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2776 VB_AGE_TEST_WITH_RETURN( dev_priv );
2777
2778 if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) {
2779 return DRM_ERR(EINVAL);
2780 }
2781
2782 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2783 * races between checking values and using those values in other code,
2784 * and simply to avoid a lot of function calls to copy in data.
2785 */
2786 orig_bufsz = cmdbuf.bufsz;
2787 if (orig_bufsz != 0) {
2788 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2789 if (kbuf == NULL)
2790 return DRM_ERR(ENOMEM);
2791 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) {
2792 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2793 return DRM_ERR(EFAULT);
2794 }
2795 cmdbuf.buf = kbuf;
2796 }
2797
2798 orig_nbox = cmdbuf.nbox;
2799
2800 while ( cmdbuf.bufsz >= sizeof(header) ) {
2801
2802 header.i = *(int *)cmdbuf.buf;
2803 cmdbuf.buf += sizeof(header);
2804 cmdbuf.bufsz -= sizeof(header);
2805
2806 switch (header.header.cmd_type) {
2807 case RADEON_CMD_PACKET:
2808 DRM_DEBUG("RADEON_CMD_PACKET\n");
2809 if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2810 DRM_ERROR("radeon_emit_packets failed\n");
2811 goto err;
2812 }
2813 break;
2814
2815 case RADEON_CMD_SCALARS:
2816 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2817 if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2818 DRM_ERROR("radeon_emit_scalars failed\n");
2819 goto err;
2820 }
2821 break;
2822
2823 case RADEON_CMD_VECTORS:
2824 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2825 if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2826 DRM_ERROR("radeon_emit_vectors failed\n");
2827 goto err;
2828 }
2829 break;
2830
2831 case RADEON_CMD_DMA_DISCARD:
2832 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2833 idx = header.dma.buf_idx;
2834 if ( idx < 0 || idx >= dma->buf_count ) {
2835 DRM_ERROR( "buffer index %d (of %d max)\n",
2836 idx, dma->buf_count - 1 );
2837 goto err;
2838 }
2839
2840 buf = dma->buflist[idx];
2841 if ( buf->filp != filp || buf->pending ) {
2842 DRM_ERROR( "bad buffer %p %p %d\n",
2843 buf->filp, filp, buf->pending);
2844 goto err;
2845 }
2846
2847 radeon_cp_discard_buffer( dev, buf );
2848 break;
2849
2850 case RADEON_CMD_PACKET3:
2851 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2852 if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2853 DRM_ERROR("radeon_emit_packet3 failed\n");
2854 goto err;
2855 }
2856 break;
2857
2858 case RADEON_CMD_PACKET3_CLIP:
2859 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2860 if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2861 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2862 goto err;
2863 }
2864 break;
2865
2866 case RADEON_CMD_SCALARS2:
2867 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2868 if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2869 DRM_ERROR("radeon_emit_scalars2 failed\n");
2870 goto err;
2871 }
2872 break;
2873
2874 case RADEON_CMD_WAIT:
2875 DRM_DEBUG("RADEON_CMD_WAIT\n");
2876 if (radeon_emit_wait( dev, header.wait.flags )) {
2877 DRM_ERROR("radeon_emit_wait failed\n");
2878 goto err;
2879 }
2880 break;
2881 default:
2882 DRM_ERROR("bad cmd_type %d at %p\n",
2883 header.header.cmd_type,
2884 cmdbuf.buf - sizeof(header));
2885 goto err;
2886 }
2887 }
2888
2889 if (orig_bufsz != 0)
2890 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2891
2892 DRM_DEBUG("DONE\n");
2893 COMMIT_RING();
2894 return 0;
2895
2896err:
2897 if (orig_bufsz != 0)
2898 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2899 return DRM_ERR(EINVAL);
2900}
2901
2902
2903
2904static int radeon_cp_getparam( DRM_IOCTL_ARGS )
2905{
2906 DRM_DEVICE;
2907 drm_radeon_private_t *dev_priv = dev->dev_private;
2908 drm_radeon_getparam_t param;
2909 int value;
2910
2911 if ( !dev_priv ) {
2912 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2913 return DRM_ERR(EINVAL);
2914 }
2915
2916 DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t __user *)data,
2917 sizeof(param) );
2918
2919 DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2920
2921 switch( param.param ) {
2922 case RADEON_PARAM_GART_BUFFER_OFFSET:
2923 value = dev_priv->gart_buffers_offset;
2924 break;
2925 case RADEON_PARAM_LAST_FRAME:
2926 dev_priv->stats.last_frame_reads++;
2927 value = GET_SCRATCH( 0 );
2928 break;
2929 case RADEON_PARAM_LAST_DISPATCH:
2930 value = GET_SCRATCH( 1 );
2931 break;
2932 case RADEON_PARAM_LAST_CLEAR:
2933 dev_priv->stats.last_clear_reads++;
2934 value = GET_SCRATCH( 2 );
2935 break;
2936 case RADEON_PARAM_IRQ_NR:
2937 value = dev->irq;
2938 break;
2939 case RADEON_PARAM_GART_BASE:
2940 value = dev_priv->gart_vm_start;
2941 break;
2942 case RADEON_PARAM_REGISTER_HANDLE:
2943 value = dev_priv->mmio_offset;
2944 break;
2945 case RADEON_PARAM_STATUS_HANDLE:
2946 value = dev_priv->ring_rptr_offset;
2947 break;
2948#if BITS_PER_LONG == 32
2949 /*
2950 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2951 * pointer which can't fit into an int-sized variable. According to
2952 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2953 * not supporting it shouldn't be a problem. If the same functionality
2954 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2955 * so backwards-compatibility for the embedded platforms can be
2956 * maintained. --davidm 4-Feb-2004.
2957 */
2958 case RADEON_PARAM_SAREA_HANDLE:
2959 /* The lock is the first dword in the sarea. */
2960 value = (long)dev->lock.hw_lock;
2961 break;
2962#endif
2963 case RADEON_PARAM_GART_TEX_HANDLE:
2964 value = dev_priv->gart_textures_offset;
2965 break;
2966 default:
2967 return DRM_ERR(EINVAL);
2968 }
2969
2970 if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2971 DRM_ERROR( "copy_to_user\n" );
2972 return DRM_ERR(EFAULT);
2973 }
2974
2975 return 0;
2976}
2977
2978static int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2979 DRM_DEVICE;
2980 drm_radeon_private_t *dev_priv = dev->dev_private;
2981 drm_file_t *filp_priv;
2982 drm_radeon_setparam_t sp;
2983 struct drm_radeon_driver_file_fields *radeon_priv;
2984
2985 if ( !dev_priv ) {
2986 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2987 return DRM_ERR( EINVAL );
2988 }
2989
2990 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2991
2992 DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t __user * )data,
2993 sizeof( sp ) );
2994
2995 switch( sp.param ) {
2996 case RADEON_SETPARAM_FB_LOCATION:
2997 radeon_priv = filp_priv->driver_priv;
2998 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
2999 break;
3000 case RADEON_SETPARAM_SWITCH_TILING:
3001 if (sp.value == 0) {
3002 DRM_DEBUG( "color tiling disabled\n" );
3003 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3004 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3005 dev_priv->sarea_priv->tiling_enabled = 0;
3006 }
3007 else if (sp.value == 1) {
3008 DRM_DEBUG( "color tiling enabled\n" );
3009 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3010 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3011 dev_priv->sarea_priv->tiling_enabled = 1;
3012 }
3013 break;
3014 default:
3015 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
3016 return DRM_ERR( EINVAL );
3017 }
3018
3019 return 0;
3020}
3021
3022/* When a client dies:
3023 * - Check for and clean up flipped page state
3024 * - Free any alloced GART memory.
3025 *
3026 * DRM infrastructure takes care of reclaiming dma buffers.
3027 */
3028void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp)
3029{
3030 if ( dev->dev_private ) {
3031 drm_radeon_private_t *dev_priv = dev->dev_private;
3032 if ( dev_priv->page_flipping ) {
3033 radeon_do_cleanup_pageflip( dev );
3034 }
3035 radeon_mem_release( filp, dev_priv->gart_heap );
3036 radeon_mem_release( filp, dev_priv->fb_heap );
3037 radeon_surfaces_release(filp, dev_priv);
3038 }
3039}
3040
3041void radeon_driver_pretakedown(drm_device_t *dev)
3042{
3043 radeon_do_release(dev);
3044}
3045
3046int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv)
3047{
3048 drm_radeon_private_t *dev_priv = dev->dev_private;
3049 struct drm_radeon_driver_file_fields *radeon_priv;
3050
3051 radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3052
3053 if (!radeon_priv)
3054 return -ENOMEM;
3055
3056 filp_priv->driver_priv = radeon_priv;
3057 if ( dev_priv )
3058 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3059 else
3060 radeon_priv->radeon_fb_delta = 0;
3061 return 0;
3062}
3063
3064
3065void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv)
3066{
3067 struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv;
3068
3069 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3070}
3071
3072drm_ioctl_desc_t radeon_ioctls[] = {
3073 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = { radeon_cp_init, 1, 1 },
3074 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = { radeon_cp_start, 1, 1 },
3075 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = { radeon_cp_stop, 1, 1 },
3076 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = { radeon_cp_reset, 1, 1 },
3077 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = { radeon_cp_idle, 1, 0 },
3078 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = { radeon_cp_resume, 1, 0 },
3079 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = { radeon_engine_reset, 1, 0 },
3080 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen, 1, 0 },
3081 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = { radeon_cp_swap, 1, 0 },
3082 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = { radeon_cp_clear, 1, 0 },
3083 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = { radeon_cp_vertex, 1, 0 },
3084 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = { radeon_cp_indices, 1, 0 },
3085 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = { radeon_cp_texture, 1, 0 },
3086 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = { radeon_cp_stipple, 1, 0 },
3087 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = { radeon_cp_indirect, 1, 1 },
3088 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = { radeon_cp_vertex2, 1, 0 },
3089 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = { radeon_cp_cmdbuf, 1, 0 },
3090 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = { radeon_cp_getparam, 1, 0 },
3091 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = { radeon_cp_flip, 1, 0 },
3092 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = { radeon_mem_alloc, 1, 0 },
3093 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = { radeon_mem_free, 1, 0 },
3094 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = { radeon_mem_init_heap,1, 1 },
3095 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = { radeon_irq_emit, 1, 0 },
3096 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = { radeon_irq_wait, 1, 0 },
3097 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = { radeon_cp_setparam, 1, 0 },
3098 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 },
3099 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = { radeon_surface_free, 1, 0 }
3100};
3101
3102int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);