blob: 64a3e3a406ef74dea8088f6fb05b2f0f0b5dd504 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/* radeon_state.c -- State support for Radeon -*- linux-c -*-
2 *
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 *
25 * Authors:
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
28 */
29
30#include "drmP.h"
31#include "drm.h"
32#include "drm_sarea.h"
33#include "radeon_drm.h"
34#include "radeon_drv.h"
35
36/* ================================================================
37 * Helper functions for client state checking and fixup
38 */
39
40static __inline__ int radeon_check_and_fixup_offset( drm_radeon_private_t *dev_priv,
41 drm_file_t *filp_priv,
42 u32 *offset ) {
43 u32 off = *offset;
44 struct drm_radeon_driver_file_fields *radeon_priv;
45
46 if ( off >= dev_priv->fb_location &&
47 off < ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
48 return 0;
49
50 radeon_priv = filp_priv->driver_priv;
51 off += radeon_priv->radeon_fb_delta;
52
53 DRM_DEBUG( "offset fixed up to 0x%x\n", off );
54
55 if ( off < dev_priv->fb_location ||
56 off >= ( dev_priv->gart_vm_start + dev_priv->gart_size ) )
57 return DRM_ERR( EINVAL );
58
59 *offset = off;
60
61 return 0;
62}
63
64static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_priv,
65 drm_file_t *filp_priv,
66 int id,
67 u32 __user *data ) {
68 switch ( id ) {
69
70 case RADEON_EMIT_PP_MISC:
71 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
72 &data[( RADEON_RB3D_DEPTHOFFSET
73 - RADEON_PP_MISC ) / 4] ) ) {
74 DRM_ERROR( "Invalid depth buffer offset\n" );
75 return DRM_ERR( EINVAL );
76 }
77 break;
78
79 case RADEON_EMIT_PP_CNTL:
80 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
81 &data[( RADEON_RB3D_COLOROFFSET
82 - RADEON_PP_CNTL ) / 4] ) ) {
83 DRM_ERROR( "Invalid colour buffer offset\n" );
84 return DRM_ERR( EINVAL );
85 }
86 break;
87
88 case R200_EMIT_PP_TXOFFSET_0:
89 case R200_EMIT_PP_TXOFFSET_1:
90 case R200_EMIT_PP_TXOFFSET_2:
91 case R200_EMIT_PP_TXOFFSET_3:
92 case R200_EMIT_PP_TXOFFSET_4:
93 case R200_EMIT_PP_TXOFFSET_5:
94 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
95 &data[0] ) ) {
96 DRM_ERROR( "Invalid R200 texture offset\n" );
97 return DRM_ERR( EINVAL );
98 }
99 break;
100
101 case RADEON_EMIT_PP_TXFILTER_0:
102 case RADEON_EMIT_PP_TXFILTER_1:
103 case RADEON_EMIT_PP_TXFILTER_2:
104 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
105 &data[( RADEON_PP_TXOFFSET_0
106 - RADEON_PP_TXFILTER_0 ) / 4] ) ) {
107 DRM_ERROR( "Invalid R100 texture offset\n" );
108 return DRM_ERR( EINVAL );
109 }
110 break;
111
112 case R200_EMIT_PP_CUBIC_OFFSETS_0:
113 case R200_EMIT_PP_CUBIC_OFFSETS_1:
114 case R200_EMIT_PP_CUBIC_OFFSETS_2:
115 case R200_EMIT_PP_CUBIC_OFFSETS_3:
116 case R200_EMIT_PP_CUBIC_OFFSETS_4:
117 case R200_EMIT_PP_CUBIC_OFFSETS_5: {
118 int i;
119 for ( i = 0; i < 5; i++ ) {
120 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
121 &data[i] ) ) {
122 DRM_ERROR( "Invalid R200 cubic texture offset\n" );
123 return DRM_ERR( EINVAL );
124 }
125 }
126 break;
127 }
128
129 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
130 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
131 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
132 int i;
133 for (i = 0; i < 5; i++) {
134 if (radeon_check_and_fixup_offset(dev_priv,
135 filp_priv,
136 &data[i])) {
137 DRM_ERROR
138 ("Invalid R100 cubic texture offset\n");
139 return DRM_ERR(EINVAL);
140 }
141 }
142 }
143 break;
144
145 case RADEON_EMIT_RB3D_COLORPITCH:
146 case RADEON_EMIT_RE_LINE_PATTERN:
147 case RADEON_EMIT_SE_LINE_WIDTH:
148 case RADEON_EMIT_PP_LUM_MATRIX:
149 case RADEON_EMIT_PP_ROT_MATRIX_0:
150 case RADEON_EMIT_RB3D_STENCILREFMASK:
151 case RADEON_EMIT_SE_VPORT_XSCALE:
152 case RADEON_EMIT_SE_CNTL:
153 case RADEON_EMIT_SE_CNTL_STATUS:
154 case RADEON_EMIT_RE_MISC:
155 case RADEON_EMIT_PP_BORDER_COLOR_0:
156 case RADEON_EMIT_PP_BORDER_COLOR_1:
157 case RADEON_EMIT_PP_BORDER_COLOR_2:
158 case RADEON_EMIT_SE_ZBIAS_FACTOR:
159 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
160 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
161 case R200_EMIT_PP_TXCBLEND_0:
162 case R200_EMIT_PP_TXCBLEND_1:
163 case R200_EMIT_PP_TXCBLEND_2:
164 case R200_EMIT_PP_TXCBLEND_3:
165 case R200_EMIT_PP_TXCBLEND_4:
166 case R200_EMIT_PP_TXCBLEND_5:
167 case R200_EMIT_PP_TXCBLEND_6:
168 case R200_EMIT_PP_TXCBLEND_7:
169 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
170 case R200_EMIT_TFACTOR_0:
171 case R200_EMIT_VTX_FMT_0:
172 case R200_EMIT_VAP_CTL:
173 case R200_EMIT_MATRIX_SELECT_0:
174 case R200_EMIT_TEX_PROC_CTL_2:
175 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
176 case R200_EMIT_PP_TXFILTER_0:
177 case R200_EMIT_PP_TXFILTER_1:
178 case R200_EMIT_PP_TXFILTER_2:
179 case R200_EMIT_PP_TXFILTER_3:
180 case R200_EMIT_PP_TXFILTER_4:
181 case R200_EMIT_PP_TXFILTER_5:
182 case R200_EMIT_VTE_CNTL:
183 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
184 case R200_EMIT_PP_TAM_DEBUG3:
185 case R200_EMIT_PP_CNTL_X:
186 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
187 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
188 case R200_EMIT_RE_SCISSOR_TL_0:
189 case R200_EMIT_RE_SCISSOR_TL_1:
190 case R200_EMIT_RE_SCISSOR_TL_2:
191 case R200_EMIT_SE_VAP_CNTL_STATUS:
192 case R200_EMIT_SE_VTX_STATE_CNTL:
193 case R200_EMIT_RE_POINTSIZE:
194 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
195 case R200_EMIT_PP_CUBIC_FACES_0:
196 case R200_EMIT_PP_CUBIC_FACES_1:
197 case R200_EMIT_PP_CUBIC_FACES_2:
198 case R200_EMIT_PP_CUBIC_FACES_3:
199 case R200_EMIT_PP_CUBIC_FACES_4:
200 case R200_EMIT_PP_CUBIC_FACES_5:
201 case RADEON_EMIT_PP_TEX_SIZE_0:
202 case RADEON_EMIT_PP_TEX_SIZE_1:
203 case RADEON_EMIT_PP_TEX_SIZE_2:
204 case R200_EMIT_RB3D_BLENDCOLOR:
205 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
206 case RADEON_EMIT_PP_CUBIC_FACES_0:
207 case RADEON_EMIT_PP_CUBIC_FACES_1:
208 case RADEON_EMIT_PP_CUBIC_FACES_2:
209 case R200_EMIT_PP_TRI_PERF_CNTL:
210 /* These packets don't contain memory offsets */
211 break;
212
213 default:
214 DRM_ERROR( "Unknown state packet ID %d\n", id );
215 return DRM_ERR( EINVAL );
216 }
217
218 return 0;
219}
220
221static __inline__ int radeon_check_and_fixup_packet3( drm_radeon_private_t *dev_priv,
222 drm_file_t *filp_priv,
223 drm_radeon_cmd_buffer_t *cmdbuf,
224 unsigned int *cmdsz ) {
225 u32 *cmd = (u32 *) cmdbuf->buf;
226
227 *cmdsz = 2 + ( ( cmd[0] & RADEON_CP_PACKET_COUNT_MASK ) >> 16 );
228
229 if ( ( cmd[0] & 0xc0000000 ) != RADEON_CP_PACKET3 ) {
230 DRM_ERROR( "Not a type 3 packet\n" );
231 return DRM_ERR( EINVAL );
232 }
233
234 if ( 4 * *cmdsz > cmdbuf->bufsz ) {
235 DRM_ERROR( "Packet size larger than size of data provided\n" );
236 return DRM_ERR( EINVAL );
237 }
238
239 /* Check client state and fix it up if necessary */
240 if ( cmd[0] & 0x8000 ) { /* MSB of opcode: next DWORD GUI_CNTL */
241 u32 offset;
242
243 if ( cmd[1] & ( RADEON_GMC_SRC_PITCH_OFFSET_CNTL
244 | RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
245 offset = cmd[2] << 10;
246 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
247 DRM_ERROR( "Invalid first packet offset\n" );
248 return DRM_ERR( EINVAL );
249 }
250 cmd[2] = ( cmd[2] & 0xffc00000 ) | offset >> 10;
251 }
252
253 if ( ( cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL ) &&
254 ( cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL ) ) {
255 offset = cmd[3] << 10;
256 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &offset ) ) {
257 DRM_ERROR( "Invalid second packet offset\n" );
258 return DRM_ERR( EINVAL );
259 }
260 cmd[3] = ( cmd[3] & 0xffc00000 ) | offset >> 10;
261 }
262 }
263
264 return 0;
265}
266
267
268/* ================================================================
269 * CP hardware state programming functions
270 */
271
272static __inline__ void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv,
273 drm_clip_rect_t *box )
274{
275 RING_LOCALS;
276
277 DRM_DEBUG( " box: x1=%d y1=%d x2=%d y2=%d\n",
278 box->x1, box->y1, box->x2, box->y2 );
279
280 BEGIN_RING( 4 );
281 OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) );
282 OUT_RING( (box->y1 << 16) | box->x1 );
283 OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) );
284 OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );
285 ADVANCE_RING();
286}
287
288/* Emit 1.1 state
289 */
290static int radeon_emit_state( drm_radeon_private_t *dev_priv,
291 drm_file_t *filp_priv,
292 drm_radeon_context_regs_t *ctx,
293 drm_radeon_texture_regs_t *tex,
294 unsigned int dirty )
295{
296 RING_LOCALS;
297 DRM_DEBUG( "dirty=0x%08x\n", dirty );
298
299 if ( dirty & RADEON_UPLOAD_CONTEXT ) {
300 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
301 &ctx->rb3d_depthoffset ) ) {
302 DRM_ERROR( "Invalid depth buffer offset\n" );
303 return DRM_ERR( EINVAL );
304 }
305
306 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
307 &ctx->rb3d_coloroffset ) ) {
308 DRM_ERROR( "Invalid depth buffer offset\n" );
309 return DRM_ERR( EINVAL );
310 }
311
312 BEGIN_RING( 14 );
313 OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) );
314 OUT_RING( ctx->pp_misc );
315 OUT_RING( ctx->pp_fog_color );
316 OUT_RING( ctx->re_solid_color );
317 OUT_RING( ctx->rb3d_blendcntl );
318 OUT_RING( ctx->rb3d_depthoffset );
319 OUT_RING( ctx->rb3d_depthpitch );
320 OUT_RING( ctx->rb3d_zstencilcntl );
321 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) );
322 OUT_RING( ctx->pp_cntl );
323 OUT_RING( ctx->rb3d_cntl );
324 OUT_RING( ctx->rb3d_coloroffset );
325 OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) );
326 OUT_RING( ctx->rb3d_colorpitch );
327 ADVANCE_RING();
328 }
329
330 if ( dirty & RADEON_UPLOAD_VERTFMT ) {
331 BEGIN_RING( 2 );
332 OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) );
333 OUT_RING( ctx->se_coord_fmt );
334 ADVANCE_RING();
335 }
336
337 if ( dirty & RADEON_UPLOAD_LINE ) {
338 BEGIN_RING( 5 );
339 OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) );
340 OUT_RING( ctx->re_line_pattern );
341 OUT_RING( ctx->re_line_state );
342 OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) );
343 OUT_RING( ctx->se_line_width );
344 ADVANCE_RING();
345 }
346
347 if ( dirty & RADEON_UPLOAD_BUMPMAP ) {
348 BEGIN_RING( 5 );
349 OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) );
350 OUT_RING( ctx->pp_lum_matrix );
351 OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) );
352 OUT_RING( ctx->pp_rot_matrix_0 );
353 OUT_RING( ctx->pp_rot_matrix_1 );
354 ADVANCE_RING();
355 }
356
357 if ( dirty & RADEON_UPLOAD_MASKS ) {
358 BEGIN_RING( 4 );
359 OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) );
360 OUT_RING( ctx->rb3d_stencilrefmask );
361 OUT_RING( ctx->rb3d_ropcntl );
362 OUT_RING( ctx->rb3d_planemask );
363 ADVANCE_RING();
364 }
365
366 if ( dirty & RADEON_UPLOAD_VIEWPORT ) {
367 BEGIN_RING( 7 );
368 OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) );
369 OUT_RING( ctx->se_vport_xscale );
370 OUT_RING( ctx->se_vport_xoffset );
371 OUT_RING( ctx->se_vport_yscale );
372 OUT_RING( ctx->se_vport_yoffset );
373 OUT_RING( ctx->se_vport_zscale );
374 OUT_RING( ctx->se_vport_zoffset );
375 ADVANCE_RING();
376 }
377
378 if ( dirty & RADEON_UPLOAD_SETUP ) {
379 BEGIN_RING( 4 );
380 OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) );
381 OUT_RING( ctx->se_cntl );
382 OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) );
383 OUT_RING( ctx->se_cntl_status );
384 ADVANCE_RING();
385 }
386
387 if ( dirty & RADEON_UPLOAD_MISC ) {
388 BEGIN_RING( 2 );
389 OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) );
390 OUT_RING( ctx->re_misc );
391 ADVANCE_RING();
392 }
393
394 if ( dirty & RADEON_UPLOAD_TEX0 ) {
395 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
396 &tex[0].pp_txoffset ) ) {
397 DRM_ERROR( "Invalid texture offset for unit 0\n" );
398 return DRM_ERR( EINVAL );
399 }
400
401 BEGIN_RING( 9 );
402 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) );
403 OUT_RING( tex[0].pp_txfilter );
404 OUT_RING( tex[0].pp_txformat );
405 OUT_RING( tex[0].pp_txoffset );
406 OUT_RING( tex[0].pp_txcblend );
407 OUT_RING( tex[0].pp_txablend );
408 OUT_RING( tex[0].pp_tfactor );
409 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) );
410 OUT_RING( tex[0].pp_border_color );
411 ADVANCE_RING();
412 }
413
414 if ( dirty & RADEON_UPLOAD_TEX1 ) {
415 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
416 &tex[1].pp_txoffset ) ) {
417 DRM_ERROR( "Invalid texture offset for unit 1\n" );
418 return DRM_ERR( EINVAL );
419 }
420
421 BEGIN_RING( 9 );
422 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) );
423 OUT_RING( tex[1].pp_txfilter );
424 OUT_RING( tex[1].pp_txformat );
425 OUT_RING( tex[1].pp_txoffset );
426 OUT_RING( tex[1].pp_txcblend );
427 OUT_RING( tex[1].pp_txablend );
428 OUT_RING( tex[1].pp_tfactor );
429 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) );
430 OUT_RING( tex[1].pp_border_color );
431 ADVANCE_RING();
432 }
433
434 if ( dirty & RADEON_UPLOAD_TEX2 ) {
435 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv,
436 &tex[2].pp_txoffset ) ) {
437 DRM_ERROR( "Invalid texture offset for unit 2\n" );
438 return DRM_ERR( EINVAL );
439 }
440
441 BEGIN_RING( 9 );
442 OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) );
443 OUT_RING( tex[2].pp_txfilter );
444 OUT_RING( tex[2].pp_txformat );
445 OUT_RING( tex[2].pp_txoffset );
446 OUT_RING( tex[2].pp_txcblend );
447 OUT_RING( tex[2].pp_txablend );
448 OUT_RING( tex[2].pp_tfactor );
449 OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) );
450 OUT_RING( tex[2].pp_border_color );
451 ADVANCE_RING();
452 }
453
454 return 0;
455}
456
457/* Emit 1.2 state
458 */
459static int radeon_emit_state2( drm_radeon_private_t *dev_priv,
460 drm_file_t *filp_priv,
461 drm_radeon_state_t *state )
462{
463 RING_LOCALS;
464
465 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
466 BEGIN_RING( 3 );
467 OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) );
468 OUT_RING( state->context2.se_zbias_factor );
469 OUT_RING( state->context2.se_zbias_constant );
470 ADVANCE_RING();
471 }
472
473 return radeon_emit_state( dev_priv, filp_priv, &state->context,
474 state->tex, state->dirty );
475}
476
477/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
478 * 1.3 cmdbuffers allow all previous state to be updated as well as
479 * the tcl scalar and vector areas.
480 */
481static struct {
482 int start;
483 int len;
484 const char *name;
485} packet[RADEON_MAX_STATE_PACKETS] = {
486 { RADEON_PP_MISC,7,"RADEON_PP_MISC" },
487 { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" },
488 { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" },
489 { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" },
490 { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" },
491 { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" },
492 { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" },
493 { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" },
494 { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" },
495 { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" },
496 { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" },
497 { RADEON_RE_MISC,1,"RADEON_RE_MISC" },
498 { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" },
499 { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" },
500 { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" },
501 { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" },
502 { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" },
503 { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" },
504 { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
505 { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
506 { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
507 { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
508 { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
509 { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
510 { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
511 { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
512 { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
513 { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
514 { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
515 { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
516 { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
517 { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
518 { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
519 { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
520 { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
521 { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
522 { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
523 { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
524 { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
525 { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
526 { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
527 { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
528 { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
529 { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
530 { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
531 { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
532 { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
533 { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
534 { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
535 { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
536 { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
537 { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" },
538 { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" },
539 { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" },
540 { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" },
541 { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" },
542 { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" },
543 { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" },
544 { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" },
545 { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" },
546 { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
547 { R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0" }, /* 61 */
548 { R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0" }, /* 62 */
549 { R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1" },
550 { R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1" },
551 { R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2" },
552 { R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2" },
553 { R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3" },
554 { R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3" },
555 { R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4" },
556 { R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4" },
557 { R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5" },
558 { R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
559 { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
560 { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
561 { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
562 { R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" },
563 { R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL" },
564 { RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
565 { RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
566 { RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
567 { RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
568 { RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
569 { RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
570 { R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
571};
572
573
574
575/* ================================================================
576 * Performance monitoring functions
577 */
578
579static void radeon_clear_box( drm_radeon_private_t *dev_priv,
580 int x, int y, int w, int h,
581 int r, int g, int b )
582{
583 u32 color;
584 RING_LOCALS;
585
586 x += dev_priv->sarea_priv->boxes[0].x1;
587 y += dev_priv->sarea_priv->boxes[0].y1;
588
589 switch ( dev_priv->color_fmt ) {
590 case RADEON_COLOR_FORMAT_RGB565:
591 color = (((r & 0xf8) << 8) |
592 ((g & 0xfc) << 3) |
593 ((b & 0xf8) >> 3));
594 break;
595 case RADEON_COLOR_FORMAT_ARGB8888:
596 default:
597 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
598 break;
599 }
600
601 BEGIN_RING( 4 );
602 RADEON_WAIT_UNTIL_3D_IDLE();
603 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
604 OUT_RING( 0xffffffff );
605 ADVANCE_RING();
606
607 BEGIN_RING( 6 );
608
609 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
610 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
611 RADEON_GMC_BRUSH_SOLID_COLOR |
612 (dev_priv->color_fmt << 8) |
613 RADEON_GMC_SRC_DATATYPE_COLOR |
614 RADEON_ROP3_P |
615 RADEON_GMC_CLR_CMP_CNTL_DIS );
616
617 if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
618 OUT_RING( dev_priv->front_pitch_offset );
619 } else {
620 OUT_RING( dev_priv->back_pitch_offset );
621 }
622
623 OUT_RING( color );
624
625 OUT_RING( (x << 16) | y );
626 OUT_RING( (w << 16) | h );
627
628 ADVANCE_RING();
629}
630
631static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
632{
633 /* Collapse various things into a wait flag -- trying to
634 * guess if userspase slept -- better just to have them tell us.
635 */
636 if (dev_priv->stats.last_frame_reads > 1 ||
637 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
638 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
639 }
640
641 if (dev_priv->stats.freelist_loops) {
642 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
643 }
644
645 /* Purple box for page flipping
646 */
647 if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )
648 radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
649
650 /* Red box if we have to wait for idle at any point
651 */
652 if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE )
653 radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
654
655 /* Blue box: lost context?
656 */
657
658 /* Yellow box for texture swaps
659 */
660 if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD )
661 radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
662
663 /* Green box if hardware never idles (as far as we can tell)
664 */
665 if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) )
666 radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
667
668
669 /* Draw bars indicating number of buffers allocated
670 * (not a great measure, easily confused)
671 */
672 if (dev_priv->stats.requested_bufs) {
673 if (dev_priv->stats.requested_bufs > 100)
674 dev_priv->stats.requested_bufs = 100;
675
676 radeon_clear_box( dev_priv, 4, 16,
677 dev_priv->stats.requested_bufs, 4,
678 196, 128, 128 );
679 }
680
681 memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
682
683}
684/* ================================================================
685 * CP command dispatch functions
686 */
687
688static void radeon_cp_dispatch_clear( drm_device_t *dev,
689 drm_radeon_clear_t *clear,
690 drm_radeon_clear_rect_t *depth_boxes )
691{
692 drm_radeon_private_t *dev_priv = dev->dev_private;
693 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
694 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
695 int nbox = sarea_priv->nbox;
696 drm_clip_rect_t *pbox = sarea_priv->boxes;
697 unsigned int flags = clear->flags;
698 u32 rb3d_cntl = 0, rb3d_stencilrefmask= 0;
699 int i;
700 RING_LOCALS;
701 DRM_DEBUG( "flags = 0x%x\n", flags );
702
703 dev_priv->stats.clears++;
704
705 if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
706 unsigned int tmp = flags;
707
708 flags &= ~(RADEON_FRONT | RADEON_BACK);
709 if ( tmp & RADEON_FRONT ) flags |= RADEON_BACK;
710 if ( tmp & RADEON_BACK ) flags |= RADEON_FRONT;
711 }
712
713 if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
714
715 BEGIN_RING( 4 );
716
717 /* Ensure the 3D stream is idle before doing a
718 * 2D fill to clear the front or back buffer.
719 */
720 RADEON_WAIT_UNTIL_3D_IDLE();
721
722 OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
723 OUT_RING( clear->color_mask );
724
725 ADVANCE_RING();
726
727 /* Make sure we restore the 3D state next time.
728 */
729 dev_priv->sarea_priv->ctx_owner = 0;
730
731 for ( i = 0 ; i < nbox ; i++ ) {
732 int x = pbox[i].x1;
733 int y = pbox[i].y1;
734 int w = pbox[i].x2 - x;
735 int h = pbox[i].y2 - y;
736
737 DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
738 x, y, w, h, flags );
739
740 if ( flags & RADEON_FRONT ) {
741 BEGIN_RING( 6 );
742
743 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
744 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
745 RADEON_GMC_BRUSH_SOLID_COLOR |
746 (dev_priv->color_fmt << 8) |
747 RADEON_GMC_SRC_DATATYPE_COLOR |
748 RADEON_ROP3_P |
749 RADEON_GMC_CLR_CMP_CNTL_DIS );
750
751 OUT_RING( dev_priv->front_pitch_offset );
752 OUT_RING( clear->clear_color );
753
754 OUT_RING( (x << 16) | y );
755 OUT_RING( (w << 16) | h );
756
757 ADVANCE_RING();
758 }
759
760 if ( flags & RADEON_BACK ) {
761 BEGIN_RING( 6 );
762
763 OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
764 OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
765 RADEON_GMC_BRUSH_SOLID_COLOR |
766 (dev_priv->color_fmt << 8) |
767 RADEON_GMC_SRC_DATATYPE_COLOR |
768 RADEON_ROP3_P |
769 RADEON_GMC_CLR_CMP_CNTL_DIS );
770
771 OUT_RING( dev_priv->back_pitch_offset );
772 OUT_RING( clear->clear_color );
773
774 OUT_RING( (x << 16) | y );
775 OUT_RING( (w << 16) | h );
776
777 ADVANCE_RING();
778 }
779 }
780 }
781
782 /* hyper z clear */
783 /* no docs available, based on reverse engeneering by Stephane Marchesin */
784 if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
785
786 int i;
787 int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
788 (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
789
790 u32 clearmask;
791
792 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
793 ((clear->depth_mask & 0xff) << 24);
794
795
796 /* Make sure we restore the 3D state next time.
797 * we haven't touched any "normal" state - still need this?
798 */
799 dev_priv->sarea_priv->ctx_owner = 0;
800
801 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
802 /* FIXME : reverse engineer that for Rx00 cards */
803 /* FIXME : the mask supposedly contains low-res z values. So can't set
804 just to the max (0xff? or actually 0x3fff?), need to take z clear
805 value into account? */
806 /* pattern seems to work for r100, though get slight
807 rendering errors with glxgears. If hierz is not enabled for r100,
808 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
809 other ones are ignored, and the same clear mask can be used. That's
810 very different behaviour than R200 which needs different clear mask
811 and different number of tiles to clear if hierz is enabled or not !?!
812 */
813 clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
814 }
815 else {
816 /* clear mask : chooses the clearing pattern.
817 rv250: could be used to clear only parts of macrotiles
818 (but that would get really complicated...)?
819 bit 0 and 1 (either or both of them ?!?!) are used to
820 not clear tile (or maybe one of the bits indicates if the tile is
821 compressed or not), bit 2 and 3 to not clear tile 1,...,.
822 Pattern is as follows:
823 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
824 bits -------------------------------------------------
825 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
826 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
827 covers 256 pixels ?!?
828 */
829 clearmask = 0x0;
830 }
831
832 BEGIN_RING( 8 );
833 RADEON_WAIT_UNTIL_2D_IDLE();
834 OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
835 tempRB3D_DEPTHCLEARVALUE);
836 /* what offset is this exactly ? */
837 OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
838 /* need ctlstat, otherwise get some strange black flickering */
839 OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
840 ADVANCE_RING();
841
842 for (i = 0; i < nbox; i++) {
843 int tileoffset, nrtilesx, nrtilesy, j;
844 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
845 if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
846 /* FIXME : figure this out for r200 (when hierz is enabled). Or
847 maybe r200 actually doesn't need to put the low-res z value into
848 the tile cache like r100, but just needs to clear the hi-level z-buffer?
849 Works for R100, both with hierz and without.
850 R100 seems to operate on 2x1 8x8 tiles, but...
851 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
852 problematic with resolutions which are not 64 pix aligned? */
853 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
854 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
855 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
856 for (j = 0; j <= nrtilesy; j++) {
857 BEGIN_RING( 4 );
858 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
859 /* first tile */
860 OUT_RING( tileoffset * 8 );
861 /* the number of tiles to clear */
862 OUT_RING( nrtilesx + 4 );
863 /* clear mask : chooses the clearing pattern. */
864 OUT_RING( clearmask );
865 ADVANCE_RING();
866 tileoffset += depthpixperline >> 6;
867 }
868 }
869 else if (dev_priv->microcode_version==UCODE_R200) {
870 /* works for rv250. */
871 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
872 tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
873 nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
874 nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
875 for (j = 0; j <= nrtilesy; j++) {
876 BEGIN_RING( 4 );
877 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
878 /* first tile */
879 /* judging by the first tile offset needed, could possibly
880 directly address/clear 4x4 tiles instead of 8x2 * 4x4
881 macro tiles, though would still need clear mask for
882 right/bottom if truely 4x4 granularity is desired ? */
883 OUT_RING( tileoffset * 16 );
884 /* the number of tiles to clear */
885 OUT_RING( nrtilesx + 1 );
886 /* clear mask : chooses the clearing pattern. */
887 OUT_RING( clearmask );
888 ADVANCE_RING();
889 tileoffset += depthpixperline >> 5;
890 }
891 }
892 else { /* rv 100 */
893 /* rv100 might not need 64 pix alignment, who knows */
894 /* offsets are, hmm, weird */
895 tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
896 nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
897 nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
898 for (j = 0; j <= nrtilesy; j++) {
899 BEGIN_RING( 4 );
900 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
901 OUT_RING( tileoffset * 128 );
902 /* the number of tiles to clear */
903 OUT_RING( nrtilesx + 4 );
904 /* clear mask : chooses the clearing pattern. */
905 OUT_RING( clearmask );
906 ADVANCE_RING();
907 tileoffset += depthpixperline >> 6;
908 }
909 }
910 }
911
912 /* TODO don't always clear all hi-level z tiles */
913 if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
914 && (flags & RADEON_USE_HIERZ))
915 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
916 /* FIXME : the mask supposedly contains low-res z values. So can't set
917 just to the max (0xff? or actually 0x3fff?), need to take z clear
918 value into account? */
919 {
920 BEGIN_RING( 4 );
921 OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
922 OUT_RING( 0x0 ); /* First tile */
923 OUT_RING( 0x3cc0 );
924 OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
925 ADVANCE_RING();
926 }
927 }
928
929 /* We have to clear the depth and/or stencil buffers by
930 * rendering a quad into just those buffers. Thus, we have to
931 * make sure the 3D engine is configured correctly.
932 */
933 if ((dev_priv->microcode_version == UCODE_R200) &&
934 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
935
936 int tempPP_CNTL;
937 int tempRE_CNTL;
938 int tempRB3D_CNTL;
939 int tempRB3D_ZSTENCILCNTL;
940 int tempRB3D_STENCILREFMASK;
941 int tempRB3D_PLANEMASK;
942 int tempSE_CNTL;
943 int tempSE_VTE_CNTL;
944 int tempSE_VTX_FMT_0;
945 int tempSE_VTX_FMT_1;
946 int tempSE_VAP_CNTL;
947 int tempRE_AUX_SCISSOR_CNTL;
948
949 tempPP_CNTL = 0;
950 tempRE_CNTL = 0;
951
952 tempRB3D_CNTL = depth_clear->rb3d_cntl;
953
954 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
955 tempRB3D_STENCILREFMASK = 0x0;
956
957 tempSE_CNTL = depth_clear->se_cntl;
958
959
960
961 /* Disable TCL */
962
963 tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
964 (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
965
966 tempRB3D_PLANEMASK = 0x0;
967
968 tempRE_AUX_SCISSOR_CNTL = 0x0;
969
970 tempSE_VTE_CNTL =
971 SE_VTE_CNTL__VTX_XY_FMT_MASK |
972 SE_VTE_CNTL__VTX_Z_FMT_MASK;
973
974 /* Vertex format (X, Y, Z, W)*/
975 tempSE_VTX_FMT_0 =
976 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
977 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
978 tempSE_VTX_FMT_1 = 0x0;
979
980
981 /*
982 * Depth buffer specific enables
983 */
984 if (flags & RADEON_DEPTH) {
985 /* Enable depth buffer */
986 tempRB3D_CNTL |= RADEON_Z_ENABLE;
987 } else {
988 /* Disable depth buffer */
989 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
990 }
991
992 /*
993 * Stencil buffer specific enables
994 */
995 if ( flags & RADEON_STENCIL ) {
996 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
997 tempRB3D_STENCILREFMASK = clear->depth_mask;
998 } else {
999 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1000 tempRB3D_STENCILREFMASK = 0x00000000;
1001 }
1002
1003 if (flags & RADEON_USE_COMP_ZBUF) {
1004 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1005 RADEON_Z_DECOMPRESSION_ENABLE;
1006 }
1007 if (flags & RADEON_USE_HIERZ) {
1008 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1009 }
1010
1011 BEGIN_RING( 26 );
1012 RADEON_WAIT_UNTIL_2D_IDLE();
1013
1014 OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
1015 OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
1016 OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
1017 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
1018 tempRB3D_ZSTENCILCNTL );
1019 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1020 tempRB3D_STENCILREFMASK );
1021 OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
1022 OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
1023 OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
1024 OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
1025 OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
1026 OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
1027 OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL,
1028 tempRE_AUX_SCISSOR_CNTL );
1029 ADVANCE_RING();
1030
1031 /* Make sure we restore the 3D state next time.
1032 */
1033 dev_priv->sarea_priv->ctx_owner = 0;
1034
1035 for ( i = 0 ; i < nbox ; i++ ) {
1036
1037 /* Funny that this should be required --
1038 * sets top-left?
1039 */
1040 radeon_emit_clip_rect( dev_priv,
1041 &sarea_priv->boxes[i] );
1042
1043 BEGIN_RING( 14 );
1044 OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
1045 OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1046 RADEON_PRIM_WALK_RING |
1047 (3 << RADEON_NUM_VERTICES_SHIFT)) );
1048 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1049 OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1050 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1051 OUT_RING( 0x3f800000 );
1052 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1053 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1054 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1055 OUT_RING( 0x3f800000 );
1056 OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1057 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1058 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1059 OUT_RING( 0x3f800000 );
1060 ADVANCE_RING();
1061 }
1062 }
1063 else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
1064
1065 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1066
1067 rb3d_cntl = depth_clear->rb3d_cntl;
1068
1069 if ( flags & RADEON_DEPTH ) {
1070 rb3d_cntl |= RADEON_Z_ENABLE;
1071 } else {
1072 rb3d_cntl &= ~RADEON_Z_ENABLE;
1073 }
1074
1075 if ( flags & RADEON_STENCIL ) {
1076 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1077 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1078 } else {
1079 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1080 rb3d_stencilrefmask = 0x00000000;
1081 }
1082
1083 if (flags & RADEON_USE_COMP_ZBUF) {
1084 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1085 RADEON_Z_DECOMPRESSION_ENABLE;
1086 }
1087 if (flags & RADEON_USE_HIERZ) {
1088 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1089 }
1090
1091 BEGIN_RING( 13 );
1092 RADEON_WAIT_UNTIL_2D_IDLE();
1093
1094 OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
1095 OUT_RING( 0x00000000 );
1096 OUT_RING( rb3d_cntl );
1097
1098 OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL );
1099 OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
1100 rb3d_stencilrefmask );
1101 OUT_RING_REG( RADEON_RB3D_PLANEMASK,
1102 0x00000000 );
1103 OUT_RING_REG( RADEON_SE_CNTL,
1104 depth_clear->se_cntl );
1105 ADVANCE_RING();
1106
1107 /* Make sure we restore the 3D state next time.
1108 */
1109 dev_priv->sarea_priv->ctx_owner = 0;
1110
1111 for ( i = 0 ; i < nbox ; i++ ) {
1112
1113 /* Funny that this should be required --
1114 * sets top-left?
1115 */
1116 radeon_emit_clip_rect( dev_priv,
1117 &sarea_priv->boxes[i] );
1118
1119 BEGIN_RING( 15 );
1120
1121 OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
1122 OUT_RING( RADEON_VTX_Z_PRESENT |
1123 RADEON_VTX_PKCOLOR_PRESENT);
1124 OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
1125 RADEON_PRIM_WALK_RING |
1126 RADEON_MAOS_ENABLE |
1127 RADEON_VTX_FMT_RADEON_MODE |
1128 (3 << RADEON_NUM_VERTICES_SHIFT)) );
1129
1130
1131 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1132 OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
1133 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1134 OUT_RING( 0x0 );
1135
1136 OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
1137 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1138 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1139 OUT_RING( 0x0 );
1140
1141 OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
1142 OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
1143 OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
1144 OUT_RING( 0x0 );
1145
1146 ADVANCE_RING();
1147 }
1148 }
1149
1150 /* Increment the clear counter. The client-side 3D driver must
1151 * wait on this value before performing the clear ioctl. We
1152 * need this because the card's so damned fast...
1153 */
1154 dev_priv->sarea_priv->last_clear++;
1155
1156 BEGIN_RING( 4 );
1157
1158 RADEON_CLEAR_AGE( dev_priv->sarea_priv->last_clear );
1159 RADEON_WAIT_UNTIL_IDLE();
1160
1161 ADVANCE_RING();
1162}
1163
1164static void radeon_cp_dispatch_swap( drm_device_t *dev )
1165{
1166 drm_radeon_private_t *dev_priv = dev->dev_private;
1167 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1168 int nbox = sarea_priv->nbox;
1169 drm_clip_rect_t *pbox = sarea_priv->boxes;
1170 int i;
1171 RING_LOCALS;
1172 DRM_DEBUG( "\n" );
1173
1174 /* Do some trivial performance monitoring...
1175 */
1176 if (dev_priv->do_boxes)
1177 radeon_cp_performance_boxes( dev_priv );
1178
1179
1180 /* Wait for the 3D stream to idle before dispatching the bitblt.
1181 * This will prevent data corruption between the two streams.
1182 */
1183 BEGIN_RING( 2 );
1184
1185 RADEON_WAIT_UNTIL_3D_IDLE();
1186
1187 ADVANCE_RING();
1188
1189 for ( i = 0 ; i < nbox ; i++ ) {
1190 int x = pbox[i].x1;
1191 int y = pbox[i].y1;
1192 int w = pbox[i].x2 - x;
1193 int h = pbox[i].y2 - y;
1194
1195 DRM_DEBUG( "dispatch swap %d,%d-%d,%d\n",
1196 x, y, w, h );
1197
1198 BEGIN_RING( 7 );
1199
1200 OUT_RING( CP_PACKET3( RADEON_CNTL_BITBLT_MULTI, 5 ) );
1201 OUT_RING( RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1202 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1203 RADEON_GMC_BRUSH_NONE |
1204 (dev_priv->color_fmt << 8) |
1205 RADEON_GMC_SRC_DATATYPE_COLOR |
1206 RADEON_ROP3_S |
1207 RADEON_DP_SRC_SOURCE_MEMORY |
1208 RADEON_GMC_CLR_CMP_CNTL_DIS |
1209 RADEON_GMC_WR_MSK_DIS );
1210
1211 /* Make this work even if front & back are flipped:
1212 */
1213 if (dev_priv->current_page == 0) {
1214 OUT_RING( dev_priv->back_pitch_offset );
1215 OUT_RING( dev_priv->front_pitch_offset );
1216 }
1217 else {
1218 OUT_RING( dev_priv->front_pitch_offset );
1219 OUT_RING( dev_priv->back_pitch_offset );
1220 }
1221
1222 OUT_RING( (x << 16) | y );
1223 OUT_RING( (x << 16) | y );
1224 OUT_RING( (w << 16) | h );
1225
1226 ADVANCE_RING();
1227 }
1228
1229 /* Increment the frame counter. The client-side 3D driver must
1230 * throttle the framerate by waiting for this value before
1231 * performing the swapbuffer ioctl.
1232 */
1233 dev_priv->sarea_priv->last_frame++;
1234
1235 BEGIN_RING( 4 );
1236
1237 RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1238 RADEON_WAIT_UNTIL_2D_IDLE();
1239
1240 ADVANCE_RING();
1241}
1242
1243static void radeon_cp_dispatch_flip( drm_device_t *dev )
1244{
1245 drm_radeon_private_t *dev_priv = dev->dev_private;
1246 drm_sarea_t *sarea = (drm_sarea_t *)dev_priv->sarea->handle;
1247 int offset = (dev_priv->current_page == 1)
1248 ? dev_priv->front_offset : dev_priv->back_offset;
1249 RING_LOCALS;
1250 DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n",
1251 __FUNCTION__,
1252 dev_priv->current_page,
1253 dev_priv->sarea_priv->pfCurrentPage);
1254
1255 /* Do some trivial performance monitoring...
1256 */
1257 if (dev_priv->do_boxes) {
1258 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1259 radeon_cp_performance_boxes( dev_priv );
1260 }
1261
1262 /* Update the frame offsets for both CRTCs
1263 */
1264 BEGIN_RING( 6 );
1265
1266 RADEON_WAIT_UNTIL_3D_IDLE();
1267 OUT_RING_REG( RADEON_CRTC_OFFSET, ( ( sarea->frame.y * dev_priv->front_pitch
1268 + sarea->frame.x
1269 * ( dev_priv->color_fmt - 2 ) ) & ~7 )
1270 + offset );
1271 OUT_RING_REG( RADEON_CRTC2_OFFSET, dev_priv->sarea_priv->crtc2_base
1272 + offset );
1273
1274 ADVANCE_RING();
1275
1276 /* Increment the frame counter. The client-side 3D driver must
1277 * throttle the framerate by waiting for this value before
1278 * performing the swapbuffer ioctl.
1279 */
1280 dev_priv->sarea_priv->last_frame++;
1281 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page =
1282 1 - dev_priv->current_page;
1283
1284 BEGIN_RING( 2 );
1285
1286 RADEON_FRAME_AGE( dev_priv->sarea_priv->last_frame );
1287
1288 ADVANCE_RING();
1289}
1290
1291static int bad_prim_vertex_nr( int primitive, int nr )
1292{
1293 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1294 case RADEON_PRIM_TYPE_NONE:
1295 case RADEON_PRIM_TYPE_POINT:
1296 return nr < 1;
1297 case RADEON_PRIM_TYPE_LINE:
1298 return (nr & 1) || nr == 0;
1299 case RADEON_PRIM_TYPE_LINE_STRIP:
1300 return nr < 2;
1301 case RADEON_PRIM_TYPE_TRI_LIST:
1302 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1303 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1304 case RADEON_PRIM_TYPE_RECT_LIST:
1305 return nr % 3 || nr == 0;
1306 case RADEON_PRIM_TYPE_TRI_FAN:
1307 case RADEON_PRIM_TYPE_TRI_STRIP:
1308 return nr < 3;
1309 default:
1310 return 1;
1311 }
1312}
1313
1314
1315
1316typedef struct {
1317 unsigned int start;
1318 unsigned int finish;
1319 unsigned int prim;
1320 unsigned int numverts;
1321 unsigned int offset;
1322 unsigned int vc_format;
1323} drm_radeon_tcl_prim_t;
1324
1325static void radeon_cp_dispatch_vertex( drm_device_t *dev,
1326 drm_buf_t *buf,
1327 drm_radeon_tcl_prim_t *prim )
1328
1329{
1330 drm_radeon_private_t *dev_priv = dev->dev_private;
1331 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1332 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1333 int numverts = (int)prim->numverts;
1334 int nbox = sarea_priv->nbox;
1335 int i = 0;
1336 RING_LOCALS;
1337
1338 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1339 prim->prim,
1340 prim->vc_format,
1341 prim->start,
1342 prim->finish,
1343 prim->numverts);
1344
1345 if (bad_prim_vertex_nr( prim->prim, prim->numverts )) {
1346 DRM_ERROR( "bad prim %x numverts %d\n",
1347 prim->prim, prim->numverts );
1348 return;
1349 }
1350
1351 do {
1352 /* Emit the next cliprect */
1353 if ( i < nbox ) {
1354 radeon_emit_clip_rect( dev_priv,
1355 &sarea_priv->boxes[i] );
1356 }
1357
1358 /* Emit the vertex buffer rendering commands */
1359 BEGIN_RING( 5 );
1360
1361 OUT_RING( CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, 3 ) );
1362 OUT_RING( offset );
1363 OUT_RING( numverts );
1364 OUT_RING( prim->vc_format );
1365 OUT_RING( prim->prim | RADEON_PRIM_WALK_LIST |
1366 RADEON_COLOR_ORDER_RGBA |
1367 RADEON_VTX_FMT_RADEON_MODE |
1368 (numverts << RADEON_NUM_VERTICES_SHIFT) );
1369
1370 ADVANCE_RING();
1371
1372 i++;
1373 } while ( i < nbox );
1374}
1375
1376
1377
1378static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf )
1379{
1380 drm_radeon_private_t *dev_priv = dev->dev_private;
1381 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1382 RING_LOCALS;
1383
1384 buf_priv->age = ++dev_priv->sarea_priv->last_dispatch;
1385
1386 /* Emit the vertex buffer age */
1387 BEGIN_RING( 2 );
1388 RADEON_DISPATCH_AGE( buf_priv->age );
1389 ADVANCE_RING();
1390
1391 buf->pending = 1;
1392 buf->used = 0;
1393}
1394
1395static void radeon_cp_dispatch_indirect( drm_device_t *dev,
1396 drm_buf_t *buf,
1397 int start, int end )
1398{
1399 drm_radeon_private_t *dev_priv = dev->dev_private;
1400 RING_LOCALS;
1401 DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n",
1402 buf->idx, start, end );
1403
1404 if ( start != end ) {
1405 int offset = (dev_priv->gart_buffers_offset
1406 + buf->offset + start);
1407 int dwords = (end - start + 3) / sizeof(u32);
1408
1409 /* Indirect buffer data must be an even number of
1410 * dwords, so if we've been given an odd number we must
1411 * pad the data with a Type-2 CP packet.
1412 */
1413 if ( dwords & 1 ) {
1414 u32 *data = (u32 *)
1415 ((char *)dev->agp_buffer_map->handle
1416 + buf->offset + start);
1417 data[dwords++] = RADEON_CP_PACKET2;
1418 }
1419
1420 /* Fire off the indirect buffer */
1421 BEGIN_RING( 3 );
1422
1423 OUT_RING( CP_PACKET0( RADEON_CP_IB_BASE, 1 ) );
1424 OUT_RING( offset );
1425 OUT_RING( dwords );
1426
1427 ADVANCE_RING();
1428 }
1429}
1430
1431
1432static void radeon_cp_dispatch_indices( drm_device_t *dev,
1433 drm_buf_t *elt_buf,
1434 drm_radeon_tcl_prim_t *prim )
1435{
1436 drm_radeon_private_t *dev_priv = dev->dev_private;
1437 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1438 int offset = dev_priv->gart_buffers_offset + prim->offset;
1439 u32 *data;
1440 int dwords;
1441 int i = 0;
1442 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1443 int count = (prim->finish - start) / sizeof(u16);
1444 int nbox = sarea_priv->nbox;
1445
1446 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1447 prim->prim,
1448 prim->vc_format,
1449 prim->start,
1450 prim->finish,
1451 prim->offset,
1452 prim->numverts);
1453
1454 if (bad_prim_vertex_nr( prim->prim, count )) {
1455 DRM_ERROR( "bad prim %x count %d\n",
1456 prim->prim, count );
1457 return;
1458 }
1459
1460
1461 if ( start >= prim->finish ||
1462 (prim->start & 0x7) ) {
1463 DRM_ERROR( "buffer prim %d\n", prim->prim );
1464 return;
1465 }
1466
1467 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1468
1469 data = (u32 *)((char *)dev->agp_buffer_map->handle +
1470 elt_buf->offset + prim->start);
1471
1472 data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 );
1473 data[1] = offset;
1474 data[2] = prim->numverts;
1475 data[3] = prim->vc_format;
1476 data[4] = (prim->prim |
1477 RADEON_PRIM_WALK_IND |
1478 RADEON_COLOR_ORDER_RGBA |
1479 RADEON_VTX_FMT_RADEON_MODE |
1480 (count << RADEON_NUM_VERTICES_SHIFT) );
1481
1482 do {
1483 if ( i < nbox )
1484 radeon_emit_clip_rect( dev_priv,
1485 &sarea_priv->boxes[i] );
1486
1487 radeon_cp_dispatch_indirect( dev, elt_buf,
1488 prim->start,
1489 prim->finish );
1490
1491 i++;
1492 } while ( i < nbox );
1493
1494}
1495
Dave Airlieffbbf7a2005-08-20 17:40:04 +10001496#define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
Linus Torvalds1da177e2005-04-16 15:20:36 -07001497
1498static int radeon_cp_dispatch_texture( DRMFILE filp,
1499 drm_device_t *dev,
1500 drm_radeon_texture_t *tex,
1501 drm_radeon_tex_image_t *image )
1502{
1503 drm_radeon_private_t *dev_priv = dev->dev_private;
1504 drm_file_t *filp_priv;
1505 drm_buf_t *buf;
1506 u32 format;
1507 u32 *buffer;
1508 const u8 __user *data;
Dave Airlieffbbf7a2005-08-20 17:40:04 +10001509 int size, dwords, tex_width, blit_width, spitch;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 u32 height;
1511 int i;
1512 u32 texpitch, microtile;
Dave Airlieffbbf7a2005-08-20 17:40:04 +10001513 u32 offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 RING_LOCALS;
1515
1516 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
1517
1518 if ( radeon_check_and_fixup_offset( dev_priv, filp_priv, &tex->offset ) ) {
1519 DRM_ERROR( "Invalid destination offset\n" );
1520 return DRM_ERR( EINVAL );
1521 }
1522
1523 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1524
1525 /* Flush the pixel cache. This ensures no pixel data gets mixed
1526 * up with the texture data from the host data blit, otherwise
1527 * part of the texture image may be corrupted.
1528 */
1529 BEGIN_RING( 4 );
1530 RADEON_FLUSH_CACHE();
1531 RADEON_WAIT_UNTIL_IDLE();
1532 ADVANCE_RING();
1533
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 /* The compiler won't optimize away a division by a variable,
1535 * even if the only legal values are powers of two. Thus, we'll
1536 * use a shift instead.
1537 */
1538 switch ( tex->format ) {
1539 case RADEON_TXFORMAT_ARGB8888:
1540 case RADEON_TXFORMAT_RGBA8888:
1541 format = RADEON_COLOR_FORMAT_ARGB8888;
1542 tex_width = tex->width * 4;
1543 blit_width = image->width * 4;
1544 break;
1545 case RADEON_TXFORMAT_AI88:
1546 case RADEON_TXFORMAT_ARGB1555:
1547 case RADEON_TXFORMAT_RGB565:
1548 case RADEON_TXFORMAT_ARGB4444:
1549 case RADEON_TXFORMAT_VYUY422:
1550 case RADEON_TXFORMAT_YVYU422:
1551 format = RADEON_COLOR_FORMAT_RGB565;
1552 tex_width = tex->width * 2;
1553 blit_width = image->width * 2;
1554 break;
1555 case RADEON_TXFORMAT_I8:
1556 case RADEON_TXFORMAT_RGB332:
1557 format = RADEON_COLOR_FORMAT_CI8;
1558 tex_width = tex->width * 1;
1559 blit_width = image->width * 1;
1560 break;
1561 default:
1562 DRM_ERROR( "invalid texture format %d\n", tex->format );
1563 return DRM_ERR(EINVAL);
1564 }
Dave Airlieffbbf7a2005-08-20 17:40:04 +10001565 spitch = blit_width >> 6;
1566 if (spitch == 0 && image->height > 1)
1567 return DRM_ERR(EINVAL);
1568
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 texpitch = tex->pitch;
1570 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1571 microtile = 1;
1572 if (tex_width < 64) {
1573 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1574 /* we got tiled coordinates, untile them */
1575 image->x *= 2;
1576 }
1577 }
1578 else microtile = 0;
1579
1580 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
1581
1582 do {
1583 DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1584 tex->offset >> 10, tex->pitch, tex->format,
1585 image->x, image->y, image->width, image->height );
1586
1587 /* Make a copy of some parameters in case we have to
1588 * update them for a multi-pass texture blit.
1589 */
1590 height = image->height;
1591 data = (const u8 __user *)image->data;
1592
1593 size = height * blit_width;
1594
1595 if ( size > RADEON_MAX_TEXTURE_SIZE ) {
1596 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1597 size = height * blit_width;
1598 } else if ( size < 4 && size > 0 ) {
1599 size = 4;
1600 } else if ( size == 0 ) {
1601 return 0;
1602 }
1603
1604 buf = radeon_freelist_get( dev );
1605 if ( 0 && !buf ) {
1606 radeon_do_cp_idle( dev_priv );
1607 buf = radeon_freelist_get( dev );
1608 }
1609 if ( !buf ) {
1610 DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
1611 if (DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ))
1612 return DRM_ERR(EFAULT);
1613 return DRM_ERR(EAGAIN);
1614 }
1615
1616
1617 /* Dispatch the indirect buffer.
1618 */
1619 buffer = (u32*)((char*)dev->agp_buffer_map->handle + buf->offset);
1620 dwords = size / 4;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621
1622 if (microtile) {
1623 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1624 however, we cannot use blitter directly for texture width < 64 bytes,
1625 since minimum tex pitch is 64 bytes and we need this to match
1626 the texture width, otherwise the blitter will tile it wrong.
1627 Thus, tiling manually in this case. Additionally, need to special
1628 case tex height = 1, since our actual image will have height 2
1629 and we need to ensure we don't read beyond the texture size
1630 from user space. */
1631 if (tex->height == 1) {
1632 if (tex_width >= 64 || tex_width <= 16) {
1633 if (DRM_COPY_FROM_USER(buffer, data,
1634 tex_width * sizeof(u32))) {
1635 DRM_ERROR("EFAULT on pad, %d bytes\n",
1636 tex_width);
1637 return DRM_ERR(EFAULT);
1638 }
1639 } else if (tex_width == 32) {
1640 if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1641 DRM_ERROR("EFAULT on pad, %d bytes\n",
1642 tex_width);
1643 return DRM_ERR(EFAULT);
1644 }
1645 if (DRM_COPY_FROM_USER(buffer + 8, data + 16, 16)) {
1646 DRM_ERROR("EFAULT on pad, %d bytes\n",
1647 tex_width);
1648 return DRM_ERR(EFAULT);
1649 }
1650 }
1651 } else if (tex_width >= 64 || tex_width == 16) {
1652 if (DRM_COPY_FROM_USER(buffer, data,
1653 dwords * sizeof(u32))) {
1654 DRM_ERROR("EFAULT on data, %d dwords\n",
1655 dwords);
1656 return DRM_ERR(EFAULT);
1657 }
1658 } else if (tex_width < 16) {
1659 for (i = 0; i < tex->height; i++) {
1660 if (DRM_COPY_FROM_USER(buffer, data, tex_width)) {
1661 DRM_ERROR("EFAULT on pad, %d bytes\n",
1662 tex_width);
1663 return DRM_ERR(EFAULT);
1664 }
1665 buffer += 4;
1666 data += tex_width;
1667 }
1668 } else if (tex_width == 32) {
1669 /* TODO: make sure this works when not fitting in one buffer
1670 (i.e. 32bytes x 2048...) */
1671 for (i = 0; i < tex->height; i += 2) {
1672 if (DRM_COPY_FROM_USER(buffer, data, 16)) {
1673 DRM_ERROR("EFAULT on pad, %d bytes\n",
1674 tex_width);
1675 return DRM_ERR(EFAULT);
1676 }
1677 data += 16;
1678 if (DRM_COPY_FROM_USER(buffer + 8, data, 16)) {
1679 DRM_ERROR("EFAULT on pad, %d bytes\n",
1680 tex_width);
1681 return DRM_ERR(EFAULT);
1682 }
1683 data += 16;
1684 if (DRM_COPY_FROM_USER(buffer + 4, data, 16)) {
1685 DRM_ERROR("EFAULT on pad, %d bytes\n",
1686 tex_width);
1687 return DRM_ERR(EFAULT);
1688 }
1689 data += 16;
1690 if (DRM_COPY_FROM_USER(buffer + 12, data, 16)) {
1691 DRM_ERROR("EFAULT on pad, %d bytes\n",
1692 tex_width);
1693 return DRM_ERR(EFAULT);
1694 }
1695 data += 16;
1696 buffer += 16;
1697 }
1698 }
1699 }
1700 else {
1701 if (tex_width >= 32) {
1702 /* Texture image width is larger than the minimum, so we
1703 * can upload it directly.
1704 */
1705 if (DRM_COPY_FROM_USER(buffer, data,
1706 dwords * sizeof(u32))) {
1707 DRM_ERROR("EFAULT on data, %d dwords\n",
1708 dwords);
1709 return DRM_ERR(EFAULT);
1710 }
1711 } else {
1712 /* Texture image width is less than the minimum, so we
1713 * need to pad out each image scanline to the minimum
1714 * width.
1715 */
1716 for (i = 0 ; i < tex->height ; i++) {
1717 if (DRM_COPY_FROM_USER(buffer, data, tex_width )) {
1718 DRM_ERROR("EFAULT on pad, %d bytes\n", tex_width);
1719 return DRM_ERR(EFAULT);
1720 }
1721 buffer += 8;
1722 data += tex_width;
1723 }
1724 }
1725 }
1726
1727 buf->filp = filp;
Dave Airlieffbbf7a2005-08-20 17:40:04 +10001728 buf->used = size;
1729 offset = dev_priv->gart_buffers_offset + buf->offset;
1730 BEGIN_RING(9);
1731 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1732 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1733 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1734 RADEON_GMC_BRUSH_NONE |
1735 (format << 8) |
1736 RADEON_GMC_SRC_DATATYPE_COLOR |
1737 RADEON_ROP3_S |
1738 RADEON_DP_SRC_SOURCE_MEMORY |
1739 RADEON_GMC_CLR_CMP_CNTL_DIS |
1740 RADEON_GMC_WR_MSK_DIS );
1741 OUT_RING((spitch << 22) | (offset >> 10));
1742 OUT_RING((texpitch << 22) | (tex->offset >> 10));
1743 OUT_RING(0);
1744 OUT_RING((image->x << 16) | image->y);
1745 OUT_RING((image->width << 16) | height);
1746 RADEON_WAIT_UNTIL_2D_IDLE();
1747 ADVANCE_RING();
1748
1749 radeon_cp_discard_buffer(dev, buf);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750
1751 /* Update the input parameters for next time */
1752 image->y += height;
1753 image->height -= height;
1754 image->data = (const u8 __user *)image->data + size;
1755 } while (image->height > 0);
1756
1757 /* Flush the pixel cache after the blit completes. This ensures
1758 * the texture data is written out to memory before rendering
1759 * continues.
1760 */
1761 BEGIN_RING( 4 );
1762 RADEON_FLUSH_CACHE();
1763 RADEON_WAIT_UNTIL_2D_IDLE();
1764 ADVANCE_RING();
1765 return 0;
1766}
1767
1768
1769static void radeon_cp_dispatch_stipple( drm_device_t *dev, u32 *stipple )
1770{
1771 drm_radeon_private_t *dev_priv = dev->dev_private;
1772 int i;
1773 RING_LOCALS;
1774 DRM_DEBUG( "\n" );
1775
1776 BEGIN_RING( 35 );
1777
1778 OUT_RING( CP_PACKET0( RADEON_RE_STIPPLE_ADDR, 0 ) );
1779 OUT_RING( 0x00000000 );
1780
1781 OUT_RING( CP_PACKET0_TABLE( RADEON_RE_STIPPLE_DATA, 31 ) );
1782 for ( i = 0 ; i < 32 ; i++ ) {
1783 OUT_RING( stipple[i] );
1784 }
1785
1786 ADVANCE_RING();
1787}
1788
1789static void radeon_apply_surface_regs(int surf_index, drm_radeon_private_t *dev_priv)
1790{
1791 if (!dev_priv->mmio)
1792 return;
1793
1794 radeon_do_cp_idle(dev_priv);
1795
1796 RADEON_WRITE(RADEON_SURFACE0_INFO + 16*surf_index,
1797 dev_priv->surfaces[surf_index].flags);
1798 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16*surf_index,
1799 dev_priv->surfaces[surf_index].lower);
1800 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16*surf_index,
1801 dev_priv->surfaces[surf_index].upper);
1802}
1803
1804
1805/* Allocates a virtual surface
1806 * doesn't always allocate a real surface, will stretch an existing
1807 * surface when possible.
1808 *
1809 * Note that refcount can be at most 2, since during a free refcount=3
1810 * might mean we have to allocate a new surface which might not always
1811 * be available.
1812 * For example : we allocate three contigous surfaces ABC. If B is
1813 * freed, we suddenly need two surfaces to store A and C, which might
1814 * not always be available.
1815 */
1816static int alloc_surface(drm_radeon_surface_alloc_t* new, drm_radeon_private_t *dev_priv, DRMFILE filp)
1817{
1818 struct radeon_virt_surface *s;
1819 int i;
1820 int virt_surface_index;
1821 uint32_t new_upper, new_lower;
1822
1823 new_lower = new->address;
1824 new_upper = new_lower + new->size - 1;
1825
1826 /* sanity check */
1827 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1828 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) != RADEON_SURF_ADDRESS_FIXED_MASK) ||
1829 ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1830 return -1;
1831
1832 /* make sure there is no overlap with existing surfaces */
1833 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1834 if ((dev_priv->surfaces[i].refcount != 0) &&
1835 (( (new_lower >= dev_priv->surfaces[i].lower) &&
1836 (new_lower < dev_priv->surfaces[i].upper) ) ||
1837 ( (new_lower < dev_priv->surfaces[i].lower) &&
1838 (new_upper > dev_priv->surfaces[i].lower) )) ){
1839 return -1;}
1840 }
1841
1842 /* find a virtual surface */
1843 for (i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1844 if (dev_priv->virt_surfaces[i].filp == 0)
1845 break;
1846 if (i == 2*RADEON_MAX_SURFACES) {
1847 return -1;}
1848 virt_surface_index = i;
1849
1850 /* try to reuse an existing surface */
1851 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1852 /* extend before */
1853 if ((dev_priv->surfaces[i].refcount == 1) &&
1854 (new->flags == dev_priv->surfaces[i].flags) &&
1855 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
1856 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1857 s->surface_index = i;
1858 s->lower = new_lower;
1859 s->upper = new_upper;
1860 s->flags = new->flags;
1861 s->filp = filp;
1862 dev_priv->surfaces[i].refcount++;
1863 dev_priv->surfaces[i].lower = s->lower;
1864 radeon_apply_surface_regs(s->surface_index, dev_priv);
1865 return virt_surface_index;
1866 }
1867
1868 /* extend after */
1869 if ((dev_priv->surfaces[i].refcount == 1) &&
1870 (new->flags == dev_priv->surfaces[i].flags) &&
1871 (new_lower == dev_priv->surfaces[i].upper + 1)) {
1872 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1873 s->surface_index = i;
1874 s->lower = new_lower;
1875 s->upper = new_upper;
1876 s->flags = new->flags;
1877 s->filp = filp;
1878 dev_priv->surfaces[i].refcount++;
1879 dev_priv->surfaces[i].upper = s->upper;
1880 radeon_apply_surface_regs(s->surface_index, dev_priv);
1881 return virt_surface_index;
1882 }
1883 }
1884
1885 /* okay, we need a new one */
1886 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1887 if (dev_priv->surfaces[i].refcount == 0) {
1888 s = &(dev_priv->virt_surfaces[virt_surface_index]);
1889 s->surface_index = i;
1890 s->lower = new_lower;
1891 s->upper = new_upper;
1892 s->flags = new->flags;
1893 s->filp = filp;
1894 dev_priv->surfaces[i].refcount = 1;
1895 dev_priv->surfaces[i].lower = s->lower;
1896 dev_priv->surfaces[i].upper = s->upper;
1897 dev_priv->surfaces[i].flags = s->flags;
1898 radeon_apply_surface_regs(s->surface_index, dev_priv);
1899 return virt_surface_index;
1900 }
1901 }
1902
1903 /* we didn't find anything */
1904 return -1;
1905}
1906
1907static int free_surface(DRMFILE filp, drm_radeon_private_t *dev_priv, int lower)
1908{
1909 struct radeon_virt_surface *s;
1910 int i;
1911 /* find the virtual surface */
1912 for(i = 0; i < 2*RADEON_MAX_SURFACES; i++) {
1913 s = &(dev_priv->virt_surfaces[i]);
1914 if (s->filp) {
1915 if ((lower == s->lower) && (filp == s->filp)) {
1916 if (dev_priv->surfaces[s->surface_index].lower == s->lower)
1917 dev_priv->surfaces[s->surface_index].lower = s->upper;
1918
1919 if (dev_priv->surfaces[s->surface_index].upper == s->upper)
1920 dev_priv->surfaces[s->surface_index].upper = s->lower;
1921
1922 dev_priv->surfaces[s->surface_index].refcount--;
1923 if (dev_priv->surfaces[s->surface_index].refcount == 0)
1924 dev_priv->surfaces[s->surface_index].flags = 0;
1925 s->filp = NULL;
1926 radeon_apply_surface_regs(s->surface_index, dev_priv);
1927 return 0;
1928 }
1929 }
1930 }
1931 return 1;
1932}
1933
1934static void radeon_surfaces_release(DRMFILE filp, drm_radeon_private_t *dev_priv)
1935{
1936 int i;
1937 for( i = 0; i < 2*RADEON_MAX_SURFACES; i++)
1938 {
1939 if (dev_priv->virt_surfaces[i].filp == filp)
1940 free_surface(filp, dev_priv, dev_priv->virt_surfaces[i].lower);
1941 }
1942}
1943
1944/* ================================================================
1945 * IOCTL functions
1946 */
1947static int radeon_surface_alloc(DRM_IOCTL_ARGS)
1948{
1949 DRM_DEVICE;
1950 drm_radeon_private_t *dev_priv = dev->dev_private;
1951 drm_radeon_surface_alloc_t alloc;
1952
1953 if (!dev_priv) {
1954 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1955 return DRM_ERR(EINVAL);
1956 }
1957
1958 DRM_COPY_FROM_USER_IOCTL(alloc, (drm_radeon_surface_alloc_t __user *)data,
1959 sizeof(alloc));
1960
1961 if (alloc_surface(&alloc, dev_priv, filp) == -1)
1962 return DRM_ERR(EINVAL);
1963 else
1964 return 0;
1965}
1966
1967static int radeon_surface_free(DRM_IOCTL_ARGS)
1968{
1969 DRM_DEVICE;
1970 drm_radeon_private_t *dev_priv = dev->dev_private;
1971 drm_radeon_surface_free_t memfree;
1972
1973 if (!dev_priv) {
1974 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
1975 return DRM_ERR(EINVAL);
1976 }
1977
1978 DRM_COPY_FROM_USER_IOCTL(memfree, (drm_radeon_mem_free_t __user *)data,
1979 sizeof(memfree) );
1980
1981 if (free_surface(filp, dev_priv, memfree.address))
1982 return DRM_ERR(EINVAL);
1983 else
1984 return 0;
1985}
1986
1987static int radeon_cp_clear( DRM_IOCTL_ARGS )
1988{
1989 DRM_DEVICE;
1990 drm_radeon_private_t *dev_priv = dev->dev_private;
1991 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
1992 drm_radeon_clear_t clear;
1993 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
1994 DRM_DEBUG( "\n" );
1995
1996 LOCK_TEST_WITH_RETURN( dev, filp );
1997
1998 DRM_COPY_FROM_USER_IOCTL( clear, (drm_radeon_clear_t __user *)data,
1999 sizeof(clear) );
2000
2001 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2002
2003 if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2004 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2005
2006 if ( DRM_COPY_FROM_USER( &depth_boxes, clear.depth_boxes,
2007 sarea_priv->nbox * sizeof(depth_boxes[0]) ) )
2008 return DRM_ERR(EFAULT);
2009
2010 radeon_cp_dispatch_clear( dev, &clear, depth_boxes );
2011
2012 COMMIT_RING();
2013 return 0;
2014}
2015
2016
2017/* Not sure why this isn't set all the time:
2018 */
2019static int radeon_do_init_pageflip( drm_device_t *dev )
2020{
2021 drm_radeon_private_t *dev_priv = dev->dev_private;
2022 RING_LOCALS;
2023
2024 DRM_DEBUG( "\n" );
2025
2026 BEGIN_RING( 6 );
2027 RADEON_WAIT_UNTIL_3D_IDLE();
2028 OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET_CNTL, 0 ) );
2029 OUT_RING( RADEON_READ( RADEON_CRTC_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2030 OUT_RING( CP_PACKET0( RADEON_CRTC2_OFFSET_CNTL, 0 ) );
2031 OUT_RING( RADEON_READ( RADEON_CRTC2_OFFSET_CNTL ) | RADEON_CRTC_OFFSET_FLIP_CNTL );
2032 ADVANCE_RING();
2033
2034 dev_priv->page_flipping = 1;
2035 dev_priv->current_page = 0;
2036 dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page;
2037
2038 return 0;
2039}
2040
2041/* Called whenever a client dies, from drm_release.
2042 * NOTE: Lock isn't necessarily held when this is called!
2043 */
2044static int radeon_do_cleanup_pageflip( drm_device_t *dev )
2045{
2046 drm_radeon_private_t *dev_priv = dev->dev_private;
2047 DRM_DEBUG( "\n" );
2048
2049 if (dev_priv->current_page != 0)
2050 radeon_cp_dispatch_flip( dev );
2051
2052 dev_priv->page_flipping = 0;
2053 return 0;
2054}
2055
2056/* Swapping and flipping are different operations, need different ioctls.
2057 * They can & should be intermixed to support multiple 3d windows.
2058 */
2059static int radeon_cp_flip( DRM_IOCTL_ARGS )
2060{
2061 DRM_DEVICE;
2062 drm_radeon_private_t *dev_priv = dev->dev_private;
2063 DRM_DEBUG( "\n" );
2064
2065 LOCK_TEST_WITH_RETURN( dev, filp );
2066
2067 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2068
2069 if (!dev_priv->page_flipping)
2070 radeon_do_init_pageflip( dev );
2071
2072 radeon_cp_dispatch_flip( dev );
2073
2074 COMMIT_RING();
2075 return 0;
2076}
2077
2078static int radeon_cp_swap( DRM_IOCTL_ARGS )
2079{
2080 DRM_DEVICE;
2081 drm_radeon_private_t *dev_priv = dev->dev_private;
2082 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2083 DRM_DEBUG( "\n" );
2084
2085 LOCK_TEST_WITH_RETURN( dev, filp );
2086
2087 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2088
2089 if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS )
2090 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2091
2092 radeon_cp_dispatch_swap( dev );
2093 dev_priv->sarea_priv->ctx_owner = 0;
2094
2095 COMMIT_RING();
2096 return 0;
2097}
2098
2099static int radeon_cp_vertex( DRM_IOCTL_ARGS )
2100{
2101 DRM_DEVICE;
2102 drm_radeon_private_t *dev_priv = dev->dev_private;
2103 drm_file_t *filp_priv;
2104 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2105 drm_device_dma_t *dma = dev->dma;
2106 drm_buf_t *buf;
2107 drm_radeon_vertex_t vertex;
2108 drm_radeon_tcl_prim_t prim;
2109
2110 LOCK_TEST_WITH_RETURN( dev, filp );
2111
2112 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2113
2114 DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex_t __user *)data,
2115 sizeof(vertex) );
2116
2117 DRM_DEBUG( "pid=%d index=%d count=%d discard=%d\n",
2118 DRM_CURRENTPID,
2119 vertex.idx, vertex.count, vertex.discard );
2120
2121 if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2122 DRM_ERROR( "buffer index %d (of %d max)\n",
2123 vertex.idx, dma->buf_count - 1 );
2124 return DRM_ERR(EINVAL);
2125 }
2126 if ( vertex.prim < 0 ||
2127 vertex.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2128 DRM_ERROR( "buffer prim %d\n", vertex.prim );
2129 return DRM_ERR(EINVAL);
2130 }
2131
2132 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2133 VB_AGE_TEST_WITH_RETURN( dev_priv );
2134
2135 buf = dma->buflist[vertex.idx];
2136
2137 if ( buf->filp != filp ) {
2138 DRM_ERROR( "process %d using buffer owned by %p\n",
2139 DRM_CURRENTPID, buf->filp );
2140 return DRM_ERR(EINVAL);
2141 }
2142 if ( buf->pending ) {
2143 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2144 return DRM_ERR(EINVAL);
2145 }
2146
2147 /* Build up a prim_t record:
2148 */
2149 if (vertex.count) {
2150 buf->used = vertex.count; /* not used? */
2151
2152 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2153 if ( radeon_emit_state( dev_priv, filp_priv,
2154 &sarea_priv->context_state,
2155 sarea_priv->tex_state,
2156 sarea_priv->dirty ) ) {
2157 DRM_ERROR( "radeon_emit_state failed\n" );
2158 return DRM_ERR( EINVAL );
2159 }
2160
2161 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2162 RADEON_UPLOAD_TEX1IMAGES |
2163 RADEON_UPLOAD_TEX2IMAGES |
2164 RADEON_REQUIRE_QUIESCENCE);
2165 }
2166
2167 prim.start = 0;
2168 prim.finish = vertex.count; /* unused */
2169 prim.prim = vertex.prim;
2170 prim.numverts = vertex.count;
2171 prim.vc_format = dev_priv->sarea_priv->vc_format;
2172
2173 radeon_cp_dispatch_vertex( dev, buf, &prim );
2174 }
2175
2176 if (vertex.discard) {
2177 radeon_cp_discard_buffer( dev, buf );
2178 }
2179
2180 COMMIT_RING();
2181 return 0;
2182}
2183
2184static int radeon_cp_indices( DRM_IOCTL_ARGS )
2185{
2186 DRM_DEVICE;
2187 drm_radeon_private_t *dev_priv = dev->dev_private;
2188 drm_file_t *filp_priv;
2189 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2190 drm_device_dma_t *dma = dev->dma;
2191 drm_buf_t *buf;
2192 drm_radeon_indices_t elts;
2193 drm_radeon_tcl_prim_t prim;
2194 int count;
2195
2196 LOCK_TEST_WITH_RETURN( dev, filp );
2197
2198 if ( !dev_priv ) {
2199 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2200 return DRM_ERR(EINVAL);
2201 }
2202
2203 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2204
2205 DRM_COPY_FROM_USER_IOCTL( elts, (drm_radeon_indices_t __user *)data,
2206 sizeof(elts) );
2207
2208 DRM_DEBUG( "pid=%d index=%d start=%d end=%d discard=%d\n",
2209 DRM_CURRENTPID,
2210 elts.idx, elts.start, elts.end, elts.discard );
2211
2212 if ( elts.idx < 0 || elts.idx >= dma->buf_count ) {
2213 DRM_ERROR( "buffer index %d (of %d max)\n",
2214 elts.idx, dma->buf_count - 1 );
2215 return DRM_ERR(EINVAL);
2216 }
2217 if ( elts.prim < 0 ||
2218 elts.prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST ) {
2219 DRM_ERROR( "buffer prim %d\n", elts.prim );
2220 return DRM_ERR(EINVAL);
2221 }
2222
2223 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2224 VB_AGE_TEST_WITH_RETURN( dev_priv );
2225
2226 buf = dma->buflist[elts.idx];
2227
2228 if ( buf->filp != filp ) {
2229 DRM_ERROR( "process %d using buffer owned by %p\n",
2230 DRM_CURRENTPID, buf->filp );
2231 return DRM_ERR(EINVAL);
2232 }
2233 if ( buf->pending ) {
2234 DRM_ERROR( "sending pending buffer %d\n", elts.idx );
2235 return DRM_ERR(EINVAL);
2236 }
2237
2238 count = (elts.end - elts.start) / sizeof(u16);
2239 elts.start -= RADEON_INDEX_PRIM_OFFSET;
2240
2241 if ( elts.start & 0x7 ) {
2242 DRM_ERROR( "misaligned buffer 0x%x\n", elts.start );
2243 return DRM_ERR(EINVAL);
2244 }
2245 if ( elts.start < buf->used ) {
2246 DRM_ERROR( "no header 0x%x - 0x%x\n", elts.start, buf->used );
2247 return DRM_ERR(EINVAL);
2248 }
2249
2250 buf->used = elts.end;
2251
2252 if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) {
2253 if ( radeon_emit_state( dev_priv, filp_priv,
2254 &sarea_priv->context_state,
2255 sarea_priv->tex_state,
2256 sarea_priv->dirty ) ) {
2257 DRM_ERROR( "radeon_emit_state failed\n" );
2258 return DRM_ERR( EINVAL );
2259 }
2260
2261 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2262 RADEON_UPLOAD_TEX1IMAGES |
2263 RADEON_UPLOAD_TEX2IMAGES |
2264 RADEON_REQUIRE_QUIESCENCE);
2265 }
2266
2267
2268 /* Build up a prim_t record:
2269 */
2270 prim.start = elts.start;
2271 prim.finish = elts.end;
2272 prim.prim = elts.prim;
2273 prim.offset = 0; /* offset from start of dma buffers */
2274 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2275 prim.vc_format = dev_priv->sarea_priv->vc_format;
2276
2277 radeon_cp_dispatch_indices( dev, buf, &prim );
2278 if (elts.discard) {
2279 radeon_cp_discard_buffer( dev, buf );
2280 }
2281
2282 COMMIT_RING();
2283 return 0;
2284}
2285
2286static int radeon_cp_texture( DRM_IOCTL_ARGS )
2287{
2288 DRM_DEVICE;
2289 drm_radeon_private_t *dev_priv = dev->dev_private;
2290 drm_radeon_texture_t tex;
2291 drm_radeon_tex_image_t image;
2292 int ret;
2293
2294 LOCK_TEST_WITH_RETURN( dev, filp );
2295
2296 DRM_COPY_FROM_USER_IOCTL( tex, (drm_radeon_texture_t __user *)data, sizeof(tex) );
2297
2298 if ( tex.image == NULL ) {
2299 DRM_ERROR( "null texture image!\n" );
2300 return DRM_ERR(EINVAL);
2301 }
2302
2303 if ( DRM_COPY_FROM_USER( &image,
2304 (drm_radeon_tex_image_t __user *)tex.image,
2305 sizeof(image) ) )
2306 return DRM_ERR(EFAULT);
2307
2308 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2309 VB_AGE_TEST_WITH_RETURN( dev_priv );
2310
2311 ret = radeon_cp_dispatch_texture( filp, dev, &tex, &image );
2312
2313 COMMIT_RING();
2314 return ret;
2315}
2316
2317static int radeon_cp_stipple( DRM_IOCTL_ARGS )
2318{
2319 DRM_DEVICE;
2320 drm_radeon_private_t *dev_priv = dev->dev_private;
2321 drm_radeon_stipple_t stipple;
2322 u32 mask[32];
2323
2324 LOCK_TEST_WITH_RETURN( dev, filp );
2325
2326 DRM_COPY_FROM_USER_IOCTL( stipple, (drm_radeon_stipple_t __user *)data,
2327 sizeof(stipple) );
2328
2329 if ( DRM_COPY_FROM_USER( &mask, stipple.mask, 32 * sizeof(u32) ) )
2330 return DRM_ERR(EFAULT);
2331
2332 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2333
2334 radeon_cp_dispatch_stipple( dev, mask );
2335
2336 COMMIT_RING();
2337 return 0;
2338}
2339
2340static int radeon_cp_indirect( DRM_IOCTL_ARGS )
2341{
2342 DRM_DEVICE;
2343 drm_radeon_private_t *dev_priv = dev->dev_private;
2344 drm_device_dma_t *dma = dev->dma;
2345 drm_buf_t *buf;
2346 drm_radeon_indirect_t indirect;
2347 RING_LOCALS;
2348
2349 LOCK_TEST_WITH_RETURN( dev, filp );
2350
2351 if ( !dev_priv ) {
2352 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2353 return DRM_ERR(EINVAL);
2354 }
2355
2356 DRM_COPY_FROM_USER_IOCTL( indirect, (drm_radeon_indirect_t __user *)data,
2357 sizeof(indirect) );
2358
2359 DRM_DEBUG( "indirect: idx=%d s=%d e=%d d=%d\n",
2360 indirect.idx, indirect.start,
2361 indirect.end, indirect.discard );
2362
2363 if ( indirect.idx < 0 || indirect.idx >= dma->buf_count ) {
2364 DRM_ERROR( "buffer index %d (of %d max)\n",
2365 indirect.idx, dma->buf_count - 1 );
2366 return DRM_ERR(EINVAL);
2367 }
2368
2369 buf = dma->buflist[indirect.idx];
2370
2371 if ( buf->filp != filp ) {
2372 DRM_ERROR( "process %d using buffer owned by %p\n",
2373 DRM_CURRENTPID, buf->filp );
2374 return DRM_ERR(EINVAL);
2375 }
2376 if ( buf->pending ) {
2377 DRM_ERROR( "sending pending buffer %d\n", indirect.idx );
2378 return DRM_ERR(EINVAL);
2379 }
2380
2381 if ( indirect.start < buf->used ) {
2382 DRM_ERROR( "reusing indirect: start=0x%x actual=0x%x\n",
2383 indirect.start, buf->used );
2384 return DRM_ERR(EINVAL);
2385 }
2386
2387 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2388 VB_AGE_TEST_WITH_RETURN( dev_priv );
2389
2390 buf->used = indirect.end;
2391
2392 /* Wait for the 3D stream to idle before the indirect buffer
2393 * containing 2D acceleration commands is processed.
2394 */
2395 BEGIN_RING( 2 );
2396
2397 RADEON_WAIT_UNTIL_3D_IDLE();
2398
2399 ADVANCE_RING();
2400
2401 /* Dispatch the indirect buffer full of commands from the
2402 * X server. This is insecure and is thus only available to
2403 * privileged clients.
2404 */
2405 radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end );
2406 if (indirect.discard) {
2407 radeon_cp_discard_buffer( dev, buf );
2408 }
2409
2410
2411 COMMIT_RING();
2412 return 0;
2413}
2414
2415static int radeon_cp_vertex2( DRM_IOCTL_ARGS )
2416{
2417 DRM_DEVICE;
2418 drm_radeon_private_t *dev_priv = dev->dev_private;
2419 drm_file_t *filp_priv;
2420 drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv;
2421 drm_device_dma_t *dma = dev->dma;
2422 drm_buf_t *buf;
2423 drm_radeon_vertex2_t vertex;
2424 int i;
2425 unsigned char laststate;
2426
2427 LOCK_TEST_WITH_RETURN( dev, filp );
2428
2429 if ( !dev_priv ) {
2430 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2431 return DRM_ERR(EINVAL);
2432 }
2433
2434 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2435
2436 DRM_COPY_FROM_USER_IOCTL( vertex, (drm_radeon_vertex2_t __user *)data,
2437 sizeof(vertex) );
2438
2439 DRM_DEBUG( "pid=%d index=%d discard=%d\n",
2440 DRM_CURRENTPID,
2441 vertex.idx, vertex.discard );
2442
2443 if ( vertex.idx < 0 || vertex.idx >= dma->buf_count ) {
2444 DRM_ERROR( "buffer index %d (of %d max)\n",
2445 vertex.idx, dma->buf_count - 1 );
2446 return DRM_ERR(EINVAL);
2447 }
2448
2449 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2450 VB_AGE_TEST_WITH_RETURN( dev_priv );
2451
2452 buf = dma->buflist[vertex.idx];
2453
2454 if ( buf->filp != filp ) {
2455 DRM_ERROR( "process %d using buffer owned by %p\n",
2456 DRM_CURRENTPID, buf->filp );
2457 return DRM_ERR(EINVAL);
2458 }
2459
2460 if ( buf->pending ) {
2461 DRM_ERROR( "sending pending buffer %d\n", vertex.idx );
2462 return DRM_ERR(EINVAL);
2463 }
2464
2465 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2466 return DRM_ERR(EINVAL);
2467
2468 for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) {
2469 drm_radeon_prim_t prim;
2470 drm_radeon_tcl_prim_t tclprim;
2471
2472 if ( DRM_COPY_FROM_USER( &prim, &vertex.prim[i], sizeof(prim) ) )
2473 return DRM_ERR(EFAULT);
2474
2475 if ( prim.stateidx != laststate ) {
2476 drm_radeon_state_t state;
2477
2478 if ( DRM_COPY_FROM_USER( &state,
2479 &vertex.state[prim.stateidx],
2480 sizeof(state) ) )
2481 return DRM_ERR(EFAULT);
2482
2483 if ( radeon_emit_state2( dev_priv, filp_priv, &state ) ) {
2484 DRM_ERROR( "radeon_emit_state2 failed\n" );
2485 return DRM_ERR( EINVAL );
2486 }
2487
2488 laststate = prim.stateidx;
2489 }
2490
2491 tclprim.start = prim.start;
2492 tclprim.finish = prim.finish;
2493 tclprim.prim = prim.prim;
2494 tclprim.vc_format = prim.vc_format;
2495
2496 if ( prim.prim & RADEON_PRIM_WALK_IND ) {
2497 tclprim.offset = prim.numverts * 64;
2498 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2499
2500 radeon_cp_dispatch_indices( dev, buf, &tclprim );
2501 } else {
2502 tclprim.numverts = prim.numverts;
2503 tclprim.offset = 0; /* not used */
2504
2505 radeon_cp_dispatch_vertex( dev, buf, &tclprim );
2506 }
2507
2508 if (sarea_priv->nbox == 1)
2509 sarea_priv->nbox = 0;
2510 }
2511
2512 if ( vertex.discard ) {
2513 radeon_cp_discard_buffer( dev, buf );
2514 }
2515
2516 COMMIT_RING();
2517 return 0;
2518}
2519
2520
2521static int radeon_emit_packets(
2522 drm_radeon_private_t *dev_priv,
2523 drm_file_t *filp_priv,
2524 drm_radeon_cmd_header_t header,
2525 drm_radeon_cmd_buffer_t *cmdbuf )
2526{
2527 int id = (int)header.packet.packet_id;
2528 int sz, reg;
2529 int *data = (int *)cmdbuf->buf;
2530 RING_LOCALS;
2531
2532 if (id >= RADEON_MAX_STATE_PACKETS)
2533 return DRM_ERR(EINVAL);
2534
2535 sz = packet[id].len;
2536 reg = packet[id].start;
2537
2538 if (sz * sizeof(int) > cmdbuf->bufsz) {
2539 DRM_ERROR( "Packet size provided larger than data provided\n" );
2540 return DRM_ERR(EINVAL);
2541 }
2542
2543 if ( radeon_check_and_fixup_packets( dev_priv, filp_priv, id, data ) ) {
2544 DRM_ERROR( "Packet verification failed\n" );
2545 return DRM_ERR( EINVAL );
2546 }
2547
2548 BEGIN_RING(sz+1);
2549 OUT_RING( CP_PACKET0( reg, (sz-1) ) );
2550 OUT_RING_TABLE( data, sz );
2551 ADVANCE_RING();
2552
2553 cmdbuf->buf += sz * sizeof(int);
2554 cmdbuf->bufsz -= sz * sizeof(int);
2555 return 0;
2556}
2557
2558static __inline__ int radeon_emit_scalars(
2559 drm_radeon_private_t *dev_priv,
2560 drm_radeon_cmd_header_t header,
2561 drm_radeon_cmd_buffer_t *cmdbuf )
2562{
2563 int sz = header.scalars.count;
2564 int start = header.scalars.offset;
2565 int stride = header.scalars.stride;
2566 RING_LOCALS;
2567
2568 BEGIN_RING( 3+sz );
2569 OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2570 OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2571 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2572 OUT_RING_TABLE( cmdbuf->buf, sz );
2573 ADVANCE_RING();
2574 cmdbuf->buf += sz * sizeof(int);
2575 cmdbuf->bufsz -= sz * sizeof(int);
2576 return 0;
2577}
2578
2579/* God this is ugly
2580 */
2581static __inline__ int radeon_emit_scalars2(
2582 drm_radeon_private_t *dev_priv,
2583 drm_radeon_cmd_header_t header,
2584 drm_radeon_cmd_buffer_t *cmdbuf )
2585{
2586 int sz = header.scalars.count;
2587 int start = ((unsigned int)header.scalars.offset) + 0x100;
2588 int stride = header.scalars.stride;
2589 RING_LOCALS;
2590
2591 BEGIN_RING( 3+sz );
2592 OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
2593 OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2594 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
2595 OUT_RING_TABLE( cmdbuf->buf, sz );
2596 ADVANCE_RING();
2597 cmdbuf->buf += sz * sizeof(int);
2598 cmdbuf->bufsz -= sz * sizeof(int);
2599 return 0;
2600}
2601
2602static __inline__ int radeon_emit_vectors(
2603 drm_radeon_private_t *dev_priv,
2604 drm_radeon_cmd_header_t header,
2605 drm_radeon_cmd_buffer_t *cmdbuf )
2606{
2607 int sz = header.vectors.count;
2608 int start = header.vectors.offset;
2609 int stride = header.vectors.stride;
2610 RING_LOCALS;
2611
2612 BEGIN_RING( 3+sz );
2613 OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) );
2614 OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2615 OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) );
2616 OUT_RING_TABLE( cmdbuf->buf, sz );
2617 ADVANCE_RING();
2618
2619 cmdbuf->buf += sz * sizeof(int);
2620 cmdbuf->bufsz -= sz * sizeof(int);
2621 return 0;
2622}
2623
2624
2625static int radeon_emit_packet3( drm_device_t *dev,
2626 drm_file_t *filp_priv,
2627 drm_radeon_cmd_buffer_t *cmdbuf )
2628{
2629 drm_radeon_private_t *dev_priv = dev->dev_private;
2630 unsigned int cmdsz;
2631 int ret;
2632 RING_LOCALS;
2633
2634 DRM_DEBUG("\n");
2635
2636 if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2637 cmdbuf, &cmdsz ) ) ) {
2638 DRM_ERROR( "Packet verification failed\n" );
2639 return ret;
2640 }
2641
2642 BEGIN_RING( cmdsz );
2643 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2644 ADVANCE_RING();
2645
2646 cmdbuf->buf += cmdsz * 4;
2647 cmdbuf->bufsz -= cmdsz * 4;
2648 return 0;
2649}
2650
2651
2652static int radeon_emit_packet3_cliprect( drm_device_t *dev,
2653 drm_file_t *filp_priv,
2654 drm_radeon_cmd_buffer_t *cmdbuf,
2655 int orig_nbox )
2656{
2657 drm_radeon_private_t *dev_priv = dev->dev_private;
2658 drm_clip_rect_t box;
2659 unsigned int cmdsz;
2660 int ret;
2661 drm_clip_rect_t __user *boxes = cmdbuf->boxes;
2662 int i = 0;
2663 RING_LOCALS;
2664
2665 DRM_DEBUG("\n");
2666
2667 if ( ( ret = radeon_check_and_fixup_packet3( dev_priv, filp_priv,
2668 cmdbuf, &cmdsz ) ) ) {
2669 DRM_ERROR( "Packet verification failed\n" );
2670 return ret;
2671 }
2672
2673 if (!orig_nbox)
2674 goto out;
2675
2676 do {
2677 if ( i < cmdbuf->nbox ) {
2678 if (DRM_COPY_FROM_USER( &box, &boxes[i], sizeof(box) ))
2679 return DRM_ERR(EFAULT);
2680 /* FIXME The second and subsequent times round
2681 * this loop, send a WAIT_UNTIL_3D_IDLE before
2682 * calling emit_clip_rect(). This fixes a
2683 * lockup on fast machines when sending
2684 * several cliprects with a cmdbuf, as when
2685 * waving a 2D window over a 3D
2686 * window. Something in the commands from user
2687 * space seems to hang the card when they're
2688 * sent several times in a row. That would be
2689 * the correct place to fix it but this works
2690 * around it until I can figure that out - Tim
2691 * Smith */
2692 if ( i ) {
2693 BEGIN_RING( 2 );
2694 RADEON_WAIT_UNTIL_3D_IDLE();
2695 ADVANCE_RING();
2696 }
2697 radeon_emit_clip_rect( dev_priv, &box );
2698 }
2699
2700 BEGIN_RING( cmdsz );
2701 OUT_RING_TABLE( cmdbuf->buf, cmdsz );
2702 ADVANCE_RING();
2703
2704 } while ( ++i < cmdbuf->nbox );
2705 if (cmdbuf->nbox == 1)
2706 cmdbuf->nbox = 0;
2707
2708 out:
2709 cmdbuf->buf += cmdsz * 4;
2710 cmdbuf->bufsz -= cmdsz * 4;
2711 return 0;
2712}
2713
2714
2715static int radeon_emit_wait( drm_device_t *dev, int flags )
2716{
2717 drm_radeon_private_t *dev_priv = dev->dev_private;
2718 RING_LOCALS;
2719
2720 DRM_DEBUG("%s: %x\n", __FUNCTION__, flags);
2721 switch (flags) {
2722 case RADEON_WAIT_2D:
2723 BEGIN_RING( 2 );
2724 RADEON_WAIT_UNTIL_2D_IDLE();
2725 ADVANCE_RING();
2726 break;
2727 case RADEON_WAIT_3D:
2728 BEGIN_RING( 2 );
2729 RADEON_WAIT_UNTIL_3D_IDLE();
2730 ADVANCE_RING();
2731 break;
2732 case RADEON_WAIT_2D|RADEON_WAIT_3D:
2733 BEGIN_RING( 2 );
2734 RADEON_WAIT_UNTIL_IDLE();
2735 ADVANCE_RING();
2736 break;
2737 default:
2738 return DRM_ERR(EINVAL);
2739 }
2740
2741 return 0;
2742}
2743
2744static int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
2745{
2746 DRM_DEVICE;
2747 drm_radeon_private_t *dev_priv = dev->dev_private;
2748 drm_file_t *filp_priv;
2749 drm_device_dma_t *dma = dev->dma;
2750 drm_buf_t *buf = NULL;
2751 int idx;
2752 drm_radeon_cmd_buffer_t cmdbuf;
2753 drm_radeon_cmd_header_t header;
2754 int orig_nbox, orig_bufsz;
2755 char *kbuf=NULL;
2756
2757 LOCK_TEST_WITH_RETURN( dev, filp );
2758
2759 if ( !dev_priv ) {
2760 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2761 return DRM_ERR(EINVAL);
2762 }
2763
2764 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2765
2766 DRM_COPY_FROM_USER_IOCTL( cmdbuf, (drm_radeon_cmd_buffer_t __user *)data,
2767 sizeof(cmdbuf) );
2768
2769 RING_SPACE_TEST_WITH_RETURN( dev_priv );
2770 VB_AGE_TEST_WITH_RETURN( dev_priv );
2771
2772 if (cmdbuf.bufsz > 64*1024 || cmdbuf.bufsz<0) {
2773 return DRM_ERR(EINVAL);
2774 }
2775
2776 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2777 * races between checking values and using those values in other code,
2778 * and simply to avoid a lot of function calls to copy in data.
2779 */
2780 orig_bufsz = cmdbuf.bufsz;
2781 if (orig_bufsz != 0) {
2782 kbuf = drm_alloc(cmdbuf.bufsz, DRM_MEM_DRIVER);
2783 if (kbuf == NULL)
2784 return DRM_ERR(ENOMEM);
2785 if (DRM_COPY_FROM_USER(kbuf, cmdbuf.buf, cmdbuf.bufsz)) {
2786 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2787 return DRM_ERR(EFAULT);
2788 }
2789 cmdbuf.buf = kbuf;
2790 }
2791
2792 orig_nbox = cmdbuf.nbox;
2793
Dave Airlie414ed532005-08-16 20:43:16 +10002794 if(dev_priv->microcode_version == UCODE_R300) {
2795 int temp;
2796 temp=r300_do_cp_cmdbuf(dev, filp, filp_priv, &cmdbuf);
2797
2798 if (orig_bufsz != 0)
2799 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2800
2801 return temp;
2802 }
2803
2804 /* microcode_version != r300 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002805 while ( cmdbuf.bufsz >= sizeof(header) ) {
2806
2807 header.i = *(int *)cmdbuf.buf;
2808 cmdbuf.buf += sizeof(header);
2809 cmdbuf.bufsz -= sizeof(header);
2810
2811 switch (header.header.cmd_type) {
2812 case RADEON_CMD_PACKET:
2813 DRM_DEBUG("RADEON_CMD_PACKET\n");
2814 if (radeon_emit_packets( dev_priv, filp_priv, header, &cmdbuf )) {
2815 DRM_ERROR("radeon_emit_packets failed\n");
2816 goto err;
2817 }
2818 break;
2819
2820 case RADEON_CMD_SCALARS:
2821 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2822 if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
2823 DRM_ERROR("radeon_emit_scalars failed\n");
2824 goto err;
2825 }
2826 break;
2827
2828 case RADEON_CMD_VECTORS:
2829 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2830 if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
2831 DRM_ERROR("radeon_emit_vectors failed\n");
2832 goto err;
2833 }
2834 break;
2835
2836 case RADEON_CMD_DMA_DISCARD:
2837 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2838 idx = header.dma.buf_idx;
2839 if ( idx < 0 || idx >= dma->buf_count ) {
2840 DRM_ERROR( "buffer index %d (of %d max)\n",
2841 idx, dma->buf_count - 1 );
2842 goto err;
2843 }
2844
2845 buf = dma->buflist[idx];
2846 if ( buf->filp != filp || buf->pending ) {
2847 DRM_ERROR( "bad buffer %p %p %d\n",
2848 buf->filp, filp, buf->pending);
2849 goto err;
2850 }
2851
2852 radeon_cp_discard_buffer( dev, buf );
2853 break;
2854
2855 case RADEON_CMD_PACKET3:
2856 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2857 if (radeon_emit_packet3( dev, filp_priv, &cmdbuf )) {
2858 DRM_ERROR("radeon_emit_packet3 failed\n");
2859 goto err;
2860 }
2861 break;
2862
2863 case RADEON_CMD_PACKET3_CLIP:
2864 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2865 if (radeon_emit_packet3_cliprect( dev, filp_priv, &cmdbuf, orig_nbox )) {
2866 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2867 goto err;
2868 }
2869 break;
2870
2871 case RADEON_CMD_SCALARS2:
2872 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2873 if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
2874 DRM_ERROR("radeon_emit_scalars2 failed\n");
2875 goto err;
2876 }
2877 break;
2878
2879 case RADEON_CMD_WAIT:
2880 DRM_DEBUG("RADEON_CMD_WAIT\n");
2881 if (radeon_emit_wait( dev, header.wait.flags )) {
2882 DRM_ERROR("radeon_emit_wait failed\n");
2883 goto err;
2884 }
2885 break;
2886 default:
2887 DRM_ERROR("bad cmd_type %d at %p\n",
2888 header.header.cmd_type,
2889 cmdbuf.buf - sizeof(header));
2890 goto err;
2891 }
2892 }
2893
2894 if (orig_bufsz != 0)
2895 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2896
2897 DRM_DEBUG("DONE\n");
2898 COMMIT_RING();
2899 return 0;
2900
2901err:
2902 if (orig_bufsz != 0)
2903 drm_free(kbuf, orig_bufsz, DRM_MEM_DRIVER);
2904 return DRM_ERR(EINVAL);
2905}
2906
2907
2908
2909static int radeon_cp_getparam( DRM_IOCTL_ARGS )
2910{
2911 DRM_DEVICE;
2912 drm_radeon_private_t *dev_priv = dev->dev_private;
2913 drm_radeon_getparam_t param;
2914 int value;
2915
2916 if ( !dev_priv ) {
2917 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2918 return DRM_ERR(EINVAL);
2919 }
2920
2921 DRM_COPY_FROM_USER_IOCTL( param, (drm_radeon_getparam_t __user *)data,
2922 sizeof(param) );
2923
2924 DRM_DEBUG( "pid=%d\n", DRM_CURRENTPID );
2925
2926 switch( param.param ) {
2927 case RADEON_PARAM_GART_BUFFER_OFFSET:
2928 value = dev_priv->gart_buffers_offset;
2929 break;
2930 case RADEON_PARAM_LAST_FRAME:
2931 dev_priv->stats.last_frame_reads++;
2932 value = GET_SCRATCH( 0 );
2933 break;
2934 case RADEON_PARAM_LAST_DISPATCH:
2935 value = GET_SCRATCH( 1 );
2936 break;
2937 case RADEON_PARAM_LAST_CLEAR:
2938 dev_priv->stats.last_clear_reads++;
2939 value = GET_SCRATCH( 2 );
2940 break;
2941 case RADEON_PARAM_IRQ_NR:
2942 value = dev->irq;
2943 break;
2944 case RADEON_PARAM_GART_BASE:
2945 value = dev_priv->gart_vm_start;
2946 break;
2947 case RADEON_PARAM_REGISTER_HANDLE:
2948 value = dev_priv->mmio_offset;
2949 break;
2950 case RADEON_PARAM_STATUS_HANDLE:
2951 value = dev_priv->ring_rptr_offset;
2952 break;
2953#if BITS_PER_LONG == 32
2954 /*
2955 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
2956 * pointer which can't fit into an int-sized variable. According to
2957 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
2958 * not supporting it shouldn't be a problem. If the same functionality
2959 * is needed on 64-bit platforms, a new ioctl() would have to be added,
2960 * so backwards-compatibility for the embedded platforms can be
2961 * maintained. --davidm 4-Feb-2004.
2962 */
2963 case RADEON_PARAM_SAREA_HANDLE:
2964 /* The lock is the first dword in the sarea. */
2965 value = (long)dev->lock.hw_lock;
2966 break;
2967#endif
2968 case RADEON_PARAM_GART_TEX_HANDLE:
2969 value = dev_priv->gart_textures_offset;
2970 break;
2971 default:
2972 return DRM_ERR(EINVAL);
2973 }
2974
2975 if ( DRM_COPY_TO_USER( param.value, &value, sizeof(int) ) ) {
2976 DRM_ERROR( "copy_to_user\n" );
2977 return DRM_ERR(EFAULT);
2978 }
2979
2980 return 0;
2981}
2982
2983static int radeon_cp_setparam( DRM_IOCTL_ARGS ) {
2984 DRM_DEVICE;
2985 drm_radeon_private_t *dev_priv = dev->dev_private;
2986 drm_file_t *filp_priv;
2987 drm_radeon_setparam_t sp;
2988 struct drm_radeon_driver_file_fields *radeon_priv;
2989
2990 if ( !dev_priv ) {
2991 DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ );
2992 return DRM_ERR( EINVAL );
2993 }
2994
2995 DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
2996
2997 DRM_COPY_FROM_USER_IOCTL( sp, ( drm_radeon_setparam_t __user * )data,
2998 sizeof( sp ) );
2999
3000 switch( sp.param ) {
3001 case RADEON_SETPARAM_FB_LOCATION:
3002 radeon_priv = filp_priv->driver_priv;
3003 radeon_priv->radeon_fb_delta = dev_priv->fb_location - sp.value;
3004 break;
3005 case RADEON_SETPARAM_SWITCH_TILING:
3006 if (sp.value == 0) {
3007 DRM_DEBUG( "color tiling disabled\n" );
3008 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3009 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3010 dev_priv->sarea_priv->tiling_enabled = 0;
3011 }
3012 else if (sp.value == 1) {
3013 DRM_DEBUG( "color tiling enabled\n" );
3014 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3015 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3016 dev_priv->sarea_priv->tiling_enabled = 1;
3017 }
3018 break;
3019 default:
3020 DRM_DEBUG( "Invalid parameter %d\n", sp.param );
3021 return DRM_ERR( EINVAL );
3022 }
3023
3024 return 0;
3025}
3026
3027/* When a client dies:
3028 * - Check for and clean up flipped page state
3029 * - Free any alloced GART memory.
3030 *
3031 * DRM infrastructure takes care of reclaiming dma buffers.
3032 */
3033void radeon_driver_prerelease(drm_device_t *dev, DRMFILE filp)
3034{
3035 if ( dev->dev_private ) {
3036 drm_radeon_private_t *dev_priv = dev->dev_private;
3037 if ( dev_priv->page_flipping ) {
3038 radeon_do_cleanup_pageflip( dev );
3039 }
3040 radeon_mem_release( filp, dev_priv->gart_heap );
3041 radeon_mem_release( filp, dev_priv->fb_heap );
3042 radeon_surfaces_release(filp, dev_priv);
3043 }
3044}
3045
3046void radeon_driver_pretakedown(drm_device_t *dev)
3047{
3048 radeon_do_release(dev);
3049}
3050
3051int radeon_driver_open_helper(drm_device_t *dev, drm_file_t *filp_priv)
3052{
3053 drm_radeon_private_t *dev_priv = dev->dev_private;
3054 struct drm_radeon_driver_file_fields *radeon_priv;
3055
3056 radeon_priv = (struct drm_radeon_driver_file_fields *)drm_alloc(sizeof(*radeon_priv), DRM_MEM_FILES);
3057
3058 if (!radeon_priv)
3059 return -ENOMEM;
3060
3061 filp_priv->driver_priv = radeon_priv;
3062 if ( dev_priv )
3063 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3064 else
3065 radeon_priv->radeon_fb_delta = 0;
3066 return 0;
3067}
3068
3069
3070void radeon_driver_free_filp_priv(drm_device_t *dev, drm_file_t *filp_priv)
3071{
3072 struct drm_radeon_driver_file_fields *radeon_priv = filp_priv->driver_priv;
3073
3074 drm_free(radeon_priv, sizeof(*radeon_priv), DRM_MEM_FILES);
3075}
3076
3077drm_ioctl_desc_t radeon_ioctls[] = {
3078 [DRM_IOCTL_NR(DRM_RADEON_CP_INIT)] = { radeon_cp_init, 1, 1 },
3079 [DRM_IOCTL_NR(DRM_RADEON_CP_START)] = { radeon_cp_start, 1, 1 },
3080 [DRM_IOCTL_NR(DRM_RADEON_CP_STOP)] = { radeon_cp_stop, 1, 1 },
3081 [DRM_IOCTL_NR(DRM_RADEON_CP_RESET)] = { radeon_cp_reset, 1, 1 },
3082 [DRM_IOCTL_NR(DRM_RADEON_CP_IDLE)] = { radeon_cp_idle, 1, 0 },
3083 [DRM_IOCTL_NR(DRM_RADEON_CP_RESUME)] = { radeon_cp_resume, 1, 0 },
3084 [DRM_IOCTL_NR(DRM_RADEON_RESET)] = { radeon_engine_reset, 1, 0 },
3085 [DRM_IOCTL_NR(DRM_RADEON_FULLSCREEN)] = { radeon_fullscreen, 1, 0 },
3086 [DRM_IOCTL_NR(DRM_RADEON_SWAP)] = { radeon_cp_swap, 1, 0 },
3087 [DRM_IOCTL_NR(DRM_RADEON_CLEAR)] = { radeon_cp_clear, 1, 0 },
3088 [DRM_IOCTL_NR(DRM_RADEON_VERTEX)] = { radeon_cp_vertex, 1, 0 },
3089 [DRM_IOCTL_NR(DRM_RADEON_INDICES)] = { radeon_cp_indices, 1, 0 },
3090 [DRM_IOCTL_NR(DRM_RADEON_TEXTURE)] = { radeon_cp_texture, 1, 0 },
3091 [DRM_IOCTL_NR(DRM_RADEON_STIPPLE)] = { radeon_cp_stipple, 1, 0 },
3092 [DRM_IOCTL_NR(DRM_RADEON_INDIRECT)] = { radeon_cp_indirect, 1, 1 },
3093 [DRM_IOCTL_NR(DRM_RADEON_VERTEX2)] = { radeon_cp_vertex2, 1, 0 },
3094 [DRM_IOCTL_NR(DRM_RADEON_CMDBUF)] = { radeon_cp_cmdbuf, 1, 0 },
3095 [DRM_IOCTL_NR(DRM_RADEON_GETPARAM)] = { radeon_cp_getparam, 1, 0 },
3096 [DRM_IOCTL_NR(DRM_RADEON_FLIP)] = { radeon_cp_flip, 1, 0 },
3097 [DRM_IOCTL_NR(DRM_RADEON_ALLOC)] = { radeon_mem_alloc, 1, 0 },
3098 [DRM_IOCTL_NR(DRM_RADEON_FREE)] = { radeon_mem_free, 1, 0 },
3099 [DRM_IOCTL_NR(DRM_RADEON_INIT_HEAP)] = { radeon_mem_init_heap,1, 1 },
3100 [DRM_IOCTL_NR(DRM_RADEON_IRQ_EMIT)] = { radeon_irq_emit, 1, 0 },
3101 [DRM_IOCTL_NR(DRM_RADEON_IRQ_WAIT)] = { radeon_irq_wait, 1, 0 },
3102 [DRM_IOCTL_NR(DRM_RADEON_SETPARAM)] = { radeon_cp_setparam, 1, 0 },
3103 [DRM_IOCTL_NR(DRM_RADEON_SURF_ALLOC)] = { radeon_surface_alloc,1, 0 },
3104 [DRM_IOCTL_NR(DRM_RADEON_SURF_FREE)] = { radeon_surface_free, 1, 0 }
3105};
3106
3107int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);