SPARC assembly optimizations from David Miller.
diff --git a/src/mesa/math/m_debug_util.h b/src/mesa/math/m_debug_util.h
index dc8024a..efedda9 100644
--- a/src/mesa/math/m_debug_util.h
+++ b/src/mesa/math/m_debug_util.h
@@ -1,4 +1,4 @@
-/* $Id: m_debug_util.h,v 1.3 2001/03/30 14:44:43 gareth Exp $ */
+/* $Id: m_debug_util.h,v 1.4 2001/05/23 14:27:03 brianp Exp $ */
 
 /*
  * Mesa 3-D graphics library
@@ -38,7 +38,9 @@
  * NOTE: it works only on CPUs which know the 'rdtsc' command (586 or higher)
  * (hope, you don't try to debug Mesa on a 386 ;)
  */
-#if defined(__GNUC__) && defined(__i386__) && defined(USE_X86_ASM)
+#if defined(__GNUC__) && \
+    ((defined(__i386__) && defined(USE_X86_ASM)) || \
+     (defined(__sparc__) && defined(USE_SPARC_ASM)))
 #define  RUN_DEBUG_BENCHMARK
 #endif
 
@@ -67,6 +69,8 @@
  * It is assumed that all calculations are done in the cache.
  */
 
+#if defined(__i386__)
+
 #if 1 /* PPro, PII, PIII version */
 
 /* Profiling on the P6 architecture requires a little more work, due to
@@ -183,6 +187,30 @@
 
 #endif
 
+#elif defined(__sparc__)
+
+#define  INIT_COUNTER()	\
+	 do { counter_overhead = 5; } while(0)
+
+#define  BEGIN_RACE(x)                                                        \
+x = LONG_MAX;                                                                 \
+for (cycle_i = 0; cycle_i <10; cycle_i++) {                                   \
+   register long cycle_tmp1 asm("l0");					      \
+   register long cycle_tmp2 asm("l1");					      \
+   /* rd %tick, %l0 */							      \
+   __asm__ __volatile__ (".word 0xa1410000" : "=r" (cycle_tmp1));  /*  save timestamp   */
+
+#define END_RACE(x)                                                           \
+   /* rd %tick, %l1 */							      \
+   __asm__ __volatile__ (".word 0xa3410000" : "=r" (cycle_tmp2));	      \
+   if (x > (cycle_tmp2-cycle_tmp1)) x = cycle_tmp2 - cycle_tmp1;              \
+}                                                                             \
+x -= counter_overhead;
+
+#else
+#error Your processor is not supported for RUN_XFORM_BENCHMARK
+#endif
+
 #else
 
 #define BEGIN_RACE(x)
diff --git a/src/mesa/math/m_xform.c b/src/mesa/math/m_xform.c
index 6943084..c043b09 100644
--- a/src/mesa/math/m_xform.c
+++ b/src/mesa/math/m_xform.c
@@ -1,4 +1,4 @@
-/* $Id: m_xform.c,v 1.13 2001/05/21 16:33:41 gareth Exp $ */
+/* $Id: m_xform.c,v 1.14 2001/05/23 14:27:03 brianp Exp $ */
 
 /*
  * Mesa 3-D graphics library
@@ -56,6 +56,10 @@
 #include "X86/common_x86_asm.h"
 #endif
 
+#ifdef USE_SPARC_ASM
+#include "SPARC/sparc.h"
+#endif
+
 clip_func _mesa_clip_tab[5];
 clip_func _mesa_clip_np_tab[5];
 dotprod_func _mesa_dotprod_tab[5];
@@ -206,6 +210,9 @@
 #ifdef USE_X86_ASM
    _mesa_init_all_x86_transform_asm();
 #endif
+#ifdef USE_SPARC_ASM
+   _mesa_init_all_sparc_transform_asm();
+#endif
 }
 
 void
diff --git a/src/mesa/sparc/clip.S b/src/mesa/sparc/clip.S
new file mode 100644
index 0000000..a569428
--- /dev/null
+++ b/src/mesa/sparc/clip.S
@@ -0,0 +1,234 @@
+/* $Id: clip.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
+
+#ifdef __sparc_v9__
+#define LDPTR		ldx
+#define V4F_DATA	0x00
+#define V4F_START	0x08
+#define V4F_COUNT	0x10
+#define V4F_STRIDE	0x14
+#define V4F_SIZE	0x18
+#define V4F_FLAGS	0x1c
+#else
+#define LDPTR		ld
+#define V4F_DATA	0x00
+#define V4F_START	0x04
+#define V4F_COUNT	0x08
+#define V4F_STRIDE	0x0c
+#define V4F_SIZE	0x10
+#define V4F_FLAGS	0x14
+#endif
+
+#define VEC_SIZE_1   	1
+#define VEC_SIZE_2   	3
+#define VEC_SIZE_3   	7
+#define VEC_SIZE_4   	15
+
+	.text
+	.align		64
+
+one_dot_zero:
+	.word		0x3f800000	/* 1.0f */
+
+	/* This trick is shamelessly stolen from the x86
+	 * Mesa asm.  Very clever, and we can do it too
+	 * since we have the necessary add with carry
+	 * instructions on Sparc.
+	 */
+clip_table:
+	.byte	 0,  1,  0,  2,  4,  5,  4,  6
+	.byte	 0,  1,  0,  2,  8,  9,  8, 10
+	.byte	32, 33, 32, 34, 36, 37, 36, 38
+	.byte	32, 33, 32, 34, 40, 41, 40, 42
+	.byte	 0,  1,  0,  2,  4,  5,  4,  6
+	.byte	 0,  1,  0,  2,  8,  9,  8, 10
+	.byte	16, 17, 16, 18, 20, 21, 20, 22
+	.byte	16, 17, 16, 18, 24, 25, 24, 26
+	.byte	63, 61, 63, 62, 55, 53, 55, 54
+	.byte	63, 61, 63, 62, 59, 57, 59, 58
+	.byte	47, 45, 47, 46, 39, 37, 39, 38
+	.byte	47, 45, 47, 46, 43, 41, 43, 42
+	.byte	63, 61, 63, 62, 55, 53, 55, 54
+	.byte	63, 61, 63, 62, 59, 57, 59, 58
+	.byte	31, 29, 31, 30, 23, 21, 23, 22
+	.byte	31, 29, 31, 30, 27, 25, 27, 26
+
+/* GLvector4f *clip_vec, GLvector4f *proj_vec, 
+   GLubyte clipMask[], GLubyte *orMask, GLubyte *andMask */
+
+	.align		64
+__pc_tramp:
+	retl
+	 nop
+
+	.globl		_mesa_sparc_cliptest_points4
+_mesa_sparc_cliptest_points4:
+	save		%sp, -64, %sp
+	call		__pc_tramp
+	 sub		%o7, (. - one_dot_zero - 4), %g1
+	ld		[%g1 + 0x0], %f4
+	add		%g1, 0x4, %g1
+
+	ld		[%i0 + V4F_STRIDE], %l1
+	ld		[%i0 + V4F_COUNT], %g7
+	LDPTR		[%i0 + V4F_START], %i0
+	LDPTR		[%i1 + V4F_START], %i5
+	ldub		[%i3], %g2
+	ldub		[%i4], %g3
+	sll		%g3, 8, %g3
+	or		%g2, %g3, %g2
+
+	ld		[%i1 + V4F_FLAGS], %g3
+	or		%g3, VEC_SIZE_4, %g3
+	st		%g3, [%i1 + V4F_FLAGS]
+	mov		3, %g3
+	st		%g3, [%i1 + V4F_SIZE]
+	st		%g7, [%i1 + V4F_COUNT]
+	clr		%l2
+	clr		%l0
+
+	/* l0:	i
+	 * g7:	count
+	 * l1:	stride
+	 * l2:	c
+	 * g2:	(tmpAndMask << 8) | tmpOrMask
+	 * g1:	clip_table
+	 * i0:	from[stride][i]
+	 * i2:	clipMask
+	 * i5:	vProj[4][i]
+	 */
+
+1:	ld		[%i0 + 0x0c], %f3	! LSU	Group
+	ld		[%i0 + 0x0c], %g5	! LSU	Group
+	ld		[%i0 + 0x08], %g4	! LSU	Group
+	fdivs		%f4, %f3, %f8		! FGM
+	addcc		%g5, %g5, %g5		! IEU1	Group
+	addx		%g0, 0x0, %g3		! IEU1	Group
+	addcc		%g4, %g4, %g4		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	subcc		%g5, %g4, %g0		! IEU1	Group
+	ld		[%i0 + 0x04], %g4	! LSU	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	addcc		%g4, %g4, %g4		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	subcc		%g5, %g4, %g0		! IEU1	Group
+	ld		[%i0 + 0x00], %g4	! LSU	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	addcc		%g4, %g4, %g4		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	subcc		%g5, %g4, %g0		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	ldub		[%g1 + %g3], %g3	! LSU	Group
+	cmp		%g3, 0			! IEU1	Group, stall
+	be		2f			! CTI
+	 stb		%g3, [%i2]		! LSU
+	sll		%g3, 8, %g4		! IEU1	Group
+	add		%l2, 1, %l2		! IEU0
+	st		%g0, [%i5 + 0x00]	! LSU
+	or		%g4, 0xff, %g4		! IEU0	Group
+	or		%g2, %g3, %g2		! IEU1
+	st		%g0, [%i5 + 0x04]	! LSU
+	and		%g2, %g4, %g2		! IEU0	Group
+	st		%g0, [%i5 + 0x08]	! LSU
+	b		3f			! CTI
+	 st		%f4, [%i5 + 0x0c]	! LSU	Group
+2:	ld		[%i0 + 0x00], %f0	! LSU	Group
+	ld		[%i0 + 0x04], %f1	! LSU	Group
+	ld		[%i0 + 0x08], %f2	! LSU	Group
+	fmuls		%f0, %f8, %f0		! FGM
+	st		%f0, [%i5 + 0x00]	! LSU	Group
+	fmuls		%f1, %f8, %f1		! FGM
+	st		%f1, [%i5 + 0x04]	! LSU	Group
+	fmuls		%f2, %f8, %f2		! FGM
+	st		%f2, [%i5 + 0x08]	! LSU	Group
+	st		%f8, [%i5 + 0x0c]	! LSU	Group
+3:	add		%i5, 0x10, %i5		! IEU1
+	add		%l0, 1, %l0		! IEU0	Group
+	add		%i2, 1, %i2		! IEU0	Group
+	cmp		%l0, %g7		! IEU1	Group
+	bne		1b			! CTI
+	 add		%i0, %l1, %i0		! IEU0	Group
+	stb		%g2, [%i3]		! LSU
+	srl		%g2, 8, %g3		! IEU0	Group
+	cmp		%l2, %g7		! IEU1	Group
+	bl,a		1f			! CTI
+	 clr		%g3			! IEU0
+1:	stb		%g3, [%i4]		! LSU	Group
+	ret					! CTI	Group
+	 restore	%i1, 0x0, %o0
+
+	.globl		_mesa_sparc_cliptest_points4_np
+_mesa_sparc_cliptest_points4_np:
+	save		%sp, -64, %sp
+
+	call		__pc_tramp
+	 sub		%o7, (. - one_dot_zero - 4), %g1
+	add		%g1, 0x4, %g1
+
+	ld		[%i0 + V4F_STRIDE], %l1
+	ld		[%i0 + V4F_COUNT], %g7
+	LDPTR		[%i0 + V4F_START], %i0
+	LDPTR		[%i1 + V4F_START], %i5
+	ldub		[%i3], %g2
+	ldub		[%i4], %g3
+	sll		%g3, 8, %g3
+	or		%g2, %g3, %g2
+
+	ld		[%i1 + V4F_FLAGS], %g3
+	or		%g3, VEC_SIZE_4, %g3
+	st		%g3, [%i1 + V4F_FLAGS]
+	mov		3, %g3
+	st		%g3, [%i1 + V4F_SIZE]
+	st		%g7, [%i1 + V4F_COUNT]
+	clr		%l2
+	clr		%l0
+
+	/* l0:	i
+	 * g7:	count
+	 * l1:	stride
+	 * l2:	c
+	 * g2:	(tmpAndMask << 8) | tmpOrMask
+	 * g1:	clip_table
+	 * i0:	from[stride][i]
+	 * i2:	clipMask
+	 */
+
+1:	ld		[%i0 + 0x0c], %g5	! LSU	Group
+	ld		[%i0 + 0x08], %g4	! LSU	Group
+	addcc		%g5, %g5, %g5		! IEU1	Group
+	addx		%g0, 0x0, %g3		! IEU1	Group
+	addcc		%g4, %g4, %g4		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	subcc		%g5, %g4, %g0		! IEU1	Group
+	ld		[%i0 + 0x04], %g4	! LSU	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	addcc		%g4, %g4, %g4		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	subcc		%g5, %g4, %g0		! IEU1	Group
+	ld		[%i0 + 0x00], %g4	! LSU	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	addcc		%g4, %g4, %g4		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	subcc		%g5, %g4, %g0		! IEU1	Group
+	addx		%g3, %g3, %g3		! IEU1	Group
+	ldub		[%g1 + %g3], %g3	! LSU	Group
+	cmp		%g3, 0			! IEU1	Group, stall
+	be		2f			! CTI
+	 stb		%g3, [%i2]		! LSU
+	sll		%g3, 8, %g4		! IEU1	Group
+	add		%l2, 1, %l2		! IEU0
+	or		%g4, 0xff, %g4		! IEU0	Group
+	or		%g2, %g3, %g2		! IEU1
+	and		%g2, %g4, %g2		! IEU0	Group
+2:	add		%l0, 1, %l0		! IEU0	Group
+	add		%i2, 1, %i2		! IEU0	Group
+	cmp		%l0, %g7		! IEU1	Group
+	bne		1b			! CTI
+	 add		%i0, %l1, %i0		! IEU0	Group
+	stb		%g2, [%i3]		! LSU
+	srl		%g2, 8, %g3		! IEU0	Group
+	cmp		%l2, %g7		! IEU1	Group
+	bl,a		1f			! CTI
+	 clr		%g3			! IEU0
+1:	stb		%g3, [%i4]		! LSU	Group
+	ret					! CTI	Group
+	 restore	%i1, 0x0, %o0
diff --git a/src/mesa/sparc/sparc.c b/src/mesa/sparc/sparc.c
new file mode 100644
index 0000000..83741cf
--- /dev/null
+++ b/src/mesa/sparc/sparc.c
@@ -0,0 +1,109 @@
+/* $Id: sparc.c,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.1
+ * 
+ * Copyright (C) 1999  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Sparc assembly code by David S. Miller
+ */
+
+
+#include "glheader.h"
+#include "context.h"
+#include "math/m_vertices.h"
+#include "math/m_xform.h"
+#include "tnl/t_context.h"
+#include "sparc.h"
+
+#ifdef DEBUG
+#include "math/m_debug.h"
+#endif
+
+#define XFORM_ARGS 	GLvector4f *to_vec, 		\
+			const GLfloat m[16], 		\
+			const GLvector4f *from_vec
+
+#define DECLARE_XFORM_GROUP(pfx, sz)					   \
+ extern void _mesa_##pfx##_transform_points##sz##_general(XFORM_ARGS);     \
+ extern void _mesa_##pfx##_transform_points##sz##_identity(XFORM_ARGS);    \
+ extern void _mesa_##pfx##_transform_points##sz##_3d_no_rot(XFORM_ARGS);   \
+ extern void _mesa_##pfx##_transform_points##sz##_perspective(XFORM_ARGS); \
+ extern void _mesa_##pfx##_transform_points##sz##_2d(XFORM_ARGS);          \
+ extern void _mesa_##pfx##_transform_points##sz##_2d_no_rot(XFORM_ARGS);   \
+ extern void _mesa_##pfx##_transform_points##sz##_3d(XFORM_ARGS);
+
+#define ASSIGN_XFORM_GROUP(pfx, sz)					\
+   _mesa_transform_tab[sz][MATRIX_GENERAL] =				\
+      _mesa_##pfx##_transform_points##sz##_general;			\
+   _mesa_transform_tab[sz][MATRIX_IDENTITY] =				\
+      _mesa_##pfx##_transform_points##sz##_identity;			\
+   _mesa_transform_tab[sz][MATRIX_3D_NO_ROT] =				\
+      _mesa_##pfx##_transform_points##sz##_3d_no_rot;			\
+   _mesa_transform_tab[sz][MATRIX_PERSPECTIVE] =			\
+      _mesa_##pfx##_transform_points##sz##_perspective;			\
+   _mesa_transform_tab[sz][MATRIX_2D] =					\
+      _mesa_##pfx##_transform_points##sz##_2d;				\
+   _mesa_transform_tab[sz][MATRIX_2D_NO_ROT] =				\
+      _mesa_##pfx##_transform_points##sz##_2d_no_rot;			\
+   _mesa_transform_tab[sz][MATRIX_3D] =					\
+      _mesa_##pfx##_transform_points##sz##_3d;
+
+
+#ifdef USE_SPARC_ASM
+DECLARE_XFORM_GROUP(sparc, 1)
+DECLARE_XFORM_GROUP(sparc, 2)
+DECLARE_XFORM_GROUP(sparc, 3)
+DECLARE_XFORM_GROUP(sparc, 4)
+
+extern GLvector4f  *_mesa_sparc_cliptest_points4(GLvector4f *clip_vec,
+						 GLvector4f *proj_vec,
+						 GLubyte clipMask[],
+						 GLubyte *orMask,
+						 GLubyte *andMask);
+
+extern GLvector4f  *_mesa_sparc_cliptest_points4_np(GLvector4f *clip_vec,
+						    GLvector4f *proj_vec,
+						    GLubyte clipMask[],
+						    GLubyte *orMask,
+						    GLubyte *andMask);
+#endif
+
+void _mesa_init_all_sparc_transform_asm(void)
+{
+#ifdef USE_SPARC_ASM
+   ASSIGN_XFORM_GROUP(sparc, 1)
+   ASSIGN_XFORM_GROUP(sparc, 2)
+   ASSIGN_XFORM_GROUP(sparc, 3)
+   ASSIGN_XFORM_GROUP(sparc, 4)
+
+   _mesa_clip_tab[4] = _mesa_sparc_cliptest_points4;
+   _mesa_clip_np_tab[4] = _mesa_sparc_cliptest_points4_np;
+
+#ifdef DEBUG
+   _math_test_all_transform_functions("sparc");
+   _math_test_all_cliptest_functions("sparc");
+#endif
+
+#endif
+}
diff --git a/src/mesa/sparc/sparc.h b/src/mesa/sparc/sparc.h
new file mode 100644
index 0000000..64422a3
--- /dev/null
+++ b/src/mesa/sparc/sparc.h
@@ -0,0 +1,37 @@
+/* $Id: sparc.h,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
+
+/*
+ * Mesa 3-D graphics library
+ * Version:  3.1
+ * 
+ * Copyright (C) 1999  Brian Paul   All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Sparc assembly code by David S. Miller
+ */
+
+
+#ifndef SPARC_H
+#define SPARC_H
+
+extern void _mesa_init_all_sparc_transform_asm(void);
+
+#endif /* !(SPARC_H) */
diff --git a/src/mesa/sparc/sparc_matrix.h b/src/mesa/sparc/sparc_matrix.h
new file mode 100644
index 0000000..4b452fd
--- /dev/null
+++ b/src/mesa/sparc/sparc_matrix.h
@@ -0,0 +1,277 @@
+/* $Id: sparc_matrix.h,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
+
+#define M0		%f16
+#define M1		%f17
+#define M2		%f18
+#define M3		%f19
+#define M4		%f20
+#define M5		%f21
+#define M6		%f22
+#define M7		%f23
+#define M8		%f24
+#define M9		%f25
+#define M10		%f26
+#define M11		%f27
+#define M12		%f28
+#define M13		%f29
+#define M14		%f30
+#define M15		%f31
+
+/* Seems to work, disable if unaligned traps begin to appear... -DaveM */
+#define USE_LD_DOUBLE
+
+#ifndef USE_LD_DOUBLE
+
+#define LDMATRIX_0_1_2_3_12_13_14_15(BASE)	\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ld	[BASE + ( 3 * 0x4)], M3;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14;	\
+	ld	[BASE + (15 * 0x4)], M15
+
+#define LDMATRIX_0_1_12_13(BASE)		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13
+
+#define LDMATRIX_0_12_13(BASE)			\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13
+
+#define LDMATRIX_0_1_2_12_13_14(BASE)		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_12_13_14(BASE)		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_14(BASE)			\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ld	[BASE + ( 3 * 0x4)], M3;	\
+	ld	[BASE + ( 4 * 0x4)], M4;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + ( 6 * 0x4)], M6;	\
+	ld	[BASE + ( 7 * 0x4)], M7;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14;	\
+	ld	[BASE + (15 * 0x4)], M15
+
+#define LDMATRIX_0_1_4_5_12_13(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + ( 4 * 0x4)], M4;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13
+
+#define LDMATRIX_0_5_12_13(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13
+
+#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE)	\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ld	[BASE + ( 3 * 0x4)], M3;	\
+	ld	[BASE + ( 4 * 0x4)], M4;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + ( 6 * 0x4)], M6;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_12_13_14(BASE)		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_14(BASE)			\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ld	[BASE + ( 3 * 0x4)], M3;	\
+	ld	[BASE + ( 4 * 0x4)], M4;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + ( 6 * 0x4)], M6;	\
+	ld	[BASE + ( 7 * 0x4)], M7;	\
+	ld	[BASE + ( 8 * 0x4)], M8;	\
+	ld	[BASE + ( 9 * 0x4)], M9;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+	ld	[BASE + (11 * 0x4)], M11;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14;	\
+	ld	[BASE + (15 * 0x4)], M15
+
+#define LDMATRIX_0_5_12_13(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13
+
+#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 1 * 0x4)], M1;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ld	[BASE + ( 4 * 0x4)], M4;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + ( 6 * 0x4)], M6;	\
+	ld	[BASE + ( 8 * 0x4)], M8;	\
+	ld	[BASE + ( 9 * 0x4)], M9;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_10_12_13_14(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+	ld	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (13 * 0x4)], M13;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_8_9_10_14(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + ( 8 * 0x4)], M8;	\
+	ld	[BASE + ( 9 * 0x4)], M9;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#else /* !(USE_LD_DOUBLE) */
+
+#define LDMATRIX_0_1_2_3_12_13_14_15(BASE)	\
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + ( 2 * 0x4)], M2;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ldd	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_12_13(BASE)		\
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_12_13(BASE)			\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_1_2_12_13_14(BASE)		\
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_12_13_14(BASE)		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_14(BASE)			\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(BASE) \
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + ( 2 * 0x4)], M2;	\
+	ldd	[BASE + ( 4 * 0x4)], M4;	\
+	ldd	[BASE + ( 6 * 0x4)], M6;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ldd	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_12_13(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ldd	[BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_1_2_3_4_5_6_12_13_14(BASE)	\
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + ( 2 * 0x4)], M2;	\
+	ldd	[BASE + ( 4 * 0x4)], M4;	\
+	ld	[BASE + ( 6 * 0x4)], M6;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_12_13_14(BASE)		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_14(BASE)			\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(BASE) \
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + ( 2 * 0x4)], M2;	\
+	ldd	[BASE + ( 4 * 0x4)], M4;	\
+	ldd	[BASE + ( 6 * 0x4)], M6;	\
+	ldd	[BASE + ( 8 * 0x4)], M8;	\
+	ldd	[BASE + (10 * 0x4)], M10;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ldd	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_1_4_5_12_13(BASE) 		\
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ldd	[BASE + ( 4 * 0x4)], M4;	\
+	ldd	[BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_5_12_13(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ldd	[BASE + (12 * 0x4)], M12
+
+#define LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(BASE) \
+	ldd	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 2 * 0x4)], M2;	\
+	ldd	[BASE + ( 4 * 0x4)], M4;	\
+	ld	[BASE + ( 6 * 0x4)], M6;	\
+	ldd	[BASE + ( 8 * 0x4)], M8;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_10_12_13_14(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+	ldd	[BASE + (12 * 0x4)], M12;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#define LDMATRIX_0_5_8_9_10_14(BASE) 		\
+	ld	[BASE + ( 0 * 0x4)], M0;	\
+	ld	[BASE + ( 5 * 0x4)], M5;	\
+	ldd	[BASE + ( 8 * 0x4)], M8;	\
+	ld	[BASE + (10 * 0x4)], M10;	\
+	ld	[BASE + (14 * 0x4)], M14
+
+#endif /* USE_LD_DOUBLE */
diff --git a/src/mesa/sparc/xform.S b/src/mesa/sparc/xform.S
new file mode 100644
index 0000000..368fdd9
--- /dev/null
+++ b/src/mesa/sparc/xform.S
@@ -0,0 +1,1410 @@
+/* $Id: xform.S,v 1.1 2001/05/23 14:27:03 brianp Exp $ */
+
+	/* TODO
+	 *
+	 * 1) It would be nice if load/store double could be used
+	 *    at least for the matrix parts.  I think for the matrices
+	 *    it is safe, but for the vertices it probably is not due to
+	 *    things like glInterleavedArrays etc.
+	 *
+	 *    UPDATE: Trying this now in sparc_matrix.h -DaveM_990624
+	 *
+	 * 2) One extremely slick trick would be if we could enclose
+	 *    groups of xform calls on the same vertices such that
+	 *    we just load the matrix into f16-->f31 before the calls
+	 *    and then we would not have to do them here.  This may be
+	 *    tricky and not much of a gain though.
+	 */
+
+#ifdef __sparc_v9__
+#define LDPTR		ldx
+#define V4F_DATA	0x00
+#define V4F_START	0x08
+#define V4F_COUNT	0x10
+#define V4F_STRIDE	0x14
+#define V4F_SIZE	0x18
+#define V4F_FLAGS	0x1c
+#else
+#define LDPTR		ld
+#define V4F_DATA	0x00
+#define V4F_START	0x04
+#define V4F_COUNT	0x08
+#define V4F_STRIDE	0x0c
+#define V4F_SIZE	0x10
+#define V4F_FLAGS	0x14
+#endif
+
+#define VEC_SIZE_1   	1
+#define VEC_SIZE_2   	3
+#define VEC_SIZE_3   	7
+#define VEC_SIZE_4   	15
+
+	.text
+	.align	64
+
+__set_v4f_1:
+	ld	[%o0 + V4F_FLAGS], %g2
+	mov	1, %g1
+	st	%g1, [%o0 + V4F_SIZE]
+	or	%g2, VEC_SIZE_1, %g2
+	retl
+	 st	%g2, [%o0 + V4F_FLAGS]
+__set_v4f_2:
+	ld	[%o0 + V4F_FLAGS], %g2
+	mov	2, %g1
+	st	%g1, [%o0 + V4F_SIZE]
+	or	%g2, VEC_SIZE_2, %g2
+	retl
+	 st	%g2, [%o0 + V4F_FLAGS]
+__set_v4f_3:
+	ld	[%o0 + V4F_FLAGS], %g2
+	mov	3, %g1
+	st	%g1, [%o0 + V4F_SIZE]
+	or	%g2, VEC_SIZE_3, %g2
+	retl
+	 st	%g2, [%o0 + V4F_FLAGS]
+__set_v4f_4:
+	ld	[%o0 + V4F_FLAGS], %g2
+	mov	4, %g1
+	st	%g1, [%o0 + V4F_SIZE]
+	or	%g2, VEC_SIZE_4, %g2
+	retl
+	 st	%g2, [%o0 + V4F_FLAGS]
+
+#include "sparc_matrix.h"
+
+	/* First the raw versions. */
+
+	.globl	_mesa_sparc_transform_points1_general
+_mesa_sparc_transform_points1_general:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_3_12_13_14_15(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	ld	[%g1 + 0x00], %f8	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f1		! FGM	Group	1-cycle stall on %f0
+	fmuls	%f0, M1, %f2		! FGM	Group
+	fmuls	%f0, M2, %f3		! FGM	Group
+	fmuls	%f0, M3, %f4		! FGM	Group
+	fmuls	%f8, M0, %f9		! FGM	Group	f1 available
+	fadds	%f1, M12, %f1		! FGA
+	st	%f1, [%g2 + 0x00]	! LSU
+	fmuls	%f8, M1, %f10		! FGM	Group	f2 available
+	fadds	%f2, M13, %f2		! FGA
+	st	%f2, [%g2 + 0x04]	! LSU
+	fmuls	%f8, M2, %f11		! FGM	Group	f3 available
+	fadds	%f3, M14, %f3		! FGA
+	st	%f3, [%g2 + 0x08]	! LSU
+	fmuls	%f8, M3, %f12		! FGM	Group	f4 available
+	fadds	%f4, M15, %f4		! FGA
+	st	%f4, [%g2 + 0x0c]	! LSU
+	fadds	%f9, M12, %f9		! FGA	Group	f9 available
+	st	%f9, [%g2 + 0x10]	! LSU
+	fadds	%f10, M13, %f10		! FGA	Group	f10 available
+	st	%f10, [%g2 + 0x14]	! LSU
+	fadds	%f11, M14, %f11		! FGA	Group	f11 available
+	st	%f11, [%g2 + 0x18]	! LSU
+	fadds	%f12, M15, %f12		! FGA	Group	f12 available
+	st	%f12, [%g2 + 0x1c]	! LSU
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	fmuls	%f0, M0, %f1		! FGM	Group	1-cycle stall on %f0
+	fmuls	%f0, M1, %f2		! FGM	Group
+	fmuls	%f0, M2, %f3		! FGM	Group
+	fmuls	%f0, M3, %f4		! FGM	Group
+	fadds	%f1, M12, %f1		! FGA	Group
+	st	%f1, [%g2 + 0x00]	! LSU
+	fadds	%f2, M13, %f2		! FGA	Group
+	st	%f2, [%g2 + 0x04]	! LSU
+	fadds	%f3, M14, %f3		! FGA	Group
+	st	%f3, [%g2 + 0x08]	! LSU
+	fadds	%f4, M15, %f4		! FGA	Group
+	st	%f4, [%g2 + 0x0c]	! LSU
+
+3:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points1_identity
+_mesa_sparc_transform_points1_identity:
+	cmp	%o0, %o2
+	be	4f
+	 ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	ld	[%g1 + 0x00], %f1	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	st	%f0, [%g2 + 0x00]	! LSU	Group
+	cmp	%o1, %o2		! IEU1
+	st	%f1, [%g2 + 0x10]	! LSU	Group
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0
+	addx	%g0, %g0, %g0
+	st	%f0, [%g2 + 0x00]
+
+3:
+	ba	__set_v4f_1
+	 nop
+
+4:	retl
+	 nop
+
+	.globl	_mesa_sparc_transform_points1_2d
+_mesa_sparc_transform_points1_2d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_12_13(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	ld	[%g1 + 0x00], %f8	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f1		! FGM	Group
+	fmuls	%f0, M1, %f2		! FGM	Group
+	fmuls	%f8, M0, %f9		! FGM	Group
+	fmuls	%f8, M1, %f10		! FGM	Group
+	fadds	%f1, M12, %f3		! FGA	Group	f1 available
+	st	%f3, [%g2 + 0x00]	! LSU
+	fadds	%f2, M13, %f4		! FGA	Group	f2 available
+	st	%f4, [%g2 + 0x04]	! LSU
+	fadds	%f9, M12, %f11		! FGA	Group	f9 available
+	st	%f11, [%g2 + 0x10]	! LSU
+	fadds	%f10, M13, %f12		! FGA	Group	f10 available
+	st	%f12, [%g2 + 0x14]	! LSU
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0
+	fmuls	%f0, M0, %f1
+	fmuls	%f0, M1, %f2
+	fadds	%f1, M12, %f3
+	st	%f3, [%g2 + 0x00]
+	fadds	%f2, M13, %f4
+	st	%f4, [%g2 + 0x04]
+
+3:
+	ba	__set_v4f_2
+	 nop
+
+	.globl	_mesa_sparc_transform_points1_2d_no_rot
+_mesa_sparc_transform_points1_2d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_12_13(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	ld	[%g1 + 0x00], %f4	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f1		! FGM	Group
+	fmuls	%f4, M0, %f5		! FGM	Group
+	fadds	%f1, M12, %f3		! FGA	Group, 2 cycle stall, f1 available
+	st	%f3, [%g2 + 0x00]	! LSU
+	st	M13, [%g2 + 0x04]	! LSU	Group, f5 available
+	fadds	%f5, M12, %f6		! FGA
+	st	%f6, [%g2 + 0x10]	! LSU	Group
+	st	M13, [%g2 + 0x14]	! LSU	Group
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0
+	fmuls	%f0, M0, %f1
+	fadds	%f1, M12, %f3
+	st	%f3, [%g2 + 0x00]
+	st	M13, [%g2 + 0x04]
+
+3:
+	ba	__set_v4f_2
+	 nop
+
+	.globl	_mesa_sparc_transform_points1_3d
+_mesa_sparc_transform_points1_3d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_12_13_14(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	ld	[%g1 + 0x00], %f4	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f1		! FGM	Group
+	fmuls	%f0, M1, %f2		! FGM	Group
+	fmuls	%f0, M2, %f3		! FGM	Group
+	fmuls	%f4, M0, %f5		! FGM	Group
+	fadds	%f1, M12, %f1		! FGA	Group, f1 available
+	st	%f1, [%g2 + 0x00]	! LSU
+	fmuls	%f4, M1, %f6		! FGM
+	fadds	%f2, M13, %f2		! FGA	Group, f2 available
+	st	%f2, [%g2 + 0x04]	! LSU
+	fmuls	%f4, M2, %f7		! FGM
+	fadds	%f3, M14, %f3		! FGA	Group, f3 available
+	st	%f3, [%g2 + 0x08]	! LSU
+	fadds	%f5, M12, %f5		! FGA	Group, f5 available
+	st	%f5, [%g2 + 0x10]	! LSU
+	fadds	%f6, M13, %f6		! FGA	Group, f6 available
+	st	%f6, [%g2 + 0x14]	! LSU
+	fadds	%f7, M14, %f7		! FGA	Group, f7 available
+	st	%f7, [%g2 + 0x18]	! LSU
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0
+	fmuls	%f0, M0, %f1
+	fmuls	%f0, M1, %f2
+	fmuls	%f0, M2, %f3
+	fadds	%f1, M12, %f1
+	st	%f1, [%g2 + 0x00]
+	fadds	%f2, M13, %f2
+	st	%f2, [%g2 + 0x04]
+	fadds	%f3, M14, %f3
+	st	%f3, [%g2 + 0x08]
+
+3:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points1_3d_no_rot
+_mesa_sparc_transform_points1_3d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_12_13_14(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	ld	[%g1 + 0x00], %f2	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f1		! FGM	Group
+	fmuls	%f2, M0, %f3		! FGM	Group
+	fadds	%f1, M12, %f1		! FGA	Group, 2 cycle stall, f1 available
+	st	%f1, [%g2 + 0x00]	! LSU
+	fadds	%f3, M12, %f3		! FGA	Group, f3 available
+	st	M13, [%g2 + 0x04]	! LSU
+	st	M14, [%g2 + 0x08]	! LSU	Group
+	st	%f3, [%g2 + 0x10]	! LSU	Group
+	st	M13, [%g2 + 0x14]	! LSU	Group
+	st	M14, [%g2 + 0x18]	! LSU	Group
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0
+	fmuls	%f0, M0, %f1
+	fadds	%f1, M12, %f1
+	st	%f1, [%g2 + 0x00]
+	st	M13, [%g2 + 0x04]
+	st	M14, [%g2 + 0x08]
+
+3:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points1_perspective
+_mesa_sparc_transform_points1_perspective:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_14(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	ld	[%g1 + 0x00], %f2	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f1		! FGM	Group
+	st	%f1, [%g2 + 0x00]	! LSU
+	fmuls	%f2, M0, %f3		! FGM	Group
+	st	%g0, [%g2 + 0x04]	! LSU
+	st	M14, [%g2 + 0x08]	! LSU	Group
+	st	%g0, [%g2 + 0x0c]	! LSU	Group
+	st	%f3, [%g2 + 0x10]	! LSU	Group
+	st	%g0, [%g2 + 0x14]	! LSU	Group
+	st	M14, [%g2 + 0x18]	! LSU	Group
+	st	%g0, [%g2 + 0x1c]	! LSU	Group
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0
+	fmuls	%f0, M0, %f1
+	st	%f1, [%g2 + 0x00]
+	st	%g0, [%g2 + 0x04]
+	st	M14, [%g2 + 0x08]
+	st	%g0, [%g2 + 0x0c]
+
+3:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points2_general
+_mesa_sparc_transform_points2_general:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_3_4_5_6_7_12_13_14_15(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f2		! FGM	Group
+	fmuls	%f0, M1, %f3		! FGM	Group
+	fmuls	%f0, M2, %f4		! FGM	Group
+	fmuls	%f0, M3, %f5		! FGM	Group
+	fadds	%f2, M12, %f2		! FGA	Group	f2 available
+	fmuls	%f1, M4, %f6		! FGM
+	fadds	%f3, M13, %f3		! FGA	Group	f3 available
+	fmuls	%f1, M5, %f7		! FGM
+	fadds	%f4, M14, %f4		! FGA	Group	f4 available
+	fmuls	%f1, M6, %f8		! FGM
+	fadds	%f5, M15, %f5		! FGA	Group	f5 available
+	fmuls	%f1, M7, %f9		! FGM
+	fadds	%f2, %f6, %f2		! FGA	Group	f6 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f3, %f7, %f3		! FGA	Group	f7 available
+	st	%f3, [%g2 + 0x04]	! LSU
+	fadds	%f4, %f8, %f4		! FGA	Group	f8 available
+	st	%f4, [%g2 + 0x08]	! LSU
+	fadds	%f5, %f9, %f5		! FGA	Group	f9 available
+	st	%f5, [%g2 + 0x0c]	! LSU
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points2_identity
+_mesa_sparc_transform_points2_identity:
+	cmp	%o2, %o0
+	be	3f
+	 ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	add	%g1, %o5, %g1		! IEU0
+	cmp	%o1, %g3		! IEU1
+	st	%f0, [%g2 + 0x00]	! LSU	Group
+	st	%f1, [%g2 + 0x04]	! LSU	Group
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0
+2:
+	ba	__set_v4f_2
+	 nop
+
+3:	retl
+	 nop
+
+	.globl	_mesa_sparc_transform_points2_2d
+_mesa_sparc_transform_points2_2d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_4_5_12_13(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f2		! FGM
+	ld	[%g1 + 0x00], %f8	! LSU	Group
+	fmuls	%f0, M1, %f3		! FGM
+	ld	[%g1 + 0x04], %f9	! LSU	Group
+	fmuls	%f1, M4, %f6		! FGM
+	fmuls	%f1, M5, %f7		! FGM	Group
+	add	%g1, %o5, %g1		! IEU0
+	fmuls	%f8, M0, %f10		! FGM	Group	f2 available
+	fadds	%f2, M12, %f2		! FGA
+	fmuls	%f8, M1, %f11		! FGM	Group	f3 available
+	fadds	%f3, M13, %f3		! FGA
+	fmuls	%f9, M4, %f12		! FGM	Group
+	fmuls	%f9, M5, %f13		! FGM	Group
+	fadds	%f10, M12, %f10		! FGA	Group	f2, f10 available
+	fadds	%f2, %f6, %f2		! FGA	Group	f3, f11 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f11, M13, %f11		! FGA	Group	f12 available
+	fadds	%f3, %f7, %f3		! FGA	Group	f13 available
+	st	%f3, [%g2 + 0x04]	! LSU
+	fadds	%f10, %f12, %f10	! FGA	Group	f10 available
+	st	%f10, [%g2 + 0x10]	! LSU
+	fadds	%f11, %f13, %f11	! FGA	Group	f11 available
+	st	%f11, [%g2 + 0x14]	! LSU
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	fmuls	%f0, M0, %f2		! FGM	Group
+	fmuls	%f0, M1, %f3		! FGM	Group
+	fmuls	%f1, M4, %f6		! FGM	Group
+	fmuls	%f1, M5, %f7		! FGM	Group
+	fadds	%f2, M12, %f2		! FGA	Group	f2 available
+	fadds	%f3, M13, %f3		! FGA	Group	f3 available
+	fadds	%f2, %f6, %f2		! FGA	Group	2 cycle stall, f2 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f3, %f7, %f3		! FGA	Group	f3 available
+	st	%f3, [%g2 + 0x04]	! LSU
+
+3:
+	ba	__set_v4f_2
+	 nop
+
+	.globl	_mesa_sparc_transform_points2_2d_no_rot
+_mesa_sparc_transform_points2_2d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_12_13(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	ld	[%g1 + 0x00], %f4	! LSU	Group
+	fmuls	%f0, M0, %f2		! FGM
+	ld	[%g1 + 0x04], %f5	! LSU	Group
+	fmuls	%f1, M5, %f3		! FGM
+	fmuls	%f4, M0, %f6		! FGM	Group
+	add	%g1, %o5, %g1		! IEU0
+	fmuls	%f5, M5, %f7		! FGM	Group
+	fadds	%f2, M12, %f2		! FGA	Group	f2 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f3, M13, %f3		! FGA	Group	f3 available
+	st	%f3, [%g2 + 0x04]	! LSU
+	fadds	%f6, M12, %f6		! FGA	Group	f6 available
+	st	%f6, [%g2 + 0x10]	! LSU
+	fadds	%f7, M13, %f7		! FGA	Group	f7 available
+	st	%f7, [%g2 + 0x14]	! LSU
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	fmuls	%f0, M0, %f2		! FGM	Group
+	fmuls	%f1, M5, %f3		! FGM	Group
+	fadds	%f2, M12, %f2		! FGA	Group, 2 cycle stall, f2 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f3, M13, %f3		! FGA	Group	f3 available
+	st	%f3, [%g2 + 0x04]	! LSU
+
+3:
+	ba	__set_v4f_2
+	 nop
+
+	/* orig: 12 cycles */
+	.globl	_mesa_sparc_transform_points2_3d
+_mesa_sparc_transform_points2_3d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	ld	[%o2 + V4F_START], %g1
+	ld	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_3_4_5_6_12_13_14(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o1
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	add	%o1, 2, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	ld	[%g1 + 0x00], %f9	! LSU	Group
+	fmuls	%f0, M0, %f2		! FGM
+	ld	[%g1 + 0x04], %f10	! LSU	Group
+	fmuls	%f0, M1, %f3		! FGM
+	fmuls	%f0, M2, %f4		! FGM	Group
+	add	%g1, %o5, %g1		! IEU0
+	fmuls	%f1, M4, %f6		! FGM	Group
+	fmuls	%f1, M5, %f7		! FGM	Group	f2 available
+	fadds	%f2, M12, %f2		! FGA
+	fmuls	%f1, M6, %f8		! FGM	Group	f3 available
+	fadds	%f3, M13, %f3		! FGA
+	fmuls	%f9, M0, %f11		! FGM	Group	f4 available
+	fadds	%f4, M14, %f4		! FGA
+	fmuls	%f9, M1, %f12		! FGM	Group	f6 available
+	fmuls	%f9, M2, %f13		! FGM	Group	f2, f7 available
+	fadds	%f2, %f6, %f2		! FGA
+	st	%f2, [%g2 + 0x00]	! LSU
+	fmuls	%f10, M4, %f14		! FGM	Group	f3, f8 available
+	fadds	%f3, %f7, %f3		! FGA
+	st	%f3, [%g2 + 0x04]	! LSU
+	fmuls	%f10, M5, %f15		! FGM	Group	f4, f11 available
+	fadds	%f11, M12, %f11		! FGA
+	fmuls	%f10, M6, %f0		! FGM	Group	f12 available
+	fadds	%f12, M13, %f12		! FGA
+	fadds	%f13, M14, %f13		! FGA	Group	f13 available
+	fadds	%f4, %f8, %f4		! FGA	Group	f14 available
+	st	%f4, [%g2 + 0x08]	! LSU
+	fadds	%f11, %f14, %f11	! FGA	Group	f15, f11 available
+	st	%f11, [%g2 + 0x10]	! LSU
+	fadds	%f12, %f15, %f12	! FGA	Group	f0, f12 available
+	st	%f12, [%g2 + 0x14]	! LSU
+	fadds	%f13, %f0, %f13		! FGA	Group	f13 available
+	st	%f13, [%g2 + 0x18]	! LSU
+
+	cmp	%o1, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o1, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	fmuls	%f0, M0, %f2		! FGM	Group
+	fmuls	%f0, M1, %f3		! FGM	Group
+	fmuls	%f0, M2, %f4		! FGM	Group
+	fmuls	%f1, M4, %f6		! FGM	Group
+	fmuls	%f1, M5, %f7		! FGM	Group	f2 available
+	fadds	%f2, M12, %f2		! FGA
+	fmuls	%f1, M6, %f8		! FGM	Group	f3 available
+	fadds	%f3, M13, %f3		! FGA
+	fadds	%f4, M14, %f4		! FGA	Group	f4 available
+	fadds	%f2, %f6, %f2		! FGA	Group	stall, f2, f6, f7 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f3, %f7, %f3		! FGA	Group	f3, f8 available
+	st	%f3, [%g2 + 0x04]	! LSU
+	fadds	%f4, %f8, %f4		! FGA	Group	f4 available
+	st	%f4, [%g2 + 0x08]	! LSU
+
+3:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points2_3d_no_rot
+_mesa_sparc_transform_points2_3d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_12_13_14(%o1)
+
+	cmp	%g3, 1
+	st	%g3, [%o0 + V4F_COUNT]
+	bl	3f
+	 clr	%o3
+
+	be	2f
+	 andn	%g3, 1, %o2
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	add	%o3, 2, %o3		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	ld	[%g1 + 0x00], %f4	! LSU	Group
+	fmuls	%f0, M0, %f2		! FGM
+	ld	[%g1 + 0x04], %f5	! LSU	Group
+	fmuls	%f1, M5, %f3		! FGM
+	fmuls	%f4, M0, %f6		! FGM	Group
+	add	%g1, %o5, %g1		! IEU0
+	fmuls	%f5, M5, %f7		! FGM	Group
+	fadds	%f2, M12, %f2		! FGA	Group	f2 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f3, M13, %f3		! FGA	Group	f3 available
+	st	%f3, [%g2 + 0x04]	! LSU
+	fadds	%f6, M12, %f6		! FGA	Group	f6 available
+	st	M14, [%g2 + 0x08]	! LSU
+	fadds	%f7, M13, %f7		! FGA	Group	f7 available
+	st	%f6, [%g2 + 0x10]	! LSU
+	st	%f7, [%g2 + 0x14]	! LSU	Group
+	st	M14, [%g2 + 0x18]	! LSU	Group
+	cmp	%o3, %o2		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x20, %g2		! IEU0	Group
+
+	cmp	%o3, %g3
+	be	3f
+	 nop
+
+2:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	fmuls	%f0, M0, %f2		! FGM	Group
+	fmuls	%f1, M5, %f3		! FGM	Group
+	fadds	%f2, M12, %f2		! FGA	Group, 2 cycle stall, f2 available
+	st	%f2, [%g2 + 0x00]	! LSU
+	fadds	%f3, M13, %f3		! FGA	Group	f3 available
+	st	%f3, [%g2 + 0x04]	! LSU
+	st	M14, [%g2 + 0x08]	! LSU	Group
+
+3:	ld	[%o1 + (14 * 0x4)], %g3
+	cmp	%g3, 0
+	bne	__set_v4f_3
+	 nop
+	ba	__set_v4f_2
+	 nop
+
+	.globl	_mesa_sparc_transform_points2_perspective
+_mesa_sparc_transform_points2_perspective:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_14(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0
+	ld	[%g1 + 0x04], %f1
+	add	%o1, 1, %o1
+	add	%g1, %o5, %g1
+	fmuls	%f0, M0, %f2
+	st	%f2, [%g2 + 0x00]
+	fmuls	%f1, M5, %f3
+	st	%f3, [%g2 + 0x04]
+	st	M14, [%g2 + 0x08]
+	st	%g0, [%g2 + 0x0c]
+	cmp	%o1, %g3
+	bne	1b
+	 add	%g2, 0x10, %g2
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points3_general
+_mesa_sparc_transform_points3_general:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f3		! FGM
+	fmuls	%f1, M4, %f7		! FGM	Group
+	fmuls	%f0, M1, %f4		! FGM	Group
+	fmuls	%f1, M5, %f8		! FGM	Group
+	fmuls	%f0, M2, %f5		! FGM	Group	f3 available
+	fmuls	%f1, M6, %f9		! FGM	Group	f7 available
+	fadds	%f3, %f7, %f3		! FGA
+	fmuls	%f0, M3, %f6		! FGM	Group	f4 available
+	fmuls	%f1, M7, %f10		! FGM	Group	f8 available
+	fadds	%f4, %f8, %f4		! FGA
+	fmuls	%f2, M8, %f7		! FGM	Group	f5 available
+	fmuls	%f2, M9, %f8		! FGM	Group	f9,f3 available
+	fadds	%f5, %f9, %f5		! FGA
+	fmuls	%f2, M10, %f9		! FGM	Group	f6 available
+	fadds	%f6, %f10, %f6		! FGA	Group	f10,f4 available
+	fmuls	%f2, M11, %f10		! FGM
+	fadds	%f3, M12, %f3		! FGA	Group	f7 available
+	fadds	%f4, M13, %f4		! FGA	Group	f8,f5 available
+	fadds	%f5, M14, %f5		! FGA	Group	f9 available
+	fadds	%f6, M15, %f6		! FGA	Group	f10,f6 available
+	fadds	%f3, %f7, %f3		! FGA	Group	f3 available
+	st	%f3, [%g2 + 0x00]	! LSU
+	fadds	%f4, %f8, %f4		! FGA	Group	f4 available
+	st	%f4, [%g2 + 0x04]	! LSU
+	fadds	%f5, %f9, %f5		! FGA	Group	f5 available
+	st	%f5, [%g2 + 0x08]	! LSU
+	fadds	%f6, %f10, %f6		! FGA	Group	f6 available
+	st	%f6, [%g2 + 0x0c]	! LSU
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points3_identity
+_mesa_sparc_transform_points3_identity:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0
+	ld	[%g1 + 0x04], %f1
+	ld	[%g1 + 0x08], %f2
+	add	%o1, 1, %o1
+	add	%g1, %o5, %g1
+	cmp	%o1, %g3
+	st	%f0, [%g2 + 0x00]
+	st	%f1, [%g2 + 0x04]
+	st	%f2, [%g2 + 0x08]
+	bne	1b
+	 add	%g2, 0x10, %g2
+2:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points3_2d
+_mesa_sparc_transform_points3_2d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_4_5_12_13(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f3		! FGM
+	fmuls	%f0, M1, %f4		! FGM	Group
+	fmuls	%f1, M4, %f6		! FGM	Group
+	fmuls	%f1, M5, %f7		! FGM	Group
+	fadds	%f3, M12, %f3		! FGA	Group	f3 available
+	fadds	%f4, M13, %f4		! FGA	Group	f4 available
+	fadds	%f3, %f6, %f3		! FGA	Group	f6 available
+	st	%f3, [%g2 + 0x00]	! LSU
+	fadds	%f4, %f7, %f4		! FGA	Group	f7 available
+	st	%f4, [%g2 + 0x04]	! LSU
+	st	%f2, [%g2 + 0x08]	! LSU	Group
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points3_2d_no_rot
+_mesa_sparc_transform_points3_2d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_12_13(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f3		! FGM
+	fmuls	%f1, M5, %f4		! FGM	Group
+	st	%f2, [%g2 + 0x08]	! LSU
+	fadds	%f3, M12, %f3		! FGA	Group
+	st	%f3, [%g2 + 0x00]	! LSU
+	fadds	%f4, M13, %f4		! FGA	Group
+	st	%f4, [%g2 + 0x04]	! LSU
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points3_3d
+_mesa_sparc_transform_points3_3d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f3		! FGM
+	fmuls	%f1, M4, %f6		! FGM	Group
+	fmuls	%f0, M1, %f4		! FGM	Group
+	fmuls	%f1, M5, %f7		! FGM	Group
+	fmuls	%f0, M2, %f5		! FGM	Group	f3 available
+	fmuls	%f1, M6, %f8		! FGM	Group	f6 available
+	fadds	%f3, %f6, %f3		! FGA
+	fmuls	%f2, M8, %f9		! FGM	Group	f4 available
+	fmuls	%f2, M9, %f10		! FGM	Group	f7 available
+	fadds	%f4, %f7, %f4		! FGA
+	fmuls	%f2, M10, %f11		! FGM	Group	f5 available
+	fadds	%f5, %f8, %f5		! FGA	Group	f8, f3 available
+	fadds	%f3, %f9, %f3		! FGA	Group	f9 available
+	fadds	%f4, %f10, %f4		! FGA	Group	f10, f4 available
+	fadds	%f5, %f11, %f5		! FGA	Group	stall, f11, f5 available
+	fadds	%f3, M12, %f3		! FGA	Group	f3 available
+	st	%f3, [%g2 + 0x00]	! LSU
+	fadds	%f4, M13, %f4		! FGA	Group	f4 available
+	st	%f4, [%g2 + 0x04]	! LSU
+	fadds	%f5, M14, %f5		! FGA	Group	f5 available
+	st	%f5, [%g2 + 0x08]	! LSU
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points3_3d_no_rot
+_mesa_sparc_transform_points3_3d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_10_12_13_14(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	cmp	%o1, %g3		! IEU1	Group
+	fmuls	%f0, M0, %f3		! FGM
+	fmuls	%f1, M5, %f4		! FGM	Group
+	fmuls	%f2, M10, %f5		! FGM	Group
+	fadds	%f3, M12, %f3		! FGA	Group, stall, f3 available
+	st	%f3, [%g2 + 0x00]	! LSU
+	fadds	%f4, M13, %f4		! FGA	Group, f4 available
+	st	%f4, [%g2 + 0x04]	! LSU
+	fadds	%f5, M14, %f5		! FGA	Group, f5 available
+	st	%f5, [%g2 + 0x08]	! LEU
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_3
+	 nop
+
+	.globl	_mesa_sparc_transform_points3_perspective
+_mesa_sparc_transform_points3_perspective:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_8_9_10_14(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f3		! FGM
+	fmuls	%f2, M8, %f6		! FGM	Group
+	fmuls	%f1, M5, %f4		! FGM	Group
+	fmuls	%f2, M9, %f7		! FGM	Group
+	fmuls	%f2, M10, %f5		! FGM	Group	f3 available
+	fadds	%f3, %f6, %f3		! FGA	Group	f6 available
+	st	%f3, [%g2 + 0x00]	! LSU
+	fadds	%f4, %f7, %f4		! FGA	Group	stall, f4, f7 available
+	st	%f4, [%g2 + 0x04]	! LSU
+	fadds	%f5, M14, %f5		! FGA	Group
+	st	%f5, [%g2 + 0x08]	! LSU
+	fnegs	%f2, %f6		! FGA	Group
+	st	%f6, [%g2 + 0x0c]	! LSU
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points4_general
+_mesa_sparc_transform_points4_general:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	ld	[%g1 + 0x0c], %f3	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f4		! FGM	Group
+	fmuls	%f1, M4, %f8		! FGM	Group
+	fmuls	%f0, M1, %f5		! FGM	Group
+	fmuls	%f1, M5, %f9		! FGM	Group
+	fmuls	%f0, M2, %f6		! FGM	Group	f4 available
+	fmuls	%f1, M6, %f10		! FGM	Group	f8 available
+	fadds	%f4, %f8, %f4		! FGA
+	fmuls	%f0, M3, %f7		! FGM	Group	f5 available
+	fmuls	%f1, M7, %f11		! FGM	Group	f9 available
+	fadds	%f5, %f9, %f5		! FGA
+	fmuls	%f2, M8, %f12		! FGM	Group	f6 available
+	fmuls	%f2, M9, %f13		! FGM	Group	f10, f4 available
+	fadds	%f6, %f10, %f6		! FGA
+	fmuls	%f2, M10, %f14		! FGM	Group	f7 available
+	fmuls	%f2, M11, %f15		! FGM	Group	f11, f5 available
+	fadds	%f7, %f11, %f7		! FGA
+	fmuls	%f3, M12, %f8		! FGM	Group	f12 available
+	fadds	%f4, %f12, %f4		! FGA
+	fmuls	%f3, M13, %f9		! FGM	Group	f13, f6 available
+	fadds	%f5, %f13, %f5		! FGA
+	fmuls	%f3, M14, %f10		! FGM	Group	f14 available
+	fadds	%f6, %f14, %f6		! FGA
+	fmuls	%f3, M15, %f11		! FGM	Group	f15, f7 available
+	fadds	%f7, %f15, %f7		! FGA
+	fadds	%f4, %f8, %f4		! FGA	Group	f8, f4 available
+	st	%f4, [%g2 + 0x00]	! LSU
+	fadds	%f5, %f9, %f5		! FGA	Group	f9, f5 available
+	st	%f5, [%g2 + 0x04]	! LSU
+	fadds	%f6, %f10, %f6		! FGA	Group	f10, f6 available
+	st	%f6, [%g2 + 0x08]	! LSU
+	fadds	%f7, %f11, %f7		! FGA	Group	f11, f7 available
+	st	%f7, [%g2 + 0x0c]	! LSU
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points4_identity
+_mesa_sparc_transform_points4_identity:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0
+	ld	[%g1 + 0x04], %f1
+	ld	[%g1 + 0x08], %f2
+	add	%o1, 1, %o1
+	ld	[%g1 + 0x0c], %f3
+	add	%g1, %o5, %g1
+	st	%f0, [%g2 + 0x00]
+	st	%f1, [%g2 + 0x04]
+	st	%f2, [%g2 + 0x08]
+	cmp	%o1, %g3
+	st	%f3, [%g2 + 0x0c]
+	bne	1b
+	 add	%g2, 0x10, %g2
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points4_2d
+_mesa_sparc_transform_points4_2d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_4_5_12_13(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	ld	[%g1 + 0x0c], %f3	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f4		! FGM
+	fmuls	%f1, M4, %f8		! FGM	Group
+	fmuls	%f0, M1, %f5		! FGM	Group
+	fmuls	%f1, M5, %f9		! FGM	Group	f4 available
+	fmuls	%f3, M12, %f12		! FGM	Group
+	fmuls	%f3, M13, %f13		! FGM	Group	f8 available
+	fadds	%f4, %f8, %f4		! FGA
+	fadds	%f5, %f9, %f5		! FGA	Group	stall, f5, f9 available
+	fadds	%f4, %f12, %f4		! FGA	Group	2 cycle stall, f4, f12, f13 avail
+	st	%f4, [%g2 + 0x00]	! LSU
+	fadds	%f5, %f13, %f5		! FGA	Group	f5 available
+	st	%f5, [%g2 + 0x04]	! LSU
+	st	%f2, [%g2 + 0x08]	! LSU	Group
+	st	%f3, [%g2 + 0x0c]	! LSU	Group
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points4_2d_no_rot
+_mesa_sparc_transform_points4_2d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_4_5_12_13(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0
+	ld	[%g1 + 0x04], %f1
+	ld	[%g1 + 0x08], %f2
+	ld	[%g1 + 0x0c], %f3
+	add	%o1, 1, %o1
+	add	%g1, %o5, %g1
+	fmuls	%f0, M0, %f4
+	fmuls	%f3, M12, %f8
+	fmuls	%f1, M5, %f5
+	fmuls	%f3, M13, %f9
+	fadds	%f4, %f8, %f4
+	st	%f4, [%g2 + 0x00]
+	fadds	%f5, %f9, %f5
+	st	%f5, [%g2 + 0x04]
+	st	%f2, [%g2 + 0x08]
+	st	%f3, [%g2 + 0x0c]
+	cmp	%o1, %g3
+	bne	1b
+	 add	%g2, 0x10, %g2
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points4_3d
+_mesa_sparc_transform_points4_3d:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_1_2_4_5_6_8_9_10_12_13_14(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	ld	[%g1 + 0x0c], %f3	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f4		! FGM
+	fmuls	%f1, M4, %f7		! FGM	Group
+	fmuls	%f0, M1, %f5		! FGM	Group
+	fmuls	%f1, M5, %f8		! FGM	Group
+	fmuls	%f0, M2, %f6		! FGM	Group	f4 available
+	fmuls	%f1, M6, %f9		! FGM	Group	f7 available
+	fadds	%f4, %f7, %f4		! FGA
+	fmuls	%f2, M8, %f10		! FGM	Group	f5 available
+	fmuls	%f2, M9, %f11		! FGM	Group	f8 available
+	fadds	%f5, %f8, %f5		! FGA
+	fmuls	%f2, M10, %f12		! FGM	Group	f6 available
+	fmuls	%f3, M12, %f13		! FGM	Group	f9, f4 available
+	fadds	%f6, %f9, %f6		! FGA
+	fmuls	%f3, M13, %f14		! FGM	Group	f10 available
+	fadds	%f4, %f10, %f4		! FGA
+	fmuls	%f3, M14, %f15		! FGM	Group	f11, f5 available
+	fadds	%f5, %f11, %f5		! FGA
+	fadds	%f6, %f12, %f6		! FGA	Group	stall, f12, f13, f6 available
+	fadds	%f4, %f13, %f4		! FGA	Group	f14, f4 available
+	st	%f4, [%g2 + 0x00]	! LSU
+	fadds	%f5, %f14, %f5		! FGA	Group	f15, f5 available
+	st	%f5, [%g2 + 0x04]	! LSU
+	fadds	%f6, %f15, %f6		! FGA	Group	f6 available
+	st	%f6, [%g2 + 0x08]	! LSU
+	st	%f3, [%g2 + 0x0c]	! LSU	Group
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points4_3d_no_rot
+_mesa_sparc_transform_points4_3d_no_rot:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_10_12_13_14(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	ld	[%g1 + 0x0c], %f3	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f4		! FGM
+	fmuls	%f3, M12, %f7		! FGM	Group
+	fmuls	%f1, M5, %f5		! FGM	Group
+	fmuls	%f3, M13, %f8		! FGM	Group
+	fmuls	%f2, M10, %f6		! FGM	Group	f4 available
+	fmuls	%f3, M14, %f9		! FGM	Group	f7 available
+	fadds	%f4, %f7, %f4		! FGA
+	st	%f4, [%g2 + 0x00]	! LSU
+	fadds	%f5, %f8, %f5		! FGA	Group	stall, f5, f8 available
+	st	%f5, [%g2 + 0x04]	! LSU
+	fadds	%f6, %f9, %f6		! FGA	Group	stall, f6, f9 available
+	st	%f6, [%g2 + 0x08]	! LSU
+	st	%f3, [%g2 + 0x0c]	! LSU	Group
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop
+
+	.globl	_mesa_sparc_transform_points4_perspective
+_mesa_sparc_transform_points4_perspective:
+	ld	[%o2 + V4F_STRIDE], %o5
+	LDPTR	[%o2 + V4F_START], %g1
+	LDPTR	[%o0 + V4F_START], %g2
+	ld	[%o2 + V4F_COUNT], %g3
+
+	LDMATRIX_0_5_8_9_10_14(%o1)
+
+	cmp	%g3, 0
+	st	%g3, [%o0 + V4F_COUNT]
+	be	2f
+	 clr	%o1
+
+1:	ld	[%g1 + 0x00], %f0	! LSU	Group
+	ld	[%g1 + 0x04], %f1	! LSU	Group
+	ld	[%g1 + 0x08], %f2	! LSU	Group
+	ld	[%g1 + 0x0c], %f3	! LSU	Group
+	add	%o1, 1, %o1		! IEU0
+	add	%g1, %o5, %g1		! IEU1
+	fmuls	%f0, M0, %f4		! FGM
+	fmuls	%f2, M8, %f7		! FGM	Group
+	fmuls	%f1, M5, %f5		! FGM	Group
+	fmuls	%f2, M9, %f8		! FGM	Group
+	fmuls	%f2, M10, %f6		! FGM	Group	f4 available
+	fmuls	%f3, M14, %f9		! FGM	Group	f7 available
+	fadds	%f4, %f7, %f4		! FGA
+	st	%f4, [%g2 + 0x00]	! LSU
+	fadds	%f5, %f8, %f5		! FGA	Group	stall, f5, f8 available
+	st	%f5, [%g2 + 0x04]	! LSU
+	fadds	%f6, %f9, %f6		! FGA	Group	stall, f6, f9 available
+	st	%f6, [%g2 + 0x08]	! LSU
+	fnegs	%f2, %f7		! FGA	Group
+	st	%f7, [%g2 + 0x0c]	! LSU
+	cmp	%o1, %g3		! IEU1
+	bne	1b			! CTI
+	 add	%g2, 0x10, %g2		! IEU0	Group
+2:
+	ba	__set_v4f_4
+	 nop