Blame - lib/Target/CellSPU/SPUMathInstr.td - fp2-dev/platform/external/llvm

blob: 38279a0a9f81b19c3c8d62581c886a845740c7ef [file] [log] [blame]

Scott Michel	02d711b	2008-12-30 23:28:25 +0000	[diff] [blame^]	1	//======--- SPUMathInst.td - Cell SPU math operations -- tablegen ----======//
				2	//
				3	// Cell SPU math operations
				4	//
				5	// This target description file contains instruction sequences for various
				6	// math operations, such as vector multiplies, i32 multiply, etc., for the
				7	// SPU's i32, i16 i8 and corresponding vector types.
				8	//
				9	// Any resemblance to libsimdmath or the Cell SDK simdmath library is
				10	// purely and completely coincidental.
				11	//
				12	// Primary author: Scott Michel (scottm@aero.org)
				13	//===----------------------------------------------------------------------===//
				14
				15	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				16	// v16i8 multiply instruction sequence:
				17	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				18
				19	def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
				20	(ORv4i32
				21	(ANDv4i32
				22	(SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
				23	(SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
				24	(ROTMAHIv8i16 VECREG:$rB, 8)), 8),
				25	(FSMBIv8i16 0x2222)),
				26	(ILAv4i32 0x0000ffff)),
				27	(SHLIv4i32
				28	(SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
				29	(ROTMAIv4i32_i32 VECREG:$rB, 16)),
				30	(SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
				31	(ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
				32	(FSMBIv8i16 0x2222)), 16))>;
				33
				34	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				35	// v8i16 multiply instruction sequence:
				36	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				37
				38	def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
				39	(SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
				40	(SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
				41	(FSMBIv8i16 0xcccc))>;
				42
				43	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				44	// v4i32, i32 multiply instruction sequence:
				45	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				46
				47	def MPYv4i32:
				48	Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
				49	(Av4i32
				50	(Av4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB),
				51	(MPYHv4i32 VECREG:$rB, VECREG:$rA)),
				52	(MPYUv4i32 VECREG:$rA, VECREG:$rB))>;
				53
				54	def MPYi32:
				55	Pat<(mul R32C:$rA, R32C:$rB),
				56	(Ar32
				57	(Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
				58	(MPYHr32 R32C:$rB, R32C:$rA)),
				59	(MPYUr32 R32C:$rA, R32C:$rB))>;
				60
				61	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				62	// f32, v4f32 divide instruction sequence:
				63	//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
				64
				65	// Reciprocal estimate and interpolation
				66	def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
				67	// Division estimate
				68	def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
				69	// Newton-Raphson iteration
				70	def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
				71	Interpf32.Fragment,
				72	DivEstf32.Fragment)>;
				73	// Epsilon addition
				74	def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
				75
				76	def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
				77	(SELBf32_cond NRaphf32.Fragment,
				78	Epsilonf32.Fragment,
				79	(CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
				80
				81	// Reciprocal estimate and interpolation
				82	def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
				83	// Division estimate
				84	def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
				85	// Newton-Raphson iteration
				86	def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
				87	(v4f32 VECREG:$rB),
				88	(v4f32 VECREG:$rA)),
				89	Interpv4f32.Fragment,
				90	DivEstv4f32.Fragment)>;
				91	// Epsilon addition
				92	def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
				93
				94	def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
				95	(SELBv4f32_cond NRaphv4f32.Fragment,
				96	Epsilonv4f32.Fragment,
				97	(CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
				98	Epsilonv4f32.Fragment,
				99	(v4f32 VECREG:$rA)), -1))>;