Blame - arch/m68k/ifpsp060/src/fpsp.S - kernel/msm-4.9

blob: 9bbffebe3eb504833ed0937670bd4168751d61a4 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				2	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
				3	M68000 Hi-Performance Microprocessor Division
				4	M68060 Software Package
				5	Production Release P1.00 -- October 10, 1994
				6
Jan Engelhardt	96de0e2	2007-10-19 23:21:04 +0200	[diff] [blame]	7	M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	8
				9	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
				10	To the maximum extent permitted by applicable law,
				11	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
				12	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
				13	and any warranty against infringement with regard to the SOFTWARE
				14	(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
				15
				16	To the maximum extent permitted by applicable law,
				17	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
				18	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
				19	BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
				20	ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
				21	Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
				22
				23	You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
				24	so long as this entire notice is retained without alteration in any modified and/or
				25	redistributed versions, and that such modified versions are clearly identified as such.
				26	No licenses are granted by implication, estoppel or otherwise under any patents
				27	or trademarks of Motorola, Inc.
				28	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				29	#
				30	# freal.s:
				31	# This file is appended to the top of the 060FPSP package
				32	# and contains the entry points into the package. The user, in
				33	# effect, branches to one of the branch table entries located
				34	# after _060FPSP_TABLE.
				35	# Also, subroutine stubs exist in this file (_fpsp_done for
				36	# example) that are referenced by the FPSP package itself in order
				37	# to call a given routine. The stub routine actually performs the
				38	# callout. The FPSP code does a "bsr" to the stub routine. This
				39	# extra layer of hierarchy adds a slight performance penalty but
				40	# it makes the FPSP code easier to read and more mainatinable.
				41	#
				42
				43	set _off_bsun, 0x00
				44	set _off_snan, 0x04
				45	set _off_operr, 0x08
				46	set _off_ovfl, 0x0c
				47	set _off_unfl, 0x10
				48	set _off_dz, 0x14
				49	set _off_inex, 0x18
				50	set _off_fline, 0x1c
				51	set _off_fpu_dis, 0x20
				52	set _off_trap, 0x24
				53	set _off_trace, 0x28
				54	set _off_access, 0x2c
				55	set _off_done, 0x30
				56
				57	set _off_imr, 0x40
				58	set _off_dmr, 0x44
				59	set _off_dmw, 0x48
				60	set _off_irw, 0x4c
				61	set _off_irl, 0x50
				62	set _off_drb, 0x54
				63	set _off_drw, 0x58
				64	set _off_drl, 0x5c
				65	set _off_dwb, 0x60
				66	set _off_dww, 0x64
				67	set _off_dwl, 0x68
				68
				69	_060FPSP_TABLE:
				70
				71	###############################################################
				72
				73	# Here's the table of ENTRY POINTS for those linking the package.
				74	bra.l _fpsp_snan
				75	short 0x0000
				76	bra.l _fpsp_operr
				77	short 0x0000
				78	bra.l _fpsp_ovfl
				79	short 0x0000
				80	bra.l _fpsp_unfl
				81	short 0x0000
				82	bra.l _fpsp_dz
				83	short 0x0000
				84	bra.l _fpsp_inex
				85	short 0x0000
				86	bra.l _fpsp_fline
				87	short 0x0000
				88	bra.l _fpsp_unsupp
				89	short 0x0000
				90	bra.l _fpsp_effadd
				91	short 0x0000
				92
				93	space 56
				94
				95	###############################################################
				96	global _fpsp_done
				97	_fpsp_done:
				98	mov.l %d0,-(%sp)
				99	mov.l (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
				100	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				101	mov.l 0x4(%sp),%d0
				102	rtd &0x4
				103
				104	global _real_ovfl
				105	_real_ovfl:
				106	mov.l %d0,-(%sp)
				107	mov.l (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
				108	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				109	mov.l 0x4(%sp),%d0
				110	rtd &0x4
				111
				112	global _real_unfl
				113	_real_unfl:
				114	mov.l %d0,-(%sp)
				115	mov.l (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
				116	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				117	mov.l 0x4(%sp),%d0
				118	rtd &0x4
				119
				120	global _real_inex
				121	_real_inex:
				122	mov.l %d0,-(%sp)
				123	mov.l (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
				124	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				125	mov.l 0x4(%sp),%d0
				126	rtd &0x4
				127
				128	global _real_bsun
				129	_real_bsun:
				130	mov.l %d0,-(%sp)
				131	mov.l (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
				132	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				133	mov.l 0x4(%sp),%d0
				134	rtd &0x4
				135
				136	global _real_operr
				137	_real_operr:
				138	mov.l %d0,-(%sp)
				139	mov.l (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
				140	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				141	mov.l 0x4(%sp),%d0
				142	rtd &0x4
				143
				144	global _real_snan
				145	_real_snan:
				146	mov.l %d0,-(%sp)
				147	mov.l (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
				148	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				149	mov.l 0x4(%sp),%d0
				150	rtd &0x4
				151
				152	global _real_dz
				153	_real_dz:
				154	mov.l %d0,-(%sp)
				155	mov.l (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
				156	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				157	mov.l 0x4(%sp),%d0
				158	rtd &0x4
				159
				160	global _real_fline
				161	_real_fline:
				162	mov.l %d0,-(%sp)
				163	mov.l (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
				164	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				165	mov.l 0x4(%sp),%d0
				166	rtd &0x4
				167
				168	global _real_fpu_disabled
				169	_real_fpu_disabled:
				170	mov.l %d0,-(%sp)
				171	mov.l (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
				172	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				173	mov.l 0x4(%sp),%d0
				174	rtd &0x4
				175
				176	global _real_trap
				177	_real_trap:
				178	mov.l %d0,-(%sp)
				179	mov.l (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
				180	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				181	mov.l 0x4(%sp),%d0
				182	rtd &0x4
				183
				184	global _real_trace
				185	_real_trace:
				186	mov.l %d0,-(%sp)
				187	mov.l (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
				188	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				189	mov.l 0x4(%sp),%d0
				190	rtd &0x4
				191
				192	global _real_access
				193	_real_access:
				194	mov.l %d0,-(%sp)
				195	mov.l (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
				196	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				197	mov.l 0x4(%sp),%d0
				198	rtd &0x4
				199
				200	#######################################
				201
				202	global _imem_read
				203	_imem_read:
				204	mov.l %d0,-(%sp)
				205	mov.l (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
				206	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				207	mov.l 0x4(%sp),%d0
				208	rtd &0x4
				209
				210	global _dmem_read
				211	_dmem_read:
				212	mov.l %d0,-(%sp)
				213	mov.l (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
				214	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				215	mov.l 0x4(%sp),%d0
				216	rtd &0x4
				217
				218	global _dmem_write
				219	_dmem_write:
				220	mov.l %d0,-(%sp)
				221	mov.l (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
				222	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				223	mov.l 0x4(%sp),%d0
				224	rtd &0x4
				225
				226	global _imem_read_word
				227	_imem_read_word:
				228	mov.l %d0,-(%sp)
				229	mov.l (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
				230	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				231	mov.l 0x4(%sp),%d0
				232	rtd &0x4
				233
				234	global _imem_read_long
				235	_imem_read_long:
				236	mov.l %d0,-(%sp)
				237	mov.l (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
				238	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				239	mov.l 0x4(%sp),%d0
				240	rtd &0x4
				241
				242	global _dmem_read_byte
				243	_dmem_read_byte:
				244	mov.l %d0,-(%sp)
				245	mov.l (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
				246	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				247	mov.l 0x4(%sp),%d0
				248	rtd &0x4
				249
				250	global _dmem_read_word
				251	_dmem_read_word:
				252	mov.l %d0,-(%sp)
				253	mov.l (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
				254	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				255	mov.l 0x4(%sp),%d0
				256	rtd &0x4
				257
				258	global _dmem_read_long
				259	_dmem_read_long:
				260	mov.l %d0,-(%sp)
				261	mov.l (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
				262	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				263	mov.l 0x4(%sp),%d0
				264	rtd &0x4
				265
				266	global _dmem_write_byte
				267	_dmem_write_byte:
				268	mov.l %d0,-(%sp)
				269	mov.l (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
				270	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				271	mov.l 0x4(%sp),%d0
				272	rtd &0x4
				273
				274	global _dmem_write_word
				275	_dmem_write_word:
				276	mov.l %d0,-(%sp)
				277	mov.l (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
				278	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				279	mov.l 0x4(%sp),%d0
				280	rtd &0x4
				281
				282	global _dmem_write_long
				283	_dmem_write_long:
				284	mov.l %d0,-(%sp)
				285	mov.l (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
				286	pea.l (_060FPSP_TABLE-0x80,%pc,%d0)
				287	mov.l 0x4(%sp),%d0
				288	rtd &0x4
				289
				290	#
				291	# This file contains a set of define statements for constants
				292	# in order to promote readability within the corecode itself.
				293	#
				294
				295	set LOCAL_SIZE, 192 # stack frame size(bytes)
				296	set LV, -LOCAL_SIZE # stack offset
				297
				298	set EXC_SR, 0x4 # stack status register
				299	set EXC_PC, 0x6 # stack pc
				300	set EXC_VOFF, 0xa # stacked vector offset
				301	set EXC_EA, 0xc # stacked <ea>
				302
				303	set EXC_FP, 0x0 # frame pointer
				304
				305	set EXC_AREGS, -68 # offset of all address regs
				306	set EXC_DREGS, -100 # offset of all data regs
				307	set EXC_FPREGS, -36 # offset of all fp regs
				308
				309	set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7
				310	set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7
				311	set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6
				312	set EXC_A5, EXC_AREGS+(5*4)
				313	set EXC_A4, EXC_AREGS+(4*4)
				314	set EXC_A3, EXC_AREGS+(3*4)
				315	set EXC_A2, EXC_AREGS+(2*4)
				316	set EXC_A1, EXC_AREGS+(1*4)
				317	set EXC_A0, EXC_AREGS+(0*4)
				318	set EXC_D7, EXC_DREGS+(7*4)
				319	set EXC_D6, EXC_DREGS+(6*4)
				320	set EXC_D5, EXC_DREGS+(5*4)
				321	set EXC_D4, EXC_DREGS+(4*4)
				322	set EXC_D3, EXC_DREGS+(3*4)
				323	set EXC_D2, EXC_DREGS+(2*4)
				324	set EXC_D1, EXC_DREGS+(1*4)
				325	set EXC_D0, EXC_DREGS+(0*4)
				326
				327	set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0
				328	set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1
				329	set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used)
				330
				331	set FP_SCR1, LV+80 # fp scratch 1
				332	set FP_SCR1_EX, FP_SCR1+0
				333	set FP_SCR1_SGN, FP_SCR1+2
				334	set FP_SCR1_HI, FP_SCR1+4
				335	set FP_SCR1_LO, FP_SCR1+8
				336
				337	set FP_SCR0, LV+68 # fp scratch 0
				338	set FP_SCR0_EX, FP_SCR0+0
				339	set FP_SCR0_SGN, FP_SCR0+2
				340	set FP_SCR0_HI, FP_SCR0+4
				341	set FP_SCR0_LO, FP_SCR0+8
				342
				343	set FP_DST, LV+56 # fp destination operand
				344	set FP_DST_EX, FP_DST+0
				345	set FP_DST_SGN, FP_DST+2
				346	set FP_DST_HI, FP_DST+4
				347	set FP_DST_LO, FP_DST+8
				348
				349	set FP_SRC, LV+44 # fp source operand
				350	set FP_SRC_EX, FP_SRC+0
				351	set FP_SRC_SGN, FP_SRC+2
				352	set FP_SRC_HI, FP_SRC+4
				353	set FP_SRC_LO, FP_SRC+8
				354
				355	set USER_FPIAR, LV+40 # FP instr address register
				356
				357	set USER_FPSR, LV+36 # FP status register
				358	set FPSR_CC, USER_FPSR+0 # FPSR condition codes
				359	set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte
				360	set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte
				361	set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte
				362
				363	set USER_FPCR, LV+32 # FP control register
				364	set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable
				365	set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control
				366
				367	set L_SCR3, LV+28 # integer scratch 3
				368	set L_SCR2, LV+24 # integer scratch 2
				369	set L_SCR1, LV+20 # integer scratch 1
				370
				371	set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst)
				372
				373	set EXC_TEMP2, LV+24 # temporary space
				374	set EXC_TEMP, LV+16 # temporary space
				375
				376	set DTAG, LV+15 # destination operand type
				377	set STAG, LV+14 # source operand type
				378
				379	set SPCOND_FLG, LV+10 # flag: special case (see below)
				380
				381	set EXC_CC, LV+8 # saved condition codes
				382	set EXC_EXTWPTR, LV+4 # saved current PC (active)
				383	set EXC_EXTWORD, LV+2 # saved extension word
				384	set EXC_CMDREG, LV+2 # saved extension word
				385	set EXC_OPWORD, LV+0 # saved operation word
				386
				387	################################
				388
				389	# Helpful macros
				390
				391	set FTEMP, 0 # offsets within an
				392	set FTEMP_EX, 0 # extended precision
				393	set FTEMP_SGN, 2 # value saved in memory.
				394	set FTEMP_HI, 4
				395	set FTEMP_LO, 8
				396	set FTEMP_GRS, 12
				397
				398	set LOCAL, 0 # offsets within an
				399	set LOCAL_EX, 0 # extended precision
				400	set LOCAL_SGN, 2 # value saved in memory.
				401	set LOCAL_HI, 4
				402	set LOCAL_LO, 8
				403	set LOCAL_GRS, 12
				404
				405	set DST, 0 # offsets within an
				406	set DST_EX, 0 # extended precision
				407	set DST_HI, 4 # value saved in memory.
				408	set DST_LO, 8
				409
				410	set SRC, 0 # offsets within an
				411	set SRC_EX, 0 # extended precision
				412	set SRC_HI, 4 # value saved in memory.
				413	set SRC_LO, 8
				414
				415	set SGL_LO, 0x3f81 # min sgl prec exponent
				416	set SGL_HI, 0x407e # max sgl prec exponent
				417	set DBL_LO, 0x3c01 # min dbl prec exponent
				418	set DBL_HI, 0x43fe # max dbl prec exponent
				419	set EXT_LO, 0x0 # min ext prec exponent
				420	set EXT_HI, 0x7ffe # max ext prec exponent
				421
				422	set EXT_BIAS, 0x3fff # extended precision bias
				423	set SGL_BIAS, 0x007f # single precision bias
				424	set DBL_BIAS, 0x03ff # double precision bias
				425
				426	set NORM, 0x00 # operand type for STAG/DTAG
				427	set ZERO, 0x01 # operand type for STAG/DTAG
				428	set INF, 0x02 # operand type for STAG/DTAG
				429	set QNAN, 0x03 # operand type for STAG/DTAG
				430	set DENORM, 0x04 # operand type for STAG/DTAG
				431	set SNAN, 0x05 # operand type for STAG/DTAG
				432	set UNNORM, 0x06 # operand type for STAG/DTAG
				433
				434	##################
				435	# FPSR/FPCR bits #
				436	##################
				437	set neg_bit, 0x3 # negative result
				438	set z_bit, 0x2 # zero result
				439	set inf_bit, 0x1 # infinite result
				440	set nan_bit, 0x0 # NAN result
				441
				442	set q_sn_bit, 0x7 # sign bit of quotient byte
				443
				444	set bsun_bit, 7 # branch on unordered
				445	set snan_bit, 6 # signalling NAN
				446	set operr_bit, 5 # operand error
				447	set ovfl_bit, 4 # overflow
				448	set unfl_bit, 3 # underflow
				449	set dz_bit, 2 # divide by zero
				450	set inex2_bit, 1 # inexact result 2
				451	set inex1_bit, 0 # inexact result 1
				452
				453	set aiop_bit, 7 # accrued inexact operation bit
				454	set aovfl_bit, 6 # accrued overflow bit
				455	set aunfl_bit, 5 # accrued underflow bit
				456	set adz_bit, 4 # accrued dz bit
				457	set ainex_bit, 3 # accrued inexact bit
				458
				459	#############################
				460	# FPSR individual bit masks #
				461	#############################
				462	set neg_mask, 0x08000000 # negative bit mask (lw)
				463	set inf_mask, 0x02000000 # infinity bit mask (lw)
				464	set z_mask, 0x04000000 # zero bit mask (lw)
				465	set nan_mask, 0x01000000 # nan bit mask (lw)
				466
				467	set neg_bmask, 0x08 # negative bit mask (byte)
				468	set inf_bmask, 0x02 # infinity bit mask (byte)
				469	set z_bmask, 0x04 # zero bit mask (byte)
				470	set nan_bmask, 0x01 # nan bit mask (byte)
				471
				472	set bsun_mask, 0x00008000 # bsun exception mask
				473	set snan_mask, 0x00004000 # snan exception mask
				474	set operr_mask, 0x00002000 # operr exception mask
				475	set ovfl_mask, 0x00001000 # overflow exception mask
				476	set unfl_mask, 0x00000800 # underflow exception mask
				477	set dz_mask, 0x00000400 # dz exception mask
				478	set inex2_mask, 0x00000200 # inex2 exception mask
				479	set inex1_mask, 0x00000100 # inex1 exception mask
				480
				481	set aiop_mask, 0x00000080 # accrued illegal operation
				482	set aovfl_mask, 0x00000040 # accrued overflow
				483	set aunfl_mask, 0x00000020 # accrued underflow
				484	set adz_mask, 0x00000010 # accrued divide by zero
				485	set ainex_mask, 0x00000008 # accrued inexact
				486
				487	######################################
				488	# FPSR combinations used in the FPSP #
				489	######################################
				490	set dzinf_mask, inf_mask+dz_mask+adz_mask
				491	set opnan_mask, nan_mask+operr_mask+aiop_mask
				492	set nzi_mask, 0x01ffffff #clears N, Z, and I
				493	set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask
				494	set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask
				495	set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
				496	set inx1a_mask, inex1_mask+ainex_mask
				497	set inx2a_mask, inex2_mask+ainex_mask
				498	set snaniop_mask, nan_mask+snan_mask+aiop_mask
				499	set snaniop2_mask, snan_mask+aiop_mask
				500	set naniop_mask, nan_mask+aiop_mask
				501	set neginf_mask, neg_mask+inf_mask
				502	set infaiop_mask, inf_mask+aiop_mask
				503	set negz_mask, neg_mask+z_mask
				504	set opaop_mask, operr_mask+aiop_mask
				505	set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask
				506	set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask
				507
				508	#########
				509	# misc. #
				510	#########
				511	set rnd_stky_bit, 29 # stky bit pos in longword
				512
				513	set sign_bit, 0x7 # sign bit
				514	set signan_bit, 0x6 # signalling nan bit
				515
				516	set sgl_thresh, 0x3f81 # minimum sgl exponent
				517	set dbl_thresh, 0x3c01 # minimum dbl exponent
				518
				519	set x_mode, 0x0 # extended precision
				520	set s_mode, 0x4 # single precision
				521	set d_mode, 0x8 # double precision
				522
				523	set rn_mode, 0x0 # round-to-nearest
				524	set rz_mode, 0x1 # round-to-zero
				525	set rm_mode, 0x2 # round-tp-minus-infinity
				526	set rp_mode, 0x3 # round-to-plus-infinity
				527
				528	set mantissalen, 64 # length of mantissa in bits
				529
				530	set BYTE, 1 # len(byte) == 1 byte
				531	set WORD, 2 # len(word) == 2 bytes
				532	set LONG, 4 # len(longword) == 2 bytes
				533
				534	set BSUN_VEC, 0xc0 # bsun vector offset
				535	set INEX_VEC, 0xc4 # inexact vector offset
				536	set DZ_VEC, 0xc8 # dz vector offset
				537	set UNFL_VEC, 0xcc # unfl vector offset
				538	set OPERR_VEC, 0xd0 # operr vector offset
				539	set OVFL_VEC, 0xd4 # ovfl vector offset
				540	set SNAN_VEC, 0xd8 # snan vector offset
				541
				542	###########################
				543	# SPecial CONDition FLaGs #
				544	###########################
				545	set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception
				546	set fbsun_flg, 0x02 # flag bit: bsun exception
				547	set mia7_flg, 0x04 # flag bit: (a7)+ <ea>
				548	set mda7_flg, 0x08 # flag bit: -(a7) <ea>
				549	set fmovm_flg, 0x40 # flag bit: fmovm instruction
				550	set immed_flg, 0x80 # flag bit: &<data> <ea>
				551
				552	set ftrapcc_bit, 0x0
				553	set fbsun_bit, 0x1
				554	set mia7_bit, 0x2
				555	set mda7_bit, 0x3
				556	set immed_bit, 0x7
				557
				558	##################################
				559	# TRANSCENDENTAL "LAST-OP" FLAGS #
				560	##################################
				561	set FMUL_OP, 0x0 # fmul instr performed last
				562	set FDIV_OP, 0x1 # fdiv performed last
				563	set FADD_OP, 0x2 # fadd performed last
				564	set FMOV_OP, 0x3 # fmov performed last
				565
				566	#############
				567	# CONSTANTS #
				568	#############
				569	T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD
				570	T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL
				571
				572	PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
				573	PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
				574
				575	TWOBYPI:
				576	long 0x3FE45F30,0x6DC9C883
				577
				578	#########################################################################
				579	# XDEF **************************************************************** #
				580	# _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception. #
				581	# #
				582	# This handler should be the first code executed upon taking the #
				583	# FP Overflow exception in an operating system. #
				584	# #
				585	# XREF **************************************************************** #
				586	# _imem_read_long() - read instruction longword #
				587	# fix_skewed_ops() - adjust src operand in fsave frame #
				588	# set_tag_x() - determine optype of src/dst operands #
				589	# store_fpreg() - store opclass 0 or 2 result to FP regfile #
				590	# unnorm_fix() - change UNNORM operands to NORM or ZERO #
				591	# load_fpn2() - load dst operand from FP regfile #
				592	# fout() - emulate an opclass 3 instruction #
				593	# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
				594	# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
				595	# _real_ovfl() - "callout" for Overflow exception enabled code #
				596	# _real_inex() - "callout" for Inexact exception enabled code #
				597	# _real_trace() - "callout" for Trace exception code #
				598	# #
				599	# INPUT *************************************************************** #
				600	# - The system stack contains the FP Ovfl exception stack frame #
				601	# - The fsave frame contains the source operand #
				602	# #
				603	# OUTPUT ************************************************************** #
				604	# Overflow Exception enabled: #
				605	# - The system stack is unchanged #
				606	# - The fsave frame contains the adjusted src op for opclass 0,2 #
				607	# Overflow Exception disabled: #
				608	# - The system stack is unchanged #
				609	# - The "exception present" flag in the fsave frame is cleared #
				610	# #
				611	# ALGORITHM *********************************************************** #
				612	# On the 060, if an FP overflow is present as the result of any #
				613	# instruction, the 060 will take an overflow exception whether the #
				614	# exception is enabled or disabled in the FPCR. For the disabled case, #
				615	# This handler emulates the instruction to determine what the correct #
				616	# default result should be for the operation. This default result is #
				617	# then stored in either the FP regfile, data regfile, or memory. #
				618	# Finally, the handler exits through the "callout" _fpsp_done() #
				619	# denoting that no exceptional conditions exist within the machine. #
				620	# If the exception is enabled, then this handler must create the #
				621	# exceptional operand and plave it in the fsave state frame, and store #
				622	# the default result (only if the instruction is opclass 3). For #
				623	# exceptions enabled, this handler must exit through the "callout" #
				624	# _real_ovfl() so that the operating system enabled overflow handler #
				625	# can handle this case. #
				626	# Two other conditions exist. First, if overflow was disabled #
				627	# but the inexact exception was enabled, this handler must exit #
				628	# through the "callout" _real_inex() regardless of whether the result #
				629	# was inexact. #
				630	# Also, in the case of an opclass three instruction where #
				631	# overflow was disabled and the trace exception was enabled, this #
				632	# handler must exit through the "callout" _real_trace(). #
				633	# #
				634	#########################################################################
				635
				636	global _fpsp_ovfl
				637	_fpsp_ovfl:
				638
				639	#$# sub.l &24,%sp # make room for src/dst
				640
				641	link.w %a6,&-LOCAL_SIZE # init stack frame
				642
				643	fsave FP_SRC(%a6) # grab the "busy" frame
				644
				645	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				646	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				647	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				648
				649	# the FPIAR holds the "current PC" of the faulting instruction
				650	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				651	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				652	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				653	bsr.l _imem_read_long # fetch the instruction words
				654	mov.l %d0,EXC_OPWORD(%a6)
				655
				656	##############################################################################
				657
				658	btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
				659	bne.w fovfl_out
				660
				661
				662	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				663	bsr.l fix_skewed_ops # fix src op
				664
				665	# since, I believe, only NORMs and DENORMs can come through here,
				666	# maybe we can avoid the subroutine call.
				667	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				668	bsr.l set_tag_x # tag the operand type
				669	mov.b %d0,STAG(%a6) # maybe NORM,DENORM
				670
				671	# bit five of the fp extension word separates the monadic and dyadic operations
				672	# that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
				673	# will never take this exception.
				674	btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
				675	beq.b fovfl_extract # monadic
				676
				677	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
				678	bsr.l load_fpn2 # load dst into FP_DST
				679
				680	lea FP_DST(%a6),%a0 # pass: ptr to dst op
				681	bsr.l set_tag_x # tag the operand type
				682	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				683	bne.b fovfl_op2_done # no
				684	bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
				685	fovfl_op2_done:
				686	mov.b %d0,DTAG(%a6) # save dst optype tag
				687
				688	fovfl_extract:
				689
				690	#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
				691	#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
				692	#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
				693	#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
				694	#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
				695	#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
				696
				697	clr.l %d0
				698	mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
				699
				700	mov.b 1+EXC_CMDREG(%a6),%d1
				701	andi.w &0x007f,%d1 # extract extension
				702
				703	andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
				704
				705	fmov.l &0x0,%fpcr # zero current control regs
				706	fmov.l &0x0,%fpsr
				707
				708	lea FP_SRC(%a6),%a0
				709	lea FP_DST(%a6),%a1
				710
				711	# maybe we can make these entry points ONLY the OVFL entry points of each routine.
				712	mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
				713	jsr (tbl_unsupp.l,%pc,%d1.l*1)
				714
				715	# the operation has been emulated. the result is in fp0.
				716	# the EXOP, if an exception occurred, is in fp1.
				717	# we must save the default result regardless of whether
				718	# traps are enabled or disabled.
				719	bfextu EXC_CMDREG(%a6){&6:&3},%d0
				720	bsr.l store_fpreg
				721
				722	# the exceptional possibilities we have left ourselves with are ONLY overflow
				723	# and inexact. and, the inexact is such that overflow occurred and was disabled
				724	# but inexact was enabled.
				725	btst &ovfl_bit,FPCR_ENABLE(%a6)
				726	bne.b fovfl_ovfl_on
				727
				728	btst &inex2_bit,FPCR_ENABLE(%a6)
				729	bne.b fovfl_inex_on
				730
				731	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				732	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				733	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				734
				735	unlk %a6
				736	#$# add.l &24,%sp
				737	bra.l _fpsp_done
				738
				739	# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
				740	# in fp1. now, simply jump to _real_ovfl()!
				741	fovfl_ovfl_on:
				742	fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
				743
				744	mov.w &0xe005,2+FP_SRC(%a6) # save exc status
				745
				746	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				747	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				748	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				749
				750	frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
				751
				752	unlk %a6
				753
				754	bra.l _real_ovfl
				755
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	756	# overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	757	# we must jump to real_inex().
				758	fovfl_inex_on:
				759
				760	fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
				761
				762	mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
				763	mov.w &0xe001,2+FP_SRC(%a6) # save exc status
				764
				765	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				766	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				767	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				768
				769	frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
				770
				771	unlk %a6
				772
				773	bra.l _real_inex
				774
				775	########################################################################
				776	fovfl_out:
				777
				778
				779	#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
				780	#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
				781	#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
				782
				783	# the src operand is definitely a NORM(!), so tag it as such
				784	mov.b &NORM,STAG(%a6) # set src optype tag
				785
				786	clr.l %d0
				787	mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
				788
				789	and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
				790
				791	fmov.l &0x0,%fpcr # zero current control regs
				792	fmov.l &0x0,%fpsr
				793
				794	lea FP_SRC(%a6),%a0 # pass ptr to src operand
				795
				796	bsr.l fout
				797
				798	btst &ovfl_bit,FPCR_ENABLE(%a6)
				799	bne.w fovfl_ovfl_on
				800
				801	btst &inex2_bit,FPCR_ENABLE(%a6)
				802	bne.w fovfl_inex_on
				803
				804	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				805	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				806	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				807
				808	unlk %a6
				809	#$# add.l &24,%sp
				810
				811	btst &0x7,(%sp) # is trace on?
				812	beq.l _fpsp_done # no
				813
				814	fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
				815	mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
				816	bra.l _real_trace
				817
				818	#########################################################################
				819	# XDEF **************************************************************** #
				820	# _fpsp_unfl(): 060FPSP entry point for FP Underflow exception. #
				821	# #
				822	# This handler should be the first code executed upon taking the #
				823	# FP Underflow exception in an operating system. #
				824	# #
				825	# XREF **************************************************************** #
				826	# _imem_read_long() - read instruction longword #
				827	# fix_skewed_ops() - adjust src operand in fsave frame #
				828	# set_tag_x() - determine optype of src/dst operands #
				829	# store_fpreg() - store opclass 0 or 2 result to FP regfile #
				830	# unnorm_fix() - change UNNORM operands to NORM or ZERO #
				831	# load_fpn2() - load dst operand from FP regfile #
				832	# fout() - emulate an opclass 3 instruction #
				833	# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
				834	# _fpsp_done() - "callout" for 060FPSP exit (all work done!) #
				835	# _real_ovfl() - "callout" for Overflow exception enabled code #
				836	# _real_inex() - "callout" for Inexact exception enabled code #
				837	# _real_trace() - "callout" for Trace exception code #
				838	# #
				839	# INPUT *************************************************************** #
				840	# - The system stack contains the FP Unfl exception stack frame #
				841	# - The fsave frame contains the source operand #
				842	# #
				843	# OUTPUT ************************************************************** #
				844	# Underflow Exception enabled: #
				845	# - The system stack is unchanged #
				846	# - The fsave frame contains the adjusted src op for opclass 0,2 #
				847	# Underflow Exception disabled: #
				848	# - The system stack is unchanged #
				849	# - The "exception present" flag in the fsave frame is cleared #
				850	# #
				851	# ALGORITHM *********************************************************** #
				852	# On the 060, if an FP underflow is present as the result of any #
				853	# instruction, the 060 will take an underflow exception whether the #
				854	# exception is enabled or disabled in the FPCR. For the disabled case, #
				855	# This handler emulates the instruction to determine what the correct #
				856	# default result should be for the operation. This default result is #
				857	# then stored in either the FP regfile, data regfile, or memory. #
				858	# Finally, the handler exits through the "callout" _fpsp_done() #
				859	# denoting that no exceptional conditions exist within the machine. #
				860	# If the exception is enabled, then this handler must create the #
				861	# exceptional operand and plave it in the fsave state frame, and store #
				862	# the default result (only if the instruction is opclass 3). For #
				863	# exceptions enabled, this handler must exit through the "callout" #
				864	# _real_unfl() so that the operating system enabled overflow handler #
				865	# can handle this case. #
				866	# Two other conditions exist. First, if underflow was disabled #
				867	# but the inexact exception was enabled and the result was inexact, #
				868	# this handler must exit through the "callout" _real_inex(). #
				869	# was inexact. #
				870	# Also, in the case of an opclass three instruction where #
				871	# underflow was disabled and the trace exception was enabled, this #
				872	# handler must exit through the "callout" _real_trace(). #
				873	# #
				874	#########################################################################
				875
				876	global _fpsp_unfl
				877	_fpsp_unfl:
				878
				879	#$# sub.l &24,%sp # make room for src/dst
				880
				881	link.w %a6,&-LOCAL_SIZE # init stack frame
				882
				883	fsave FP_SRC(%a6) # grab the "busy" frame
				884
				885	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				886	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				887	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				888
				889	# the FPIAR holds the "current PC" of the faulting instruction
				890	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				891	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				892	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				893	bsr.l _imem_read_long # fetch the instruction words
				894	mov.l %d0,EXC_OPWORD(%a6)
				895
				896	##############################################################################
				897
				898	btst &0x5,EXC_CMDREG(%a6) # is instr an fmove out?
				899	bne.w funfl_out
				900
				901
				902	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				903	bsr.l fix_skewed_ops # fix src op
				904
				905	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				906	bsr.l set_tag_x # tag the operand type
				907	mov.b %d0,STAG(%a6) # maybe NORM,DENORM
				908
				909	# bit five of the fp ext word separates the monadic and dyadic operations
				910	# that can pass through fpsp_unfl(). remember that fcmp, and ftst
				911	# will never take this exception.
				912	btst &0x5,1+EXC_CMDREG(%a6) # is op monadic or dyadic?
				913	beq.b funfl_extract # monadic
				914
				915	# now, what's left that's not dyadic is fsincos. we can distinguish it
				916	# from all dyadics by the '0110xxx pattern
				917	btst &0x4,1+EXC_CMDREG(%a6) # is op an fsincos?
				918	bne.b funfl_extract # yes
				919
				920	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
				921	bsr.l load_fpn2 # load dst into FP_DST
				922
				923	lea FP_DST(%a6),%a0 # pass: ptr to dst op
				924	bsr.l set_tag_x # tag the operand type
				925	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				926	bne.b funfl_op2_done # no
				927	bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
				928	funfl_op2_done:
				929	mov.b %d0,DTAG(%a6) # save dst optype tag
				930
				931	funfl_extract:
				932
				933	#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
				934	#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
				935	#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
				936	#$# mov.l FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
				937	#$# mov.l FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
				938	#$# mov.l FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
				939
				940	clr.l %d0
				941	mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
				942
				943	mov.b 1+EXC_CMDREG(%a6),%d1
				944	andi.w &0x007f,%d1 # extract extension
				945
				946	andi.l &0x00ff01ff,USER_FPSR(%a6)
				947
				948	fmov.l &0x0,%fpcr # zero current control regs
				949	fmov.l &0x0,%fpsr
				950
				951	lea FP_SRC(%a6),%a0
				952	lea FP_DST(%a6),%a1
				953
				954	# maybe we can make these entry points ONLY the OVFL entry points of each routine.
				955	mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
				956	jsr (tbl_unsupp.l,%pc,%d1.l*1)
				957
				958	bfextu EXC_CMDREG(%a6){&6:&3},%d0
				959	bsr.l store_fpreg
				960
				961	# The `060 FPU multiplier hardware is such that if the result of a
				962	# multiply operation is the smallest possible normalized number
				963	# (0x00000000_80000000_00000000), then the machine will take an
				964	# underflow exception. Since this is incorrect, we need to check
				965	# if our emulation, after re-doing the operation, decided that
				966	# no underflow was called for. We do these checks only in
				967	# funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
				968	# special case will simply exit gracefully with the correct result.
				969
				970	# the exceptional possibilities we have left ourselves with are ONLY overflow
				971	# and inexact. and, the inexact is such that overflow occurred and was disabled
				972	# but inexact was enabled.
				973	btst &unfl_bit,FPCR_ENABLE(%a6)
				974	bne.b funfl_unfl_on
				975
				976	funfl_chkinex:
				977	btst &inex2_bit,FPCR_ENABLE(%a6)
				978	bne.b funfl_inex_on
				979
				980	funfl_exit:
				981	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				982	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				983	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				984
				985	unlk %a6
				986	#$# add.l &24,%sp
				987	bra.l _fpsp_done
				988
				989	# overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
				990	# in fp1 (don't forget to save fp0). what to do now?
				991	# well, we simply have to get to go to _real_unfl()!
				992	funfl_unfl_on:
				993
				994	# The `060 FPU multiplier hardware is such that if the result of a
				995	# multiply operation is the smallest possible normalized number
				996	# (0x00000000_80000000_00000000), then the machine will take an
				997	# underflow exception. Since this is incorrect, we check here to see
				998	# if our emulation, after re-doing the operation, decided that
				999	# no underflow was called for.
				1000	btst &unfl_bit,FPSR_EXCEPT(%a6)
				1001	beq.w funfl_chkinex
				1002
				1003	funfl_unfl_on2:
				1004	fmovm.x &0x40,FP_SRC(%a6) # save EXOP (fp1) to stack
				1005
				1006	mov.w &0xe003,2+FP_SRC(%a6) # save exc status
				1007
				1008	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				1009	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1010	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1011
				1012	frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
				1013
				1014	unlk %a6
				1015
				1016	bra.l _real_unfl
				1017
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	1018	# underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1019	# we must jump to real_inex().
				1020	funfl_inex_on:
				1021
				1022	# The `060 FPU multiplier hardware is such that if the result of a
				1023	# multiply operation is the smallest possible normalized number
				1024	# (0x00000000_80000000_00000000), then the machine will take an
				1025	# underflow exception.
				1026	# But, whether bogus or not, if inexact is enabled AND it occurred,
				1027	# then we have to branch to real_inex.
				1028
				1029	btst &inex2_bit,FPSR_EXCEPT(%a6)
				1030	beq.w funfl_exit
				1031
				1032	funfl_inex_on2:
				1033
				1034	fmovm.x &0x40,FP_SRC(%a6) # save EXOP to stack
				1035
				1036	mov.b &0xc4,1+EXC_VOFF(%a6) # vector offset = 0xc4
				1037	mov.w &0xe001,2+FP_SRC(%a6) # save exc status
				1038
				1039	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				1040	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1041	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1042
				1043	frestore FP_SRC(%a6) # do this after fmovm,other f<op>s!
				1044
				1045	unlk %a6
				1046
				1047	bra.l _real_inex
				1048
				1049	#######################################################################
				1050	funfl_out:
				1051
				1052
				1053	#$# mov.l FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
				1054	#$# mov.l FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
				1055	#$# mov.l FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
				1056
				1057	# the src operand is definitely a NORM(!), so tag it as such
				1058	mov.b &NORM,STAG(%a6) # set src optype tag
				1059
				1060	clr.l %d0
				1061	mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
				1062
				1063	and.l &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
				1064
				1065	fmov.l &0x0,%fpcr # zero current control regs
				1066	fmov.l &0x0,%fpsr
				1067
				1068	lea FP_SRC(%a6),%a0 # pass ptr to src operand
				1069
				1070	bsr.l fout
				1071
				1072	btst &unfl_bit,FPCR_ENABLE(%a6)
				1073	bne.w funfl_unfl_on2
				1074
				1075	btst &inex2_bit,FPCR_ENABLE(%a6)
				1076	bne.w funfl_inex_on2
				1077
				1078	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				1079	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1080	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1081
				1082	unlk %a6
				1083	#$# add.l &24,%sp
				1084
				1085	btst &0x7,(%sp) # is trace on?
				1086	beq.l _fpsp_done # no
				1087
				1088	fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
				1089	mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
				1090	bra.l _real_trace
				1091
				1092	#########################################################################
				1093	# XDEF **************************************************************** #
				1094	# _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented #
				1095	# Data Type" exception. #
				1096	# #
				1097	# This handler should be the first code executed upon taking the #
				1098	# FP Unimplemented Data Type exception in an operating system. #
				1099	# #
				1100	# XREF **************************************************************** #
				1101	# _imem_read_{word,long}() - read instruction word/longword #
				1102	# fix_skewed_ops() - adjust src operand in fsave frame #
				1103	# set_tag_x() - determine optype of src/dst operands #
				1104	# store_fpreg() - store opclass 0 or 2 result to FP regfile #
				1105	# unnorm_fix() - change UNNORM operands to NORM or ZERO #
				1106	# load_fpn2() - load dst operand from FP regfile #
				1107	# load_fpn1() - load src operand from FP regfile #
				1108	# fout() - emulate an opclass 3 instruction #
				1109	# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
				1110	# _real_inex() - "callout" to operating system inexact handler #
				1111	# _fpsp_done() - "callout" for exit; work all done #
				1112	# _real_trace() - "callout" for Trace enabled exception #
				1113	# funimp_skew() - adjust fsave src ops to "incorrect" value #
				1114	# _real_snan() - "callout" for SNAN exception #
				1115	# _real_operr() - "callout" for OPERR exception #
				1116	# _real_ovfl() - "callout" for OVFL exception #
				1117	# _real_unfl() - "callout" for UNFL exception #
				1118	# get_packed() - fetch packed operand from memory #
				1119	# #
				1120	# INPUT *************************************************************** #
				1121	# - The system stack contains the "Unimp Data Type" stk frame #
				1122	# - The fsave frame contains the ssrc op (for UNNORM/DENORM) #
				1123	# #
				1124	# OUTPUT ************************************************************** #
				1125	# If Inexact exception (opclass 3): #
				1126	# - The system stack is changed to an Inexact exception stk frame #
				1127	# If SNAN exception (opclass 3): #
				1128	# - The system stack is changed to an SNAN exception stk frame #
				1129	# If OPERR exception (opclass 3): #
				1130	# - The system stack is changed to an OPERR exception stk frame #
				1131	# If OVFL exception (opclass 3): #
				1132	# - The system stack is changed to an OVFL exception stk frame #
				1133	# If UNFL exception (opclass 3): #
				1134	# - The system stack is changed to an UNFL exception stack frame #
				1135	# If Trace exception enabled: #
				1136	# - The system stack is changed to a Trace exception stack frame #
				1137	# Else: (normal case) #
				1138	# - Correct result has been stored as appropriate #
				1139	# #
				1140	# ALGORITHM *********************************************************** #
				1141	# Two main instruction types can enter here: (1) DENORM or UNNORM #
				1142	# unimplemented data types. These can be either opclass 0,2 or 3 #
				1143	# instructions, and (2) PACKED unimplemented data format instructions #
				1144	# also of opclasses 0,2, or 3. #
				1145	# For UNNORM/DENORM opclass 0 and 2, the handler fetches the src #
				1146	# operand from the fsave state frame and the dst operand (if dyadic) #
				1147	# from the FP register file. The instruction is then emulated by #
				1148	# choosing an emulation routine from a table of routines indexed by #
				1149	# instruction type. Once the instruction has been emulated and result #
				1150	# saved, then we check to see if any enabled exceptions resulted from #
				1151	# instruction emulation. If none, then we exit through the "callout" #
				1152	# _fpsp_done(). If there is an enabled FP exception, then we insert #
				1153	# this exception into the FPU in the fsave state frame and then exit #
				1154	# through _fpsp_done(). #
				1155	# PACKED opclass 0 and 2 is similar in how the instruction is #
				1156	# emulated and exceptions handled. The differences occur in how the #
				1157	# handler loads the packed op (by calling get_packed() routine) and #
				1158	# by the fact that a Trace exception could be pending for PACKED ops. #
				1159	# If a Trace exception is pending, then the current exception stack #
				1160	# frame is changed to a Trace exception stack frame and an exit is #
				1161	# made through _real_trace(). #
				1162	# For UNNORM/DENORM opclass 3, the actual move out to memory is #
				1163	# performed by calling the routine fout(). If no exception should occur #
				1164	# as the result of emulation, then an exit either occurs through #
				1165	# _fpsp_done() or through _real_trace() if a Trace exception is pending #
				1166	# (a Trace stack frame must be created here, too). If an FP exception #
				1167	# should occur, then we must create an exception stack frame of that #
				1168	# type and jump to either _real_snan(), _real_operr(), _real_inex(), #
				1169	# _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3 #
				1170	# emulation is performed in a similar manner. #
				1171	# #
				1172	#########################################################################
				1173
				1174	#
				1175	# (1) DENORM and UNNORM (unimplemented) data types:
				1176	#
				1177	# post-instruction
				1178	# *****************
				1179	# * EA *
				1180	# pre-instruction * *
				1181	# *************** ***************
				1182	# * 0x0 * 0x0dc * * 0x3 * 0x0dc *
				1183	# *************** ***************
				1184	# * Next * * Next *
				1185	# * PC * * PC *
				1186	# *************** ***************
				1187	# * SR * * SR *
				1188	# *************** ***************
				1189	#
				1190	# (2) PACKED format (unsupported) opclasses two and three:
				1191	# *****************
				1192	# * EA *
				1193	# * *
				1194	# *****************
				1195	# * 0x2 * 0x0dc *
				1196	# *****************
				1197	# * Next *
				1198	# * PC *
				1199	# *****************
				1200	# * SR *
				1201	# *****************
				1202	#
				1203	global _fpsp_unsupp
				1204	_fpsp_unsupp:
				1205
				1206	link.w %a6,&-LOCAL_SIZE # init stack frame
				1207
				1208	fsave FP_SRC(%a6) # save fp state
				1209
				1210	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				1211	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				1212	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				1213
				1214	btst &0x5,EXC_SR(%a6) # user or supervisor mode?
				1215	bne.b fu_s
				1216	fu_u:
				1217	mov.l %usp,%a0 # fetch user stack pointer
				1218	mov.l %a0,EXC_A7(%a6) # save on stack
				1219	bra.b fu_cont
				1220	# if the exception is an opclass zero or two unimplemented data type
				1221	# exception, then the a7' calculated here is wrong since it doesn't
				1222	# stack an ea. however, we don't need an a7' for this case anyways.
				1223	fu_s:
				1224	lea 0x4+EXC_EA(%a6),%a0 # load old a7'
				1225	mov.l %a0,EXC_A7(%a6) # save on stack
				1226
				1227	fu_cont:
				1228
				1229	# the FPIAR holds the "current PC" of the faulting instruction
				1230	# the FPIAR should be set correctly for ALL exceptions passing through
				1231	# this point.
				1232	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				1233	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				1234	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				1235	bsr.l _imem_read_long # fetch the instruction words
				1236	mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
				1237
				1238	############################
				1239
				1240	clr.b SPCOND_FLG(%a6) # clear special condition flag
				1241
				1242	# Separate opclass three (fpn-to-mem) ops since they have a different
				1243	# stack frame and protocol.
				1244	btst &0x5,EXC_CMDREG(%a6) # is it an fmove out?
				1245	bne.w fu_out # yes
				1246
				1247	# Separate packed opclass two instructions.
				1248	bfextu EXC_CMDREG(%a6){&0:&6},%d0
				1249	cmpi.b %d0,&0x13
				1250	beq.w fu_in_pack
				1251
				1252
				1253	# I'm not sure at this point what FPSR bits are valid for this instruction.
				1254	# so, since the emulation routines re-create them anyways, zero exception field
				1255	andi.l &0x00ff00ff,USER_FPSR(%a6) # zero exception field
				1256
				1257	fmov.l &0x0,%fpcr # zero current control regs
				1258	fmov.l &0x0,%fpsr
				1259
				1260	# Opclass two w/ memory-to-fpn operation will have an incorrect extended
				1261	# precision format if the src format was single or double and the
				1262	# source data type was an INF, NAN, DENORM, or UNNORM
				1263	lea FP_SRC(%a6),%a0 # pass ptr to input
				1264	bsr.l fix_skewed_ops
				1265
				1266	# we don't know whether the src operand or the dst operand (or both) is the
				1267	# UNNORM or DENORM. call the function that tags the operand type. if the
				1268	# input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
				1269	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				1270	bsr.l set_tag_x # tag the operand type
				1271	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				1272	bne.b fu_op2 # no
				1273	bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
				1274
				1275	fu_op2:
				1276	mov.b %d0,STAG(%a6) # save src optype tag
				1277
				1278	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
				1279
				1280	# bit five of the fp extension word separates the monadic and dyadic operations
				1281	# at this point
				1282	btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
				1283	beq.b fu_extract # monadic
				1284	cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
				1285	beq.b fu_extract # yes, so it's monadic, too
				1286
				1287	bsr.l load_fpn2 # load dst into FP_DST
				1288
				1289	lea FP_DST(%a6),%a0 # pass: ptr to dst op
				1290	bsr.l set_tag_x # tag the operand type
				1291	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				1292	bne.b fu_op2_done # no
				1293	bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
				1294	fu_op2_done:
				1295	mov.b %d0,DTAG(%a6) # save dst optype tag
				1296
				1297	fu_extract:
				1298	clr.l %d0
				1299	mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
				1300
				1301	bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
				1302
				1303	lea FP_SRC(%a6),%a0
				1304	lea FP_DST(%a6),%a1
				1305
				1306	mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
				1307	jsr (tbl_unsupp.l,%pc,%d1.l*1)
				1308
				1309	#
				1310	# Exceptions in order of precedence:
				1311	# BSUN : none
				1312	# SNAN : all dyadic ops
				1313	# OPERR : fsqrt(-NORM)
				1314	# OVFL : all except ftst,fcmp
				1315	# UNFL : all except ftst,fcmp
				1316	# DZ : fdiv
				1317	# INEX2 : all except ftst,fcmp
				1318	# INEX1 : none (packed doesn't go through here)
				1319	#
				1320
				1321	# we determine the highest priority exception(if any) set by the
				1322	# emulation routine that has also been enabled by the user.
				1323	mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions set
				1324	bne.b fu_in_ena # some are enabled
				1325
				1326	fu_in_cont:
				1327	# fcmp and ftst do not store any result.
				1328	mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
				1329	andi.b &0x38,%d0 # extract bits 3-5
				1330	cmpi.b %d0,&0x38 # is instr fcmp or ftst?
				1331	beq.b fu_in_exit # yes
				1332
				1333	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
				1334	bsr.l store_fpreg # store the result
				1335
				1336	fu_in_exit:
				1337
				1338	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1339	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1340	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1341
				1342	unlk %a6
				1343
				1344	bra.l _fpsp_done
				1345
				1346	fu_in_ena:
				1347	and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
				1348	bfffo %d0{&24:&8},%d0 # find highest priority exception
				1349	bne.b fu_in_exc # there is at least one set
				1350
				1351	#
				1352	# No exceptions occurred that were also enabled. Now:
				1353	#
				1354	# if (OVFL && ovfl_disabled && inexact_enabled) {
				1355	# branch to _real_inex() (even if the result was exact!);
				1356	# } else {
				1357	# save the result in the proper fp reg (unless the op is fcmp or ftst);
				1358	# return;
				1359	# }
				1360	#
				1361	btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
				1362	beq.b fu_in_cont # no
				1363
				1364	fu_in_ovflchk:
				1365	btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
				1366	beq.b fu_in_cont # no
				1367	bra.w fu_in_exc_ovfl # go insert overflow frame
				1368
				1369	#
				1370	# An exception occurred and that exception was enabled:
				1371	#
				1372	# shift enabled exception field into lo byte of d0;
				1373	# if (((INEX2 \|\| INEX1) && inex_enabled && OVFL && ovfl_disabled) \|\|
				1374	# ((INEX2 \|\| INEX1) && inex_enabled && UNFL && unfl_disabled)) {
				1375	# /*
				1376	# * this is the case where we must call _real_inex() now or else
				1377	# * there will be no other way to pass it the exceptional operand
				1378	# */
				1379	# call _real_inex();
				1380	# } else {
				1381	# restore exc state (SNAN\|\|OPERR\|\|OVFL\|\|UNFL\|\|DZ\|\|INEX) into the FPU;
				1382	# }
				1383	#
				1384	fu_in_exc:
				1385	subi.l &24,%d0 # fix offset to be 0-8
				1386	cmpi.b %d0,&0x6 # is exception INEX? (6)
				1387	bne.b fu_in_exc_exit # no
				1388
				1389	# the enabled exception was inexact
				1390	btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
				1391	bne.w fu_in_exc_unfl # yes
				1392	btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
				1393	bne.w fu_in_exc_ovfl # yes
				1394
				1395	# here, we insert the correct fsave status value into the fsave frame for the
				1396	# corresponding exception. the operand in the fsave frame should be the original
				1397	# src operand.
				1398	fu_in_exc_exit:
				1399	mov.l %d0,-(%sp) # save d0
				1400	bsr.l funimp_skew # skew sgl or dbl inputs
				1401	mov.l (%sp)+,%d0 # restore d0
				1402
				1403	mov.w (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
				1404
				1405	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1406	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1407	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1408
				1409	frestore FP_SRC(%a6) # restore src op
				1410
				1411	unlk %a6
				1412
				1413	bra.l _fpsp_done
				1414
				1415	tbl_except:
				1416	short 0xe000,0xe006,0xe004,0xe005
				1417	short 0xe003,0xe002,0xe001,0xe001
				1418
				1419	fu_in_exc_unfl:
				1420	mov.w &0x4,%d0
				1421	bra.b fu_in_exc_exit
				1422	fu_in_exc_ovfl:
				1423	mov.w &0x03,%d0
				1424	bra.b fu_in_exc_exit
				1425
				1426	# If the input operand to this operation was opclass two and a single
				1427	# or double precision denorm, inf, or nan, the operand needs to be
				1428	# "corrected" in order to have the proper equivalent extended precision
				1429	# number.
				1430	global fix_skewed_ops
				1431	fix_skewed_ops:
				1432	bfextu EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
				1433	cmpi.b %d0,&0x11 # is class = 2 & fmt = sgl?
				1434	beq.b fso_sgl # yes
				1435	cmpi.b %d0,&0x15 # is class = 2 & fmt = dbl?
				1436	beq.b fso_dbl # yes
				1437	rts # no
				1438
				1439	fso_sgl:
				1440	mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
				1441	andi.w &0x7fff,%d0 # strip sign
				1442	cmpi.w %d0,&0x3f80 # is \|exp\| == $3f80?
				1443	beq.b fso_sgl_dnrm_zero # yes
				1444	cmpi.w %d0,&0x407f # no; is \|exp\| == $407f?
				1445	beq.b fso_infnan # yes
				1446	rts # no
				1447
				1448	fso_sgl_dnrm_zero:
				1449	andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
				1450	beq.b fso_zero # it's a skewed zero
				1451	fso_sgl_dnrm:
				1452	# here, we count on norm not to alter a0...
				1453	bsr.l norm # normalize mantissa
				1454	neg.w %d0 # -shft amt
				1455	addi.w &0x3f81,%d0 # adjust new exponent
				1456	andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
				1457	or.w %d0,LOCAL_EX(%a0) # insert new exponent
				1458	rts
				1459
				1460	fso_zero:
				1461	andi.w &0x8000,LOCAL_EX(%a0) # clear bogus exponent
				1462	rts
				1463
				1464	fso_infnan:
				1465	andi.b &0x7f,LOCAL_HI(%a0) # clear j-bit
				1466	ori.w &0x7fff,LOCAL_EX(%a0) # make exponent = $7fff
				1467	rts
				1468
				1469	fso_dbl:
				1470	mov.w LOCAL_EX(%a0),%d0 # fetch src exponent
				1471	andi.w &0x7fff,%d0 # strip sign
				1472	cmpi.w %d0,&0x3c00 # is \|exp\| == $3c00?
				1473	beq.b fso_dbl_dnrm_zero # yes
				1474	cmpi.w %d0,&0x43ff # no; is \|exp\| == $43ff?
				1475	beq.b fso_infnan # yes
				1476	rts # no
				1477
				1478	fso_dbl_dnrm_zero:
				1479	andi.l &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
				1480	bne.b fso_dbl_dnrm # it's a skewed denorm
				1481	tst.l LOCAL_LO(%a0) # is it a zero?
				1482	beq.b fso_zero # yes
				1483	fso_dbl_dnrm:
				1484	# here, we count on norm not to alter a0...
				1485	bsr.l norm # normalize mantissa
				1486	neg.w %d0 # -shft amt
				1487	addi.w &0x3c01,%d0 # adjust new exponent
				1488	andi.w &0x8000,LOCAL_EX(%a0) # clear old exponent
				1489	or.w %d0,LOCAL_EX(%a0) # insert new exponent
				1490	rts
				1491
				1492	#################################################################
				1493
				1494	# fmove out took an unimplemented data type exception.
				1495	# the src operand is in FP_SRC. Call _fout() to write out the result and
				1496	# to determine which exceptions, if any, to take.
				1497	fu_out:
				1498
				1499	# Separate packed move outs from the UNNORM and DENORM move outs.
				1500	bfextu EXC_CMDREG(%a6){&3:&3},%d0
				1501	cmpi.b %d0,&0x3
				1502	beq.w fu_out_pack
				1503	cmpi.b %d0,&0x7
				1504	beq.w fu_out_pack
				1505
				1506
				1507	# I'm not sure at this point what FPSR bits are valid for this instruction.
				1508	# so, since the emulation routines re-create them anyways, zero exception field.
				1509	# fmove out doesn't affect ccodes.
				1510	and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
				1511
				1512	fmov.l &0x0,%fpcr # zero current control regs
				1513	fmov.l &0x0,%fpsr
				1514
				1515	# the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
				1516	# call here. just figure out what it is...
				1517	mov.w FP_SRC_EX(%a6),%d0 # get exponent
				1518	andi.w &0x7fff,%d0 # strip sign
				1519	beq.b fu_out_denorm # it's a DENORM
				1520
				1521	lea FP_SRC(%a6),%a0
				1522	bsr.l unnorm_fix # yes; fix it
				1523
				1524	mov.b %d0,STAG(%a6)
				1525
				1526	bra.b fu_out_cont
				1527	fu_out_denorm:
				1528	mov.b &DENORM,STAG(%a6)
				1529	fu_out_cont:
				1530
				1531	clr.l %d0
				1532	mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
				1533
				1534	lea FP_SRC(%a6),%a0 # pass ptr to src operand
				1535
				1536	mov.l (%a6),EXC_A6(%a6) # in case a6 changes
				1537	bsr.l fout # call fmove out routine
				1538
				1539	# Exceptions in order of precedence:
				1540	# BSUN : none
				1541	# SNAN : none
				1542	# OPERR : fmove.{b,w,l} out of large UNNORM
				1543	# OVFL : fmove.{s,d}
				1544	# UNFL : fmove.{s,d,x}
				1545	# DZ : none
				1546	# INEX2 : all
				1547	# INEX1 : none (packed doesn't travel through here)
				1548
				1549	# determine the highest priority exception(if any) set by the
				1550	# emulation routine that has also been enabled by the user.
				1551	mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
				1552	bne.w fu_out_ena # some are enabled
				1553
				1554	fu_out_done:
				1555
				1556	mov.l EXC_A6(%a6),(%a6) # in case a6 changed
				1557
				1558	# on extended precision opclass three instructions using pre-decrement or
				1559	# post-increment addressing mode, the address register is not updated. is the
				1560	# address register was the stack pointer used from user mode, then let's update
				1561	# it here. if it was used from supervisor mode, then we have to handle this
				1562	# as a special case.
				1563	btst &0x5,EXC_SR(%a6)
				1564	bne.b fu_out_done_s
				1565
				1566	mov.l EXC_A7(%a6),%a0 # restore a7
				1567	mov.l %a0,%usp
				1568
				1569	fu_out_done_cont:
				1570	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1571	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1572	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1573
				1574	unlk %a6
				1575
				1576	btst &0x7,(%sp) # is trace on?
				1577	bne.b fu_out_trace # yes
				1578
				1579	bra.l _fpsp_done
				1580
				1581	# is the ea mode pre-decrement of the stack pointer from supervisor mode?
				1582	# ("fmov.x fpm,-(a7)") if so,
				1583	fu_out_done_s:
				1584	cmpi.b SPCOND_FLG(%a6),&mda7_flg
				1585	bne.b fu_out_done_cont
				1586
				1587	# the extended precision result is still in fp0. but, we need to save it
				1588	# somewhere on the stack until we can copy it to its final resting place.
				1589	# here, we're counting on the top of the stack to be the old place-holders
				1590	# for fp0/fp1 which have already been restored. that way, we can write
				1591	# over those destinations with the shifted stack frame.
				1592	fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
				1593
				1594	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1595	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1596	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1597
				1598	mov.l (%a6),%a6 # restore frame pointer
				1599
				1600	mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
				1601	mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
				1602
				1603	# now, copy the result to the proper place on the stack
				1604	mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
				1605	mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
				1606	mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
				1607
				1608	add.l &LOCAL_SIZE-0x8,%sp
				1609
				1610	btst &0x7,(%sp)
				1611	bne.b fu_out_trace
				1612
				1613	bra.l _fpsp_done
				1614
				1615	fu_out_ena:
				1616	and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
				1617	bfffo %d0{&24:&8},%d0 # find highest priority exception
				1618	bne.b fu_out_exc # there is at least one set
				1619
				1620	# no exceptions were set.
				1621	# if a disabled overflow occurred and inexact was enabled but the result
				1622	# was exact, then a branch to _real_inex() is made.
				1623	btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
				1624	beq.w fu_out_done # no
				1625
				1626	fu_out_ovflchk:
				1627	btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
				1628	beq.w fu_out_done # no
				1629	bra.w fu_inex # yes
				1630
				1631	#
				1632	# The fp move out that took the "Unimplemented Data Type" exception was
				1633	# being traced. Since the stack frames are similar, get the "current" PC
				1634	# from FPIAR and put it in the trace stack frame then jump to _real_trace().
				1635	#
				1636	# UNSUPP FRAME TRACE FRAME
				1637	# *************** ***************
				1638	# * EA * * Current *
				1639	# * * * PC *
				1640	# *************** ***************
				1641	# * 0x3 * 0x0dc * * 0x2 * 0x024 *
				1642	# *************** ***************
				1643	# * Next * * Next *
				1644	# * PC * * PC *
				1645	# *************** ***************
				1646	# * SR * * SR *
				1647	# *************** ***************
				1648	#
				1649	fu_out_trace:
				1650	mov.w &0x2024,0x6(%sp)
				1651	fmov.l %fpiar,0x8(%sp)
				1652	bra.l _real_trace
				1653
				1654	# an exception occurred and that exception was enabled.
				1655	fu_out_exc:
				1656	subi.l &24,%d0 # fix offset to be 0-8
				1657
				1658	# we don't mess with the existing fsave frame. just re-insert it and
				1659	# jump to the "_real_{}()" handler...
				1660	mov.w (tbl_fu_out.b,%pc,%d0.w*2),%d0
				1661	jmp (tbl_fu_out.b,%pc,%d0.w*1)
				1662
				1663	swbeg &0x8
				1664	tbl_fu_out:
				1665	short tbl_fu_out - tbl_fu_out # BSUN can't happen
				1666	short tbl_fu_out - tbl_fu_out # SNAN can't happen
				1667	short fu_operr - tbl_fu_out # OPERR
				1668	short fu_ovfl - tbl_fu_out # OVFL
				1669	short fu_unfl - tbl_fu_out # UNFL
				1670	short tbl_fu_out - tbl_fu_out # DZ can't happen
				1671	short fu_inex - tbl_fu_out # INEX2
				1672	short tbl_fu_out - tbl_fu_out # INEX1 won't make it here
				1673
				1674	# for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
				1675	# frestore it.
				1676	fu_snan:
				1677	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1678	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1679	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1680
				1681	mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd8
				1682	mov.w &0xe006,2+FP_SRC(%a6)
				1683
				1684	frestore FP_SRC(%a6)
				1685
				1686	unlk %a6
				1687
				1688
				1689	bra.l _real_snan
				1690
				1691	fu_operr:
				1692	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1693	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1694	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1695
				1696	mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
				1697	mov.w &0xe004,2+FP_SRC(%a6)
				1698
				1699	frestore FP_SRC(%a6)
				1700
				1701	unlk %a6
				1702
				1703
				1704	bra.l _real_operr
				1705
				1706	fu_ovfl:
				1707	fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
				1708
				1709	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1710	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1711	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1712
				1713	mov.w &0x30d4,EXC_VOFF(%a6) # vector offset = 0xd4
				1714	mov.w &0xe005,2+FP_SRC(%a6)
				1715
				1716	frestore FP_SRC(%a6) # restore EXOP
				1717
				1718	unlk %a6
				1719
				1720	bra.l _real_ovfl
				1721
				1722	# underflow can happen for extended precision. extended precision opclass
				1723	# three instruction exceptions don't update the stack pointer. so, if the
				1724	# exception occurred from user mode, then simply update a7 and exit normally.
				1725	# if the exception occurred from supervisor mode, check if
				1726	fu_unfl:
				1727	mov.l EXC_A6(%a6),(%a6) # restore a6
				1728
				1729	btst &0x5,EXC_SR(%a6)
				1730	bne.w fu_unfl_s
				1731
				1732	mov.l EXC_A7(%a6),%a0 # restore a7 whether we need
				1733	mov.l %a0,%usp # to or not...
				1734
				1735	fu_unfl_cont:
				1736	fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
				1737
				1738	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1739	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1740	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1741
				1742	mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
				1743	mov.w &0xe003,2+FP_SRC(%a6)
				1744
				1745	frestore FP_SRC(%a6) # restore EXOP
				1746
				1747	unlk %a6
				1748
				1749	bra.l _real_unfl
				1750
				1751	fu_unfl_s:
				1752	cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
				1753	bne.b fu_unfl_cont
				1754
				1755	# the extended precision result is still in fp0. but, we need to save it
				1756	# somewhere on the stack until we can copy it to its final resting place
				1757	# (where the exc frame is currently). make sure it's not at the top of the
				1758	# frame or it will get overwritten when the exc stack frame is shifted "down".
				1759	fmovm.x &0x80,FP_SRC(%a6) # put answer on stack
				1760	fmovm.x &0x40,FP_DST(%a6) # put EXOP on stack
				1761
				1762	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1763	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1764	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1765
				1766	mov.w &0x30cc,EXC_VOFF(%a6) # vector offset = 0xcc
				1767	mov.w &0xe003,2+FP_DST(%a6)
				1768
				1769	frestore FP_DST(%a6) # restore EXOP
				1770
				1771	mov.l (%a6),%a6 # restore frame pointer
				1772
				1773	mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
				1774	mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
				1775	mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
				1776
				1777	# now, copy the result to the proper place on the stack
				1778	mov.l LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
				1779	mov.l LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
				1780	mov.l LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
				1781
				1782	add.l &LOCAL_SIZE-0x8,%sp
				1783
				1784	bra.l _real_unfl
				1785
				1786	# fmove in and out enter here.
				1787	fu_inex:
				1788	fmovm.x &0x40,FP_SRC(%a6) # save EXOP to the stack
				1789
				1790	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1791	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1792	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1793
				1794	mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
				1795	mov.w &0xe001,2+FP_SRC(%a6)
				1796
				1797	frestore FP_SRC(%a6) # restore EXOP
				1798
				1799	unlk %a6
				1800
				1801
				1802	bra.l _real_inex
				1803
				1804	#########################################################################
				1805	#########################################################################
				1806	fu_in_pack:
				1807
				1808
				1809	# I'm not sure at this point what FPSR bits are valid for this instruction.
				1810	# so, since the emulation routines re-create them anyways, zero exception field
				1811	andi.l &0x0ff00ff,USER_FPSR(%a6) # zero exception field
				1812
				1813	fmov.l &0x0,%fpcr # zero current control regs
				1814	fmov.l &0x0,%fpsr
				1815
				1816	bsr.l get_packed # fetch packed src operand
				1817
				1818	lea FP_SRC(%a6),%a0 # pass ptr to src
				1819	bsr.l set_tag_x # set src optype tag
				1820
				1821	mov.b %d0,STAG(%a6) # save src optype tag
				1822
				1823	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
				1824
				1825	# bit five of the fp extension word separates the monadic and dyadic operations
				1826	# at this point
				1827	btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
				1828	beq.b fu_extract_p # monadic
				1829	cmpi.b 1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
				1830	beq.b fu_extract_p # yes, so it's monadic, too
				1831
				1832	bsr.l load_fpn2 # load dst into FP_DST
				1833
				1834	lea FP_DST(%a6),%a0 # pass: ptr to dst op
				1835	bsr.l set_tag_x # tag the operand type
				1836	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				1837	bne.b fu_op2_done_p # no
				1838	bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
				1839	fu_op2_done_p:
				1840	mov.b %d0,DTAG(%a6) # save dst optype tag
				1841
				1842	fu_extract_p:
				1843	clr.l %d0
				1844	mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
				1845
				1846	bfextu 1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
				1847
				1848	lea FP_SRC(%a6),%a0
				1849	lea FP_DST(%a6),%a1
				1850
				1851	mov.l (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
				1852	jsr (tbl_unsupp.l,%pc,%d1.l*1)
				1853
				1854	#
				1855	# Exceptions in order of precedence:
				1856	# BSUN : none
				1857	# SNAN : all dyadic ops
				1858	# OPERR : fsqrt(-NORM)
				1859	# OVFL : all except ftst,fcmp
				1860	# UNFL : all except ftst,fcmp
				1861	# DZ : fdiv
				1862	# INEX2 : all except ftst,fcmp
				1863	# INEX1 : all
				1864	#
				1865
				1866	# we determine the highest priority exception(if any) set by the
				1867	# emulation routine that has also been enabled by the user.
				1868	mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
				1869	bne.w fu_in_ena_p # some are enabled
				1870
				1871	fu_in_cont_p:
				1872	# fcmp and ftst do not store any result.
				1873	mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension
				1874	andi.b &0x38,%d0 # extract bits 3-5
				1875	cmpi.b %d0,&0x38 # is instr fcmp or ftst?
				1876	beq.b fu_in_exit_p # yes
				1877
				1878	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
				1879	bsr.l store_fpreg # store the result
				1880
				1881	fu_in_exit_p:
				1882
				1883	btst &0x5,EXC_SR(%a6) # user or supervisor?
				1884	bne.w fu_in_exit_s_p # supervisor
				1885
				1886	mov.l EXC_A7(%a6),%a0 # update user a7
				1887	mov.l %a0,%usp
				1888
				1889	fu_in_exit_cont_p:
				1890	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1891	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1892	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1893
				1894	unlk %a6 # unravel stack frame
				1895
				1896	btst &0x7,(%sp) # is trace on?
				1897	bne.w fu_trace_p # yes
				1898
				1899	bra.l _fpsp_done # exit to os
				1900
				1901	# the exception occurred in supervisor mode. check to see if the
				1902	# addressing mode was (a7)+. if so, we'll need to shift the
				1903	# stack frame "up".
				1904	fu_in_exit_s_p:
				1905	btst &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
				1906	beq.b fu_in_exit_cont_p # no
				1907
				1908	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1909	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1910	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1911
				1912	unlk %a6 # unravel stack frame
				1913
				1914	# shift the stack frame "up". we don't really care about the <ea> field.
				1915	mov.l 0x4(%sp),0x10(%sp)
				1916	mov.l 0x0(%sp),0xc(%sp)
				1917	add.l &0xc,%sp
				1918
				1919	btst &0x7,(%sp) # is trace on?
				1920	bne.w fu_trace_p # yes
				1921
				1922	bra.l _fpsp_done # exit to os
				1923
				1924	fu_in_ena_p:
				1925	and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled & set
				1926	bfffo %d0{&24:&8},%d0 # find highest priority exception
				1927	bne.b fu_in_exc_p # at least one was set
				1928
				1929	#
				1930	# No exceptions occurred that were also enabled. Now:
				1931	#
				1932	# if (OVFL && ovfl_disabled && inexact_enabled) {
				1933	# branch to _real_inex() (even if the result was exact!);
				1934	# } else {
				1935	# save the result in the proper fp reg (unless the op is fcmp or ftst);
				1936	# return;
				1937	# }
				1938	#
				1939	btst &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
				1940	beq.w fu_in_cont_p # no
				1941
				1942	fu_in_ovflchk_p:
				1943	btst &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
				1944	beq.w fu_in_cont_p # no
				1945	bra.w fu_in_exc_ovfl_p # do _real_inex() now
				1946
				1947	#
				1948	# An exception occurred and that exception was enabled:
				1949	#
				1950	# shift enabled exception field into lo byte of d0;
				1951	# if (((INEX2 \|\| INEX1) && inex_enabled && OVFL && ovfl_disabled) \|\|
				1952	# ((INEX2 \|\| INEX1) && inex_enabled && UNFL && unfl_disabled)) {
				1953	# /*
				1954	# * this is the case where we must call _real_inex() now or else
				1955	# * there will be no other way to pass it the exceptional operand
				1956	# */
				1957	# call _real_inex();
				1958	# } else {
				1959	# restore exc state (SNAN\|\|OPERR\|\|OVFL\|\|UNFL\|\|DZ\|\|INEX) into the FPU;
				1960	# }
				1961	#
				1962	fu_in_exc_p:
				1963	subi.l &24,%d0 # fix offset to be 0-8
				1964	cmpi.b %d0,&0x6 # is exception INEX? (6 or 7)
				1965	blt.b fu_in_exc_exit_p # no
				1966
				1967	# the enabled exception was inexact
				1968	btst &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
				1969	bne.w fu_in_exc_unfl_p # yes
				1970	btst &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
				1971	bne.w fu_in_exc_ovfl_p # yes
				1972
				1973	# here, we insert the correct fsave status value into the fsave frame for the
				1974	# corresponding exception. the operand in the fsave frame should be the original
				1975	# src operand.
				1976	# as a reminder for future predicted pain and agony, we are passing in fsave the
				1977	# "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
				1978	# this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
				1979	fu_in_exc_exit_p:
				1980	btst &0x5,EXC_SR(%a6) # user or supervisor?
				1981	bne.w fu_in_exc_exit_s_p # supervisor
				1982
				1983	mov.l EXC_A7(%a6),%a0 # update user a7
				1984	mov.l %a0,%usp
				1985
				1986	fu_in_exc_exit_cont_p:
				1987	mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
				1988
				1989	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				1990	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				1991	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				1992
				1993	frestore FP_SRC(%a6) # restore src op
				1994
				1995	unlk %a6
				1996
				1997	btst &0x7,(%sp) # is trace enabled?
				1998	bne.w fu_trace_p # yes
				1999
				2000	bra.l _fpsp_done
				2001
				2002	tbl_except_p:
				2003	short 0xe000,0xe006,0xe004,0xe005
				2004	short 0xe003,0xe002,0xe001,0xe001
				2005
				2006	fu_in_exc_ovfl_p:
				2007	mov.w &0x3,%d0
				2008	bra.w fu_in_exc_exit_p
				2009
				2010	fu_in_exc_unfl_p:
				2011	mov.w &0x4,%d0
				2012	bra.w fu_in_exc_exit_p
				2013
				2014	fu_in_exc_exit_s_p:
				2015	btst &mia7_bit,SPCOND_FLG(%a6)
				2016	beq.b fu_in_exc_exit_cont_p
				2017
				2018	mov.w (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
				2019
				2020	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				2021	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2022	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2023
				2024	frestore FP_SRC(%a6) # restore src op
				2025
				2026	unlk %a6 # unravel stack frame
				2027
				2028	# shift stack frame "up". who cares about <ea> field.
				2029	mov.l 0x4(%sp),0x10(%sp)
				2030	mov.l 0x0(%sp),0xc(%sp)
				2031	add.l &0xc,%sp
				2032
				2033	btst &0x7,(%sp) # is trace on?
				2034	bne.b fu_trace_p # yes
				2035
				2036	bra.l _fpsp_done # exit to os
				2037
				2038	#
				2039	# The opclass two PACKED instruction that took an "Unimplemented Data Type"
				2040	# exception was being traced. Make the "current" PC the FPIAR and put it in the
				2041	# trace stack frame then jump to _real_trace().
				2042	#
				2043	# UNSUPP FRAME TRACE FRAME
				2044	# *************** ***************
				2045	# * EA * * Current *
				2046	# * * * PC *
				2047	# *************** ***************
				2048	# * 0x2 * 0x0dc * * 0x2 * 0x024 *
				2049	# *************** ***************
				2050	# * Next * * Next *
				2051	# * PC * * PC *
				2052	# *************** ***************
				2053	# * SR * * SR *
				2054	# *************** ***************
				2055	fu_trace_p:
				2056	mov.w &0x2024,0x6(%sp)
				2057	fmov.l %fpiar,0x8(%sp)
				2058
				2059	bra.l _real_trace
				2060
				2061	#########################################################
				2062	#########################################################
				2063	fu_out_pack:
				2064
				2065
				2066	# I'm not sure at this point what FPSR bits are valid for this instruction.
				2067	# so, since the emulation routines re-create them anyways, zero exception field.
				2068	# fmove out doesn't affect ccodes.
				2069	and.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
				2070
				2071	fmov.l &0x0,%fpcr # zero current control regs
				2072	fmov.l &0x0,%fpsr
				2073
				2074	bfextu EXC_CMDREG(%a6){&6:&3},%d0
				2075	bsr.l load_fpn1
				2076
				2077	# unlike other opclass 3, unimplemented data type exceptions, packed must be
				2078	# able to detect all operand types.
				2079	lea FP_SRC(%a6),%a0
				2080	bsr.l set_tag_x # tag the operand type
				2081	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				2082	bne.b fu_op2_p # no
				2083	bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
				2084
				2085	fu_op2_p:
				2086	mov.b %d0,STAG(%a6) # save src optype tag
				2087
				2088	clr.l %d0
				2089	mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode/prec
				2090
				2091	lea FP_SRC(%a6),%a0 # pass ptr to src operand
				2092
				2093	mov.l (%a6),EXC_A6(%a6) # in case a6 changes
				2094	bsr.l fout # call fmove out routine
				2095
				2096	# Exceptions in order of precedence:
				2097	# BSUN : no
				2098	# SNAN : yes
				2099	# OPERR : if ((k_factor > +17) \|\| (dec. exp exceeds 3 digits))
				2100	# OVFL : no
				2101	# UNFL : no
				2102	# DZ : no
				2103	# INEX2 : yes
				2104	# INEX1 : no
				2105
				2106	# determine the highest priority exception(if any) set by the
				2107	# emulation routine that has also been enabled by the user.
				2108	mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
				2109	bne.w fu_out_ena_p # some are enabled
				2110
				2111	fu_out_exit_p:
				2112	mov.l EXC_A6(%a6),(%a6) # restore a6
				2113
				2114	btst &0x5,EXC_SR(%a6) # user or supervisor?
				2115	bne.b fu_out_exit_s_p # supervisor
				2116
				2117	mov.l EXC_A7(%a6),%a0 # update user a7
				2118	mov.l %a0,%usp
				2119
				2120	fu_out_exit_cont_p:
				2121	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				2122	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2123	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2124
				2125	unlk %a6 # unravel stack frame
				2126
				2127	btst &0x7,(%sp) # is trace on?
				2128	bne.w fu_trace_p # yes
				2129
				2130	bra.l _fpsp_done # exit to os
				2131
				2132	# the exception occurred in supervisor mode. check to see if the
				2133	# addressing mode was -(a7). if so, we'll need to shift the
				2134	# stack frame "down".
				2135	fu_out_exit_s_p:
				2136	btst &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
				2137	beq.b fu_out_exit_cont_p # no
				2138
				2139	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				2140	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2141	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2142
				2143	mov.l (%a6),%a6 # restore frame pointer
				2144
				2145	mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
				2146	mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
				2147
				2148	# now, copy the result to the proper place on the stack
				2149	mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
				2150	mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
				2151	mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
				2152
				2153	add.l &LOCAL_SIZE-0x8,%sp
				2154
				2155	btst &0x7,(%sp)
				2156	bne.w fu_trace_p
				2157
				2158	bra.l _fpsp_done
				2159
				2160	fu_out_ena_p:
				2161	and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled
				2162	bfffo %d0{&24:&8},%d0 # find highest priority exception
				2163	beq.w fu_out_exit_p
				2164
				2165	mov.l EXC_A6(%a6),(%a6) # restore a6
				2166
				2167	# an exception occurred and that exception was enabled.
				2168	# the only exception possible on packed move out are INEX, OPERR, and SNAN.
				2169	fu_out_exc_p:
				2170	cmpi.b %d0,&0x1a
				2171	bgt.w fu_inex_p2
				2172	beq.w fu_operr_p
				2173
				2174	fu_snan_p:
				2175	btst &0x5,EXC_SR(%a6)
				2176	bne.b fu_snan_s_p
				2177
				2178	mov.l EXC_A7(%a6),%a0
				2179	mov.l %a0,%usp
				2180	bra.w fu_snan
				2181
				2182	fu_snan_s_p:
				2183	cmpi.b SPCOND_FLG(%a6),&mda7_flg
				2184	bne.w fu_snan
				2185
				2186	# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
				2187	# the strategy is to move the exception frame "down" 12 bytes. then, we
				2188	# can store the default result where the exception frame was.
				2189	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				2190	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2191	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2192
				2193	mov.w &0x30d8,EXC_VOFF(%a6) # vector offset = 0xd0
				2194	mov.w &0xe006,2+FP_SRC(%a6) # set fsave status
				2195
				2196	frestore FP_SRC(%a6) # restore src operand
				2197
				2198	mov.l (%a6),%a6 # restore frame pointer
				2199
				2200	mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
				2201	mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
				2202	mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
				2203
				2204	# now, we copy the default result to its proper location
				2205	mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
				2206	mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
				2207	mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
				2208
				2209	add.l &LOCAL_SIZE-0x8,%sp
				2210
				2211
				2212	bra.l _real_snan
				2213
				2214	fu_operr_p:
				2215	btst &0x5,EXC_SR(%a6)
				2216	bne.w fu_operr_p_s
				2217
				2218	mov.l EXC_A7(%a6),%a0
				2219	mov.l %a0,%usp
				2220	bra.w fu_operr
				2221
				2222	fu_operr_p_s:
				2223	cmpi.b SPCOND_FLG(%a6),&mda7_flg
				2224	bne.w fu_operr
				2225
				2226	# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
				2227	# the strategy is to move the exception frame "down" 12 bytes. then, we
				2228	# can store the default result where the exception frame was.
				2229	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				2230	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2231	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2232
				2233	mov.w &0x30d0,EXC_VOFF(%a6) # vector offset = 0xd0
				2234	mov.w &0xe004,2+FP_SRC(%a6) # set fsave status
				2235
				2236	frestore FP_SRC(%a6) # restore src operand
				2237
				2238	mov.l (%a6),%a6 # restore frame pointer
				2239
				2240	mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
				2241	mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
				2242	mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
				2243
				2244	# now, we copy the default result to its proper location
				2245	mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
				2246	mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
				2247	mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
				2248
				2249	add.l &LOCAL_SIZE-0x8,%sp
				2250
				2251
				2252	bra.l _real_operr
				2253
				2254	fu_inex_p2:
				2255	btst &0x5,EXC_SR(%a6)
				2256	bne.w fu_inex_s_p2
				2257
				2258	mov.l EXC_A7(%a6),%a0
				2259	mov.l %a0,%usp
				2260	bra.w fu_inex
				2261
				2262	fu_inex_s_p2:
				2263	cmpi.b SPCOND_FLG(%a6),&mda7_flg
				2264	bne.w fu_inex
				2265
				2266	# the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
				2267	# the strategy is to move the exception frame "down" 12 bytes. then, we
				2268	# can store the default result where the exception frame was.
				2269	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0/fp1
				2270	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2271	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2272
				2273	mov.w &0x30c4,EXC_VOFF(%a6) # vector offset = 0xc4
				2274	mov.w &0xe001,2+FP_SRC(%a6) # set fsave status
				2275
				2276	frestore FP_SRC(%a6) # restore src operand
				2277
				2278	mov.l (%a6),%a6 # restore frame pointer
				2279
				2280	mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
				2281	mov.l LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
				2282	mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
				2283
				2284	# now, we copy the default result to its proper location
				2285	mov.l LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
				2286	mov.l LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
				2287	mov.l LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
				2288
				2289	add.l &LOCAL_SIZE-0x8,%sp
				2290
				2291
				2292	bra.l _real_inex
				2293
				2294	#########################################################################
				2295
				2296	#
				2297	# if we're stuffing a source operand back into an fsave frame then we
				2298	# have to make sure that for single or double source operands that the
				2299	# format stuffed is as weird as the hardware usually makes it.
				2300	#
				2301	global funimp_skew
				2302	funimp_skew:
				2303	bfextu EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
				2304	cmpi.b %d0,&0x1 # was src sgl?
				2305	beq.b funimp_skew_sgl # yes
				2306	cmpi.b %d0,&0x5 # was src dbl?
				2307	beq.b funimp_skew_dbl # yes
				2308	rts
				2309
				2310	funimp_skew_sgl:
				2311	mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
				2312	andi.w &0x7fff,%d0 # strip sign
				2313	beq.b funimp_skew_sgl_not
				2314	cmpi.w %d0,&0x3f80
				2315	bgt.b funimp_skew_sgl_not
				2316	neg.w %d0 # make exponent negative
				2317	addi.w &0x3f81,%d0 # find amt to shift
				2318	mov.l FP_SRC_HI(%a6),%d1 # fetch DENORM hi(man)
				2319	lsr.l %d0,%d1 # shift it
				2320	bset &31,%d1 # set j-bit
				2321	mov.l %d1,FP_SRC_HI(%a6) # insert new hi(man)
				2322	andi.w &0x8000,FP_SRC_EX(%a6) # clear old exponent
				2323	ori.w &0x3f80,FP_SRC_EX(%a6) # insert new "skewed" exponent
				2324	funimp_skew_sgl_not:
				2325	rts
				2326
				2327	funimp_skew_dbl:
				2328	mov.w FP_SRC_EX(%a6),%d0 # fetch DENORM exponent
				2329	andi.w &0x7fff,%d0 # strip sign
				2330	beq.b funimp_skew_dbl_not
				2331	cmpi.w %d0,&0x3c00
				2332	bgt.b funimp_skew_dbl_not
				2333
				2334	tst.b FP_SRC_EX(%a6) # make "internal format"
				2335	smi.b 0x2+FP_SRC(%a6)
				2336	mov.w %d0,FP_SRC_EX(%a6) # insert exponent with cleared sign
				2337	clr.l %d0 # clear g,r,s
				2338	lea FP_SRC(%a6),%a0 # pass ptr to src op
				2339	mov.w &0x3c01,%d1 # pass denorm threshold
				2340	bsr.l dnrm_lp # denorm it
				2341	mov.w &0x3c00,%d0 # new exponent
				2342	tst.b 0x2+FP_SRC(%a6) # is sign set?
				2343	beq.b fss_dbl_denorm_done # no
				2344	bset &15,%d0 # set sign
				2345	fss_dbl_denorm_done:
				2346	bset &0x7,FP_SRC_HI(%a6) # set j-bit
				2347	mov.w %d0,FP_SRC_EX(%a6) # insert new exponent
				2348	funimp_skew_dbl_not:
				2349	rts
				2350
				2351	#########################################################################
				2352	global _mem_write2
				2353	_mem_write2:
				2354	btst &0x5,EXC_SR(%a6)
				2355	beq.l _dmem_write
				2356	mov.l 0x0(%a0),FP_DST_EX(%a6)
				2357	mov.l 0x4(%a0),FP_DST_HI(%a6)
				2358	mov.l 0x8(%a0),FP_DST_LO(%a6)
				2359	clr.l %d1
				2360	rts
				2361
				2362	#########################################################################
				2363	# XDEF **************************************************************** #
				2364	# _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented #
				2365	# effective address" exception. #
				2366	# #
				2367	# This handler should be the first code executed upon taking the #
				2368	# FP Unimplemented Effective Address exception in an operating #
				2369	# system. #
				2370	# #
				2371	# XREF **************************************************************** #
				2372	# _imem_read_long() - read instruction longword #
				2373	# fix_skewed_ops() - adjust src operand in fsave frame #
				2374	# set_tag_x() - determine optype of src/dst operands #
				2375	# store_fpreg() - store opclass 0 or 2 result to FP regfile #
				2376	# unnorm_fix() - change UNNORM operands to NORM or ZERO #
				2377	# load_fpn2() - load dst operand from FP regfile #
				2378	# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
				2379	# decbin() - convert packed data to FP binary data #
				2380	# _real_fpu_disabled() - "callout" for "FPU disabled" exception #
				2381	# _real_access() - "callout" for access error exception #
				2382	# _mem_read() - read extended immediate operand from memory #
				2383	# _fpsp_done() - "callout" for exit; work all done #
				2384	# _real_trace() - "callout" for Trace enabled exception #
				2385	# fmovm_dynamic() - emulate dynamic fmovm instruction #
				2386	# fmovm_ctrl() - emulate fmovm control instruction #
				2387	# #
				2388	# INPUT *************************************************************** #
				2389	# - The system stack contains the "Unimplemented <ea>" stk frame #
				2390	# #
				2391	# OUTPUT ************************************************************** #
				2392	# If access error: #
				2393	# - The system stack is changed to an access error stack frame #
				2394	# If FPU disabled: #
				2395	# - The system stack is changed to an FPU disabled stack frame #
				2396	# If Trace exception enabled: #
				2397	# - The system stack is changed to a Trace exception stack frame #
				2398	# Else: (normal case) #
				2399	# - None (correct result has been stored as appropriate) #
				2400	# #
				2401	# ALGORITHM *********************************************************** #
				2402	# This exception handles 3 types of operations: #
				2403	# (1) FP Instructions using extended precision or packed immediate #
				2404	# addressing mode. #
				2405	# (2) The "fmovm.x" instruction w/ dynamic register specification. #
				2406	# (3) The "fmovm.l" instruction w/ 2 or 3 control registers. #
				2407	# #
				2408	# For immediate data operations, the data is read in w/ a #
				2409	# _mem_read() "callout", converted to FP binary (if packed), and used #
				2410	# as the source operand to the instruction specified by the instruction #
				2411	# word. If no FP exception should be reported ads a result of the #
				2412	# emulation, then the result is stored to the destination register and #
				2413	# the handler exits through _fpsp_done(). If an enabled exc has been #
				2414	# signalled as a result of emulation, then an fsave state frame #
				2415	# corresponding to the FP exception type must be entered into the 060 #
				2416	# FPU before exiting. In either the enabled or disabled cases, we #
				2417	# must also check if a Trace exception is pending, in which case, we #
				2418	# must create a Trace exception stack frame from the current exception #
				2419	# stack frame. If no Trace is pending, we simply exit through #
				2420	# _fpsp_done(). #
				2421	# For "fmovm.x", call the routine fmovm_dynamic() which will #
				2422	# decode and emulate the instruction. No FP exceptions can be pending #
				2423	# as a result of this operation emulation. A Trace exception can be #
				2424	# pending, though, which means the current stack frame must be changed #
				2425	# to a Trace stack frame and an exit made through _real_trace(). #
				2426	# For the case of "fmovm.x Dn,-(a7)", where the offending instruction #
				2427	# was executed from supervisor mode, this handler must store the FP #
				2428	# register file values to the system stack by itself since #
				2429	# fmovm_dynamic() can't handle this. A normal exit is made through #
				2430	# fpsp_done(). #
				2431	# For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
				2432	# Again, a Trace exception may be pending and an exit made through #
				2433	# _real_trace(). Else, a normal exit is made through _fpsp_done(). #
				2434	# #
				2435	# Before any of the above is attempted, it must be checked to #
				2436	# see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
				2437	# before the "FPU disabled" exception, but the "FPU disabled" exception #
				2438	# has higher priority, we check the disabled bit in the PCR. If set, #
				2439	# then we must create an 8 word "FPU disabled" exception stack frame #
				2440	# from the current 4 word exception stack frame. This includes #
				2441	# reproducing the effective address of the instruction to put on the #
				2442	# new stack frame. #
				2443	# #
				2444	# In the process of all emulation work, if a _mem_read() #
				2445	# "callout" returns a failing result indicating an access error, then #
				2446	# we must create an access error stack frame from the current stack #
				2447	# frame. This information includes a faulting address and a fault- #
				2448	# status-longword. These are created within this handler. #
				2449	# #
				2450	#########################################################################
				2451
				2452	global _fpsp_effadd
				2453	_fpsp_effadd:
				2454
				2455	# This exception type takes priority over the "Line F Emulator"
				2456	# exception. Therefore, the FPU could be disabled when entering here.
				2457	# So, we must check to see if it's disabled and handle that case separately.
				2458	mov.l %d0,-(%sp) # save d0
				2459	movc %pcr,%d0 # load proc cr
				2460	btst &0x1,%d0 # is FPU disabled?
				2461	bne.w iea_disabled # yes
				2462	mov.l (%sp)+,%d0 # restore d0
				2463
				2464	link %a6,&-LOCAL_SIZE # init stack frame
				2465
				2466	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				2467	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				2468	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				2469
				2470	# PC of instruction that took the exception is the PC in the frame
				2471	mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
				2472
				2473	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				2474	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				2475	bsr.l _imem_read_long # fetch the instruction words
				2476	mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
				2477
				2478	#########################################################################
				2479
				2480	tst.w %d0 # is operation fmovem?
				2481	bmi.w iea_fmovm # yes
				2482
				2483	#
				2484	# here, we will have:
				2485	# fabs fdabs fsabs facos fmod
				2486	# fadd fdadd fsadd fasin frem
				2487	# fcmp fatan fscale
				2488	# fdiv fddiv fsdiv fatanh fsin
				2489	# fint fcos fsincos
				2490	# fintrz fcosh fsinh
				2491	# fmove fdmove fsmove fetox ftan
				2492	# fmul fdmul fsmul fetoxm1 ftanh
				2493	# fneg fdneg fsneg fgetexp ftentox
				2494	# fsgldiv fgetman ftwotox
				2495	# fsglmul flog10
				2496	# fsqrt flog2
				2497	# fsub fdsub fssub flogn
				2498	# ftst flognp1
				2499	# which can all use f<op>.{x,p}
				2500	# so, now it's immediate data extended precision AND PACKED FORMAT!
				2501	#
				2502	iea_op:
				2503	andi.l &0x00ff00ff,USER_FPSR(%a6)
				2504
				2505	btst &0xa,%d0 # is src fmt x or p?
				2506	bne.b iea_op_pack # packed
				2507
				2508
				2509	mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
				2510	lea FP_SRC(%a6),%a1 # pass: ptr to super addr
				2511	mov.l &0xc,%d0 # pass: 12 bytes
				2512	bsr.l _imem_read # read extended immediate
				2513
				2514	tst.l %d1 # did ifetch fail?
				2515	bne.w iea_iacc # yes
				2516
				2517	bra.b iea_op_setsrc
				2518
				2519	iea_op_pack:
				2520
				2521	mov.l EXC_EXTWPTR(%a6),%a0 # pass: ptr to #<data>
				2522	lea FP_SRC(%a6),%a1 # pass: ptr to super dst
				2523	mov.l &0xc,%d0 # pass: 12 bytes
				2524	bsr.l _imem_read # read packed operand
				2525
				2526	tst.l %d1 # did ifetch fail?
				2527	bne.w iea_iacc # yes
				2528
				2529	# The packed operand is an INF or a NAN if the exponent field is all ones.
				2530	bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
				2531	cmpi.w %d0,&0x7fff # INF or NAN?
				2532	beq.b iea_op_setsrc # operand is an INF or NAN
				2533
				2534	# The packed operand is a zero if the mantissa is all zero, else it's
				2535	# a normal packed op.
				2536	mov.b 3+FP_SRC(%a6),%d0 # get byte 4
				2537	andi.b &0x0f,%d0 # clear all but last nybble
				2538	bne.b iea_op_gp_not_spec # not a zero
				2539	tst.l FP_SRC_HI(%a6) # is lw 2 zero?
				2540	bne.b iea_op_gp_not_spec # not a zero
				2541	tst.l FP_SRC_LO(%a6) # is lw 3 zero?
				2542	beq.b iea_op_setsrc # operand is a ZERO
				2543	iea_op_gp_not_spec:
				2544	lea FP_SRC(%a6),%a0 # pass: ptr to packed op
				2545	bsr.l decbin # convert to extended
				2546	fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
				2547
				2548	iea_op_setsrc:
				2549	addi.l &0xc,EXC_EXTWPTR(%a6) # update extension word pointer
				2550
				2551	# FP_SRC now holds the src operand.
				2552	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				2553	bsr.l set_tag_x # tag the operand type
				2554	mov.b %d0,STAG(%a6) # could be ANYTHING!!!
				2555	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				2556	bne.b iea_op_getdst # no
				2557	bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
				2558	mov.b %d0,STAG(%a6) # set new optype tag
				2559	iea_op_getdst:
				2560	clr.b STORE_FLG(%a6) # clear "store result" boolean
				2561
				2562	btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
				2563	beq.b iea_op_extract # monadic
				2564	btst &0x4,1+EXC_CMDREG(%a6) # is operation fsincos,ftst,fcmp?
				2565	bne.b iea_op_spec # yes
				2566
				2567	iea_op_loaddst:
				2568	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
				2569	bsr.l load_fpn2 # load dst operand
				2570
				2571	lea FP_DST(%a6),%a0 # pass: ptr to dst op
				2572	bsr.l set_tag_x # tag the operand type
				2573	mov.b %d0,DTAG(%a6) # could be ANYTHING!!!
				2574	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				2575	bne.b iea_op_extract # no
				2576	bsr.l unnorm_fix # yes; convert to NORM/DENORM/ZERO
				2577	mov.b %d0,DTAG(%a6) # set new optype tag
				2578	bra.b iea_op_extract
				2579
				2580	# the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
				2581	iea_op_spec:
				2582	btst &0x3,1+EXC_CMDREG(%a6) # is operation fsincos?
				2583	beq.b iea_op_extract # yes
				2584	# now, we're left with ftst and fcmp. so, first let's tag them so that they don't
				2585	# store a result. then, only fcmp will branch back and pick up a dst operand.
				2586	st STORE_FLG(%a6) # don't store a final result
				2587	btst &0x1,1+EXC_CMDREG(%a6) # is operation fcmp?
				2588	beq.b iea_op_loaddst # yes
				2589
				2590	iea_op_extract:
				2591	clr.l %d0
				2592	mov.b FPCR_MODE(%a6),%d0 # pass: rnd mode,prec
				2593
				2594	mov.b 1+EXC_CMDREG(%a6),%d1
				2595	andi.w &0x007f,%d1 # extract extension
				2596
				2597	fmov.l &0x0,%fpcr
				2598	fmov.l &0x0,%fpsr
				2599
				2600	lea FP_SRC(%a6),%a0
				2601	lea FP_DST(%a6),%a1
				2602
				2603	mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
				2604	jsr (tbl_unsupp.l,%pc,%d1.l*1)
				2605
				2606	#
				2607	# Exceptions in order of precedence:
				2608	# BSUN : none
				2609	# SNAN : all operations
				2610	# OPERR : all reg-reg or mem-reg operations that can normally operr
				2611	# OVFL : same as OPERR
				2612	# UNFL : same as OPERR
				2613	# DZ : same as OPERR
				2614	# INEX2 : same as OPERR
				2615	# INEX1 : all packed immediate operations
				2616	#
				2617
				2618	# we determine the highest priority exception(if any) set by the
				2619	# emulation routine that has also been enabled by the user.
				2620	mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
				2621	bne.b iea_op_ena # some are enabled
				2622
				2623	# now, we save the result, unless, of course, the operation was ftst or fcmp.
				2624	# these don't save results.
				2625	iea_op_save:
				2626	tst.b STORE_FLG(%a6) # does this op store a result?
				2627	bne.b iea_op_exit1 # exit with no frestore
				2628
				2629	iea_op_store:
				2630	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
				2631	bsr.l store_fpreg # store the result
				2632
				2633	iea_op_exit1:
				2634	mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
				2635	mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
				2636
				2637	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				2638	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2639	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2640
				2641	unlk %a6 # unravel the frame
				2642
				2643	btst &0x7,(%sp) # is trace on?
				2644	bne.w iea_op_trace # yes
				2645
				2646	bra.l _fpsp_done # exit to os
				2647
				2648	iea_op_ena:
				2649	and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enable and set
				2650	bfffo %d0{&24:&8},%d0 # find highest priority exception
				2651	bne.b iea_op_exc # at least one was set
				2652
				2653	# no exception occurred. now, did a disabled, exact overflow occur with inexact
				2654	# enabled? if so, then we have to stuff an overflow frame into the FPU.
				2655	btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
				2656	beq.b iea_op_save
				2657
				2658	iea_op_ovfl:
				2659	btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
				2660	beq.b iea_op_store # no
				2661	bra.b iea_op_exc_ovfl # yes
				2662
				2663	# an enabled exception occurred. we have to insert the exception type back into
				2664	# the machine.
				2665	iea_op_exc:
				2666	subi.l &24,%d0 # fix offset to be 0-8
				2667	cmpi.b %d0,&0x6 # is exception INEX?
				2668	bne.b iea_op_exc_force # no
				2669
				2670	# the enabled exception was inexact. so, if it occurs with an overflow
				2671	# or underflow that was disabled, then we have to force an overflow or
				2672	# underflow frame.
				2673	btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
				2674	bne.b iea_op_exc_ovfl # yes
				2675	btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
				2676	bne.b iea_op_exc_unfl # yes
				2677
				2678	iea_op_exc_force:
				2679	mov.w (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
				2680	bra.b iea_op_exit2 # exit with frestore
				2681
				2682	tbl_iea_except:
				2683	short 0xe002, 0xe006, 0xe004, 0xe005
				2684	short 0xe003, 0xe002, 0xe001, 0xe001
				2685
				2686	iea_op_exc_ovfl:
				2687	mov.w &0xe005,2+FP_SRC(%a6)
				2688	bra.b iea_op_exit2
				2689
				2690	iea_op_exc_unfl:
				2691	mov.w &0xe003,2+FP_SRC(%a6)
				2692
				2693	iea_op_exit2:
				2694	mov.l EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
				2695	mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
				2696
				2697	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				2698	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2699	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2700
				2701	frestore FP_SRC(%a6) # restore exceptional state
				2702
				2703	unlk %a6 # unravel the frame
				2704
				2705	btst &0x7,(%sp) # is trace on?
				2706	bne.b iea_op_trace # yes
				2707
				2708	bra.l _fpsp_done # exit to os
				2709
				2710	#
				2711	# The opclass two instruction that took an "Unimplemented Effective Address"
				2712	# exception was being traced. Make the "current" PC the FPIAR and put it in
				2713	# the trace stack frame then jump to _real_trace().
				2714	#
				2715	# UNIMP EA FRAME TRACE FRAME
				2716	# *************** ***************
				2717	# * 0x0 * 0x0f0 * * Current *
				2718	# ***************** * PC *
				2719	# * Current * *****************
				2720	# * PC * * 0x2 * 0x024 *
				2721	# *************** ***************
				2722	# * SR * * Next *
				2723	# ***************** * PC *
				2724	# *****************
				2725	# * SR *
				2726	# *****************
				2727	iea_op_trace:
				2728	mov.l (%sp),-(%sp) # shift stack frame "down"
				2729	mov.w 0x8(%sp),0x4(%sp)
				2730	mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x024
				2731	fmov.l %fpiar,0x8(%sp) # "Current PC" is in FPIAR
				2732
				2733	bra.l _real_trace
				2734
				2735	#########################################################################
				2736	iea_fmovm:
				2737	btst &14,%d0 # ctrl or data reg
				2738	beq.w iea_fmovm_ctrl
				2739
				2740	iea_fmovm_data:
				2741
				2742	btst &0x5,EXC_SR(%a6) # user or supervisor mode
				2743	bne.b iea_fmovm_data_s
				2744
				2745	iea_fmovm_data_u:
				2746	mov.l %usp,%a0
				2747	mov.l %a0,EXC_A7(%a6) # store current a7
				2748	bsr.l fmovm_dynamic # do dynamic fmovm
				2749	mov.l EXC_A7(%a6),%a0 # load possibly new a7
				2750	mov.l %a0,%usp # update usp
				2751	bra.w iea_fmovm_exit
				2752
				2753	iea_fmovm_data_s:
				2754	clr.b SPCOND_FLG(%a6)
				2755	lea 0x2+EXC_VOFF(%a6),%a0
				2756	mov.l %a0,EXC_A7(%a6)
				2757	bsr.l fmovm_dynamic # do dynamic fmovm
				2758
				2759	cmpi.b SPCOND_FLG(%a6),&mda7_flg
				2760	beq.w iea_fmovm_data_predec
				2761	cmpi.b SPCOND_FLG(%a6),&mia7_flg
				2762	bne.w iea_fmovm_exit
				2763
				2764	# right now, d0 = the size.
				2765	# the data has been fetched from the supervisor stack, but we have not
				2766	# incremented the stack pointer by the appropriate number of bytes.
				2767	# do it here.
				2768	iea_fmovm_data_postinc:
				2769	btst &0x7,EXC_SR(%a6)
				2770	bne.b iea_fmovm_data_pi_trace
				2771
				2772	mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
				2773	mov.l EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
				2774	mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
				2775
				2776	lea (EXC_SR,%a6,%d0),%a0
				2777	mov.l %a0,EXC_SR(%a6)
				2778
				2779	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				2780	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2781	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2782
				2783	unlk %a6
				2784	mov.l (%sp)+,%sp
				2785	bra.l _fpsp_done
				2786
				2787	iea_fmovm_data_pi_trace:
				2788	mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
				2789	mov.l EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
				2790	mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
				2791	mov.l EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
				2792
				2793	lea (EXC_SR-0x4,%a6,%d0),%a0
				2794	mov.l %a0,EXC_SR(%a6)
				2795
				2796	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				2797	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2798	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2799
				2800	unlk %a6
				2801	mov.l (%sp)+,%sp
				2802	bra.l _real_trace
				2803
				2804	# right now, d1 = size and d0 = the strg.
				2805	iea_fmovm_data_predec:
				2806	mov.b %d1,EXC_VOFF(%a6) # store strg
				2807	mov.b %d0,0x1+EXC_VOFF(%a6) # store size
				2808
				2809	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				2810	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2811	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2812
				2813	mov.l (%a6),-(%sp) # make a copy of a6
				2814	mov.l %d0,-(%sp) # save d0
				2815	mov.l %d1,-(%sp) # save d1
				2816	mov.l EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
				2817
				2818	clr.l %d0
				2819	mov.b 0x1+EXC_VOFF(%a6),%d0 # fetch size
				2820	neg.l %d0 # get negative of size
				2821
				2822	btst &0x7,EXC_SR(%a6) # is trace enabled?
				2823	beq.b iea_fmovm_data_p2
				2824
				2825	mov.w EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
				2826	mov.l EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
				2827	mov.l (%sp)+,(EXC_PC-0x4,%a6,%d0)
				2828	mov.w &0x2024,(EXC_VOFF-0x4,%a6,%d0)
				2829
				2830	pea (%a6,%d0) # create final sp
				2831	bra.b iea_fmovm_data_p3
				2832
				2833	iea_fmovm_data_p2:
				2834	mov.w EXC_SR(%a6),(EXC_SR,%a6,%d0)
				2835	mov.l (%sp)+,(EXC_PC,%a6,%d0)
				2836	mov.w &0x00f0,(EXC_VOFF,%a6,%d0)
				2837
				2838	pea (0x4,%a6,%d0) # create final sp
				2839
				2840	iea_fmovm_data_p3:
				2841	clr.l %d1
				2842	mov.b EXC_VOFF(%a6),%d1 # fetch strg
				2843
				2844	tst.b %d1
				2845	bpl.b fm_1
				2846	fmovm.x &0x80,(0x4+0x8,%a6,%d0)
				2847	addi.l &0xc,%d0
				2848	fm_1:
				2849	lsl.b &0x1,%d1
				2850	bpl.b fm_2
				2851	fmovm.x &0x40,(0x4+0x8,%a6,%d0)
				2852	addi.l &0xc,%d0
				2853	fm_2:
				2854	lsl.b &0x1,%d1
				2855	bpl.b fm_3
				2856	fmovm.x &0x20,(0x4+0x8,%a6,%d0)
				2857	addi.l &0xc,%d0
				2858	fm_3:
				2859	lsl.b &0x1,%d1
				2860	bpl.b fm_4
				2861	fmovm.x &0x10,(0x4+0x8,%a6,%d0)
				2862	addi.l &0xc,%d0
				2863	fm_4:
				2864	lsl.b &0x1,%d1
				2865	bpl.b fm_5
				2866	fmovm.x &0x08,(0x4+0x8,%a6,%d0)
				2867	addi.l &0xc,%d0
				2868	fm_5:
				2869	lsl.b &0x1,%d1
				2870	bpl.b fm_6
				2871	fmovm.x &0x04,(0x4+0x8,%a6,%d0)
				2872	addi.l &0xc,%d0
				2873	fm_6:
				2874	lsl.b &0x1,%d1
				2875	bpl.b fm_7
				2876	fmovm.x &0x02,(0x4+0x8,%a6,%d0)
				2877	addi.l &0xc,%d0
				2878	fm_7:
				2879	lsl.b &0x1,%d1
				2880	bpl.b fm_end
				2881	fmovm.x &0x01,(0x4+0x8,%a6,%d0)
				2882	fm_end:
				2883	mov.l 0x4(%sp),%d1
				2884	mov.l 0x8(%sp),%d0
				2885	mov.l 0xc(%sp),%a6
				2886	mov.l (%sp)+,%sp
				2887
				2888	btst &0x7,(%sp) # is trace enabled?
				2889	beq.l _fpsp_done
				2890	bra.l _real_trace
				2891
				2892	#########################################################################
				2893	iea_fmovm_ctrl:
				2894
				2895	bsr.l fmovm_ctrl # load ctrl regs
				2896
				2897	iea_fmovm_exit:
				2898	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				2899	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				2900	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2901
				2902	btst &0x7,EXC_SR(%a6) # is trace on?
				2903	bne.b iea_fmovm_trace # yes
				2904
				2905	mov.l EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
				2906
				2907	unlk %a6 # unravel the frame
				2908
				2909	bra.l _fpsp_done # exit to os
				2910
				2911	#
				2912	# The control reg instruction that took an "Unimplemented Effective Address"
				2913	# exception was being traced. The "Current PC" for the trace frame is the
				2914	# PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
				2915	# After fixing the stack frame, jump to _real_trace().
				2916	#
				2917	# UNIMP EA FRAME TRACE FRAME
				2918	# *************** ***************
				2919	# * 0x0 * 0x0f0 * * Current *
				2920	# ***************** * PC *
				2921	# * Current * *****************
				2922	# * PC * * 0x2 * 0x024 *
				2923	# *************** ***************
				2924	# * SR * * Next *
				2925	# ***************** * PC *
				2926	# *****************
				2927	# * SR *
				2928	# *****************
				2929	# this ain't a pretty solution, but it works:
				2930	# -restore a6 (not with unlk)
				2931	# -shift stack frame down over where old a6 used to be
				2932	# -add LOCAL_SIZE to stack pointer
				2933	iea_fmovm_trace:
				2934	mov.l (%a6),%a6 # restore frame pointer
				2935	mov.w EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
				2936	mov.l EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
				2937	mov.l EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
				2938	mov.w &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
				2939	add.l &LOCAL_SIZE,%sp # clear stack frame
				2940
				2941	bra.l _real_trace
				2942
				2943	#########################################################################
				2944	# The FPU is disabled and so we should really have taken the "Line
				2945	# F Emulator" exception. So, here we create an 8-word stack frame
				2946	# from our 4-word stack frame. This means we must calculate the length
				2947	# the faulting instruction to get the "next PC". This is trivial for
				2948	# immediate operands but requires some extra work for fmovm dynamic
				2949	# which can use most addressing modes.
				2950	iea_disabled:
				2951	mov.l (%sp)+,%d0 # restore d0
				2952
				2953	link %a6,&-LOCAL_SIZE # init stack frame
				2954
				2955	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				2956
				2957	# PC of instruction that took the exception is the PC in the frame
				2958	mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
				2959	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				2960	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				2961	bsr.l _imem_read_long # fetch the instruction words
				2962	mov.l %d0,EXC_OPWORD(%a6) # store OPWORD and EXTWORD
				2963
				2964	tst.w %d0 # is instr fmovm?
				2965	bmi.b iea_dis_fmovm # yes
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	2966	# instruction is using an extended precision immediate operand. Therefore,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2967	# the total instruction length is 16 bytes.
				2968	iea_dis_immed:
				2969	mov.l &0x10,%d0 # 16 bytes of instruction
				2970	bra.b iea_dis_cont
				2971	iea_dis_fmovm:
				2972	btst &0xe,%d0 # is instr fmovm ctrl
				2973	bne.b iea_dis_fmovm_data # no
				2974	# the instruction is a fmovm.l with 2 or 3 registers.
				2975	bfextu %d0{&19:&3},%d1
				2976	mov.l &0xc,%d0
				2977	cmpi.b %d1,&0x7 # move all regs?
				2978	bne.b iea_dis_cont
				2979	addq.l &0x4,%d0
				2980	bra.b iea_dis_cont
				2981	# the instruction is an fmovm.x dynamic which can use many addressing
				2982	# modes and thus can have several different total instruction lengths.
				2983	# call fmovm_calc_ea which will go through the ea calc process and,
				2984	# as a by-product, will tell us how long the instruction is.
				2985	iea_dis_fmovm_data:
				2986	clr.l %d0
				2987	bsr.l fmovm_calc_ea
				2988	mov.l EXC_EXTWPTR(%a6),%d0
				2989	sub.l EXC_PC(%a6),%d0
				2990	iea_dis_cont:
				2991	mov.w %d0,EXC_VOFF(%a6) # store stack shift value
				2992
				2993	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				2994
				2995	unlk %a6
				2996
				2997	# here, we actually create the 8-word frame from the 4-word frame,
				2998	# with the "next PC" as additional info.
				2999	# the <ea> field is let as undefined.
				3000	subq.l &0x8,%sp # make room for new stack
				3001	mov.l %d0,-(%sp) # save d0
				3002	mov.w 0xc(%sp),0x4(%sp) # move SR
				3003	mov.l 0xe(%sp),0x6(%sp) # move Current PC
				3004	clr.l %d0
				3005	mov.w 0x12(%sp),%d0
				3006	mov.l 0x6(%sp),0x10(%sp) # move Current PC
				3007	add.l %d0,0x6(%sp) # make Next PC
				3008	mov.w &0x402c,0xa(%sp) # insert offset,frame format
				3009	mov.l (%sp)+,%d0 # restore d0
				3010
				3011	bra.l _real_fpu_disabled
				3012
				3013	##########
				3014
				3015	iea_iacc:
				3016	movc %pcr,%d0
				3017	btst &0x1,%d0
				3018	bne.b iea_iacc_cont
				3019	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				3020	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
				3021	iea_iacc_cont:
				3022	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3023
				3024	unlk %a6
				3025
				3026	subq.w &0x8,%sp # make stack frame bigger
				3027	mov.l 0x8(%sp),(%sp) # store SR,hi(PC)
				3028	mov.w 0xc(%sp),0x4(%sp) # store lo(PC)
				3029	mov.w &0x4008,0x6(%sp) # store voff
				3030	mov.l 0x2(%sp),0x8(%sp) # store ea
				3031	mov.l &0x09428001,0xc(%sp) # store fslw
				3032
				3033	iea_acc_done:
				3034	btst &0x5,(%sp) # user or supervisor mode?
				3035	beq.b iea_acc_done2 # user
				3036	bset &0x2,0xd(%sp) # set supervisor TM bit
				3037
				3038	iea_acc_done2:
				3039	bra.l _real_access
				3040
				3041	iea_dacc:
				3042	lea -LOCAL_SIZE(%a6),%sp
				3043
				3044	movc %pcr,%d1
				3045	btst &0x1,%d1
				3046	bne.b iea_dacc_cont
				3047	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1 on stack
				3048	fmovm.l LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
				3049	iea_dacc_cont:
				3050	mov.l (%a6),%a6
				3051
				3052	mov.l 0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
				3053	mov.w 0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
				3054	mov.w &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
				3055	mov.l %a0,-0x8+0xc+LOCAL_SIZE(%sp)
				3056	mov.w %d0,-0x8+0x10+LOCAL_SIZE(%sp)
				3057	mov.w &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
				3058
				3059	movm.l LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
				3060	add.w &LOCAL_SIZE-0x4,%sp
				3061
				3062	bra.b iea_acc_done
				3063
				3064	#########################################################################
				3065	# XDEF **************************************************************** #
				3066	# _fpsp_operr(): 060FPSP entry point for FP Operr exception. #
				3067	# #
				3068	# This handler should be the first code executed upon taking the #
				3069	# FP Operand Error exception in an operating system. #
				3070	# #
				3071	# XREF **************************************************************** #
				3072	# _imem_read_long() - read instruction longword #
				3073	# fix_skewed_ops() - adjust src operand in fsave frame #
				3074	# _real_operr() - "callout" to operating system operr handler #
				3075	# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
				3076	# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
				3077	# facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
				3078	# #
				3079	# INPUT *************************************************************** #
				3080	# - The system stack contains the FP Operr exception frame #
				3081	# - The fsave frame contains the source operand #
				3082	# #
				3083	# OUTPUT ************************************************************** #
				3084	# No access error: #
				3085	# - The system stack is unchanged #
				3086	# - The fsave frame contains the adjusted src op for opclass 0,2 #
				3087	# #
				3088	# ALGORITHM *********************************************************** #
				3089	# In a system where the FP Operr exception is enabled, the goal #
				3090	# is to get to the handler specified at _real_operr(). But, on the 060, #
				3091	# for opclass zero and two instruction taking this exception, the #
				3092	# input operand in the fsave frame may be incorrect for some cases #
				3093	# and needs to be corrected. This handler calls fix_skewed_ops() to #
				3094	# do just this and then exits through _real_operr(). #
				3095	# For opclass 3 instructions, the 060 doesn't store the default #
				3096	# operr result out to memory or data register file as it should. #
				3097	# This code must emulate the move out before finally exiting through #
				3098	# _real_inex(). The move out, if to memory, is performed using #
				3099	# _mem_write() "callout" routines that may return a failing result. #
				3100	# In this special case, the handler must exit through facc_out() #
				3101	# which creates an access error stack frame from the current operr #
				3102	# stack frame. #
				3103	# #
				3104	#########################################################################
				3105
				3106	global _fpsp_operr
				3107	_fpsp_operr:
				3108
				3109	link.w %a6,&-LOCAL_SIZE # init stack frame
				3110
				3111	fsave FP_SRC(%a6) # grab the "busy" frame
				3112
				3113	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				3114	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				3115	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				3116
				3117	# the FPIAR holds the "current PC" of the faulting instruction
				3118	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				3119
				3120	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				3121	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				3122	bsr.l _imem_read_long # fetch the instruction words
				3123	mov.l %d0,EXC_OPWORD(%a6)
				3124
				3125	##############################################################################
				3126
				3127	btst &13,%d0 # is instr an fmove out?
				3128	bne.b foperr_out # fmove out
				3129
				3130
				3131	# here, we simply see if the operand in the fsave frame needs to be "unskewed".
				3132	# this would be the case for opclass two operations with a source infinity or
				3133	# denorm operand in the sgl or dbl format. NANs also become skewed, but can't
				3134	# cause an operr so we don't need to check for them here.
				3135	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				3136	bsr.l fix_skewed_ops # fix src op
				3137
				3138	foperr_exit:
				3139	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				3140	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				3141	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3142
				3143	frestore FP_SRC(%a6)
				3144
				3145	unlk %a6
				3146	bra.l _real_operr
				3147
				3148	########################################################################
				3149
				3150	#
				3151	# the hardware does not save the default result to memory on enabled
				3152	# operand error exceptions. we do this here before passing control to
				3153	# the user operand error handler.
				3154	#
				3155	# byte, word, and long destination format operations can pass
				3156	# through here. we simply need to test the sign of the src
				3157	# operand and save the appropriate minimum or maximum integer value
				3158	# to the effective address as pointed to by the stacked effective address.
				3159	#
				3160	# although packed opclass three operations can take operand error
				3161	# exceptions, they won't pass through here since they are caught
				3162	# first by the unsupported data format exception handler. that handler
				3163	# sends them directly to _real_operr() if necessary.
				3164	#
				3165	foperr_out:
				3166
				3167	mov.w FP_SRC_EX(%a6),%d1 # fetch exponent
				3168	andi.w &0x7fff,%d1
				3169	cmpi.w %d1,&0x7fff
				3170	bne.b foperr_out_not_qnan
				3171	# the operand is either an infinity or a QNAN.
				3172	tst.l FP_SRC_LO(%a6)
				3173	bne.b foperr_out_qnan
				3174	mov.l FP_SRC_HI(%a6),%d1
				3175	andi.l &0x7fffffff,%d1
				3176	beq.b foperr_out_not_qnan
				3177	foperr_out_qnan:
				3178	mov.l FP_SRC_HI(%a6),L_SCR1(%a6)
				3179	bra.b foperr_out_jmp
				3180
				3181	foperr_out_not_qnan:
				3182	mov.l &0x7fffffff,%d1
				3183	tst.b FP_SRC_EX(%a6)
				3184	bpl.b foperr_out_not_qnan2
				3185	addq.l &0x1,%d1
				3186	foperr_out_not_qnan2:
				3187	mov.l %d1,L_SCR1(%a6)
				3188
				3189	foperr_out_jmp:
				3190	bfextu %d0{&19:&3},%d0 # extract dst format field
				3191	mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
				3192	mov.w (tbl_operr.b,%pc,%d0.w*2),%a0
				3193	jmp (tbl_operr.b,%pc,%a0)
				3194
				3195	tbl_operr:
				3196	short foperr_out_l - tbl_operr # long word integer
				3197	short tbl_operr - tbl_operr # sgl prec shouldn't happen
				3198	short tbl_operr - tbl_operr # ext prec shouldn't happen
				3199	short foperr_exit - tbl_operr # packed won't enter here
				3200	short foperr_out_w - tbl_operr # word integer
				3201	short tbl_operr - tbl_operr # dbl prec shouldn't happen
				3202	short foperr_out_b - tbl_operr # byte integer
				3203	short tbl_operr - tbl_operr # packed won't enter here
				3204
				3205	foperr_out_b:
				3206	mov.b L_SCR1(%a6),%d0 # load positive default result
				3207	cmpi.b %d1,&0x7 # is <ea> mode a data reg?
				3208	ble.b foperr_out_b_save_dn # yes
				3209	mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
				3210	bsr.l _dmem_write_byte # write the default result
				3211
				3212	tst.l %d1 # did dstore fail?
				3213	bne.l facc_out_b # yes
				3214
				3215	bra.w foperr_exit
				3216	foperr_out_b_save_dn:
				3217	andi.w &0x0007,%d1
				3218	bsr.l store_dreg_b # store result to regfile
				3219	bra.w foperr_exit
				3220
				3221	foperr_out_w:
				3222	mov.w L_SCR1(%a6),%d0 # load positive default result
				3223	cmpi.b %d1,&0x7 # is <ea> mode a data reg?
				3224	ble.b foperr_out_w_save_dn # yes
				3225	mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
				3226	bsr.l _dmem_write_word # write the default result
				3227
				3228	tst.l %d1 # did dstore fail?
				3229	bne.l facc_out_w # yes
				3230
				3231	bra.w foperr_exit
				3232	foperr_out_w_save_dn:
				3233	andi.w &0x0007,%d1
				3234	bsr.l store_dreg_w # store result to regfile
				3235	bra.w foperr_exit
				3236
				3237	foperr_out_l:
				3238	mov.l L_SCR1(%a6),%d0 # load positive default result
				3239	cmpi.b %d1,&0x7 # is <ea> mode a data reg?
				3240	ble.b foperr_out_l_save_dn # yes
				3241	mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
				3242	bsr.l _dmem_write_long # write the default result
				3243
				3244	tst.l %d1 # did dstore fail?
				3245	bne.l facc_out_l # yes
				3246
				3247	bra.w foperr_exit
				3248	foperr_out_l_save_dn:
				3249	andi.w &0x0007,%d1
				3250	bsr.l store_dreg_l # store result to regfile
				3251	bra.w foperr_exit
				3252
				3253	#########################################################################
				3254	# XDEF **************************************************************** #
				3255	# _fpsp_snan(): 060FPSP entry point for FP SNAN exception. #
				3256	# #
				3257	# This handler should be the first code executed upon taking the #
				3258	# FP Signalling NAN exception in an operating system. #
				3259	# #
				3260	# XREF **************************************************************** #
				3261	# _imem_read_long() - read instruction longword #
				3262	# fix_skewed_ops() - adjust src operand in fsave frame #
				3263	# _real_snan() - "callout" to operating system SNAN handler #
				3264	# _dmem_write_{byte,word,long}() - store data to mem (opclass 3) #
				3265	# store_dreg_{b,w,l}() - store data to data regfile (opclass 3) #
				3266	# facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3) #
				3267	# _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea> #
				3268	# #
				3269	# INPUT *************************************************************** #
				3270	# - The system stack contains the FP SNAN exception frame #
				3271	# - The fsave frame contains the source operand #
				3272	# #
				3273	# OUTPUT ************************************************************** #
				3274	# No access error: #
				3275	# - The system stack is unchanged #
				3276	# - The fsave frame contains the adjusted src op for opclass 0,2 #
				3277	# #
				3278	# ALGORITHM *********************************************************** #
				3279	# In a system where the FP SNAN exception is enabled, the goal #
				3280	# is to get to the handler specified at _real_snan(). But, on the 060, #
				3281	# for opclass zero and two instructions taking this exception, the #
				3282	# input operand in the fsave frame may be incorrect for some cases #
				3283	# and needs to be corrected. This handler calls fix_skewed_ops() to #
				3284	# do just this and then exits through _real_snan(). #
				3285	# For opclass 3 instructions, the 060 doesn't store the default #
				3286	# SNAN result out to memory or data register file as it should. #
				3287	# This code must emulate the move out before finally exiting through #
				3288	# _real_snan(). The move out, if to memory, is performed using #
				3289	# _mem_write() "callout" routines that may return a failing result. #
				3290	# In this special case, the handler must exit through facc_out() #
				3291	# which creates an access error stack frame from the current SNAN #
				3292	# stack frame. #
				3293	# For the case of an extended precision opclass 3 instruction, #
				3294	# if the effective addressing mode was -() or ()+, then the address #
				3295	# register must get updated by calling _calc_ea_fout(). If the <ea> #
				3296	# was -(a7) from supervisor mode, then the exception frame currently #
				3297	# on the system stack must be carefully moved "down" to make room #
				3298	# for the operand being moved. #
				3299	# #
				3300	#########################################################################
				3301
				3302	global _fpsp_snan
				3303	_fpsp_snan:
				3304
				3305	link.w %a6,&-LOCAL_SIZE # init stack frame
				3306
				3307	fsave FP_SRC(%a6) # grab the "busy" frame
				3308
				3309	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				3310	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				3311	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				3312
				3313	# the FPIAR holds the "current PC" of the faulting instruction
				3314	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				3315
				3316	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				3317	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				3318	bsr.l _imem_read_long # fetch the instruction words
				3319	mov.l %d0,EXC_OPWORD(%a6)
				3320
				3321	##############################################################################
				3322
				3323	btst &13,%d0 # is instr an fmove out?
				3324	bne.w fsnan_out # fmove out
				3325
				3326
				3327	# here, we simply see if the operand in the fsave frame needs to be "unskewed".
				3328	# this would be the case for opclass two operations with a source infinity or
				3329	# denorm operand in the sgl or dbl format. NANs also become skewed and must be
				3330	# fixed here.
				3331	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				3332	bsr.l fix_skewed_ops # fix src op
				3333
				3334	fsnan_exit:
				3335	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				3336	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				3337	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3338
				3339	frestore FP_SRC(%a6)
				3340
				3341	unlk %a6
				3342	bra.l _real_snan
				3343
				3344	########################################################################
				3345
				3346	#
				3347	# the hardware does not save the default result to memory on enabled
				3348	# snan exceptions. we do this here before passing control to
				3349	# the user snan handler.
				3350	#
				3351	# byte, word, long, and packed destination format operations can pass
				3352	# through here. since packed format operations already were handled by
				3353	# fpsp_unsupp(), then we need to do nothing else for them here.
				3354	# for byte, word, and long, we simply need to test the sign of the src
				3355	# operand and save the appropriate minimum or maximum integer value
				3356	# to the effective address as pointed to by the stacked effective address.
				3357	#
				3358	fsnan_out:
				3359
				3360	bfextu %d0{&19:&3},%d0 # extract dst format field
				3361	mov.b 1+EXC_OPWORD(%a6),%d1 # extract <ea> mode,reg
				3362	mov.w (tbl_snan.b,%pc,%d0.w*2),%a0
				3363	jmp (tbl_snan.b,%pc,%a0)
				3364
				3365	tbl_snan:
				3366	short fsnan_out_l - tbl_snan # long word integer
				3367	short fsnan_out_s - tbl_snan # sgl prec shouldn't happen
				3368	short fsnan_out_x - tbl_snan # ext prec shouldn't happen
				3369	short tbl_snan - tbl_snan # packed needs no help
				3370	short fsnan_out_w - tbl_snan # word integer
				3371	short fsnan_out_d - tbl_snan # dbl prec shouldn't happen
				3372	short fsnan_out_b - tbl_snan # byte integer
				3373	short tbl_snan - tbl_snan # packed needs no help
				3374
				3375	fsnan_out_b:
				3376	mov.b FP_SRC_HI(%a6),%d0 # load upper byte of SNAN
				3377	bset &6,%d0 # set SNAN bit
				3378	cmpi.b %d1,&0x7 # is <ea> mode a data reg?
				3379	ble.b fsnan_out_b_dn # yes
				3380	mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
				3381	bsr.l _dmem_write_byte # write the default result
				3382
				3383	tst.l %d1 # did dstore fail?
				3384	bne.l facc_out_b # yes
				3385
				3386	bra.w fsnan_exit
				3387	fsnan_out_b_dn:
				3388	andi.w &0x0007,%d1
				3389	bsr.l store_dreg_b # store result to regfile
				3390	bra.w fsnan_exit
				3391
				3392	fsnan_out_w:
				3393	mov.w FP_SRC_HI(%a6),%d0 # load upper word of SNAN
				3394	bset &14,%d0 # set SNAN bit
				3395	cmpi.b %d1,&0x7 # is <ea> mode a data reg?
				3396	ble.b fsnan_out_w_dn # yes
				3397	mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
				3398	bsr.l _dmem_write_word # write the default result
				3399
				3400	tst.l %d1 # did dstore fail?
				3401	bne.l facc_out_w # yes
				3402
				3403	bra.w fsnan_exit
				3404	fsnan_out_w_dn:
				3405	andi.w &0x0007,%d1
				3406	bsr.l store_dreg_w # store result to regfile
				3407	bra.w fsnan_exit
				3408
				3409	fsnan_out_l:
				3410	mov.l FP_SRC_HI(%a6),%d0 # load upper longword of SNAN
				3411	bset &30,%d0 # set SNAN bit
				3412	cmpi.b %d1,&0x7 # is <ea> mode a data reg?
				3413	ble.b fsnan_out_l_dn # yes
				3414	mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
				3415	bsr.l _dmem_write_long # write the default result
				3416
				3417	tst.l %d1 # did dstore fail?
				3418	bne.l facc_out_l # yes
				3419
				3420	bra.w fsnan_exit
				3421	fsnan_out_l_dn:
				3422	andi.w &0x0007,%d1
				3423	bsr.l store_dreg_l # store result to regfile
				3424	bra.w fsnan_exit
				3425
				3426	fsnan_out_s:
				3427	cmpi.b %d1,&0x7 # is <ea> mode a data reg?
				3428	ble.b fsnan_out_d_dn # yes
				3429	mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
				3430	andi.l &0x80000000,%d0 # keep sign
				3431	ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
				3432	mov.l FP_SRC_HI(%a6),%d1 # load mantissa
				3433	lsr.l &0x8,%d1 # shift mantissa for sgl
				3434	or.l %d1,%d0 # create sgl SNAN
				3435	mov.l EXC_EA(%a6),%a0 # pass: <ea> of default result
				3436	bsr.l _dmem_write_long # write the default result
				3437
				3438	tst.l %d1 # did dstore fail?
				3439	bne.l facc_out_l # yes
				3440
				3441	bra.w fsnan_exit
				3442	fsnan_out_d_dn:
				3443	mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
				3444	andi.l &0x80000000,%d0 # keep sign
				3445	ori.l &0x7fc00000,%d0 # insert new exponent,SNAN bit
				3446	mov.l %d1,-(%sp)
				3447	mov.l FP_SRC_HI(%a6),%d1 # load mantissa
				3448	lsr.l &0x8,%d1 # shift mantissa for sgl
				3449	or.l %d1,%d0 # create sgl SNAN
				3450	mov.l (%sp)+,%d1
				3451	andi.w &0x0007,%d1
				3452	bsr.l store_dreg_l # store result to regfile
				3453	bra.w fsnan_exit
				3454
				3455	fsnan_out_d:
				3456	mov.l FP_SRC_EX(%a6),%d0 # fetch SNAN sign
				3457	andi.l &0x80000000,%d0 # keep sign
				3458	ori.l &0x7ff80000,%d0 # insert new exponent,SNAN bit
				3459	mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
				3460	mov.l %d0,FP_SCR0_EX(%a6) # store to temp space
				3461	mov.l &11,%d0 # load shift amt
				3462	lsr.l %d0,%d1
				3463	or.l %d1,FP_SCR0_EX(%a6) # create dbl hi
				3464	mov.l FP_SRC_HI(%a6),%d1 # load hi mantissa
				3465	andi.l &0x000007ff,%d1
				3466	ror.l %d0,%d1
				3467	mov.l %d1,FP_SCR0_HI(%a6) # store to temp space
				3468	mov.l FP_SRC_LO(%a6),%d1 # load lo mantissa
				3469	lsr.l %d0,%d1
				3470	or.l %d1,FP_SCR0_HI(%a6) # create dbl lo
				3471	lea FP_SCR0(%a6),%a0 # pass: ptr to operand
				3472	mov.l EXC_EA(%a6),%a1 # pass: dst addr
				3473	movq.l &0x8,%d0 # pass: size of 8 bytes
				3474	bsr.l _dmem_write # write the default result
				3475
				3476	tst.l %d1 # did dstore fail?
				3477	bne.l facc_out_d # yes
				3478
				3479	bra.w fsnan_exit
				3480
				3481	# for extended precision, if the addressing mode is pre-decrement or
				3482	# post-increment, then the address register did not get updated.
				3483	# in addition, for pre-decrement, the stacked <ea> is incorrect.
				3484	fsnan_out_x:
				3485	clr.b SPCOND_FLG(%a6) # clear special case flag
				3486
				3487	mov.w FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
				3488	clr.w 2+FP_SCR0(%a6)
				3489	mov.l FP_SRC_HI(%a6),%d0
				3490	bset &30,%d0
				3491	mov.l %d0,FP_SCR0_HI(%a6)
				3492	mov.l FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
				3493
				3494	btst &0x5,EXC_SR(%a6) # supervisor mode exception?
				3495	bne.b fsnan_out_x_s # yes
				3496
				3497	mov.l %usp,%a0 # fetch user stack pointer
				3498	mov.l %a0,EXC_A7(%a6) # save on stack for calc_ea()
				3499	mov.l (%a6),EXC_A6(%a6)
				3500
				3501	bsr.l _calc_ea_fout # find the correct ea,update An
				3502	mov.l %a0,%a1
				3503	mov.l %a0,EXC_EA(%a6) # stack correct <ea>
				3504
				3505	mov.l EXC_A7(%a6),%a0
				3506	mov.l %a0,%usp # restore user stack pointer
				3507	mov.l EXC_A6(%a6),(%a6)
				3508
				3509	fsnan_out_x_save:
				3510	lea FP_SCR0(%a6),%a0 # pass: ptr to operand
				3511	movq.l &0xc,%d0 # pass: size of extended
				3512	bsr.l _dmem_write # write the default result
				3513
				3514	tst.l %d1 # did dstore fail?
				3515	bne.l facc_out_x # yes
				3516
				3517	bra.w fsnan_exit
				3518
				3519	fsnan_out_x_s:
				3520	mov.l (%a6),EXC_A6(%a6)
				3521
				3522	bsr.l _calc_ea_fout # find the correct ea,update An
				3523	mov.l %a0,%a1
				3524	mov.l %a0,EXC_EA(%a6) # stack correct <ea>
				3525
				3526	mov.l EXC_A6(%a6),(%a6)
				3527
				3528	cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
				3529	bne.b fsnan_out_x_save # no
				3530
				3531	# the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
				3532	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				3533	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				3534	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3535
				3536	frestore FP_SRC(%a6)
				3537
				3538	mov.l EXC_A6(%a6),%a6 # restore frame pointer
				3539
				3540	mov.l LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
				3541	mov.l LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
				3542	mov.l LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
				3543
				3544	mov.l LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
				3545	mov.l LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
				3546	mov.l LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
				3547
				3548	add.l &LOCAL_SIZE-0x8,%sp
				3549
				3550	bra.l _real_snan
				3551
				3552	#########################################################################
				3553	# XDEF **************************************************************** #
				3554	# _fpsp_inex(): 060FPSP entry point for FP Inexact exception. #
				3555	# #
				3556	# This handler should be the first code executed upon taking the #
				3557	# FP Inexact exception in an operating system. #
				3558	# #
				3559	# XREF **************************************************************** #
				3560	# _imem_read_long() - read instruction longword #
				3561	# fix_skewed_ops() - adjust src operand in fsave frame #
				3562	# set_tag_x() - determine optype of src/dst operands #
				3563	# store_fpreg() - store opclass 0 or 2 result to FP regfile #
				3564	# unnorm_fix() - change UNNORM operands to NORM or ZERO #
				3565	# load_fpn2() - load dst operand from FP regfile #
				3566	# smovcr() - emulate an "fmovcr" instruction #
				3567	# fout() - emulate an opclass 3 instruction #
				3568	# tbl_unsupp - add of table of emulation routines for opclass 0,2 #
				3569	# _real_inex() - "callout" to operating system inexact handler #
				3570	# #
				3571	# INPUT *************************************************************** #
				3572	# - The system stack contains the FP Inexact exception frame #
				3573	# - The fsave frame contains the source operand #
				3574	# #
				3575	# OUTPUT ************************************************************** #
				3576	# - The system stack is unchanged #
				3577	# - The fsave frame contains the adjusted src op for opclass 0,2 #
				3578	# #
				3579	# ALGORITHM *********************************************************** #
				3580	# In a system where the FP Inexact exception is enabled, the goal #
				3581	# is to get to the handler specified at _real_inex(). But, on the 060, #
				3582	# for opclass zero and two instruction taking this exception, the #
				3583	# hardware doesn't store the correct result to the destination FP #
				3584	# register as did the '040 and '881/2. This handler must emulate the #
				3585	# instruction in order to get this value and then store it to the #
				3586	# correct register before calling _real_inex(). #
				3587	# For opclass 3 instructions, the 060 doesn't store the default #
				3588	# inexact result out to memory or data register file as it should. #
				3589	# This code must emulate the move out by calling fout() before finally #
				3590	# exiting through _real_inex(). #
				3591	# #
				3592	#########################################################################
				3593
				3594	global _fpsp_inex
				3595	_fpsp_inex:
				3596
				3597	link.w %a6,&-LOCAL_SIZE # init stack frame
				3598
				3599	fsave FP_SRC(%a6) # grab the "busy" frame
				3600
				3601	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				3602	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				3603	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				3604
				3605	# the FPIAR holds the "current PC" of the faulting instruction
				3606	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				3607
				3608	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				3609	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				3610	bsr.l _imem_read_long # fetch the instruction words
				3611	mov.l %d0,EXC_OPWORD(%a6)
				3612
				3613	##############################################################################
				3614
				3615	btst &13,%d0 # is instr an fmove out?
				3616	bne.w finex_out # fmove out
				3617
				3618
				3619	# the hardware, for "fabs" and "fneg" w/ a long source format, puts the
				3620	# longword integer directly into the upper longword of the mantissa along
				3621	# w/ an exponent value of 0x401e. we convert this to extended precision here.
				3622	bfextu %d0{&19:&3},%d0 # fetch instr size
				3623	bne.b finex_cont # instr size is not long
				3624	cmpi.w FP_SRC_EX(%a6),&0x401e # is exponent 0x401e?
				3625	bne.b finex_cont # no
				3626	fmov.l &0x0,%fpcr
				3627	fmov.l FP_SRC_HI(%a6),%fp0 # load integer src
				3628	fmov.x %fp0,FP_SRC(%a6) # store integer as extended precision
				3629	mov.w &0xe001,0x2+FP_SRC(%a6)
				3630
				3631	finex_cont:
				3632	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				3633	bsr.l fix_skewed_ops # fix src op
				3634
				3635	# Here, we zero the ccode and exception byte field since we're going to
				3636	# emulate the whole instruction. Notice, though, that we don't kill the
				3637	# INEX1 bit. This is because a packed op has long since been converted
				3638	# to extended before arriving here. Therefore, we need to retain the
				3639	# INEX1 bit from when the operand was first converted.
				3640	andi.l &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
				3641
				3642	fmov.l &0x0,%fpcr # zero current control regs
				3643	fmov.l &0x0,%fpsr
				3644
				3645	bfextu EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
				3646	cmpi.b %d1,&0x17 # is op an fmovecr?
				3647	beq.w finex_fmovcr # yes
				3648
				3649	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				3650	bsr.l set_tag_x # tag the operand type
				3651	mov.b %d0,STAG(%a6) # maybe NORM,DENORM
				3652
				3653	# bits four and five of the fp extension word separate the monadic and dyadic
				3654	# operations that can pass through fpsp_inex(). remember that fcmp and ftst
				3655	# will never take this exception, but fsincos will.
				3656	btst &0x5,1+EXC_CMDREG(%a6) # is operation monadic or dyadic?
				3657	beq.b finex_extract # monadic
				3658
				3659	btst &0x4,1+EXC_CMDREG(%a6) # is operation an fsincos?
				3660	bne.b finex_extract # yes
				3661
				3662	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
				3663	bsr.l load_fpn2 # load dst into FP_DST
				3664
				3665	lea FP_DST(%a6),%a0 # pass: ptr to dst op
				3666	bsr.l set_tag_x # tag the operand type
				3667	cmpi.b %d0,&UNNORM # is operand an UNNORM?
				3668	bne.b finex_op2_done # no
				3669	bsr.l unnorm_fix # yes; convert to NORM,DENORM,or ZERO
				3670	finex_op2_done:
				3671	mov.b %d0,DTAG(%a6) # save dst optype tag
				3672
				3673	finex_extract:
				3674	clr.l %d0
				3675	mov.b FPCR_MODE(%a6),%d0 # pass rnd prec/mode
				3676
				3677	mov.b 1+EXC_CMDREG(%a6),%d1
				3678	andi.w &0x007f,%d1 # extract extension
				3679
				3680	lea FP_SRC(%a6),%a0
				3681	lea FP_DST(%a6),%a1
				3682
				3683	mov.l (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
				3684	jsr (tbl_unsupp.l,%pc,%d1.l*1)
				3685
				3686	# the operation has been emulated. the result is in fp0.
				3687	finex_save:
				3688	bfextu EXC_CMDREG(%a6){&6:&3},%d0
				3689	bsr.l store_fpreg
				3690
				3691	finex_exit:
				3692	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				3693	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				3694	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3695
				3696	frestore FP_SRC(%a6)
				3697
				3698	unlk %a6
				3699	bra.l _real_inex
				3700
				3701	finex_fmovcr:
				3702	clr.l %d0
				3703	mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
				3704	mov.b 1+EXC_CMDREG(%a6),%d1
				3705	andi.l &0x0000007f,%d1 # pass rom offset
				3706	bsr.l smovcr
				3707	bra.b finex_save
				3708
				3709	########################################################################
				3710
				3711	#
				3712	# the hardware does not save the default result to memory on enabled
				3713	# inexact exceptions. we do this here before passing control to
				3714	# the user inexact handler.
				3715	#
				3716	# byte, word, and long destination format operations can pass
				3717	# through here. so can double and single precision.
				3718	# although packed opclass three operations can take inexact
				3719	# exceptions, they won't pass through here since they are caught
				3720	# first by the unsupported data format exception handler. that handler
				3721	# sends them directly to _real_inex() if necessary.
				3722	#
				3723	finex_out:
				3724
				3725	mov.b &NORM,STAG(%a6) # src is a NORM
				3726
				3727	clr.l %d0
				3728	mov.b FPCR_MODE(%a6),%d0 # pass rnd prec,mode
				3729
				3730	andi.l &0xffff00ff,USER_FPSR(%a6) # zero exception field
				3731
				3732	lea FP_SRC(%a6),%a0 # pass ptr to src operand
				3733
				3734	bsr.l fout # store the default result
				3735
				3736	bra.b finex_exit
				3737
				3738	#########################################################################
				3739	# XDEF **************************************************************** #
				3740	# _fpsp_dz(): 060FPSP entry point for FP DZ exception. #
				3741	# #
				3742	# This handler should be the first code executed upon taking #
				3743	# the FP DZ exception in an operating system. #
				3744	# #
				3745	# XREF **************************************************************** #
				3746	# _imem_read_long() - read instruction longword from memory #
				3747	# fix_skewed_ops() - adjust fsave operand #
				3748	# _real_dz() - "callout" exit point from FP DZ handler #
				3749	# #
				3750	# INPUT *************************************************************** #
				3751	# - The system stack contains the FP DZ exception stack. #
				3752	# - The fsave frame contains the source operand. #
				3753	# #
				3754	# OUTPUT ************************************************************** #
				3755	# - The system stack contains the FP DZ exception stack. #
				3756	# - The fsave frame contains the adjusted source operand. #
				3757	# #
				3758	# ALGORITHM *********************************************************** #
				3759	# In a system where the DZ exception is enabled, the goal is to #
				3760	# get to the handler specified at _real_dz(). But, on the 060, when the #
				3761	# exception is taken, the input operand in the fsave state frame may #
				3762	# be incorrect for some cases and need to be adjusted. So, this package #
				3763	# adjusts the operand using fix_skewed_ops() and then branches to #
				3764	# _real_dz(). #
				3765	# #
				3766	#########################################################################
				3767
				3768	global _fpsp_dz
				3769	_fpsp_dz:
				3770
				3771	link.w %a6,&-LOCAL_SIZE # init stack frame
				3772
				3773	fsave FP_SRC(%a6) # grab the "busy" frame
				3774
				3775	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				3776	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				3777	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1 on stack
				3778
				3779	# the FPIAR holds the "current PC" of the faulting instruction
				3780	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				3781
				3782	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				3783	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				3784	bsr.l _imem_read_long # fetch the instruction words
				3785	mov.l %d0,EXC_OPWORD(%a6)
				3786
				3787	##############################################################################
				3788
				3789
				3790	# here, we simply see if the operand in the fsave frame needs to be "unskewed".
				3791	# this would be the case for opclass two operations with a source zero
				3792	# in the sgl or dbl format.
				3793	lea FP_SRC(%a6),%a0 # pass: ptr to src op
				3794	bsr.l fix_skewed_ops # fix src op
				3795
				3796	fdz_exit:
				3797	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				3798	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				3799	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3800
				3801	frestore FP_SRC(%a6)
				3802
				3803	unlk %a6
				3804	bra.l _real_dz
				3805
				3806	#########################################################################
				3807	# XDEF **************************************************************** #
				3808	# _fpsp_fline(): 060FPSP entry point for "Line F emulator" exc. #
				3809	# #
				3810	# This handler should be the first code executed upon taking the #
				3811	# "Line F Emulator" exception in an operating system. #
				3812	# #
				3813	# XREF **************************************************************** #
				3814	# _fpsp_unimp() - handle "FP Unimplemented" exceptions #
				3815	# _real_fpu_disabled() - handle "FPU disabled" exceptions #
				3816	# _real_fline() - handle "FLINE" exceptions #
				3817	# _imem_read_long() - read instruction longword #
				3818	# #
				3819	# INPUT *************************************************************** #
				3820	# - The system stack contains a "Line F Emulator" exception #
				3821	# stack frame. #
				3822	# #
				3823	# OUTPUT ************************************************************** #
				3824	# - The system stack is unchanged #
				3825	# #
				3826	# ALGORITHM *********************************************************** #
				3827	# When a "Line F Emulator" exception occurs, there are 3 possible #
				3828	# exception types, denoted by the exception stack frame format number: #
				3829	# (1) FPU unimplemented instruction (6 word stack frame) #
				3830	# (2) FPU disabled (8 word stack frame) #
				3831	# (3) Line F (4 word stack frame) #
				3832	# #
				3833	# This module determines which and forks the flow off to the #
				3834	# appropriate "callout" (for "disabled" and "Line F") or to the #
				3835	# correct emulation code (for "FPU unimplemented"). #
				3836	# This code also must check for "fmovecr" instructions w/ a #
				3837	# non-zero <ea> field. These may get flagged as "Line F" but should #
				3838	# really be flagged as "FPU Unimplemented". (This is a "feature" on #
				3839	# the '060. #
				3840	# #
				3841	#########################################################################
				3842
				3843	global _fpsp_fline
				3844	_fpsp_fline:
				3845
				3846	# check to see if this exception is a "FP Unimplemented Instruction"
				3847	# exception. if so, branch directly to that handler's entry point.
				3848	cmpi.w 0x6(%sp),&0x202c
				3849	beq.l _fpsp_unimp
				3850
				3851	# check to see if the FPU is disabled. if so, jump to the OS entry
				3852	# point for that condition.
				3853	cmpi.w 0x6(%sp),&0x402c
				3854	beq.l _real_fpu_disabled
				3855
				3856	# the exception was an "F-Line Illegal" exception. we check to see
				3857	# if the F-Line instruction is an "fmovecr" w/ a non-zero <ea>. if
				3858	# so, convert the F-Line exception stack frame to an FP Unimplemented
				3859	# Instruction exception stack frame else branch to the OS entry
				3860	# point for the F-Line exception handler.
				3861	link.w %a6,&-LOCAL_SIZE # init stack frame
				3862
				3863	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				3864
				3865	mov.l EXC_PC(%a6),EXC_EXTWPTR(%a6)
				3866	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				3867	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				3868	bsr.l _imem_read_long # fetch instruction words
				3869
				3870	bfextu %d0{&0:&10},%d1 # is it an fmovecr?
				3871	cmpi.w %d1,&0x03c8
				3872	bne.b fline_fline # no
				3873
				3874	bfextu %d0{&16:&6},%d1 # is it an fmovecr?
				3875	cmpi.b %d1,&0x17
				3876	bne.b fline_fline # no
				3877
				3878	# it's an fmovecr w/ a non-zero <ea> that has entered through
				3879	# the F-Line Illegal exception.
				3880	# so, we need to convert the F-Line exception stack frame into an
				3881	# FP Unimplemented Instruction stack frame and jump to that entry
				3882	# point.
				3883	#
Justin P. Mattock	f3449bf	2010-12-30 15:07:50 -0800	[diff] [blame]	3884	# but, if the FPU is disabled, then we need to jump to the FPU disabled
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	3885	# entry point.
				3886	movc %pcr,%d0
				3887	btst &0x1,%d0
				3888	beq.b fline_fmovcr
				3889
				3890	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3891
				3892	unlk %a6
				3893
				3894	sub.l &0x8,%sp # make room for "Next PC", <ea>
				3895	mov.w 0x8(%sp),(%sp)
				3896	mov.l 0xa(%sp),0x2(%sp) # move "Current PC"
				3897	mov.w &0x402c,0x6(%sp)
				3898	mov.l 0x2(%sp),0xc(%sp)
				3899	addq.l &0x4,0x2(%sp) # set "Next PC"
				3900
				3901	bra.l _real_fpu_disabled
				3902
				3903	fline_fmovcr:
				3904	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3905
				3906	unlk %a6
				3907
				3908	fmov.l 0x2(%sp),%fpiar # set current PC
				3909	addq.l &0x4,0x2(%sp) # set Next PC
				3910
				3911	mov.l (%sp),-(%sp)
				3912	mov.l 0x8(%sp),0x4(%sp)
				3913	mov.b &0x20,0x6(%sp)
				3914
				3915	bra.l _fpsp_unimp
				3916
				3917	fline_fline:
				3918	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				3919
				3920	unlk %a6
				3921
				3922	bra.l _real_fline
				3923
				3924	#########################################################################
				3925	# XDEF **************************************************************** #
				3926	# _fpsp_unimp(): 060FPSP entry point for FP "Unimplemented #
				3927	# Instruction" exception. #
				3928	# #
				3929	# This handler should be the first code executed upon taking the #
				3930	# FP Unimplemented Instruction exception in an operating system. #
				3931	# #
				3932	# XREF **************************************************************** #
				3933	# _imem_read_{word,long}() - read instruction word/longword #
				3934	# load_fop() - load src/dst ops from memory and/or FP regfile #
				3935	# store_fpreg() - store opclass 0 or 2 result to FP regfile #
				3936	# tbl_trans - addr of table of emulation routines for trnscndls #
				3937	# _real_access() - "callout" for access error exception #
				3938	# _fpsp_done() - "callout" for exit; work all done #
				3939	# _real_trace() - "callout" for Trace enabled exception #
				3940	# smovcr() - emulate "fmovecr" instruction #
				3941	# funimp_skew() - adjust fsave src ops to "incorrect" value #
				3942	# _ftrapcc() - emulate an "ftrapcc" instruction #
				3943	# _fdbcc() - emulate an "fdbcc" instruction #
				3944	# _fscc() - emulate an "fscc" instruction #
				3945	# _real_trap() - "callout" for Trap exception #
				3946	# _real_bsun() - "callout" for enabled Bsun exception #
				3947	# #
				3948	# INPUT *************************************************************** #
				3949	# - The system stack contains the "Unimplemented Instr" stk frame #
				3950	# #
				3951	# OUTPUT ************************************************************** #
				3952	# If access error: #
				3953	# - The system stack is changed to an access error stack frame #
				3954	# If Trace exception enabled: #
				3955	# - The system stack is changed to a Trace exception stack frame #
				3956	# Else: (normal case) #
				3957	# - Correct result has been stored as appropriate #
				3958	# #
				3959	# ALGORITHM *********************************************************** #
				3960	# There are two main cases of instructions that may enter here to #
				3961	# be emulated: (1) the FPgen instructions, most of which were also #
				3962	# unimplemented on the 040, and (2) "ftrapcc", "fscc", and "fdbcc". #
				3963	# For the first set, this handler calls the routine load_fop() #
				3964	# to load the source and destination (for dyadic) operands to be used #
				3965	# for instruction emulation. The correct emulation routine is then #
				3966	# chosen by decoding the instruction type and indexing into an #
				3967	# emulation subroutine index table. After emulation returns, this #
				3968	# handler checks to see if an exception should occur as a result of the #
				3969	# FP instruction emulation. If so, then an FP exception of the correct #
				3970	# type is inserted into the FPU state frame using the "frestore" #
				3971	# instruction before exiting through _fpsp_done(). In either the #
				3972	# exceptional or non-exceptional cases, we must check to see if the #
				3973	# Trace exception is enabled. If so, then we must create a Trace #
				3974	# exception frame from the current exception frame and exit through #
				3975	# _real_trace(). #
				3976	# For "fdbcc", "ftrapcc", and "fscc", the emulation subroutines #
				3977	# _fdbcc(), _ftrapcc(), and _fscc() respectively are used. All three #
				3978	# may flag that a BSUN exception should be taken. If so, then the #
				3979	# current exception stack frame is converted into a BSUN exception #
				3980	# stack frame and an exit is made through _real_bsun(). If the #
				3981	# instruction was "ftrapcc" and a Trap exception should result, a Trap #
				3982	# exception stack frame is created from the current frame and an exit #
				3983	# is made through _real_trap(). If a Trace exception is pending, then #
				3984	# a Trace exception frame is created from the current frame and a jump #
				3985	# is made to _real_trace(). Finally, if none of these conditions exist, #
				3986	# then the handler exits though the callout _fpsp_done(). #
				3987	# #
				3988	# In any of the above scenarios, if a _mem_read() or _mem_write() #
				3989	# "callout" returns a failing value, then an access error stack frame #
				3990	# is created from the current stack frame and an exit is made through #
				3991	# _real_access(). #
				3992	# #
				3993	#########################################################################
				3994
				3995	#
				3996	# FP UNIMPLEMENTED INSTRUCTION STACK FRAME:
				3997	#
				3998	# *****************
				3999	# * * => <ea> of fp unimp instr.
				4000	# - EA -
				4001	# * *
				4002	# *****************
				4003	# * 0x2 * 0x02c * => frame format and vector offset(vector #11)
				4004	# *****************
				4005	# * *
				4006	# - Next PC - => PC of instr to execute after exc handling
				4007	# * *
				4008	# *****************
				4009	# * SR * => SR at the time the exception was taken
				4010	# *****************
				4011	#
				4012	# Note: the !NULL bit does not get set in the fsave frame when the
				4013	# machine encounters an fp unimp exception. Therefore, it must be set
				4014	# before leaving this handler.
				4015	#
				4016	global _fpsp_unimp
				4017	_fpsp_unimp:
				4018
				4019	link.w %a6,&-LOCAL_SIZE # init stack frame
				4020
				4021	movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1
				4022	fmovm.l %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
				4023	fmovm.x &0xc0,EXC_FPREGS(%a6) # save fp0-fp1
				4024
				4025	btst &0x5,EXC_SR(%a6) # user mode exception?
				4026	bne.b funimp_s # no; supervisor mode
				4027
				4028	# save the value of the user stack pointer onto the stack frame
				4029	funimp_u:
				4030	mov.l %usp,%a0 # fetch user stack pointer
				4031	mov.l %a0,EXC_A7(%a6) # store in stack frame
				4032	bra.b funimp_cont
				4033
				4034	# store the value of the supervisor stack pointer BEFORE the exc occurred.
				4035	# old_sp is address just above stacked effective address.
				4036	funimp_s:
				4037	lea 4+EXC_EA(%a6),%a0 # load old a7'
				4038	mov.l %a0,EXC_A7(%a6) # store a7'
				4039	mov.l %a0,OLD_A7(%a6) # make a copy
				4040
				4041	funimp_cont:
				4042
				4043	# the FPIAR holds the "current PC" of the faulting instruction.
				4044	mov.l USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
				4045
				4046	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				4047	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				4048	bsr.l _imem_read_long # fetch the instruction words
				4049	mov.l %d0,EXC_OPWORD(%a6)
				4050
				4051	############################################################################
				4052
				4053	fmov.l &0x0,%fpcr # clear FPCR
				4054	fmov.l &0x0,%fpsr # clear FPSR
				4055
				4056	clr.b SPCOND_FLG(%a6) # clear "special case" flag
				4057
				4058	# Divide the fp instructions into 8 types based on the TYPE field in
				4059	# bits 6-8 of the opword(classes 6,7 are undefined).
				4060	# (for the '060, only two types can take this exception)
				4061	# bftst %d0{&7:&3} # test TYPE
				4062	btst &22,%d0 # type 0 or 1 ?
				4063	bne.w funimp_misc # type 1
				4064
				4065	#########################################
				4066	# TYPE == 0: General instructions #
				4067	#########################################
				4068	funimp_gen:
				4069
				4070	clr.b STORE_FLG(%a6) # clear "store result" flag
				4071
				4072	# clear the ccode byte and exception status byte
				4073	andi.l &0x00ff00ff,USER_FPSR(%a6)
				4074
				4075	bfextu %d0{&16:&6},%d1 # extract upper 6 of cmdreg
				4076	cmpi.b %d1,&0x17 # is op an fmovecr?
				4077	beq.w funimp_fmovcr # yes
				4078
				4079	funimp_gen_op:
				4080	bsr.l _load_fop # load
				4081
				4082	clr.l %d0
				4083	mov.b FPCR_MODE(%a6),%d0 # fetch rnd mode
				4084
				4085	mov.b 1+EXC_CMDREG(%a6),%d1
				4086	andi.w &0x003f,%d1 # extract extension bits
				4087	lsl.w &0x3,%d1 # shift right 3 bits
				4088	or.b STAG(%a6),%d1 # insert src optag bits
				4089
				4090	lea FP_DST(%a6),%a1 # pass dst ptr in a1
				4091	lea FP_SRC(%a6),%a0 # pass src ptr in a0
				4092
				4093	mov.w (tbl_trans.w,%pc,%d1.w*2),%d1
				4094	jsr (tbl_trans.w,%pc,%d1.w*1) # emulate
				4095
				4096	funimp_fsave:
				4097	mov.b FPCR_ENABLE(%a6),%d0 # fetch exceptions enabled
				4098	bne.w funimp_ena # some are enabled
				4099
				4100	funimp_store:
				4101	bfextu EXC_CMDREG(%a6){&6:&3},%d0 # fetch Dn
				4102	bsr.l store_fpreg # store result to fp regfile
				4103
				4104	funimp_gen_exit:
				4105	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				4106	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				4107	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				4108
				4109	funimp_gen_exit_cmp:
				4110	cmpi.b SPCOND_FLG(%a6),&mia7_flg # was the ea mode (sp)+ ?
				4111	beq.b funimp_gen_exit_a7 # yes
				4112
				4113	cmpi.b SPCOND_FLG(%a6),&mda7_flg # was the ea mode -(sp) ?
				4114	beq.b funimp_gen_exit_a7 # yes
				4115
				4116	funimp_gen_exit_cont:
				4117	unlk %a6
				4118
				4119	funimp_gen_exit_cont2:
				4120	btst &0x7,(%sp) # is trace on?
				4121	beq.l _fpsp_done # no
				4122
				4123	# this catches a problem with the case where an exception will be re-inserted
				4124	# into the machine. the frestore has already been executed...so, the fmov.l
				4125	# alone of the control register would trigger an unwanted exception.
				4126	# until I feel like fixing this, we'll sidestep the exception.
				4127	fsave -(%sp)
				4128	fmov.l %fpiar,0x14(%sp) # "Current PC" is in FPIAR
				4129	frestore (%sp)+
				4130	mov.w &0x2024,0x6(%sp) # stk fmt = 0x2; voff = 0x24
				4131	bra.l _real_trace
				4132
				4133	funimp_gen_exit_a7:
				4134	btst &0x5,EXC_SR(%a6) # supervisor or user mode?
				4135	bne.b funimp_gen_exit_a7_s # supervisor
				4136
				4137	mov.l %a0,-(%sp)
				4138	mov.l EXC_A7(%a6),%a0
				4139	mov.l %a0,%usp
				4140	mov.l (%sp)+,%a0
				4141	bra.b funimp_gen_exit_cont
				4142
				4143	# if the instruction was executed from supervisor mode and the addressing
				4144	# mode was (a7)+, then the stack frame for the rte must be shifted "up"
				4145	# "n" bytes where "n" is the size of the src operand type.
				4146	# f<op>.{b,w,l,s,d,x,p}
				4147	funimp_gen_exit_a7_s:
				4148	mov.l %d0,-(%sp) # save d0
				4149	mov.l EXC_A7(%a6),%d0 # load new a7'
				4150	sub.l OLD_A7(%a6),%d0 # subtract old a7'
				4151	mov.l 0x2+EXC_PC(%a6),(0x2+EXC_PC,%a6,%d0) # shift stack frame
				4152	mov.l EXC_SR(%a6),(EXC_SR,%a6,%d0) # shift stack frame
				4153	mov.w %d0,EXC_SR(%a6) # store incr number
				4154	mov.l (%sp)+,%d0 # restore d0
				4155
				4156	unlk %a6
				4157
				4158	add.w (%sp),%sp # stack frame shifted
				4159	bra.b funimp_gen_exit_cont2
				4160
				4161	######################
				4162	# fmovecr.x #ccc,fpn #
				4163	######################
				4164	funimp_fmovcr:
				4165	clr.l %d0
				4166	mov.b FPCR_MODE(%a6),%d0
				4167	mov.b 1+EXC_CMDREG(%a6),%d1
				4168	andi.l &0x0000007f,%d1 # pass rom offset in d1
				4169	bsr.l smovcr
				4170	bra.w funimp_fsave
				4171
				4172	#########################################################################
				4173
				4174	#
				4175	# the user has enabled some exceptions. we figure not to see this too
				4176	# often so that's why it gets lower priority.
				4177	#
				4178	funimp_ena:
				4179
				4180	# was an exception set that was also enabled?
				4181	and.b FPSR_EXCEPT(%a6),%d0 # keep only ones enabled and set
				4182	bfffo %d0{&24:&8},%d0 # find highest priority exception
				4183	bne.b funimp_exc # at least one was set
				4184
				4185	# no exception that was enabled was set BUT if we got an exact overflow
				4186	# and overflow wasn't enabled but inexact was (yech!) then this is
				4187	# an inexact exception; otherwise, return to normal non-exception flow.
				4188	btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
				4189	beq.w funimp_store # no; return to normal flow
				4190
				4191	# the overflow w/ exact result happened but was inexact set in the FPCR?
				4192	funimp_ovfl:
				4193	btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
				4194	beq.w funimp_store # no; return to normal flow
				4195	bra.b funimp_exc_ovfl # yes
				4196
				4197	# some exception happened that was actually enabled.
				4198	# we'll insert this new exception into the FPU and then return.
				4199	funimp_exc:
				4200	subi.l &24,%d0 # fix offset to be 0-8
				4201	cmpi.b %d0,&0x6 # is exception INEX?
				4202	bne.b funimp_exc_force # no
				4203
				4204	# the enabled exception was inexact. so, if it occurs with an overflow
				4205	# or underflow that was disabled, then we have to force an overflow or
				4206	# underflow frame. the eventual overflow or underflow handler will see that
				4207	# it's actually an inexact and act appropriately. this is the only easy
				4208	# way to have the EXOP available for the enabled inexact handler when
				4209	# a disabled overflow or underflow has also happened.
				4210	btst &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
				4211	bne.b funimp_exc_ovfl # yes
				4212	btst &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
				4213	bne.b funimp_exc_unfl # yes
				4214
				4215	# force the fsave exception status bits to signal an exception of the
				4216	# appropriate type. don't forget to "skew" the source operand in case we
				4217	# "unskewed" the one the hardware initially gave us.
				4218	funimp_exc_force:
				4219	mov.l %d0,-(%sp) # save d0
				4220	bsr.l funimp_skew # check for special case
				4221	mov.l (%sp)+,%d0 # restore d0
				4222	mov.w (tbl_funimp_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
				4223	bra.b funimp_gen_exit2 # exit with frestore
				4224
				4225	tbl_funimp_except:
				4226	short 0xe002, 0xe006, 0xe004, 0xe005
				4227	short 0xe003, 0xe002, 0xe001, 0xe001
				4228
				4229	# insert an overflow frame
				4230	funimp_exc_ovfl:
				4231	bsr.l funimp_skew # check for special case
				4232	mov.w &0xe005,2+FP_SRC(%a6)
				4233	bra.b funimp_gen_exit2
				4234
				4235	# insert an underflow frame
				4236	funimp_exc_unfl:
				4237	bsr.l funimp_skew # check for special case
				4238	mov.w &0xe003,2+FP_SRC(%a6)
				4239
				4240	# this is the general exit point for an enabled exception that will be
				4241	# restored into the machine for the instruction just emulated.
				4242	funimp_gen_exit2:
				4243	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				4244	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				4245	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				4246
				4247	frestore FP_SRC(%a6) # insert exceptional status
				4248
				4249	bra.w funimp_gen_exit_cmp
				4250
				4251	############################################################################
				4252
				4253	#
				4254	# TYPE == 1: FDB<cc>, FS<cc>, FTRAP<cc>
				4255	#
				4256	# These instructions were implemented on the '881/2 and '040 in hardware but
				4257	# are emulated in software on the '060.
				4258	#
				4259	funimp_misc:
				4260	bfextu %d0{&10:&3},%d1 # extract mode field
				4261	cmpi.b %d1,&0x1 # is it an fdb<cc>?
				4262	beq.w funimp_fdbcc # yes
				4263	cmpi.b %d1,&0x7 # is it an fs<cc>?
				4264	bne.w funimp_fscc # yes
				4265	bfextu %d0{&13:&3},%d1
				4266	cmpi.b %d1,&0x2 # is it an fs<cc>?
				4267	blt.w funimp_fscc # yes
				4268
				4269	#########################
				4270	# ftrap<cc> #
				4271	# ftrap<cc>.w #<data> #
				4272	# ftrap<cc>.l #<data> #
				4273	#########################
				4274	funimp_ftrapcc:
				4275
				4276	bsr.l _ftrapcc # FTRAP<cc>()
				4277
				4278	cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
				4279	beq.w funimp_bsun # yes
				4280
				4281	cmpi.b SPCOND_FLG(%a6),&ftrapcc_flg # should a trap occur?
				4282	bne.w funimp_done # no
				4283
				4284	# FP UNIMP FRAME TRAP FRAME
				4285	# *************** ***************
				4286	# <EA> Current PC
				4287	# *************** ***************
				4288	# * 0x2 * 0x02c * * 0x2 * 0x01c *
				4289	# *************** ***************
				4290	# Next PC Next PC
				4291	# *************** ***************
				4292	# * SR * * SR *
				4293	# *************** ***************
				4294	# (6 words) (6 words)
				4295	#
				4296	# the ftrapcc instruction should take a trap. so, here we must create a
				4297	# trap stack frame from an unimplemented fp instruction stack frame and
				4298	# jump to the user supplied entry point for the trap exception
				4299	funimp_ftrapcc_tp:
				4300	mov.l USER_FPIAR(%a6),EXC_EA(%a6) # Address = Current PC
				4301	mov.w &0x201c,EXC_VOFF(%a6) # Vector Offset = 0x01c
				4302
				4303	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				4304	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				4305	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				4306
				4307	unlk %a6
				4308	bra.l _real_trap
				4309
				4310	#########################
				4311	# fdb<cc> Dn,<label> #
				4312	#########################
				4313	funimp_fdbcc:
				4314
				4315	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				4316	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				4317	bsr.l _imem_read_word # read displacement
				4318
				4319	tst.l %d1 # did ifetch fail?
				4320	bne.w funimp_iacc # yes
				4321
				4322	ext.l %d0 # sign extend displacement
				4323
				4324	bsr.l _fdbcc # FDB<cc>()
				4325
				4326	cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
				4327	beq.w funimp_bsun
				4328
				4329	bra.w funimp_done # branch to finish
				4330
				4331	#################
				4332	# fs<cc>.b <ea> #
				4333	#################
				4334	funimp_fscc:
				4335
				4336	bsr.l _fscc # FS<cc>()
				4337
				4338	# I am assuming here that an "fs<cc>.b -(An)" or "fs<cc>.b (An)+" instruction
				4339	# does not need to update "An" before taking a bsun exception.
				4340	cmpi.b SPCOND_FLG(%a6),&fbsun_flg # is enabled bsun occurring?
				4341	beq.w funimp_bsun
				4342
				4343	btst &0x5,EXC_SR(%a6) # yes; is it a user mode exception?
				4344	bne.b funimp_fscc_s # no
				4345
				4346	funimp_fscc_u:
				4347	mov.l EXC_A7(%a6),%a0 # yes; set new USP
				4348	mov.l %a0,%usp
				4349	bra.w funimp_done # branch to finish
				4350
				4351	# remember, I'm assuming that post-increment is bogus...(it IS!!!)
				4352	# so, the least significant WORD of the stacked effective address got
				4353	# overwritten by the "fs<cc> -(An)". We must shift the stack frame "down"
				4354	# so that the rte will work correctly without destroying the result.
				4355	# even though the operation size is byte, the stack ptr is decr by 2.
				4356	#
				4357	# remember, also, this instruction may be traced.
				4358	funimp_fscc_s:
				4359	cmpi.b SPCOND_FLG(%a6),&mda7_flg # was a7 modified?
				4360	bne.w funimp_done # no
				4361
				4362	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				4363	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				4364	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				4365
				4366	unlk %a6
				4367
				4368	btst &0x7,(%sp) # is trace enabled?
				4369	bne.b funimp_fscc_s_trace # yes
				4370
				4371	subq.l &0x2,%sp
				4372	mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
				4373	mov.l 0x6(%sp),0x4(%sp) # shift lo(PC),voff "down"
				4374	bra.l _fpsp_done
				4375
				4376	funimp_fscc_s_trace:
				4377	subq.l &0x2,%sp
				4378	mov.l 0x2(%sp),(%sp) # shift SR,hi(PC) "down"
				4379	mov.w 0x6(%sp),0x4(%sp) # shift lo(PC)
				4380	mov.w &0x2024,0x6(%sp) # fmt/voff = $2024
				4381	fmov.l %fpiar,0x8(%sp) # insert "current PC"
				4382
				4383	bra.l _real_trace
				4384
				4385	#
				4386	# The ftrap<cc>, fs<cc>, or fdb<cc> is to take an enabled bsun. we must convert
				4387	# the fp unimplemented instruction exception stack frame into a bsun stack frame,
				4388	# restore a bsun exception into the machine, and branch to the user
				4389	# supplied bsun hook.
				4390	#
				4391	# FP UNIMP FRAME BSUN FRAME
				4392	# *************** ***************
				4393	# <EA> * 0x0 * 0x0c0 *
				4394	# *************** ***************
				4395	# * 0x2 * 0x02c * Current PC
				4396	# *************** ***************
				4397	# Next PC * SR *
				4398	# *************** ***************
				4399	# * SR * (4 words)
				4400	# *****************
				4401	# (6 words)
				4402	#
				4403	funimp_bsun:
				4404	mov.w &0x00c0,2+EXC_EA(%a6) # Fmt = 0x0; Vector Offset = 0x0c0
				4405	mov.l USER_FPIAR(%a6),EXC_VOFF(%a6) # PC = Current PC
				4406	mov.w EXC_SR(%a6),2+EXC_PC(%a6) # shift SR "up"
				4407
				4408	mov.w &0xe000,2+FP_SRC(%a6) # bsun exception enabled
				4409
				4410	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				4411	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				4412	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				4413
				4414	frestore FP_SRC(%a6) # restore bsun exception
				4415
				4416	unlk %a6
				4417
				4418	addq.l &0x4,%sp # erase sludge
				4419
				4420	bra.l _real_bsun # branch to user bsun hook
				4421
				4422	#
				4423	# all ftrapcc/fscc/fdbcc processing has been completed. unwind the stack frame
				4424	# and return.
				4425	#
				4426	# as usual, we have to check for trace mode being on here. since instructions
				4427	# modifying the supervisor stack frame don't pass through here, this is a
				4428	# relatively easy task.
				4429	#
				4430	funimp_done:
				4431	fmovm.x EXC_FP0(%a6),&0xc0 # restore fp0-fp1
				4432	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				4433	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				4434
				4435	unlk %a6
				4436
				4437	btst &0x7,(%sp) # is trace enabled?
				4438	bne.b funimp_trace # yes
				4439
				4440	bra.l _fpsp_done
				4441
				4442	# FP UNIMP FRAME TRACE FRAME
				4443	# *************** ***************
				4444	# <EA> Current PC
				4445	# *************** ***************
				4446	# * 0x2 * 0x02c * * 0x2 * 0x024 *
				4447	# *************** ***************
				4448	# Next PC Next PC
				4449	# *************** ***************
				4450	# * SR * * SR *
				4451	# *************** ***************
				4452	# (6 words) (6 words)
				4453	#
				4454	# the fscc instruction should take a trace trap. so, here we must create a
				4455	# trace stack frame from an unimplemented fp instruction stack frame and
				4456	# jump to the user supplied entry point for the trace exception
				4457	funimp_trace:
				4458	fmov.l %fpiar,0x8(%sp) # current PC is in fpiar
				4459	mov.b &0x24,0x7(%sp) # vector offset = 0x024
				4460
				4461	bra.l _real_trace
				4462
				4463	################################################################
				4464
				4465	global tbl_trans
				4466	swbeg &0x1c0
				4467	tbl_trans:
				4468	short tbl_trans - tbl_trans # $00-0 fmovecr all
				4469	short tbl_trans - tbl_trans # $00-1 fmovecr all
				4470	short tbl_trans - tbl_trans # $00-2 fmovecr all
				4471	short tbl_trans - tbl_trans # $00-3 fmovecr all
				4472	short tbl_trans - tbl_trans # $00-4 fmovecr all
				4473	short tbl_trans - tbl_trans # $00-5 fmovecr all
				4474	short tbl_trans - tbl_trans # $00-6 fmovecr all
				4475	short tbl_trans - tbl_trans # $00-7 fmovecr all
				4476
				4477	short tbl_trans - tbl_trans # $01-0 fint norm
				4478	short tbl_trans - tbl_trans # $01-1 fint zero
				4479	short tbl_trans - tbl_trans # $01-2 fint inf
				4480	short tbl_trans - tbl_trans # $01-3 fint qnan
				4481	short tbl_trans - tbl_trans # $01-5 fint denorm
				4482	short tbl_trans - tbl_trans # $01-4 fint snan
				4483	short tbl_trans - tbl_trans # $01-6 fint unnorm
				4484	short tbl_trans - tbl_trans # $01-7 ERROR
				4485
				4486	short ssinh - tbl_trans # $02-0 fsinh norm
				4487	short src_zero - tbl_trans # $02-1 fsinh zero
				4488	short src_inf - tbl_trans # $02-2 fsinh inf
				4489	short src_qnan - tbl_trans # $02-3 fsinh qnan
				4490	short ssinhd - tbl_trans # $02-5 fsinh denorm
				4491	short src_snan - tbl_trans # $02-4 fsinh snan
				4492	short tbl_trans - tbl_trans # $02-6 fsinh unnorm
				4493	short tbl_trans - tbl_trans # $02-7 ERROR
				4494
				4495	short tbl_trans - tbl_trans # $03-0 fintrz norm
				4496	short tbl_trans - tbl_trans # $03-1 fintrz zero
				4497	short tbl_trans - tbl_trans # $03-2 fintrz inf
				4498	short tbl_trans - tbl_trans # $03-3 fintrz qnan
				4499	short tbl_trans - tbl_trans # $03-5 fintrz denorm
				4500	short tbl_trans - tbl_trans # $03-4 fintrz snan
				4501	short tbl_trans - tbl_trans # $03-6 fintrz unnorm
				4502	short tbl_trans - tbl_trans # $03-7 ERROR
				4503
				4504	short tbl_trans - tbl_trans # $04-0 fsqrt norm
				4505	short tbl_trans - tbl_trans # $04-1 fsqrt zero
				4506	short tbl_trans - tbl_trans # $04-2 fsqrt inf
				4507	short tbl_trans - tbl_trans # $04-3 fsqrt qnan
				4508	short tbl_trans - tbl_trans # $04-5 fsqrt denorm
				4509	short tbl_trans - tbl_trans # $04-4 fsqrt snan
				4510	short tbl_trans - tbl_trans # $04-6 fsqrt unnorm
				4511	short tbl_trans - tbl_trans # $04-7 ERROR
				4512
				4513	short tbl_trans - tbl_trans # $05-0 ERROR
				4514	short tbl_trans - tbl_trans # $05-1 ERROR
				4515	short tbl_trans - tbl_trans # $05-2 ERROR
				4516	short tbl_trans - tbl_trans # $05-3 ERROR
				4517	short tbl_trans - tbl_trans # $05-4 ERROR
				4518	short tbl_trans - tbl_trans # $05-5 ERROR
				4519	short tbl_trans - tbl_trans # $05-6 ERROR
				4520	short tbl_trans - tbl_trans # $05-7 ERROR
				4521
				4522	short slognp1 - tbl_trans # $06-0 flognp1 norm
				4523	short src_zero - tbl_trans # $06-1 flognp1 zero
				4524	short sopr_inf - tbl_trans # $06-2 flognp1 inf
				4525	short src_qnan - tbl_trans # $06-3 flognp1 qnan
				4526	short slognp1d - tbl_trans # $06-5 flognp1 denorm
				4527	short src_snan - tbl_trans # $06-4 flognp1 snan
				4528	short tbl_trans - tbl_trans # $06-6 flognp1 unnorm
				4529	short tbl_trans - tbl_trans # $06-7 ERROR
				4530
				4531	short tbl_trans - tbl_trans # $07-0 ERROR
				4532	short tbl_trans - tbl_trans # $07-1 ERROR
				4533	short tbl_trans - tbl_trans # $07-2 ERROR
				4534	short tbl_trans - tbl_trans # $07-3 ERROR
				4535	short tbl_trans - tbl_trans # $07-4 ERROR
				4536	short tbl_trans - tbl_trans # $07-5 ERROR
				4537	short tbl_trans - tbl_trans # $07-6 ERROR
				4538	short tbl_trans - tbl_trans # $07-7 ERROR
				4539
				4540	short setoxm1 - tbl_trans # $08-0 fetoxm1 norm
				4541	short src_zero - tbl_trans # $08-1 fetoxm1 zero
				4542	short setoxm1i - tbl_trans # $08-2 fetoxm1 inf
				4543	short src_qnan - tbl_trans # $08-3 fetoxm1 qnan
				4544	short setoxm1d - tbl_trans # $08-5 fetoxm1 denorm
				4545	short src_snan - tbl_trans # $08-4 fetoxm1 snan
				4546	short tbl_trans - tbl_trans # $08-6 fetoxm1 unnorm
				4547	short tbl_trans - tbl_trans # $08-7 ERROR
				4548
				4549	short stanh - tbl_trans # $09-0 ftanh norm
				4550	short src_zero - tbl_trans # $09-1 ftanh zero
				4551	short src_one - tbl_trans # $09-2 ftanh inf
				4552	short src_qnan - tbl_trans # $09-3 ftanh qnan
				4553	short stanhd - tbl_trans # $09-5 ftanh denorm
				4554	short src_snan - tbl_trans # $09-4 ftanh snan
				4555	short tbl_trans - tbl_trans # $09-6 ftanh unnorm
				4556	short tbl_trans - tbl_trans # $09-7 ERROR
				4557
				4558	short satan - tbl_trans # $0a-0 fatan norm
				4559	short src_zero - tbl_trans # $0a-1 fatan zero
				4560	short spi_2 - tbl_trans # $0a-2 fatan inf
				4561	short src_qnan - tbl_trans # $0a-3 fatan qnan
				4562	short satand - tbl_trans # $0a-5 fatan denorm
				4563	short src_snan - tbl_trans # $0a-4 fatan snan
				4564	short tbl_trans - tbl_trans # $0a-6 fatan unnorm
				4565	short tbl_trans - tbl_trans # $0a-7 ERROR
				4566
				4567	short tbl_trans - tbl_trans # $0b-0 ERROR
				4568	short tbl_trans - tbl_trans # $0b-1 ERROR
				4569	short tbl_trans - tbl_trans # $0b-2 ERROR
				4570	short tbl_trans - tbl_trans # $0b-3 ERROR
				4571	short tbl_trans - tbl_trans # $0b-4 ERROR
				4572	short tbl_trans - tbl_trans # $0b-5 ERROR
				4573	short tbl_trans - tbl_trans # $0b-6 ERROR
				4574	short tbl_trans - tbl_trans # $0b-7 ERROR
				4575
				4576	short sasin - tbl_trans # $0c-0 fasin norm
				4577	short src_zero - tbl_trans # $0c-1 fasin zero
				4578	short t_operr - tbl_trans # $0c-2 fasin inf
				4579	short src_qnan - tbl_trans # $0c-3 fasin qnan
				4580	short sasind - tbl_trans # $0c-5 fasin denorm
				4581	short src_snan - tbl_trans # $0c-4 fasin snan
				4582	short tbl_trans - tbl_trans # $0c-6 fasin unnorm
				4583	short tbl_trans - tbl_trans # $0c-7 ERROR
				4584
				4585	short satanh - tbl_trans # $0d-0 fatanh norm
				4586	short src_zero - tbl_trans # $0d-1 fatanh zero
				4587	short t_operr - tbl_trans # $0d-2 fatanh inf
				4588	short src_qnan - tbl_trans # $0d-3 fatanh qnan
				4589	short satanhd - tbl_trans # $0d-5 fatanh denorm
				4590	short src_snan - tbl_trans # $0d-4 fatanh snan
				4591	short tbl_trans - tbl_trans # $0d-6 fatanh unnorm
				4592	short tbl_trans - tbl_trans # $0d-7 ERROR
				4593
				4594	short ssin - tbl_trans # $0e-0 fsin norm
				4595	short src_zero - tbl_trans # $0e-1 fsin zero
				4596	short t_operr - tbl_trans # $0e-2 fsin inf
				4597	short src_qnan - tbl_trans # $0e-3 fsin qnan
				4598	short ssind - tbl_trans # $0e-5 fsin denorm
				4599	short src_snan - tbl_trans # $0e-4 fsin snan
				4600	short tbl_trans - tbl_trans # $0e-6 fsin unnorm
				4601	short tbl_trans - tbl_trans # $0e-7 ERROR
				4602
				4603	short stan - tbl_trans # $0f-0 ftan norm
				4604	short src_zero - tbl_trans # $0f-1 ftan zero
				4605	short t_operr - tbl_trans # $0f-2 ftan inf
				4606	short src_qnan - tbl_trans # $0f-3 ftan qnan
				4607	short stand - tbl_trans # $0f-5 ftan denorm
				4608	short src_snan - tbl_trans # $0f-4 ftan snan
				4609	short tbl_trans - tbl_trans # $0f-6 ftan unnorm
				4610	short tbl_trans - tbl_trans # $0f-7 ERROR
				4611
				4612	short setox - tbl_trans # $10-0 fetox norm
				4613	short ld_pone - tbl_trans # $10-1 fetox zero
				4614	short szr_inf - tbl_trans # $10-2 fetox inf
				4615	short src_qnan - tbl_trans # $10-3 fetox qnan
				4616	short setoxd - tbl_trans # $10-5 fetox denorm
				4617	short src_snan - tbl_trans # $10-4 fetox snan
				4618	short tbl_trans - tbl_trans # $10-6 fetox unnorm
				4619	short tbl_trans - tbl_trans # $10-7 ERROR
				4620
				4621	short stwotox - tbl_trans # $11-0 ftwotox norm
				4622	short ld_pone - tbl_trans # $11-1 ftwotox zero
				4623	short szr_inf - tbl_trans # $11-2 ftwotox inf
				4624	short src_qnan - tbl_trans # $11-3 ftwotox qnan
				4625	short stwotoxd - tbl_trans # $11-5 ftwotox denorm
				4626	short src_snan - tbl_trans # $11-4 ftwotox snan
				4627	short tbl_trans - tbl_trans # $11-6 ftwotox unnorm
				4628	short tbl_trans - tbl_trans # $11-7 ERROR
				4629
				4630	short stentox - tbl_trans # $12-0 ftentox norm
				4631	short ld_pone - tbl_trans # $12-1 ftentox zero
				4632	short szr_inf - tbl_trans # $12-2 ftentox inf
				4633	short src_qnan - tbl_trans # $12-3 ftentox qnan
				4634	short stentoxd - tbl_trans # $12-5 ftentox denorm
				4635	short src_snan - tbl_trans # $12-4 ftentox snan
				4636	short tbl_trans - tbl_trans # $12-6 ftentox unnorm
				4637	short tbl_trans - tbl_trans # $12-7 ERROR
				4638
				4639	short tbl_trans - tbl_trans # $13-0 ERROR
				4640	short tbl_trans - tbl_trans # $13-1 ERROR
				4641	short tbl_trans - tbl_trans # $13-2 ERROR
				4642	short tbl_trans - tbl_trans # $13-3 ERROR
				4643	short tbl_trans - tbl_trans # $13-4 ERROR
				4644	short tbl_trans - tbl_trans # $13-5 ERROR
				4645	short tbl_trans - tbl_trans # $13-6 ERROR
				4646	short tbl_trans - tbl_trans # $13-7 ERROR
				4647
				4648	short slogn - tbl_trans # $14-0 flogn norm
				4649	short t_dz2 - tbl_trans # $14-1 flogn zero
				4650	short sopr_inf - tbl_trans # $14-2 flogn inf
				4651	short src_qnan - tbl_trans # $14-3 flogn qnan
				4652	short slognd - tbl_trans # $14-5 flogn denorm
				4653	short src_snan - tbl_trans # $14-4 flogn snan
				4654	short tbl_trans - tbl_trans # $14-6 flogn unnorm
				4655	short tbl_trans - tbl_trans # $14-7 ERROR
				4656
				4657	short slog10 - tbl_trans # $15-0 flog10 norm
				4658	short t_dz2 - tbl_trans # $15-1 flog10 zero
				4659	short sopr_inf - tbl_trans # $15-2 flog10 inf
				4660	short src_qnan - tbl_trans # $15-3 flog10 qnan
				4661	short slog10d - tbl_trans # $15-5 flog10 denorm
				4662	short src_snan - tbl_trans # $15-4 flog10 snan
				4663	short tbl_trans - tbl_trans # $15-6 flog10 unnorm
				4664	short tbl_trans - tbl_trans # $15-7 ERROR
				4665
				4666	short slog2 - tbl_trans # $16-0 flog2 norm
				4667	short t_dz2 - tbl_trans # $16-1 flog2 zero
				4668	short sopr_inf - tbl_trans # $16-2 flog2 inf
				4669	short src_qnan - tbl_trans # $16-3 flog2 qnan
				4670	short slog2d - tbl_trans # $16-5 flog2 denorm
				4671	short src_snan - tbl_trans # $16-4 flog2 snan
				4672	short tbl_trans - tbl_trans # $16-6 flog2 unnorm
				4673	short tbl_trans - tbl_trans # $16-7 ERROR
				4674
				4675	short tbl_trans - tbl_trans # $17-0 ERROR
				4676	short tbl_trans - tbl_trans # $17-1 ERROR
				4677	short tbl_trans - tbl_trans # $17-2 ERROR
				4678	short tbl_trans - tbl_trans # $17-3 ERROR
				4679	short tbl_trans - tbl_trans # $17-4 ERROR
				4680	short tbl_trans - tbl_trans # $17-5 ERROR
				4681	short tbl_trans - tbl_trans # $17-6 ERROR
				4682	short tbl_trans - tbl_trans # $17-7 ERROR
				4683
				4684	short tbl_trans - tbl_trans # $18-0 fabs norm
				4685	short tbl_trans - tbl_trans # $18-1 fabs zero
				4686	short tbl_trans - tbl_trans # $18-2 fabs inf
				4687	short tbl_trans - tbl_trans # $18-3 fabs qnan
				4688	short tbl_trans - tbl_trans # $18-5 fabs denorm
				4689	short tbl_trans - tbl_trans # $18-4 fabs snan
				4690	short tbl_trans - tbl_trans # $18-6 fabs unnorm
				4691	short tbl_trans - tbl_trans # $18-7 ERROR
				4692
				4693	short scosh - tbl_trans # $19-0 fcosh norm
				4694	short ld_pone - tbl_trans # $19-1 fcosh zero
				4695	short ld_pinf - tbl_trans # $19-2 fcosh inf
				4696	short src_qnan - tbl_trans # $19-3 fcosh qnan
				4697	short scoshd - tbl_trans # $19-5 fcosh denorm
				4698	short src_snan - tbl_trans # $19-4 fcosh snan
				4699	short tbl_trans - tbl_trans # $19-6 fcosh unnorm
				4700	short tbl_trans - tbl_trans # $19-7 ERROR
				4701
				4702	short tbl_trans - tbl_trans # $1a-0 fneg norm
				4703	short tbl_trans - tbl_trans # $1a-1 fneg zero
				4704	short tbl_trans - tbl_trans # $1a-2 fneg inf
				4705	short tbl_trans - tbl_trans # $1a-3 fneg qnan
				4706	short tbl_trans - tbl_trans # $1a-5 fneg denorm
				4707	short tbl_trans - tbl_trans # $1a-4 fneg snan
				4708	short tbl_trans - tbl_trans # $1a-6 fneg unnorm
				4709	short tbl_trans - tbl_trans # $1a-7 ERROR
				4710
				4711	short tbl_trans - tbl_trans # $1b-0 ERROR
				4712	short tbl_trans - tbl_trans # $1b-1 ERROR
				4713	short tbl_trans - tbl_trans # $1b-2 ERROR
				4714	short tbl_trans - tbl_trans # $1b-3 ERROR
				4715	short tbl_trans - tbl_trans # $1b-4 ERROR
				4716	short tbl_trans - tbl_trans # $1b-5 ERROR
				4717	short tbl_trans - tbl_trans # $1b-6 ERROR
				4718	short tbl_trans - tbl_trans # $1b-7 ERROR
				4719
				4720	short sacos - tbl_trans # $1c-0 facos norm
				4721	short ld_ppi2 - tbl_trans # $1c-1 facos zero
				4722	short t_operr - tbl_trans # $1c-2 facos inf
				4723	short src_qnan - tbl_trans # $1c-3 facos qnan
				4724	short sacosd - tbl_trans # $1c-5 facos denorm
				4725	short src_snan - tbl_trans # $1c-4 facos snan
				4726	short tbl_trans - tbl_trans # $1c-6 facos unnorm
				4727	short tbl_trans - tbl_trans # $1c-7 ERROR
				4728
				4729	short scos - tbl_trans # $1d-0 fcos norm
				4730	short ld_pone - tbl_trans # $1d-1 fcos zero
				4731	short t_operr - tbl_trans # $1d-2 fcos inf
				4732	short src_qnan - tbl_trans # $1d-3 fcos qnan
				4733	short scosd - tbl_trans # $1d-5 fcos denorm
				4734	short src_snan - tbl_trans # $1d-4 fcos snan
				4735	short tbl_trans - tbl_trans # $1d-6 fcos unnorm
				4736	short tbl_trans - tbl_trans # $1d-7 ERROR
				4737
				4738	short sgetexp - tbl_trans # $1e-0 fgetexp norm
				4739	short src_zero - tbl_trans # $1e-1 fgetexp zero
				4740	short t_operr - tbl_trans # $1e-2 fgetexp inf
				4741	short src_qnan - tbl_trans # $1e-3 fgetexp qnan
				4742	short sgetexpd - tbl_trans # $1e-5 fgetexp denorm
				4743	short src_snan - tbl_trans # $1e-4 fgetexp snan
				4744	short tbl_trans - tbl_trans # $1e-6 fgetexp unnorm
				4745	short tbl_trans - tbl_trans # $1e-7 ERROR
				4746
				4747	short sgetman - tbl_trans # $1f-0 fgetman norm
				4748	short src_zero - tbl_trans # $1f-1 fgetman zero
				4749	short t_operr - tbl_trans # $1f-2 fgetman inf
				4750	short src_qnan - tbl_trans # $1f-3 fgetman qnan
				4751	short sgetmand - tbl_trans # $1f-5 fgetman denorm
				4752	short src_snan - tbl_trans # $1f-4 fgetman snan
				4753	short tbl_trans - tbl_trans # $1f-6 fgetman unnorm
				4754	short tbl_trans - tbl_trans # $1f-7 ERROR
				4755
				4756	short tbl_trans - tbl_trans # $20-0 fdiv norm
				4757	short tbl_trans - tbl_trans # $20-1 fdiv zero
				4758	short tbl_trans - tbl_trans # $20-2 fdiv inf
				4759	short tbl_trans - tbl_trans # $20-3 fdiv qnan
				4760	short tbl_trans - tbl_trans # $20-5 fdiv denorm
				4761	short tbl_trans - tbl_trans # $20-4 fdiv snan
				4762	short tbl_trans - tbl_trans # $20-6 fdiv unnorm
				4763	short tbl_trans - tbl_trans # $20-7 ERROR
				4764
				4765	short smod_snorm - tbl_trans # $21-0 fmod norm
				4766	short smod_szero - tbl_trans # $21-1 fmod zero
				4767	short smod_sinf - tbl_trans # $21-2 fmod inf
				4768	short sop_sqnan - tbl_trans # $21-3 fmod qnan
				4769	short smod_sdnrm - tbl_trans # $21-5 fmod denorm
				4770	short sop_ssnan - tbl_trans # $21-4 fmod snan
				4771	short tbl_trans - tbl_trans # $21-6 fmod unnorm
				4772	short tbl_trans - tbl_trans # $21-7 ERROR
				4773
				4774	short tbl_trans - tbl_trans # $22-0 fadd norm
				4775	short tbl_trans - tbl_trans # $22-1 fadd zero
				4776	short tbl_trans - tbl_trans # $22-2 fadd inf
				4777	short tbl_trans - tbl_trans # $22-3 fadd qnan
				4778	short tbl_trans - tbl_trans # $22-5 fadd denorm
				4779	short tbl_trans - tbl_trans # $22-4 fadd snan
				4780	short tbl_trans - tbl_trans # $22-6 fadd unnorm
				4781	short tbl_trans - tbl_trans # $22-7 ERROR
				4782
				4783	short tbl_trans - tbl_trans # $23-0 fmul norm
				4784	short tbl_trans - tbl_trans # $23-1 fmul zero
				4785	short tbl_trans - tbl_trans # $23-2 fmul inf
				4786	short tbl_trans - tbl_trans # $23-3 fmul qnan
				4787	short tbl_trans - tbl_trans # $23-5 fmul denorm
				4788	short tbl_trans - tbl_trans # $23-4 fmul snan
				4789	short tbl_trans - tbl_trans # $23-6 fmul unnorm
				4790	short tbl_trans - tbl_trans # $23-7 ERROR
				4791
				4792	short tbl_trans - tbl_trans # $24-0 fsgldiv norm
				4793	short tbl_trans - tbl_trans # $24-1 fsgldiv zero
				4794	short tbl_trans - tbl_trans # $24-2 fsgldiv inf
				4795	short tbl_trans - tbl_trans # $24-3 fsgldiv qnan
				4796	short tbl_trans - tbl_trans # $24-5 fsgldiv denorm
				4797	short tbl_trans - tbl_trans # $24-4 fsgldiv snan
				4798	short tbl_trans - tbl_trans # $24-6 fsgldiv unnorm
				4799	short tbl_trans - tbl_trans # $24-7 ERROR
				4800
				4801	short srem_snorm - tbl_trans # $25-0 frem norm
				4802	short srem_szero - tbl_trans # $25-1 frem zero
				4803	short srem_sinf - tbl_trans # $25-2 frem inf
				4804	short sop_sqnan - tbl_trans # $25-3 frem qnan
				4805	short srem_sdnrm - tbl_trans # $25-5 frem denorm
				4806	short sop_ssnan - tbl_trans # $25-4 frem snan
				4807	short tbl_trans - tbl_trans # $25-6 frem unnorm
				4808	short tbl_trans - tbl_trans # $25-7 ERROR
				4809
				4810	short sscale_snorm - tbl_trans # $26-0 fscale norm
				4811	short sscale_szero - tbl_trans # $26-1 fscale zero
				4812	short sscale_sinf - tbl_trans # $26-2 fscale inf
				4813	short sop_sqnan - tbl_trans # $26-3 fscale qnan
				4814	short sscale_sdnrm - tbl_trans # $26-5 fscale denorm
				4815	short sop_ssnan - tbl_trans # $26-4 fscale snan
				4816	short tbl_trans - tbl_trans # $26-6 fscale unnorm
				4817	short tbl_trans - tbl_trans # $26-7 ERROR
				4818
				4819	short tbl_trans - tbl_trans # $27-0 fsglmul norm
				4820	short tbl_trans - tbl_trans # $27-1 fsglmul zero
				4821	short tbl_trans - tbl_trans # $27-2 fsglmul inf
				4822	short tbl_trans - tbl_trans # $27-3 fsglmul qnan
				4823	short tbl_trans - tbl_trans # $27-5 fsglmul denorm
				4824	short tbl_trans - tbl_trans # $27-4 fsglmul snan
				4825	short tbl_trans - tbl_trans # $27-6 fsglmul unnorm
				4826	short tbl_trans - tbl_trans # $27-7 ERROR
				4827
				4828	short tbl_trans - tbl_trans # $28-0 fsub norm
				4829	short tbl_trans - tbl_trans # $28-1 fsub zero
				4830	short tbl_trans - tbl_trans # $28-2 fsub inf
				4831	short tbl_trans - tbl_trans # $28-3 fsub qnan
				4832	short tbl_trans - tbl_trans # $28-5 fsub denorm
				4833	short tbl_trans - tbl_trans # $28-4 fsub snan
				4834	short tbl_trans - tbl_trans # $28-6 fsub unnorm
				4835	short tbl_trans - tbl_trans # $28-7 ERROR
				4836
				4837	short tbl_trans - tbl_trans # $29-0 ERROR
				4838	short tbl_trans - tbl_trans # $29-1 ERROR
				4839	short tbl_trans - tbl_trans # $29-2 ERROR
				4840	short tbl_trans - tbl_trans # $29-3 ERROR
				4841	short tbl_trans - tbl_trans # $29-4 ERROR
				4842	short tbl_trans - tbl_trans # $29-5 ERROR
				4843	short tbl_trans - tbl_trans # $29-6 ERROR
				4844	short tbl_trans - tbl_trans # $29-7 ERROR
				4845
				4846	short tbl_trans - tbl_trans # $2a-0 ERROR
				4847	short tbl_trans - tbl_trans # $2a-1 ERROR
				4848	short tbl_trans - tbl_trans # $2a-2 ERROR
				4849	short tbl_trans - tbl_trans # $2a-3 ERROR
				4850	short tbl_trans - tbl_trans # $2a-4 ERROR
				4851	short tbl_trans - tbl_trans # $2a-5 ERROR
				4852	short tbl_trans - tbl_trans # $2a-6 ERROR
				4853	short tbl_trans - tbl_trans # $2a-7 ERROR
				4854
				4855	short tbl_trans - tbl_trans # $2b-0 ERROR
				4856	short tbl_trans - tbl_trans # $2b-1 ERROR
				4857	short tbl_trans - tbl_trans # $2b-2 ERROR
				4858	short tbl_trans - tbl_trans # $2b-3 ERROR
				4859	short tbl_trans - tbl_trans # $2b-4 ERROR
				4860	short tbl_trans - tbl_trans # $2b-5 ERROR
				4861	short tbl_trans - tbl_trans # $2b-6 ERROR
				4862	short tbl_trans - tbl_trans # $2b-7 ERROR
				4863
				4864	short tbl_trans - tbl_trans # $2c-0 ERROR
				4865	short tbl_trans - tbl_trans # $2c-1 ERROR
				4866	short tbl_trans - tbl_trans # $2c-2 ERROR
				4867	short tbl_trans - tbl_trans # $2c-3 ERROR
				4868	short tbl_trans - tbl_trans # $2c-4 ERROR
				4869	short tbl_trans - tbl_trans # $2c-5 ERROR
				4870	short tbl_trans - tbl_trans # $2c-6 ERROR
				4871	short tbl_trans - tbl_trans # $2c-7 ERROR
				4872
				4873	short tbl_trans - tbl_trans # $2d-0 ERROR
				4874	short tbl_trans - tbl_trans # $2d-1 ERROR
				4875	short tbl_trans - tbl_trans # $2d-2 ERROR
				4876	short tbl_trans - tbl_trans # $2d-3 ERROR
				4877	short tbl_trans - tbl_trans # $2d-4 ERROR
				4878	short tbl_trans - tbl_trans # $2d-5 ERROR
				4879	short tbl_trans - tbl_trans # $2d-6 ERROR
				4880	short tbl_trans - tbl_trans # $2d-7 ERROR
				4881
				4882	short tbl_trans - tbl_trans # $2e-0 ERROR
				4883	short tbl_trans - tbl_trans # $2e-1 ERROR
				4884	short tbl_trans - tbl_trans # $2e-2 ERROR
				4885	short tbl_trans - tbl_trans # $2e-3 ERROR
				4886	short tbl_trans - tbl_trans # $2e-4 ERROR
				4887	short tbl_trans - tbl_trans # $2e-5 ERROR
				4888	short tbl_trans - tbl_trans # $2e-6 ERROR
				4889	short tbl_trans - tbl_trans # $2e-7 ERROR
				4890
				4891	short tbl_trans - tbl_trans # $2f-0 ERROR
				4892	short tbl_trans - tbl_trans # $2f-1 ERROR
				4893	short tbl_trans - tbl_trans # $2f-2 ERROR
				4894	short tbl_trans - tbl_trans # $2f-3 ERROR
				4895	short tbl_trans - tbl_trans # $2f-4 ERROR
				4896	short tbl_trans - tbl_trans # $2f-5 ERROR
				4897	short tbl_trans - tbl_trans # $2f-6 ERROR
				4898	short tbl_trans - tbl_trans # $2f-7 ERROR
				4899
				4900	short ssincos - tbl_trans # $30-0 fsincos norm
				4901	short ssincosz - tbl_trans # $30-1 fsincos zero
				4902	short ssincosi - tbl_trans # $30-2 fsincos inf
				4903	short ssincosqnan - tbl_trans # $30-3 fsincos qnan
				4904	short ssincosd - tbl_trans # $30-5 fsincos denorm
				4905	short ssincossnan - tbl_trans # $30-4 fsincos snan
				4906	short tbl_trans - tbl_trans # $30-6 fsincos unnorm
				4907	short tbl_trans - tbl_trans # $30-7 ERROR
				4908
				4909	short ssincos - tbl_trans # $31-0 fsincos norm
				4910	short ssincosz - tbl_trans # $31-1 fsincos zero
				4911	short ssincosi - tbl_trans # $31-2 fsincos inf
				4912	short ssincosqnan - tbl_trans # $31-3 fsincos qnan
				4913	short ssincosd - tbl_trans # $31-5 fsincos denorm
				4914	short ssincossnan - tbl_trans # $31-4 fsincos snan
				4915	short tbl_trans - tbl_trans # $31-6 fsincos unnorm
				4916	short tbl_trans - tbl_trans # $31-7 ERROR
				4917
				4918	short ssincos - tbl_trans # $32-0 fsincos norm
				4919	short ssincosz - tbl_trans # $32-1 fsincos zero
				4920	short ssincosi - tbl_trans # $32-2 fsincos inf
				4921	short ssincosqnan - tbl_trans # $32-3 fsincos qnan
				4922	short ssincosd - tbl_trans # $32-5 fsincos denorm
				4923	short ssincossnan - tbl_trans # $32-4 fsincos snan
				4924	short tbl_trans - tbl_trans # $32-6 fsincos unnorm
				4925	short tbl_trans - tbl_trans # $32-7 ERROR
				4926
				4927	short ssincos - tbl_trans # $33-0 fsincos norm
				4928	short ssincosz - tbl_trans # $33-1 fsincos zero
				4929	short ssincosi - tbl_trans # $33-2 fsincos inf
				4930	short ssincosqnan - tbl_trans # $33-3 fsincos qnan
				4931	short ssincosd - tbl_trans # $33-5 fsincos denorm
				4932	short ssincossnan - tbl_trans # $33-4 fsincos snan
				4933	short tbl_trans - tbl_trans # $33-6 fsincos unnorm
				4934	short tbl_trans - tbl_trans # $33-7 ERROR
				4935
				4936	short ssincos - tbl_trans # $34-0 fsincos norm
				4937	short ssincosz - tbl_trans # $34-1 fsincos zero
				4938	short ssincosi - tbl_trans # $34-2 fsincos inf
				4939	short ssincosqnan - tbl_trans # $34-3 fsincos qnan
				4940	short ssincosd - tbl_trans # $34-5 fsincos denorm
				4941	short ssincossnan - tbl_trans # $34-4 fsincos snan
				4942	short tbl_trans - tbl_trans # $34-6 fsincos unnorm
				4943	short tbl_trans - tbl_trans # $34-7 ERROR
				4944
				4945	short ssincos - tbl_trans # $35-0 fsincos norm
				4946	short ssincosz - tbl_trans # $35-1 fsincos zero
				4947	short ssincosi - tbl_trans # $35-2 fsincos inf
				4948	short ssincosqnan - tbl_trans # $35-3 fsincos qnan
				4949	short ssincosd - tbl_trans # $35-5 fsincos denorm
				4950	short ssincossnan - tbl_trans # $35-4 fsincos snan
				4951	short tbl_trans - tbl_trans # $35-6 fsincos unnorm
				4952	short tbl_trans - tbl_trans # $35-7 ERROR
				4953
				4954	short ssincos - tbl_trans # $36-0 fsincos norm
				4955	short ssincosz - tbl_trans # $36-1 fsincos zero
				4956	short ssincosi - tbl_trans # $36-2 fsincos inf
				4957	short ssincosqnan - tbl_trans # $36-3 fsincos qnan
				4958	short ssincosd - tbl_trans # $36-5 fsincos denorm
				4959	short ssincossnan - tbl_trans # $36-4 fsincos snan
				4960	short tbl_trans - tbl_trans # $36-6 fsincos unnorm
				4961	short tbl_trans - tbl_trans # $36-7 ERROR
				4962
				4963	short ssincos - tbl_trans # $37-0 fsincos norm
				4964	short ssincosz - tbl_trans # $37-1 fsincos zero
				4965	short ssincosi - tbl_trans # $37-2 fsincos inf
				4966	short ssincosqnan - tbl_trans # $37-3 fsincos qnan
				4967	short ssincosd - tbl_trans # $37-5 fsincos denorm
				4968	short ssincossnan - tbl_trans # $37-4 fsincos snan
				4969	short tbl_trans - tbl_trans # $37-6 fsincos unnorm
				4970	short tbl_trans - tbl_trans # $37-7 ERROR
				4971
				4972	##########
				4973
				4974	# the instruction fetch access for the displacement word for the
				4975	# fdbcc emulation failed. here, we create an access error frame
				4976	# from the current frame and branch to _real_access().
				4977	funimp_iacc:
				4978	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				4979	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				4980	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				4981
				4982	mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
				4983
				4984	unlk %a6
				4985
				4986	mov.l (%sp),-(%sp) # store SR,hi(PC)
				4987	mov.w 0x8(%sp),0x4(%sp) # store lo(PC)
				4988	mov.w &0x4008,0x6(%sp) # store voff
				4989	mov.l 0x2(%sp),0x8(%sp) # store EA
				4990	mov.l &0x09428001,0xc(%sp) # store FSLW
				4991
				4992	btst &0x5,(%sp) # user or supervisor mode?
				4993	beq.b funimp_iacc_end # user
				4994	bset &0x2,0xd(%sp) # set supervisor TM bit
				4995
				4996	funimp_iacc_end:
				4997	bra.l _real_access
				4998
				4999	#########################################################################
				5000	# ssin(): computes the sine of a normalized input #
				5001	# ssind(): computes the sine of a denormalized input #
				5002	# scos(): computes the cosine of a normalized input #
				5003	# scosd(): computes the cosine of a denormalized input #
				5004	# ssincos(): computes the sine and cosine of a normalized input #
				5005	# ssincosd(): computes the sine and cosine of a denormalized input #
				5006	# #
				5007	# INPUT *************************************************************** #
				5008	# a0 = pointer to extended precision input #
				5009	# d0 = round precision,mode #
				5010	# #
				5011	# OUTPUT ************************************************************** #
				5012	# fp0 = sin(X) or cos(X) #
				5013	# #
				5014	# For ssincos(X): #
				5015	# fp0 = sin(X) #
				5016	# fp1 = cos(X) #
				5017	# #
				5018	# ACCURACY and MONOTONICITY ******************************************* #
				5019	# The returned result is within 1 ulp in 64 significant bit, i.e. #
				5020	# within 0.5001 ulp to 53 bits if the result is subsequently #
				5021	# rounded to double precision. The result is provably monotonic #
				5022	# in double precision. #
				5023	# #
				5024	# ALGORITHM *********************************************************** #
				5025	# #
				5026	# SIN and COS: #
				5027	# 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. #
				5028	# #
				5029	# 2. If \|X\| >= 15Pi or \|X\| < 2**(-40), go to 7. #
				5030	# #
				5031	# 3. Decompose X as X = N(Pi/2) + r where \|r\| <= Pi/4. Let #
				5032	# k = N mod 4, so in particular, k = 0,1,2,or 3. #
				5033	# Overwrite k by k := k + AdjN. #
				5034	# #
				5035	# 4. If k is even, go to 6. #
				5036	# #
				5037	# 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. #
				5038	# Return sgn*cos(r) where cos(r) is approximated by an #
				5039	# even polynomial in r, 1 + rr(B1+s(B2+ ... + sB8)), #
				5040	# s = r*r. #
				5041	# Exit. #
				5042	# #
				5043	# 6. (k is even) Set j := k/2, sgn := (-1)*j. Return sgnsin(r) #
				5044	# where sin(r) is approximated by an odd polynomial in r #
				5045	# r + rs(A1+s(A2+ ... + sA7)), s = r*r. #
				5046	# Exit. #
				5047	# #
				5048	# 7. If \|X\| > 1, go to 9. #
				5049	# #
				5050	# 8. (\|X\|<2**(-40)) If SIN is invoked, return X; #
				5051	# otherwise return 1. #
				5052	# #
				5053	# 9. Overwrite X by X := X rem 2Pi. Now that \|X\| <= Pi, #
				5054	# go back to 3. #
				5055	# #
				5056	# SINCOS: #
				5057	# 1. If \|X\| >= 15Pi or \|X\| < 2**(-40), go to 6. #
				5058	# #
				5059	# 2. Decompose X as X = N(Pi/2) + r where \|r\| <= Pi/4. Let #
				5060	# k = N mod 4, so in particular, k = 0,1,2,or 3. #
				5061	# #
				5062	# 3. If k is even, go to 5. #
				5063	# #
				5064	# 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. #
				5065	# j1 exclusive or with the l.s.b. of k. #
				5066	# sgn1 := (-1)j1, sgn2 := (-1)j2. #
				5067	# SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where #
				5068	# sin(r) and cos(r) are computed as odd and even #
				5069	# polynomials in r, respectively. Exit #
				5070	# #
				5071	# 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. #
				5072	# SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where #
				5073	# sin(r) and cos(r) are computed as odd and even #
				5074	# polynomials in r, respectively. Exit #
				5075	# #
				5076	# 6. If \|X\| > 1, go to 8. #
				5077	# #
				5078	# 7. (\|X\|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. #
				5079	# #
				5080	# 8. Overwrite X by X := X rem 2Pi. Now that \|X\| <= Pi, #
				5081	# go back to 2. #
				5082	# #
				5083	#########################################################################
				5084
				5085	SINA7: long 0xBD6AAA77,0xCCC994F5
				5086	SINA6: long 0x3DE61209,0x7AAE8DA1
				5087	SINA5: long 0xBE5AE645,0x2A118AE4
				5088	SINA4: long 0x3EC71DE3,0xA5341531
				5089	SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
				5090	SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000
				5091	SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
				5092
				5093	COSB8: long 0x3D2AC4D0,0xD6011EE3
				5094	COSB7: long 0xBDA9396F,0x9F45AC19
				5095	COSB6: long 0x3E21EED9,0x0612C972
				5096	COSB5: long 0xBE927E4F,0xB79D9FCF
				5097	COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
				5098	COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
				5099	COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
				5100	COSB1: long 0xBF000000
				5101
				5102	set INARG,FP_SCR0
				5103
				5104	set X,FP_SCR0
				5105	# set XDCARE,X+2
				5106	set XFRAC,X+4
				5107
				5108	set RPRIME,FP_SCR0
				5109	set SPRIME,FP_SCR1
				5110
				5111	set POSNEG1,L_SCR1
				5112	set TWOTO63,L_SCR1
				5113
				5114	set ENDFLAG,L_SCR2
				5115	set INT,L_SCR2
				5116
				5117	set ADJN,L_SCR3
				5118
				5119	############################################
				5120	global ssin
				5121	ssin:
				5122	mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0
				5123	bra.b SINBGN
				5124
				5125	############################################
				5126	global scos
				5127	scos:
				5128	mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1
				5129
				5130	############################################
				5131	SINBGN:
				5132	#--SAVE FPCR, FP1. CHECK IF \|X\| IS TOO SMALL OR LARGE
				5133
				5134	fmov.x (%a0),%fp0 # LOAD INPUT
				5135	fmov.x %fp0,X(%a6) # save input at X
				5136
				5137	# "COMPACTIFY" X
				5138	mov.l (%a0),%d1 # put exp in hi word
				5139	mov.w 4(%a0),%d1 # fetch hi(man)
				5140	and.l &0x7FFFFFFF,%d1 # strip sign
				5141
				5142	cmpi.l %d1,&0x3FD78000 # is \|X\| >= 2**(-40)?
				5143	bge.b SOK1 # no
				5144	bra.w SINSM # yes; input is very small
				5145
				5146	SOK1:
				5147	cmp.l %d1,&0x4004BC7E # is \|X\| < 15 PI?
				5148	blt.b SINMAIN # no
				5149	bra.w SREDUCEX # yes; input is very large
				5150
				5151	#--THIS IS THE USUAL CASE, \|X\| <= 15 PI.
				5152	#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
				5153	SINMAIN:
				5154	fmov.x %fp0,%fp1
				5155	fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
				5156
				5157	lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
				5158
				5159	fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
				5160
				5161	mov.l INT(%a6),%d1 # make a copy of N
				5162	asl.l &4,%d1 # N *= 16
				5163	add.l %d1,%a1 # tbl_addr = a1 + (N*16)
				5164
				5165	# A1 IS THE ADDRESS OF N*PIBY2
				5166	# ...WHICH IS IN TWO PIECES Y1 & Y2
				5167	fsub.x (%a1)+,%fp0 # X-Y1
				5168	fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2
				5169
				5170	SINCONT:
				5171	#--continuation from REDUCEX
				5172
				5173	#--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
				5174	mov.l INT(%a6),%d1
				5175	add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN
				5176	ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE
				5177	cmp.l %d1,&0
				5178	blt.w COSPOLY
				5179
				5180	#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
				5181	#--THEN WE RETURN SGNSIN(R). SGNSIN(R) IS COMPUTED BY
				5182	#--R' + R'S(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
				5183	#--R' = SGNR, S=RR. THIS CAN BE REWRITTEN AS
				5184	#--R' + R'S( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
				5185	#--WHERE T=S*S.
				5186	#--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
				5187	#--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
				5188	SINPOLY:
				5189	fmovm.x &0x0c,-(%sp) # save fp2/fp3
				5190
				5191	fmov.x %fp0,X(%a6) # X IS R
				5192	fmul.x %fp0,%fp0 # FP0 IS S
				5193
				5194	fmov.d SINA7(%pc),%fp3
				5195	fmov.d SINA6(%pc),%fp2
				5196
				5197	fmov.x %fp0,%fp1
				5198	fmul.x %fp1,%fp1 # FP1 IS T
				5199
				5200	ror.l &1,%d1
				5201	and.l &0x80000000,%d1
				5202	# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
				5203	eor.l %d1,X(%a6) # X IS NOW R'= SGN*R
				5204
				5205	fmul.x %fp1,%fp3 # TA7
				5206	fmul.x %fp1,%fp2 # TA6
				5207
				5208	fadd.d SINA5(%pc),%fp3 # A5+TA7
				5209	fadd.d SINA4(%pc),%fp2 # A4+TA6
				5210
				5211	fmul.x %fp1,%fp3 # T(A5+TA7)
				5212	fmul.x %fp1,%fp2 # T(A4+TA6)
				5213
				5214	fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7)
				5215	fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6)
				5216
				5217	fmul.x %fp3,%fp1 # T(A3+T(A5+TA7))
				5218
				5219	fmul.x %fp0,%fp2 # S(A2+T(A4+TA6))
				5220	fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7))
				5221	fmul.x X(%a6),%fp0 # R'*S
				5222
				5223	fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
				5224
				5225	fmul.x %fp1,%fp0 # SIN(R')-R'
				5226
				5227	fmovm.x (%sp)+,&0x30 # restore fp2/fp3
				5228
				5229	fmov.l %d0,%fpcr # restore users round mode,prec
				5230	fadd.x X(%a6),%fp0 # last inst - possible exception set
				5231	bra t_inx2
				5232
				5233	#--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
				5234	#--THEN WE RETURN SGNCOS(R). SGNCOS(R) IS COMPUTED BY
				5235	#--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
				5236	#--S=RR AND S'=SGNS. THIS CAN BE REWRITTEN AS
				5237	#--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
				5238	#--WHERE T=S*S.
				5239	#--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
				5240	#--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
				5241	#--AND IS THEREFORE STORED AS SINGLE PRECISION.
				5242	COSPOLY:
				5243	fmovm.x &0x0c,-(%sp) # save fp2/fp3
				5244
				5245	fmul.x %fp0,%fp0 # FP0 IS S
				5246
				5247	fmov.d COSB8(%pc),%fp2
				5248	fmov.d COSB7(%pc),%fp3
				5249
				5250	fmov.x %fp0,%fp1
				5251	fmul.x %fp1,%fp1 # FP1 IS T
				5252
				5253	fmov.x %fp0,X(%a6) # X IS S
				5254	ror.l &1,%d1
				5255	and.l &0x80000000,%d1
				5256	# ...LEAST SIG. BIT OF D0 IN SIGN POSITION
				5257
				5258	fmul.x %fp1,%fp2 # TB8
				5259
				5260	eor.l %d1,X(%a6) # X IS NOW S'= SGN*S
				5261	and.l &0x80000000,%d1
				5262
				5263	fmul.x %fp1,%fp3 # TB7
				5264
				5265	or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE
				5266	mov.l %d1,POSNEG1(%a6)
				5267
				5268	fadd.d COSB6(%pc),%fp2 # B6+TB8
				5269	fadd.d COSB5(%pc),%fp3 # B5+TB7
				5270
				5271	fmul.x %fp1,%fp2 # T(B6+TB8)
				5272	fmul.x %fp1,%fp3 # T(B5+TB7)
				5273
				5274	fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8)
				5275	fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7)
				5276
				5277	fmul.x %fp1,%fp2 # T(B4+T(B6+TB8))
				5278	fmul.x %fp3,%fp1 # T(B3+T(B5+TB7))
				5279
				5280	fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8))
				5281	fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7))
				5282
				5283	fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8)))
				5284
				5285	fadd.x %fp1,%fp0
				5286
				5287	fmul.x X(%a6),%fp0
				5288
				5289	fmovm.x (%sp)+,&0x30 # restore fp2/fp3
				5290
				5291	fmov.l %d0,%fpcr # restore users round mode,prec
				5292	fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set
				5293	bra t_inx2
				5294
				5295	##############################################
				5296
				5297	# SINe: Big OR Small?
				5298	#--IF \|X\| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
				5299	#--IF \|X\| < 2**(-40), RETURN X OR 1.
				5300	SINBORS:
				5301	cmp.l %d1,&0x3FFF8000
				5302	bgt.l SREDUCEX
				5303
				5304	SINSM:
				5305	mov.l ADJN(%a6),%d1
				5306	cmp.l %d1,&0
				5307	bgt.b COSTINY
				5308
				5309	# here, the operation may underflow iff the precision is sgl or dbl.
				5310	# extended denorms are handled through another entry point.
				5311	SINTINY:
				5312	# mov.w &0x0000,XDCARE(%a6) # JUST IN CASE
				5313
				5314	fmov.l %d0,%fpcr # restore users round mode,prec
				5315	mov.b &FMOV_OP,%d1 # last inst is MOVE
				5316	fmov.x X(%a6),%fp0 # last inst - possible exception set
				5317	bra t_catch
				5318
				5319	COSTINY:
				5320	fmov.s &0x3F800000,%fp0 # fp0 = 1.0
				5321	fmov.l %d0,%fpcr # restore users round mode,prec
				5322	fadd.s &0x80800000,%fp0 # last inst - possible exception set
				5323	bra t_pinx2
				5324
				5325	################################################
				5326	global ssind
				5327	#--SIN(X) = X FOR DENORMALIZED X
				5328	ssind:
				5329	bra t_extdnrm
				5330
				5331	############################################
				5332	global scosd
				5333	#--COS(X) = 1 FOR DENORMALIZED X
				5334	scosd:
				5335	fmov.s &0x3F800000,%fp0 # fp0 = 1.0
				5336	bra t_pinx2
				5337
				5338	##################################################
				5339
				5340	global ssincos
				5341	ssincos:
				5342	#--SET ADJN TO 4
				5343	mov.l &4,ADJN(%a6)
				5344
				5345	fmov.x (%a0),%fp0 # LOAD INPUT
				5346	fmov.x %fp0,X(%a6)
				5347
				5348	mov.l (%a0),%d1
				5349	mov.w 4(%a0),%d1
				5350	and.l &0x7FFFFFFF,%d1 # COMPACTIFY X
				5351
				5352	cmp.l %d1,&0x3FD78000 # \|X\| >= 2**(-40)?
				5353	bge.b SCOK1
				5354	bra.w SCSM
				5355
				5356	SCOK1:
				5357	cmp.l %d1,&0x4004BC7E # \|X\| < 15 PI?
				5358	blt.b SCMAIN
				5359	bra.w SREDUCEX
				5360
				5361
				5362	#--THIS IS THE USUAL CASE, \|X\| <= 15 PI.
				5363	#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
				5364	SCMAIN:
				5365	fmov.x %fp0,%fp1
				5366
				5367	fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
				5368
				5369	lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
				5370
				5371	fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER
				5372
				5373	mov.l INT(%a6),%d1
				5374	asl.l &4,%d1
				5375	add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2
				5376
				5377	fsub.x (%a1)+,%fp0 # X-Y1
				5378	fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
				5379
				5380	SCCONT:
				5381	#--continuation point from REDUCEX
				5382
				5383	mov.l INT(%a6),%d1
				5384	ror.l &1,%d1
				5385	cmp.l %d1,&0 # D0 < 0 IFF N IS ODD
				5386	bge.w NEVEN
				5387
				5388	SNODD:
				5389	#--REGISTERS SAVED SO FAR: D0, A0, FP2.
				5390	fmovm.x &0x04,-(%sp) # save fp2
				5391
				5392	fmov.x %fp0,RPRIME(%a6)
				5393	fmul.x %fp0,%fp0 # FP0 IS S = R*R
				5394	fmov.d SINA7(%pc),%fp1 # A7
				5395	fmov.d COSB8(%pc),%fp2 # B8
				5396	fmul.x %fp0,%fp1 # SA7
				5397	fmul.x %fp0,%fp2 # SB8
				5398
				5399	mov.l %d2,-(%sp)
				5400	mov.l %d1,%d2
				5401	ror.l &1,%d2
				5402	and.l &0x80000000,%d2
				5403	eor.l %d1,%d2
				5404	and.l &0x80000000,%d2
				5405
				5406	fadd.d SINA6(%pc),%fp1 # A6+SA7
				5407	fadd.d COSB7(%pc),%fp2 # B7+SB8
				5408
				5409	fmul.x %fp0,%fp1 # S(A6+SA7)
				5410	eor.l %d2,RPRIME(%a6)
				5411	mov.l (%sp)+,%d2
				5412	fmul.x %fp0,%fp2 # S(B7+SB8)
				5413	ror.l &1,%d1
				5414	and.l &0x80000000,%d1
				5415	mov.l &0x3F800000,POSNEG1(%a6)
				5416	eor.l %d1,POSNEG1(%a6)
				5417
				5418	fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7)
				5419	fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8)
				5420
				5421	fmul.x %fp0,%fp1 # S(A5+S(A6+SA7))
				5422	fmul.x %fp0,%fp2 # S(B6+S(B7+SB8))
				5423	fmov.x %fp0,SPRIME(%a6)
				5424
				5425	fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7))
				5426	eor.l %d1,SPRIME(%a6)
				5427	fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8))
				5428
				5429	fmul.x %fp0,%fp1 # S(A4+...)
				5430	fmul.x %fp0,%fp2 # S(B5+...)
				5431
				5432	fadd.d SINA3(%pc),%fp1 # A3+S(A4+...)
				5433	fadd.d COSB4(%pc),%fp2 # B4+S(B5+...)
				5434
				5435	fmul.x %fp0,%fp1 # S(A3+...)
				5436	fmul.x %fp0,%fp2 # S(B4+...)
				5437
				5438	fadd.x SINA2(%pc),%fp1 # A2+S(A3+...)
				5439	fadd.x COSB3(%pc),%fp2 # B3+S(B4+...)
				5440
				5441	fmul.x %fp0,%fp1 # S(A2+...)
				5442	fmul.x %fp0,%fp2 # S(B3+...)
				5443
				5444	fadd.x SINA1(%pc),%fp1 # A1+S(A2+...)
				5445	fadd.x COSB2(%pc),%fp2 # B2+S(B3+...)
				5446
				5447	fmul.x %fp0,%fp1 # S(A1+...)
				5448	fmul.x %fp2,%fp0 # S(B2+...)
				5449
				5450	fmul.x RPRIME(%a6),%fp1 # R'S(A1+...)
				5451	fadd.s COSB1(%pc),%fp0 # B1+S(B2...)
				5452	fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...))
				5453
				5454	fmovm.x (%sp)+,&0x20 # restore fp2
				5455
				5456	fmov.l %d0,%fpcr
				5457	fadd.x RPRIME(%a6),%fp1 # COS(X)
				5458	bsr sto_cos # store cosine result
				5459	fadd.s POSNEG1(%a6),%fp0 # SIN(X)
				5460	bra t_inx2
				5461
				5462	NEVEN:
				5463	#--REGISTERS SAVED SO FAR: FP2.
				5464	fmovm.x &0x04,-(%sp) # save fp2
				5465
				5466	fmov.x %fp0,RPRIME(%a6)
				5467	fmul.x %fp0,%fp0 # FP0 IS S = R*R
				5468
				5469	fmov.d COSB8(%pc),%fp1 # B8
				5470	fmov.d SINA7(%pc),%fp2 # A7
				5471
				5472	fmul.x %fp0,%fp1 # SB8
				5473	fmov.x %fp0,SPRIME(%a6)
				5474	fmul.x %fp0,%fp2 # SA7
				5475
				5476	ror.l &1,%d1
				5477	and.l &0x80000000,%d1
				5478
				5479	fadd.d COSB7(%pc),%fp1 # B7+SB8
				5480	fadd.d SINA6(%pc),%fp2 # A6+SA7
				5481
				5482	eor.l %d1,RPRIME(%a6)
				5483	eor.l %d1,SPRIME(%a6)
				5484
				5485	fmul.x %fp0,%fp1 # S(B7+SB8)
				5486
				5487	or.l &0x3F800000,%d1
				5488	mov.l %d1,POSNEG1(%a6)
				5489
				5490	fmul.x %fp0,%fp2 # S(A6+SA7)
				5491
				5492	fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8)
				5493	fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7)
				5494
				5495	fmul.x %fp0,%fp1 # S(B6+S(B7+SB8))
				5496	fmul.x %fp0,%fp2 # S(A5+S(A6+SA7))
				5497
				5498	fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8))
				5499	fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7))
				5500
				5501	fmul.x %fp0,%fp1 # S(B5+...)
				5502	fmul.x %fp0,%fp2 # S(A4+...)
				5503
				5504	fadd.d COSB4(%pc),%fp1 # B4+S(B5+...)
				5505	fadd.d SINA3(%pc),%fp2 # A3+S(A4+...)
				5506
				5507	fmul.x %fp0,%fp1 # S(B4+...)
				5508	fmul.x %fp0,%fp2 # S(A3+...)
				5509
				5510	fadd.x COSB3(%pc),%fp1 # B3+S(B4+...)
				5511	fadd.x SINA2(%pc),%fp2 # A2+S(A3+...)
				5512
				5513	fmul.x %fp0,%fp1 # S(B3+...)
				5514	fmul.x %fp0,%fp2 # S(A2+...)
				5515
				5516	fadd.x COSB2(%pc),%fp1 # B2+S(B3+...)
				5517	fadd.x SINA1(%pc),%fp2 # A1+S(A2+...)
				5518
				5519	fmul.x %fp0,%fp1 # S(B2+...)
				5520	fmul.x %fp2,%fp0 # s(a1+...)
				5521
				5522
				5523	fadd.s COSB1(%pc),%fp1 # B1+S(B2...)
				5524	fmul.x RPRIME(%a6),%fp0 # R'S(A1+...)
				5525	fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...))
				5526
				5527	fmovm.x (%sp)+,&0x20 # restore fp2
				5528
				5529	fmov.l %d0,%fpcr
				5530	fadd.s POSNEG1(%a6),%fp1 # COS(X)
				5531	bsr sto_cos # store cosine result
				5532	fadd.x RPRIME(%a6),%fp0 # SIN(X)
				5533	bra t_inx2
				5534
				5535	################################################
				5536
				5537	SCBORS:
				5538	cmp.l %d1,&0x3FFF8000
				5539	bgt.w SREDUCEX
				5540
				5541	################################################
				5542
				5543	SCSM:
				5544	# mov.w &0x0000,XDCARE(%a6)
				5545	fmov.s &0x3F800000,%fp1
				5546
				5547	fmov.l %d0,%fpcr
				5548	fsub.s &0x00800000,%fp1
				5549	bsr sto_cos # store cosine result
				5550	fmov.l %fpcr,%d0 # d0 must have fpcr,too
				5551	mov.b &FMOV_OP,%d1 # last inst is MOVE
				5552	fmov.x X(%a6),%fp0
				5553	bra t_catch
				5554
				5555	##############################################
				5556
				5557	global ssincosd
				5558	#--SIN AND COS OF X FOR DENORMALIZED X
				5559	ssincosd:
				5560	mov.l %d0,-(%sp) # save d0
				5561	fmov.s &0x3F800000,%fp1
				5562	bsr sto_cos # store cosine result
				5563	mov.l (%sp)+,%d0 # restore d0
				5564	bra t_extdnrm
				5565
				5566	############################################
				5567
				5568	#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
				5569	#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
				5570	#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
				5571	SREDUCEX:
				5572	fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
				5573	mov.l %d2,-(%sp) # save d2
				5574	fmov.s &0x00000000,%fp1 # fp1 = 0
				5575
				5576	#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
				5577	#--there is a danger of unwanted overflow in first LOOP iteration. In this
				5578	#--case, reduce argument by one remainder step to make subsequent reduction
				5579	#--safe.
				5580	cmp.l %d1,&0x7ffeffff # is arg dangerously large?
				5581	bne.b SLOOP # no
				5582
				5583	# yes; create 2*16383PI/2
				5584	mov.w &0x7ffe,FP_SCR0_EX(%a6)
				5585	mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
				5586	clr.l FP_SCR0_LO(%a6)
				5587
				5588	# create low half of 2*16383PI/2 at FP_SCR1
				5589	mov.w &0x7fdc,FP_SCR1_EX(%a6)
				5590	mov.l &0x85a308d3,FP_SCR1_HI(%a6)
				5591	clr.l FP_SCR1_LO(%a6)
				5592
				5593	ftest.x %fp0 # test sign of argument
				5594	fblt.w sred_neg
				5595
				5596	or.b &0x80,FP_SCR0_EX(%a6) # positive arg
				5597	or.b &0x80,FP_SCR1_EX(%a6)
				5598	sred_neg:
				5599	fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
				5600	fmov.x %fp0,%fp1 # save high result in fp1
				5601	fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
				5602	fsub.x %fp0,%fp1 # determine low component of result
				5603	fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
				5604
				5605	#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, \|X\| <= PI/4.
				5606	#--integer quotient will be stored in N
				5607	#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
				5608	SLOOP:
				5609	fmov.x %fp0,INARG(%a6) # +-2*K F, 1 <= F < 2
				5610	mov.w INARG(%a6),%d1
				5611	mov.l %d1,%a1 # save a copy of D0
				5612	and.l &0x00007FFF,%d1
				5613	sub.l &0x00003FFF,%d1 # d0 = K
				5614	cmp.l %d1,&28
				5615	ble.b SLASTLOOP
				5616	SCONTLOOP:
				5617	sub.l &27,%d1 # d0 = L := K-27
				5618	mov.b &0,ENDFLAG(%a6)
				5619	bra.b SWORK
				5620	SLASTLOOP:
				5621	clr.l %d1 # d0 = L := 0
				5622	mov.b &1,ENDFLAG(%a6)
				5623
				5624	SWORK:
				5625	#--FIND THE REMAINDER OF (R,r) W.R.T. 2*L (PI/2). L IS SO CHOSEN
				5626	#--THAT INT( X * (2/PI) / 2(L) ) < 229.
				5627
				5628	#--CREATE 2*(-L) (2/PI), SIGN(INARG)2*(63),
				5629	#--2*L (PIby2_1), 2*L (PIby2_2)
				5630
				5631	mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
				5632	sub.l %d1,%d2 # BIASED EXP OF 2*(-L)(2/PI)
				5633
				5634	mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
				5635	mov.l &0x4E44152A,FP_SCR0_LO(%a6)
				5636	mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2*(-L)(2/PI)
				5637
				5638	fmov.x %fp0,%fp2
				5639	fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2*(-L)(2/PI)
				5640
				5641	#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
				5642	#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
				5643	#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
				5644	#--(SIGN(INARG)263 + FP2) - SIGN(INARG)2**63 WILL GIVE
				5645	#--US THE DESIRED VALUE IN FLOATING POINT.
				5646	mov.l %a1,%d2
				5647	swap %d2
				5648	and.l &0x80000000,%d2
				5649	or.l &0x5F000000,%d2 # d2 = SIGN(INARG)2*63 IN SGL
				5650	mov.l %d2,TWOTO63(%a6)
				5651	fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
				5652	fsub.s TWOTO63(%a6),%fp2 # fp2 = N
				5653	# fint.x %fp2
				5654
				5655	#--CREATING 2*(L)Piby2_1 and 2*(L)Piby2_2
				5656	mov.l %d1,%d2 # d2 = L
				5657
				5658	add.l &0x00003FFF,%d2 # BIASED EXP OF 2*L (PI/2)
				5659	mov.w %d2,FP_SCR0_EX(%a6)
				5660	mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
				5661	clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2*(L) Piby2_1
				5662
				5663	add.l &0x00003FDD,%d1
				5664	mov.w %d1,FP_SCR1_EX(%a6)
				5665	mov.l &0x85A308D3,FP_SCR1_HI(%a6)
				5666	clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2*(L) Piby2_2
				5667
				5668	mov.b ENDFLAG(%a6),%d1
				5669
				5670	#--We are now ready to perform (R+r) - NP1 - NP2, P1 = 2*(L) Piby2_1 and
				5671	#--P2 = 2*(L) Piby2_2
				5672	fmov.x %fp2,%fp4 # fp4 = N
				5673	fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
				5674	fmov.x %fp2,%fp5 # fp5 = N
				5675	fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
				5676	fmov.x %fp4,%fp3 # fp3 = W = N*P1
				5677
				5678	#--we want P+p = W+w but \|p\| <= half ulp of P
				5679	#--Then, we need to compute A := R-P and a := r-p
				5680	fadd.x %fp5,%fp3 # fp3 = P
				5681	fsub.x %fp3,%fp4 # fp4 = W-P
				5682
				5683	fsub.x %fp3,%fp0 # fp0 = A := R - P
				5684	fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
				5685
				5686	fmov.x %fp0,%fp3 # fp3 = A
				5687	fsub.x %fp4,%fp1 # fp1 = a := r - p
				5688
				5689	#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
				5690	#--\|r\| <= half ulp of R.
				5691	fadd.x %fp1,%fp0 # fp0 = R := A+a
				5692	#--No need to calculate r if this is the last loop
				5693	cmp.b %d1,&0
				5694	bgt.w SRESTORE
				5695
				5696	#--Need to calculate r
				5697	fsub.x %fp0,%fp3 # fp3 = A-R
				5698	fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
				5699	bra.w SLOOP
				5700
				5701	SRESTORE:
				5702	fmov.l %fp2,INT(%a6)
				5703	mov.l (%sp)+,%d2 # restore d2
				5704	fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
				5705
				5706	mov.l ADJN(%a6),%d1
				5707	cmp.l %d1,&4
				5708
				5709	blt.w SINCONT
				5710	bra.w SCCONT
				5711
				5712	#########################################################################
				5713	# stan(): computes the tangent of a normalized input #
				5714	# stand(): computes the tangent of a denormalized input #
				5715	# #
				5716	# INPUT *************************************************************** #
				5717	# a0 = pointer to extended precision input #
				5718	# d0 = round precision,mode #
				5719	# #
				5720	# OUTPUT ************************************************************** #
				5721	# fp0 = tan(X) #
				5722	# #
				5723	# ACCURACY and MONOTONICITY ******************************************* #
				5724	# The returned result is within 3 ulp in 64 significant bit, i.e. #
				5725	# within 0.5001 ulp to 53 bits if the result is subsequently #
				5726	# rounded to double precision. The result is provably monotonic #
				5727	# in double precision. #
				5728	# #
				5729	# ALGORITHM *********************************************************** #
				5730	# #
				5731	# 1. If \|X\| >= 15Pi or \|X\| < 2**(-40), go to 6. #
				5732	# #
				5733	# 2. Decompose X as X = N(Pi/2) + r where \|r\| <= Pi/4. Let #
				5734	# k = N mod 2, so in particular, k = 0 or 1. #
				5735	# #
				5736	# 3. If k is odd, go to 5. #
				5737	# #
				5738	# 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a #
				5739	# rational function U/V where #
				5740	# U = r + rs(P1 + s(P2 + sP3)), and #
				5741	# V = 1 + s(Q1 + s(Q2 + s(Q3 + sQ4))), s = r*r. #
				5742	# Exit. #
				5743	# #
				5744	# 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by #
				5745	# a rational function U/V where #
				5746	# U = r + rs(P1 + s(P2 + sP3)), and #
				5747	# V = 1 + s(Q1 + s(Q2 + s(Q3 + sQ4))), s = r*r, #
				5748	# -Cot(r) = -V/U. Exit. #
				5749	# #
				5750	# 6. If \|X\| > 1, go to 8. #
				5751	# #
				5752	# 7. (\|X\|<2**(-40)) Tan(X) = X. Exit. #
				5753	# #
				5754	# 8. Overwrite X by X := X rem 2Pi. Now that \|X\| <= Pi, go back #
				5755	# to 2. #
				5756	# #
				5757	#########################################################################
				5758
				5759	TANQ4:
				5760	long 0x3EA0B759,0xF50F8688
				5761	TANP3:
				5762	long 0xBEF2BAA5,0xA8924F04
				5763
				5764	TANQ3:
				5765	long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
				5766
				5767	TANP2:
				5768	long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
				5769
				5770	TANQ2:
				5771	long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
				5772
				5773	TANP1:
				5774	long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
				5775
				5776	TANQ1:
				5777	long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
				5778
				5779	INVTWOPI:
				5780	long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
				5781
				5782	TWOPI1:
				5783	long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
				5784	TWOPI2:
				5785	long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
				5786
				5787	#--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
				5788	#--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
				5789	#--MOST 69 BITS LONG.
				5790	# global PITBL
				5791	PITBL:
				5792	long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
				5793	long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
				5794	long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
				5795	long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000
				5796	long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
				5797	long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
				5798	long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
				5799	long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
				5800	long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
				5801	long 0xC0040000,0x90836524,0x88034B96,0x20B00000
				5802	long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
				5803	long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
				5804	long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
				5805	long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
				5806	long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
				5807	long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
				5808	long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
				5809	long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
				5810	long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
				5811	long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
				5812	long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
				5813	long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
				5814	long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
				5815	long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
				5816	long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
				5817	long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
				5818	long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
				5819	long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
				5820	long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
				5821	long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
				5822	long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
				5823	long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
				5824	long 0x00000000,0x00000000,0x00000000,0x00000000
				5825	long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
				5826	long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
				5827	long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
				5828	long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
				5829	long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
				5830	long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
				5831	long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
				5832	long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
				5833	long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
				5834	long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
				5835	long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000
				5836	long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
				5837	long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
				5838	long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
				5839	long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
				5840	long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
				5841	long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
				5842	long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
				5843	long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
				5844	long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
				5845	long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
				5846	long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000
				5847	long 0x40040000,0x90836524,0x88034B96,0xA0B00000
				5848	long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
				5849	long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
				5850	long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
				5851	long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
				5852	long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
				5853	long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000
				5854	long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
				5855	long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
				5856	long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
				5857
				5858	set INARG,FP_SCR0
				5859
				5860	set TWOTO63,L_SCR1
				5861	set INT,L_SCR1
				5862	set ENDFLAG,L_SCR2
				5863
				5864	global stan
				5865	stan:
				5866	fmov.x (%a0),%fp0 # LOAD INPUT
				5867
				5868	mov.l (%a0),%d1
				5869	mov.w 4(%a0),%d1
				5870	and.l &0x7FFFFFFF,%d1
				5871
				5872	cmp.l %d1,&0x3FD78000 # \|X\| >= 2**(-40)?
				5873	bge.b TANOK1
				5874	bra.w TANSM
				5875	TANOK1:
				5876	cmp.l %d1,&0x4004BC7E # \|X\| < 15 PI?
				5877	blt.b TANMAIN
				5878	bra.w REDUCEX
				5879
				5880	TANMAIN:
				5881	#--THIS IS THE USUAL CASE, \|X\| <= 15 PI.
				5882	#--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
				5883	fmov.x %fp0,%fp1
				5884	fmul.d TWOBYPI(%pc),%fp1 # X*2/PI
				5885
				5886	lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32
				5887
				5888	fmov.l %fp1,%d1 # CONVERT TO INTEGER
				5889
				5890	asl.l &4,%d1
				5891	add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2
				5892
				5893	fsub.x (%a1)+,%fp0 # X-Y1
				5894
				5895	fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2
				5896
				5897	ror.l &5,%d1
				5898	and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0
				5899
				5900	TANCONT:
				5901	fmovm.x &0x0c,-(%sp) # save fp2,fp3
				5902
				5903	cmp.l %d1,&0
				5904	blt.w NODD
				5905
				5906	fmov.x %fp0,%fp1
				5907	fmul.x %fp1,%fp1 # S = R*R
				5908
				5909	fmov.d TANQ4(%pc),%fp3
				5910	fmov.d TANP3(%pc),%fp2
				5911
				5912	fmul.x %fp1,%fp3 # SQ4
				5913	fmul.x %fp1,%fp2 # SP3
				5914
				5915	fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
				5916	fadd.x TANP2(%pc),%fp2 # P2+SP3
				5917
				5918	fmul.x %fp1,%fp3 # S(Q3+SQ4)
				5919	fmul.x %fp1,%fp2 # S(P2+SP3)
				5920
				5921	fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
				5922	fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
				5923
				5924	fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4))
				5925	fmul.x %fp1,%fp2 # S(P1+S(P2+SP3))
				5926
				5927	fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
				5928	fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3))
				5929
				5930	fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4)))
				5931
				5932	fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3))
				5933
				5934	fadd.s &0x3F800000,%fp1 # 1+S(Q1+...)
				5935
				5936	fmovm.x (%sp)+,&0x30 # restore fp2,fp3
				5937
				5938	fmov.l %d0,%fpcr # restore users round mode,prec
				5939	fdiv.x %fp1,%fp0 # last inst - possible exception set
				5940	bra t_inx2
				5941
				5942	NODD:
				5943	fmov.x %fp0,%fp1
				5944	fmul.x %fp0,%fp0 # S = R*R
				5945
				5946	fmov.d TANQ4(%pc),%fp3
				5947	fmov.d TANP3(%pc),%fp2
				5948
				5949	fmul.x %fp0,%fp3 # SQ4
				5950	fmul.x %fp0,%fp2 # SP3
				5951
				5952	fadd.d TANQ3(%pc),%fp3 # Q3+SQ4
				5953	fadd.x TANP2(%pc),%fp2 # P2+SP3
				5954
				5955	fmul.x %fp0,%fp3 # S(Q3+SQ4)
				5956	fmul.x %fp0,%fp2 # S(P2+SP3)
				5957
				5958	fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4)
				5959	fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3)
				5960
				5961	fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4))
				5962	fmul.x %fp0,%fp2 # S(P1+S(P2+SP3))
				5963
				5964	fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4))
				5965	fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3))
				5966
				5967	fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4)))
				5968
				5969	fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3))
				5970	fadd.s &0x3F800000,%fp0 # 1+S(Q1+...)
				5971
				5972	fmovm.x (%sp)+,&0x30 # restore fp2,fp3
				5973
				5974	fmov.x %fp1,-(%sp)
				5975	eor.l &0x80000000,(%sp)
				5976
				5977	fmov.l %d0,%fpcr # restore users round mode,prec
				5978	fdiv.x (%sp)+,%fp0 # last inst - possible exception set
				5979	bra t_inx2
				5980
				5981	TANBORS:
				5982	#--IF \|X\| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
				5983	#--IF \|X\| < 2**(-40), RETURN X OR 1.
				5984	cmp.l %d1,&0x3FFF8000
				5985	bgt.b REDUCEX
				5986
				5987	TANSM:
				5988	fmov.x %fp0,-(%sp)
				5989	fmov.l %d0,%fpcr # restore users round mode,prec
				5990	mov.b &FMOV_OP,%d1 # last inst is MOVE
				5991	fmov.x (%sp)+,%fp0 # last inst - posibble exception set
				5992	bra t_catch
				5993
				5994	global stand
				5995	#--TAN(X) = X FOR DENORMALIZED X
				5996	stand:
				5997	bra t_extdnrm
				5998
				5999	#--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
				6000	#--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
				6001	#--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
				6002	REDUCEX:
				6003	fmovm.x &0x3c,-(%sp) # save {fp2-fp5}
				6004	mov.l %d2,-(%sp) # save d2
				6005	fmov.s &0x00000000,%fp1 # fp1 = 0
				6006
				6007	#--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
				6008	#--there is a danger of unwanted overflow in first LOOP iteration. In this
				6009	#--case, reduce argument by one remainder step to make subsequent reduction
				6010	#--safe.
				6011	cmp.l %d1,&0x7ffeffff # is arg dangerously large?
				6012	bne.b LOOP # no
				6013
				6014	# yes; create 2*16383PI/2
				6015	mov.w &0x7ffe,FP_SCR0_EX(%a6)
				6016	mov.l &0xc90fdaa2,FP_SCR0_HI(%a6)
				6017	clr.l FP_SCR0_LO(%a6)
				6018
				6019	# create low half of 2*16383PI/2 at FP_SCR1
				6020	mov.w &0x7fdc,FP_SCR1_EX(%a6)
				6021	mov.l &0x85a308d3,FP_SCR1_HI(%a6)
				6022	clr.l FP_SCR1_LO(%a6)
				6023
				6024	ftest.x %fp0 # test sign of argument
				6025	fblt.w red_neg
				6026
				6027	or.b &0x80,FP_SCR0_EX(%a6) # positive arg
				6028	or.b &0x80,FP_SCR1_EX(%a6)
				6029	red_neg:
				6030	fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact
				6031	fmov.x %fp0,%fp1 # save high result in fp1
				6032	fadd.x FP_SCR1(%a6),%fp0 # low part of reduction
				6033	fsub.x %fp0,%fp1 # determine low component of result
				6034	fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument.
				6035
				6036	#--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, \|X\| <= PI/4.
				6037	#--integer quotient will be stored in N
				6038	#--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1)
				6039	LOOP:
				6040	fmov.x %fp0,INARG(%a6) # +-2*K F, 1 <= F < 2
				6041	mov.w INARG(%a6),%d1
				6042	mov.l %d1,%a1 # save a copy of D0
				6043	and.l &0x00007FFF,%d1
				6044	sub.l &0x00003FFF,%d1 # d0 = K
				6045	cmp.l %d1,&28
				6046	ble.b LASTLOOP
				6047	CONTLOOP:
				6048	sub.l &27,%d1 # d0 = L := K-27
				6049	mov.b &0,ENDFLAG(%a6)
				6050	bra.b WORK
				6051	LASTLOOP:
				6052	clr.l %d1 # d0 = L := 0
				6053	mov.b &1,ENDFLAG(%a6)
				6054
				6055	WORK:
				6056	#--FIND THE REMAINDER OF (R,r) W.R.T. 2*L (PI/2). L IS SO CHOSEN
				6057	#--THAT INT( X * (2/PI) / 2(L) ) < 229.
				6058
				6059	#--CREATE 2*(-L) (2/PI), SIGN(INARG)2*(63),
				6060	#--2*L (PIby2_1), 2*L (PIby2_2)
				6061
				6062	mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI
				6063	sub.l %d1,%d2 # BIASED EXP OF 2*(-L)(2/PI)
				6064
				6065	mov.l &0xA2F9836E,FP_SCR0_HI(%a6)
				6066	mov.l &0x4E44152A,FP_SCR0_LO(%a6)
				6067	mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2*(-L)(2/PI)
				6068
				6069	fmov.x %fp0,%fp2
				6070	fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2*(-L)(2/PI)
				6071
				6072	#--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
				6073	#--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N
				6074	#--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
				6075	#--(SIGN(INARG)263 + FP2) - SIGN(INARG)2**63 WILL GIVE
				6076	#--US THE DESIRED VALUE IN FLOATING POINT.
				6077	mov.l %a1,%d2
				6078	swap %d2
				6079	and.l &0x80000000,%d2
				6080	or.l &0x5F000000,%d2 # d2 = SIGN(INARG)2*63 IN SGL
				6081	mov.l %d2,TWOTO63(%a6)
				6082	fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED
				6083	fsub.s TWOTO63(%a6),%fp2 # fp2 = N
				6084	# fintrz.x %fp2,%fp2
				6085
				6086	#--CREATING 2*(L)Piby2_1 and 2*(L)Piby2_2
				6087	mov.l %d1,%d2 # d2 = L
				6088
				6089	add.l &0x00003FFF,%d2 # BIASED EXP OF 2*L (PI/2)
				6090	mov.w %d2,FP_SCR0_EX(%a6)
				6091	mov.l &0xC90FDAA2,FP_SCR0_HI(%a6)
				6092	clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2*(L) Piby2_1
				6093
				6094	add.l &0x00003FDD,%d1
				6095	mov.w %d1,FP_SCR1_EX(%a6)
				6096	mov.l &0x85A308D3,FP_SCR1_HI(%a6)
				6097	clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2*(L) Piby2_2
				6098
				6099	mov.b ENDFLAG(%a6),%d1
				6100
				6101	#--We are now ready to perform (R+r) - NP1 - NP2, P1 = 2*(L) Piby2_1 and
				6102	#--P2 = 2*(L) Piby2_2
				6103	fmov.x %fp2,%fp4 # fp4 = N
				6104	fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1
				6105	fmov.x %fp2,%fp5 # fp5 = N
				6106	fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2
				6107	fmov.x %fp4,%fp3 # fp3 = W = N*P1
				6108
				6109	#--we want P+p = W+w but \|p\| <= half ulp of P
				6110	#--Then, we need to compute A := R-P and a := r-p
				6111	fadd.x %fp5,%fp3 # fp3 = P
				6112	fsub.x %fp3,%fp4 # fp4 = W-P
				6113
				6114	fsub.x %fp3,%fp0 # fp0 = A := R - P
				6115	fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w
				6116
				6117	fmov.x %fp0,%fp3 # fp3 = A
				6118	fsub.x %fp4,%fp1 # fp1 = a := r - p
				6119
				6120	#--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but
				6121	#--\|r\| <= half ulp of R.
				6122	fadd.x %fp1,%fp0 # fp0 = R := A+a
				6123	#--No need to calculate r if this is the last loop
				6124	cmp.b %d1,&0
				6125	bgt.w RESTORE
				6126
				6127	#--Need to calculate r
				6128	fsub.x %fp0,%fp3 # fp3 = A-R
				6129	fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a
				6130	bra.w LOOP
				6131
				6132	RESTORE:
				6133	fmov.l %fp2,INT(%a6)
				6134	mov.l (%sp)+,%d2 # restore d2
				6135	fmovm.x (%sp)+,&0x3c # restore {fp2-fp5}
				6136
				6137	mov.l INT(%a6),%d1
				6138	ror.l &1,%d1
				6139
				6140	bra.w TANCONT
				6141
				6142	#########################################################################
				6143	# satan(): computes the arctangent of a normalized number #
				6144	# satand(): computes the arctangent of a denormalized number #
				6145	# #
				6146	# INPUT *************************************************************** #
				6147	# a0 = pointer to extended precision input #
				6148	# d0 = round precision,mode #
				6149	# #
				6150	# OUTPUT ************************************************************** #
				6151	# fp0 = arctan(X) #
				6152	# #
				6153	# ACCURACY and MONOTONICITY ******************************************* #
				6154	# The returned result is within 2 ulps in 64 significant bit, #
				6155	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				6156	# rounded to double precision. The result is provably monotonic #
				6157	# in double precision. #
				6158	# #
				6159	# ALGORITHM *********************************************************** #
				6160	# Step 1. If \|X\| >= 16 or \|X\| < 1/16, go to Step 5. #
				6161	# #
				6162	# Step 2. Let X = sgn * 2*k 1.xxxxxxxx...x. #
				6163	# Note that k = -4, -3,..., or 3. #
				6164	# Define F = sgn * 2*k 1.xxxx1, i.e. the first 5 #
				6165	# significant bits of X with a bit-1 attached at the 6-th #
				6166	# bit position. Define u to be u = (X-F) / (1 + X*F). #
				6167	# #
				6168	# Step 3. Approximate arctan(u) by a polynomial poly. #
				6169	# #
				6170	# Step 4. Return arctan(F) + poly, arctan(F) is fetched from a #
				6171	# table of values calculated beforehand. Exit. #
				6172	# #
				6173	# Step 5. If \|X\| >= 16, go to Step 7. #
				6174	# #
				6175	# Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. #
				6176	# #
				6177	# Step 7. Define X' = -1/X. Approximate arctan(X') by an odd #
				6178	# polynomial in X'. #
				6179	# Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. #
				6180	# #
				6181	#########################################################################
				6182
				6183	ATANA3: long 0xBFF6687E,0x314987D8
				6184	ATANA2: long 0x4002AC69,0x34A26DB3
				6185	ATANA1: long 0xBFC2476F,0x4E1DA28E
				6186
				6187	ATANB6: long 0x3FB34444,0x7F876989
				6188	ATANB5: long 0xBFB744EE,0x7FAF45DB
				6189	ATANB4: long 0x3FBC71C6,0x46940220
				6190	ATANB3: long 0xBFC24924,0x921872F9
				6191	ATANB2: long 0x3FC99999,0x99998FA9
				6192	ATANB1: long 0xBFD55555,0x55555555
				6193
				6194	ATANC5: long 0xBFB70BF3,0x98539E6A
				6195	ATANC4: long 0x3FBC7187,0x962D1D7D
				6196	ATANC3: long 0xBFC24924,0x827107B8
				6197	ATANC2: long 0x3FC99999,0x9996263E
				6198	ATANC1: long 0xBFD55555,0x55555536
				6199
				6200	PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
				6201	NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
				6202
				6203	PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000
				6204	NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000
				6205
				6206	ATANTBL:
				6207	long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
				6208	long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
				6209	long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
				6210	long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
				6211	long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
				6212	long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000
				6213	long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
				6214	long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
				6215	long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
				6216	long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
				6217	long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
				6218	long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
				6219	long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
				6220	long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
				6221	long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
				6222	long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
				6223	long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
				6224	long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
				6225	long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
				6226	long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
				6227	long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
				6228	long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
				6229	long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
				6230	long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
				6231	long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
				6232	long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
				6233	long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
				6234	long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
				6235	long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
				6236	long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
				6237	long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
				6238	long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
				6239	long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
				6240	long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
				6241	long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
				6242	long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
				6243	long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
				6244	long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
				6245	long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
				6246	long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
				6247	long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
				6248	long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
				6249	long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
				6250	long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
				6251	long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
				6252	long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
				6253	long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
				6254	long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
				6255	long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
				6256	long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
				6257	long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
				6258	long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
				6259	long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
				6260	long 0x3FFE0000,0x97731420,0x365E538C,0x00000000
				6261	long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
				6262	long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
				6263	long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
				6264	long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
				6265	long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
				6266	long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
				6267	long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
				6268	long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
				6269	long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
				6270	long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
				6271	long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
				6272	long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
				6273	long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
				6274	long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
				6275	long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000
				6276	long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
				6277	long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
				6278	long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
				6279	long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
				6280	long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
				6281	long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
				6282	long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
				6283	long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
				6284	long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
				6285	long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
				6286	long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
				6287	long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
				6288	long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
				6289	long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
				6290	long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
				6291	long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
				6292	long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
				6293	long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
				6294	long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000
				6295	long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
				6296	long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
				6297	long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
				6298	long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
				6299	long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
				6300	long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
				6301	long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
				6302	long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
				6303	long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
				6304	long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
				6305	long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
				6306	long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
				6307	long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
				6308	long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
				6309	long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
				6310	long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
				6311	long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
				6312	long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000
				6313	long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
				6314	long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
				6315	long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
				6316	long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
				6317	long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
				6318	long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
				6319	long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
				6320	long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
				6321	long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
				6322	long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
				6323	long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
				6324	long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
				6325	long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
				6326	long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
				6327	long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
				6328	long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
				6329	long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
				6330	long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
				6331	long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
				6332	long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
				6333	long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
				6334	long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
				6335
				6336	set X,FP_SCR0
				6337	set XDCARE,X+2
				6338	set XFRAC,X+4
				6339	set XFRACLO,X+8
				6340
				6341	set ATANF,FP_SCR1
				6342	set ATANFHI,ATANF+4
				6343	set ATANFLO,ATANF+8
				6344
				6345	global satan
				6346	#--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
				6347	satan:
				6348	fmov.x (%a0),%fp0 # LOAD INPUT
				6349
				6350	mov.l (%a0),%d1
				6351	mov.w 4(%a0),%d1
				6352	fmov.x %fp0,X(%a6)
				6353	and.l &0x7FFFFFFF,%d1
				6354
				6355	cmp.l %d1,&0x3FFB8000 # \|X\| >= 1/16?
				6356	bge.b ATANOK1
				6357	bra.w ATANSM
				6358
				6359	ATANOK1:
				6360	cmp.l %d1,&0x4002FFFF # \|X\| < 16 ?
				6361	ble.b ATANMAIN
				6362	bra.w ATANBIG
				6363
				6364	#--THE MOST LIKELY CASE, \|X\| IN [1/16, 16). WE USE TABLE TECHNIQUE
				6365	#--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
				6366	#--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
				6367	#--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
				6368	#--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
				6369	#--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
				6370	#--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
				6371	#--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
				6372	#--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
				6373	#--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
				6374	#--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
				6375	#--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
				6376	#--WILL INVOLVE A VERY LONG POLYNOMIAL.
				6377
				6378	#--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
				6379	#--WE CHOSE F TO BE +-2^K * 1.BBBB1
				6380	#--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
				6381	#--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
				6382	#--ARE ONLY 8 TIMES 16 = 2^7 = 128 \|F\|'S. SINCE ATAN(-\|F\|) IS
				6383	#-- -ATAN(\|F\|), WE NEED TO STORE ONLY ATAN(\|F\|).
				6384
				6385	ATANMAIN:
				6386
				6387	and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS
				6388	or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1
				6389	mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F
				6390
				6391	fmov.x %fp0,%fp1 # FP1 IS X
				6392	fmul.x X(%a6),%fp1 # FP1 IS XF, NOTE THAT XF > 0
				6393	fsub.x X(%a6),%fp0 # FP0 IS X-F
				6394	fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F
				6395	fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F)
				6396
				6397	#--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(\|F\|)
				6398	#--CREATE ATAN(F) AND STORE IT IN ATANF, AND
				6399	#--SAVE REGISTERS FP2.
				6400
				6401	mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY
				6402	mov.l %d1,%d2 # THE EXP AND 16 BITS OF X
				6403	and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION
				6404	and.l &0x7FFF0000,%d2 # EXPONENT OF F
				6405	sub.l &0x3FFB0000,%d2 # K+4
				6406	asr.l &1,%d2
				6407	add.l %d2,%d1 # THE 7 BITS IDENTIFYING F
				6408	asr.l &7,%d1 # INDEX INTO TBL OF ATAN(\|F\|)
				6409	lea ATANTBL(%pc),%a1
				6410	add.l %d1,%a1 # ADDRESS OF ATAN(\|F\|)
				6411	mov.l (%a1)+,ATANF(%a6)
				6412	mov.l (%a1)+,ATANFHI(%a6)
				6413	mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(\|F\|)
				6414	mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN
				6415	and.l &0x80000000,%d1 # SIGN(F)
				6416	or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(\|F\|)
				6417	mov.l (%sp)+,%d2 # RESTORE d2
				6418
				6419	#--THAT'S ALL I HAVE TO DO FOR NOW,
				6420	#--BUT ALAS, THE DIVIDE IS STILL CRANKING!
				6421
				6422	#--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
				6423	#--U + A1UV(A2 + V(A3 + V)), V = U*U
				6424	#--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
				6425	#--THE NATURAL FORM IS U + UV(A1 + V(A2 + VA3))
				6426	#--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3.
				6427	#--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
				6428	#--PARTS A1UV AND (A2 + ... STUFF) MORE LOAD-BALANCED
				6429
				6430	fmovm.x &0x04,-(%sp) # save fp2
				6431
				6432	fmov.x %fp0,%fp1
				6433	fmul.x %fp1,%fp1
				6434	fmov.d ATANA3(%pc),%fp2
				6435	fadd.x %fp1,%fp2 # A3+V
				6436	fmul.x %fp1,%fp2 # V*(A3+V)
				6437	fmul.x %fp0,%fp1 # U*V
				6438	fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V)
				6439	fmul.d ATANA1(%pc),%fp1 # A1UV
				6440	fmul.x %fp2,%fp1 # A1UV(A2+V(A3+V))
				6441	fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED
				6442
				6443	fmovm.x (%sp)+,&0x20 # restore fp2
				6444
				6445	fmov.l %d0,%fpcr # restore users rnd mode,prec
				6446	fadd.x ATANF(%a6),%fp0 # ATAN(X)
				6447	bra t_inx2
				6448
				6449	ATANBORS:
				6450	#--\|X\| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
				6451	#--FP0 IS X AND \|X\| <= 1/16 OR \|X\| >= 16.
				6452	cmp.l %d1,&0x3FFF8000
				6453	bgt.w ATANBIG # I.E. \|X\| >= 16
				6454
				6455	ATANSM:
				6456	#--\|X\| <= 1/16
				6457	#--IF \|X\| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
				6458	#--ATAN(X) BY X + XY(B1+Y(B2+Y(B3+Y(B4+Y(B5+Y*B6)))))
				6459	#--WHICH IS X + XY( [B1+Z(B3+ZB5)] + [Y(B2+Z(B4+Z*B6)] )
				6460	#--WHERE Y = XX, AND Z = YY.
				6461
				6462	cmp.l %d1,&0x3FD78000
				6463	blt.w ATANTINY
				6464
				6465	#--COMPUTE POLYNOMIAL
				6466	fmovm.x &0x0c,-(%sp) # save fp2/fp3
				6467
				6468	fmul.x %fp0,%fp0 # FPO IS Y = X*X
				6469
				6470	fmov.x %fp0,%fp1
				6471	fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
				6472
				6473	fmov.d ATANB6(%pc),%fp2
				6474	fmov.d ATANB5(%pc),%fp3
				6475
				6476	fmul.x %fp1,%fp2 # Z*B6
				6477	fmul.x %fp1,%fp3 # Z*B5
				6478
				6479	fadd.d ATANB4(%pc),%fp2 # B4+Z*B6
				6480	fadd.d ATANB3(%pc),%fp3 # B3+Z*B5
				6481
				6482	fmul.x %fp1,%fp2 # Z(B4+ZB6)
				6483	fmul.x %fp3,%fp1 # Z(B3+ZB5)
				6484
				6485	fadd.d ATANB2(%pc),%fp2 # B2+Z(B4+ZB6)
				6486	fadd.d ATANB1(%pc),%fp1 # B1+Z(B3+ZB5)
				6487
				6488	fmul.x %fp0,%fp2 # Y(B2+Z(B4+Z*B6))
				6489	fmul.x X(%a6),%fp0 # X*Y
				6490
				6491	fadd.x %fp2,%fp1 # [B1+Z(B3+ZB5)]+[Y(B2+Z(B4+Z*B6))]
				6492
				6493	fmul.x %fp1,%fp0 # XY([B1+Z(B3+ZB5)]+[Y(B2+Z(B4+Z*B6))])
				6494
				6495	fmovm.x (%sp)+,&0x30 # restore fp2/fp3
				6496
				6497	fmov.l %d0,%fpcr # restore users rnd mode,prec
				6498	fadd.x X(%a6),%fp0
				6499	bra t_inx2
				6500
				6501	ATANTINY:
				6502	#--\|X\| < 2^(-40), ATAN(X) = X
				6503
				6504	fmov.l %d0,%fpcr # restore users rnd mode,prec
				6505	mov.b &FMOV_OP,%d1 # last inst is MOVE
				6506	fmov.x X(%a6),%fp0 # last inst - possible exception set
				6507
				6508	bra t_catch
				6509
				6510	ATANBIG:
				6511	#--IF \|X\| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE,
				6512	#--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
				6513	cmp.l %d1,&0x40638000
				6514	bgt.w ATANHUGE
				6515
				6516	#--APPROXIMATE ATAN(-1/X) BY
				6517	#--X'+X'Y(C1+Y(C2+Y(C3+Y(C4+YC5)))), X' = -1/X, Y = X'*X'
				6518	#--THIS CAN BE RE-WRITTEN AS
				6519	#--X'+X'Y( [C1+Z(C3+ZC5)] + [Y(C2+ZC4)] ), Z = Y*Y.
				6520
				6521	fmovm.x &0x0c,-(%sp) # save fp2/fp3
				6522
				6523	fmov.s &0xBF800000,%fp1 # LOAD -1
				6524	fdiv.x %fp0,%fp1 # FP1 IS -1/X
				6525
				6526	#--DIVIDE IS STILL CRANKING
				6527
				6528	fmov.x %fp1,%fp0 # FP0 IS X'
				6529	fmul.x %fp0,%fp0 # FP0 IS Y = X'*X'
				6530	fmov.x %fp1,X(%a6) # X IS REALLY X'
				6531
				6532	fmov.x %fp0,%fp1
				6533	fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y
				6534
				6535	fmov.d ATANC5(%pc),%fp3
				6536	fmov.d ATANC4(%pc),%fp2
				6537
				6538	fmul.x %fp1,%fp3 # Z*C5
				6539	fmul.x %fp1,%fp2 # Z*B4
				6540
				6541	fadd.d ATANC3(%pc),%fp3 # C3+Z*C5
				6542	fadd.d ATANC2(%pc),%fp2 # C2+Z*C4
				6543
				6544	fmul.x %fp3,%fp1 # Z(C3+ZC5), FP3 RELEASED
				6545	fmul.x %fp0,%fp2 # Y(C2+ZC4)
				6546
				6547	fadd.d ATANC1(%pc),%fp1 # C1+Z(C3+ZC5)
				6548	fmul.x X(%a6),%fp0 # X'*Y
				6549
				6550	fadd.x %fp2,%fp1 # [Y(C2+ZC4)]+[C1+Z(C3+ZC5)]
				6551
				6552	fmul.x %fp1,%fp0 # X'Y([B1+Z(B3+ZB5)]
				6553	# ... +[Y(B2+Z(B4+Z*B6))])
				6554	fadd.x X(%a6),%fp0
				6555
				6556	fmovm.x (%sp)+,&0x30 # restore fp2/fp3
				6557
				6558	fmov.l %d0,%fpcr # restore users rnd mode,prec
				6559	tst.b (%a0)
				6560	bpl.b pos_big
				6561
				6562	neg_big:
				6563	fadd.x NPIBY2(%pc),%fp0
				6564	bra t_minx2
				6565
				6566	pos_big:
				6567	fadd.x PPIBY2(%pc),%fp0
				6568	bra t_pinx2
				6569
				6570	ATANHUGE:
				6571	#--RETURN SIGN(X)(PIBY2 - TINY) = SIGN(X)PIBY2 - SIGN(X)*TINY
				6572	tst.b (%a0)
				6573	bpl.b pos_huge
				6574
				6575	neg_huge:
				6576	fmov.x NPIBY2(%pc),%fp0
				6577	fmov.l %d0,%fpcr
				6578	fadd.x PTINY(%pc),%fp0
				6579	bra t_minx2
				6580
				6581	pos_huge:
				6582	fmov.x PPIBY2(%pc),%fp0
				6583	fmov.l %d0,%fpcr
				6584	fadd.x NTINY(%pc),%fp0
				6585	bra t_pinx2
				6586
				6587	global satand
				6588	#--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
				6589	satand:
				6590	bra t_extdnrm
				6591
				6592	#########################################################################
				6593	# sasin(): computes the inverse sine of a normalized input #
				6594	# sasind(): computes the inverse sine of a denormalized input #
				6595	# #
				6596	# INPUT *************************************************************** #
				6597	# a0 = pointer to extended precision input #
				6598	# d0 = round precision,mode #
				6599	# #
				6600	# OUTPUT ************************************************************** #
				6601	# fp0 = arcsin(X) #
				6602	# #
				6603	# ACCURACY and MONOTONICITY ******************************************* #
				6604	# The returned result is within 3 ulps in 64 significant bit, #
				6605	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				6606	# rounded to double precision. The result is provably monotonic #
				6607	# in double precision. #
				6608	# #
				6609	# ALGORITHM *********************************************************** #
				6610	# #
				6611	# ASIN #
				6612	# 1. If \|X\| >= 1, go to 3. #
				6613	# #
				6614	# 2. (\|X\| < 1) Calculate asin(X) by #
				6615	# z := sqrt( [1-X][1+X] ) #
				6616	# asin(X) = atan( x / z ). #
				6617	# Exit. #
				6618	# #
				6619	# 3. If \|X\| > 1, go to 5. #
				6620	# #
				6621	# 4. (\|X\| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.#
				6622	# #
				6623	# 5. (\|X\| > 1) Generate an invalid operation by 0 * infinity. #
				6624	# Exit. #
				6625	# #
				6626	#########################################################################
				6627
				6628	global sasin
				6629	sasin:
				6630	fmov.x (%a0),%fp0 # LOAD INPUT
				6631
				6632	mov.l (%a0),%d1
				6633	mov.w 4(%a0),%d1
				6634	and.l &0x7FFFFFFF,%d1
				6635	cmp.l %d1,&0x3FFF8000
				6636	bge.b ASINBIG
				6637
				6638	# This catch is added here for the '060 QSP. Originally, the call to
				6639	# satan() would handle this case by causing the exception which would
				6640	# not be caught until gen_except(). Now, with the exceptions being
				6641	# detected inside of satan(), the exception would have been handled there
				6642	# instead of inside sasin() as expected.
				6643	cmp.l %d1,&0x3FD78000
				6644	blt.w ASINTINY
				6645
				6646	#--THIS IS THE USUAL CASE, \|X\| < 1
				6647	#--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
				6648
				6649	ASINMAIN:
				6650	fmov.s &0x3F800000,%fp1
				6651	fsub.x %fp0,%fp1 # 1-X
				6652	fmovm.x &0x4,-(%sp) # {fp2}
				6653	fmov.s &0x3F800000,%fp2
				6654	fadd.x %fp0,%fp2 # 1+X
				6655	fmul.x %fp2,%fp1 # (1+X)(1-X)
				6656	fmovm.x (%sp)+,&0x20 # {fp2}
				6657	fsqrt.x %fp1 # SQRT([1-X][1+X])
				6658	fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X])
				6659	fmovm.x &0x01,-(%sp) # save X/SQRT(...)
				6660	lea (%sp),%a0 # pass ptr to X/SQRT(...)
				6661	bsr satan
				6662	add.l &0xc,%sp # clear X/SQRT(...) from stack
				6663	bra t_inx2
				6664
				6665	ASINBIG:
				6666	fabs.x %fp0 # \|X\|
				6667	fcmp.s %fp0,&0x3F800000
				6668	fbgt t_operr # cause an operr exception
				6669
				6670	#--\|X\| = 1, ASIN(X) = +- PI/2.
				6671	ASINONE:
				6672	fmov.x PIBY2(%pc),%fp0
				6673	mov.l (%a0),%d1
				6674	and.l &0x80000000,%d1 # SIGN BIT OF X
				6675	or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT
				6676	mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT
				6677	fmov.l %d0,%fpcr
				6678	fmul.s (%sp)+,%fp0
				6679	bra t_inx2
				6680
				6681	#--\|X\| < 2^(-40), ATAN(X) = X
				6682	ASINTINY:
				6683	fmov.l %d0,%fpcr # restore users rnd mode,prec
				6684	mov.b &FMOV_OP,%d1 # last inst is MOVE
				6685	fmov.x (%a0),%fp0 # last inst - possible exception
				6686	bra t_catch
				6687
				6688	global sasind
				6689	#--ASIN(X) = X FOR DENORMALIZED X
				6690	sasind:
				6691	bra t_extdnrm
				6692
				6693	#########################################################################
				6694	# sacos(): computes the inverse cosine of a normalized input #
				6695	# sacosd(): computes the inverse cosine of a denormalized input #
				6696	# #
				6697	# INPUT *************************************************************** #
				6698	# a0 = pointer to extended precision input #
				6699	# d0 = round precision,mode #
				6700	# #
				6701	# OUTPUT ************************************************************** #
				6702	# fp0 = arccos(X) #
				6703	# #
				6704	# ACCURACY and MONOTONICITY ******************************************* #
				6705	# The returned result is within 3 ulps in 64 significant bit, #
				6706	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				6707	# rounded to double precision. The result is provably monotonic #
				6708	# in double precision. #
				6709	# #
				6710	# ALGORITHM *********************************************************** #
				6711	# #
				6712	# ACOS #
				6713	# 1. If \|X\| >= 1, go to 3. #
				6714	# #
				6715	# 2. (\|X\| < 1) Calculate acos(X) by #
				6716	# z := (1-X) / (1+X) #
				6717	# acos(X) = 2 * atan( sqrt(z) ). #
				6718	# Exit. #
				6719	# #
				6720	# 3. If \|X\| > 1, go to 5. #
				6721	# #
				6722	# 4. (\|X\| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. #
				6723	# #
				6724	# 5. (\|X\| > 1) Generate an invalid operation by 0 * infinity. #
				6725	# Exit. #
				6726	# #
				6727	#########################################################################
				6728
				6729	global sacos
				6730	sacos:
				6731	fmov.x (%a0),%fp0 # LOAD INPUT
				6732
				6733	mov.l (%a0),%d1 # pack exp w/ upper 16 fraction
				6734	mov.w 4(%a0),%d1
				6735	and.l &0x7FFFFFFF,%d1
				6736	cmp.l %d1,&0x3FFF8000
				6737	bge.b ACOSBIG
				6738
				6739	#--THIS IS THE USUAL CASE, \|X\| < 1
				6740	#--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) )
				6741
				6742	ACOSMAIN:
				6743	fmov.s &0x3F800000,%fp1
				6744	fadd.x %fp0,%fp1 # 1+X
				6745	fneg.x %fp0 # -X
				6746	fadd.s &0x3F800000,%fp0 # 1-X
				6747	fdiv.x %fp1,%fp0 # (1-X)/(1+X)
				6748	fsqrt.x %fp0 # SQRT((1-X)/(1+X))
				6749	mov.l %d0,-(%sp) # save original users fpcr
				6750	clr.l %d0
				6751	fmovm.x &0x01,-(%sp) # save SQRT(...) to stack
				6752	lea (%sp),%a0 # pass ptr to sqrt
				6753	bsr satan # ATAN(SQRT([1-X]/[1+X]))
				6754	add.l &0xc,%sp # clear SQRT(...) from stack
				6755
				6756	fmov.l (%sp)+,%fpcr # restore users round prec,mode
				6757	fadd.x %fp0,%fp0 # 2 * ATAN( STUFF )
				6758	bra t_pinx2
				6759
				6760	ACOSBIG:
				6761	fabs.x %fp0
				6762	fcmp.s %fp0,&0x3F800000
				6763	fbgt t_operr # cause an operr exception
				6764
				6765	#--\|X\| = 1, ACOS(X) = 0 OR PI
				6766	tst.b (%a0) # is X positive or negative?
				6767	bpl.b ACOSP1
				6768
				6769	#--X = -1
				6770	#Returns PI and inexact exception
				6771	ACOSM1:
				6772	fmov.x PI(%pc),%fp0 # load PI
				6773	fmov.l %d0,%fpcr # load round mode,prec
				6774	fadd.s &0x00800000,%fp0 # add a small value
				6775	bra t_pinx2
				6776
				6777	ACOSP1:
				6778	bra ld_pzero # answer is positive zero
				6779
				6780	global sacosd
				6781	#--ACOS(X) = PI/2 FOR DENORMALIZED X
				6782	sacosd:
				6783	fmov.l %d0,%fpcr # load user's rnd mode/prec
				6784	fmov.x PIBY2(%pc),%fp0
				6785	bra t_pinx2
				6786
				6787	#########################################################################
				6788	# setox(): computes the exponential for a normalized input #
				6789	# setoxd(): computes the exponential for a denormalized input #
				6790	# setoxm1(): computes the exponential minus 1 for a normalized input #
				6791	# setoxm1d(): computes the exponential minus 1 for a denormalized input #
				6792	# #
				6793	# INPUT *************************************************************** #
				6794	# a0 = pointer to extended precision input #
				6795	# d0 = round precision,mode #
				6796	# #
				6797	# OUTPUT ************************************************************** #
				6798	# fp0 = exp(X) or exp(X)-1 #
				6799	# #
				6800	# ACCURACY and MONOTONICITY ******************************************* #
				6801	# The returned result is within 0.85 ulps in 64 significant bit, #
				6802	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				6803	# rounded to double precision. The result is provably monotonic #
				6804	# in double precision. #
				6805	# #
				6806	# ALGORITHM and IMPLEMENTATION **************************************** #
				6807	# #
				6808	# setoxd #
				6809	# ------ #
				6810	# Step 1. Set ans := 1.0 #
				6811	# #
				6812	# Step 2. Return ans := ans + sign(X)*2^(-126). Exit. #
				6813	# Notes: This will always generate one exception -- inexact. #
				6814	# #
				6815	# #
				6816	# setox #
				6817	# ----- #
				6818	# #
				6819	# Step 1. Filter out extreme cases of input argument. #
				6820	# 1.1 If \|X\| >= 2^(-65), go to Step 1.3. #
				6821	# 1.2 Go to Step 7. #
				6822	# 1.3 If \|X\| < 16380 log(2), go to Step 2. #
				6823	# 1.4 Go to Step 8. #
				6824	# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
				6825	# To avoid the use of floating-point comparisons, a #
				6826	# compact representation of \|X\| is used. This format is a #
				6827	# 32-bit integer, the upper (more significant) 16 bits #
				6828	# are the sign and biased exponent field of \|X\|; the #
				6829	# lower 16 bits are the 16 most significant fraction #
				6830	# (including the explicit bit) bits of \|X\|. Consequently, #
				6831	# the comparisons in Steps 1.1 and 1.3 can be performed #
				6832	# by integer comparison. Note also that the constant #
				6833	# 16380 log(2) used in Step 1.3 is also in the compact #
				6834	# form. Thus taking the branch to Step 2 guarantees #
				6835	# \|X\| < 16380 log(2). There is no harm to have a small #
				6836	# number of cases where \|X\| is less than, but close to, #
				6837	# 16380 log(2) and the branch to Step 9 is taken. #
				6838	# #
				6839	# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
				6840	# 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 #
				6841	# was taken) #
				6842	# 2.2 N := round-to-nearest-integer( X * 64/log2 ). #
				6843	# 2.3 Calculate J = N mod 64; so J = 0,1,2,..., #
				6844	# or 63. #
				6845	# 2.4 Calculate M = (N - J)/64; so N = 64M + J. #
				6846	# 2.5 Calculate the address of the stored value of #
				6847	# 2^(J/64). #
				6848	# 2.6 Create the value Scale = 2^M. #
				6849	# Notes: The calculation in 2.2 is really performed by #
				6850	# Z := X * constant #
				6851	# N := round-to-nearest-integer(Z) #
				6852	# where #
				6853	# constant := single-precision( 64/log 2 ). #
				6854	# #
				6855	# Using a single-precision constant avoids memory #
				6856	# access. Another effect of using a single-precision #
				6857	# "constant" is that the calculated value Z is #
				6858	# #
				6859	# Z = X(64/log2)(1+eps), \|eps\| <= 2^(-24). #
				6860	# #
				6861	# This error has to be considered later in Steps 3 and 4. #
				6862	# #
				6863	# Step 3. Calculate X - N*log2/64. #
				6864	# 3.1 R := X + N*L1, #
				6865	# where L1 := single-precision(-log2/64). #
				6866	# 3.2 R := R + N*L2, #
				6867	# L2 := extended-precision(-log2/64 - L1).#
				6868	# Notes: a) The way L1 and L2 are chosen ensures L1+L2 #
				6869	# approximate the value -log2/64 to 88 bits of accuracy. #
				6870	# b) N*L1 is exact because N is no longer than 22 bits #
				6871	# and L1 is no longer than 24 bits. #
				6872	# c) The calculation X+N*L1 is also exact due to #
				6873	# cancellation. Thus, R is practically X+N(L1+L2) to full #
				6874	# 64 bits. #
				6875	# d) It is important to estimate how large can \|R\| be #
				6876	# after Step 3.2. #
				6877	# #
				6878	# N = rnd-to-int( X*64/log2 (1+eps) ), \|eps\|<=2^(-24) #
				6879	# X*64/log2 (1+eps) = N + f, \|f\| <= 0.5 #
				6880	# X64/log2 - N = f - epsX 64/log2 #
				6881	# X - Nlog2/64 = flog2/64 - eps*X #
				6882	# #
				6883	# #
				6884	# Now \|X\| <= 16446 log2, thus #
				6885	# #
				6886	# \|X - Nlog2/64\| <= (0.5 + 16446/2^(18))log2/64 #
				6887	# <= 0.57 log2/64. #
				6888	# This bound will be used in Step 4. #
				6889	# #
				6890	# Step 4. Approximate exp(R)-1 by a polynomial #
				6891	# p = R + RR(A1 + R(A2 + R(A3 + R(A4 + RA5)))) #
				6892	# Notes: a) In order to reduce memory access, the coefficients #
				6893	# are made as "short" as possible: A1 (which is 1/2), A4 #
				6894	# and A5 are single precision; A2 and A3 are double #
				6895	# precision. #
				6896	# b) Even with the restrictions above, #
				6897	# \|p - (exp(R)-1)\| < 2^(-68.8) for all \|R\| <= 0.0062. #
				6898	# Note that 0.0062 is slightly bigger than 0.57 log2/64. #
				6899	# c) To fully utilize the pipeline, p is separated into #
				6900	# two independent pieces of roughly equal complexities #
				6901	# p = [ R + RS(A2 + S*A4) ] + #
				6902	# [ S(A1 + S(A3 + S*A5)) ] #
				6903	# where S = R*R. #
				6904	# #
				6905	# Step 5. Compute 2^(J/64)exp(R) = 2^(J/64)(1+p) by #
				6906	# ans := T + ( T*p + t) #
				6907	# where T and t are the stored values for 2^(J/64). #
				6908	# Notes: 2^(J/64) is stored as T and t where T+t approximates #
				6909	# 2^(J/64) to roughly 85 bits; T is in extended precision #
				6910	# and t is in single precision. Note also that T is #
				6911	# rounded to 62 bits so that the last two bits of T are #
				6912	# zero. The reason for such a special form is that T-1, #
				6913	# T-2, and T-8 will all be exact --- a property that will #
				6914	# give much more accurate computation of the function #
				6915	# EXPM1. #
				6916	# #
				6917	# Step 6. Reconstruction of exp(X) #
				6918	# exp(X) = 2^M * 2^(J/64) * exp(R). #
				6919	# 6.1 If AdjFlag = 0, go to 6.3 #
				6920	# 6.2 ans := ans * AdjScale #
				6921	# 6.3 Restore the user FPCR #
				6922	# 6.4 Return ans := ans * Scale. Exit. #
				6923	# Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, #
				6924	# \|M\| <= 16380, and Scale = 2^M. Moreover, exp(X) will #
				6925	# neither overflow nor underflow. If AdjFlag = 1, that #
				6926	# means that #
				6927	# X = (M1+M)log2 + Jlog2/64 + R, \|M1+M\| >= 16380. #
				6928	# Hence, exp(X) may overflow or underflow or neither. #
				6929	# When that is the case, AdjScale = 2^(M1) where M1 is #
				6930	# approximately M. Thus 6.2 will never cause #
				6931	# over/underflow. Possible exception in 6.4 is overflow #
				6932	# or underflow. The inexact exception is not generated in #
				6933	# 6.4. Although one can argue that the inexact flag #
				6934	# should always be raised, to simulate that exception #
				6935	# cost to much than the flag is worth in practical uses. #
				6936	# #
				6937	# Step 7. Return 1 + X. #
				6938	# 7.1 ans := X #
				6939	# 7.2 Restore user FPCR. #
				6940	# 7.3 Return ans := 1 + ans. Exit #
				6941	# Notes: For non-zero X, the inexact exception will always be #
				6942	# raised by 7.3. That is the only exception raised by 7.3.#
				6943	# Note also that we use the FMOVEM instruction to move X #
				6944	# in Step 7.1 to avoid unnecessary trapping. (Although #
				6945	# the FMOVEM may not seem relevant since X is normalized, #
				6946	# the precaution will be useful in the library version of #
				6947	# this code where the separate entry for denormalized #
				6948	# inputs will be done away with.) #
				6949	# #
				6950	# Step 8. Handle exp(X) where \|X\| >= 16380log2. #
				6951	# 8.1 If \|X\| > 16480 log2, go to Step 9. #
				6952	# (mimic 2.2 - 2.6) #
				6953	# 8.2 N := round-to-integer( X * 64/log2 ) #
				6954	# 8.3 Calculate J = N mod 64, J = 0,1,...,63 #
				6955	# 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, #
				6956	# AdjFlag := 1. #
				6957	# 8.5 Calculate the address of the stored value #
				6958	# 2^(J/64). #
				6959	# 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. #
				6960	# 8.7 Go to Step 3. #
				6961	# Notes: Refer to notes for 2.2 - 2.6. #
				6962	# #
				6963	# Step 9. Handle exp(X), \|X\| > 16480 log2. #
				6964	# 9.1 If X < 0, go to 9.3 #
				6965	# 9.2 ans := Huge, go to 9.4 #
				6966	# 9.3 ans := Tiny. #
				6967	# 9.4 Restore user FPCR. #
				6968	# 9.5 Return ans := ans * ans. Exit. #
				6969	# Notes: Exp(X) will surely overflow or underflow, depending on #
				6970	# X's sign. "Huge" and "Tiny" are respectively large/tiny #
				6971	# extended-precision numbers whose square over/underflow #
				6972	# with an inexact result. Thus, 9.5 always raises the #
				6973	# inexact together with either overflow or underflow. #
				6974	# #
				6975	# setoxm1d #
				6976	# -------- #
				6977	# #
				6978	# Step 1. Set ans := 0 #
				6979	# #
				6980	# Step 2. Return ans := X + ans. Exit. #
				6981	# Notes: This will return X with the appropriate rounding #
				6982	# precision prescribed by the user FPCR. #
				6983	# #
				6984	# setoxm1 #
				6985	# ------- #
				6986	# #
				6987	# Step 1. Check \|X\| #
				6988	# 1.1 If \|X\| >= 1/4, go to Step 1.3. #
				6989	# 1.2 Go to Step 7. #
				6990	# 1.3 If \|X\| < 70 log(2), go to Step 2. #
				6991	# 1.4 Go to Step 10. #
				6992	# Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.#
				6993	# However, it is conceivable \|X\| can be small very often #
				6994	# because EXPM1 is intended to evaluate exp(X)-1 #
				6995	# accurately when \|X\| is small. For further details on #
				6996	# the comparisons, see the notes on Step 1 of setox. #
				6997	# #
				6998	# Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). #
				6999	# 2.1 N := round-to-nearest-integer( X * 64/log2 ). #
				7000	# 2.2 Calculate J = N mod 64; so J = 0,1,2,..., #
				7001	# or 63. #
				7002	# 2.3 Calculate M = (N - J)/64; so N = 64M + J. #
				7003	# 2.4 Calculate the address of the stored value of #
				7004	# 2^(J/64). #
				7005	# 2.5 Create the values Sc = 2^M and #
				7006	# OnebySc := -2^(-M). #
				7007	# Notes: See the notes on Step 2 of setox. #
				7008	# #
				7009	# Step 3. Calculate X - N*log2/64. #
				7010	# 3.1 R := X + N*L1, #
				7011	# where L1 := single-precision(-log2/64). #
				7012	# 3.2 R := R + N*L2, #
				7013	# L2 := extended-precision(-log2/64 - L1).#
				7014	# Notes: Applying the analysis of Step 3 of setox in this case #
				7015	# shows that \|R\| <= 0.0055 (note that \|X\| <= 70 log2 in #
				7016	# this case). #
				7017	# #
				7018	# Step 4. Approximate exp(R)-1 by a polynomial #
				7019	# p = R+RR(A1+R(A2+R(A3+R(A4+R(A5+R*A6))))) #
				7020	# Notes: a) In order to reduce memory access, the coefficients #
				7021	# are made as "short" as possible: A1 (which is 1/2), A5 #
				7022	# and A6 are single precision; A2, A3 and A4 are double #
				7023	# precision. #
				7024	# b) Even with the restriction above, #
				7025	# \|p - (exp(R)-1)\| < \|R\| * 2^(-72.7) #
				7026	# for all \|R\| <= 0.0055. #
				7027	# c) To fully utilize the pipeline, p is separated into #
				7028	# two independent pieces of roughly equal complexity #
				7029	# p = [ RS(A2 + S(A4 + SA6)) ] + #
				7030	# [ R + S(A1 + S(A3 + S*A5)) ] #
				7031	# where S = R*R. #
				7032	# #
				7033	# Step 5. Compute 2^(J/64)*p by #
				7034	# p := T*p #
				7035	# where T and t are the stored values for 2^(J/64). #
				7036	# Notes: 2^(J/64) is stored as T and t where T+t approximates #
				7037	# 2^(J/64) to roughly 85 bits; T is in extended precision #
				7038	# and t is in single precision. Note also that T is #
				7039	# rounded to 62 bits so that the last two bits of T are #
				7040	# zero. The reason for such a special form is that T-1, #
				7041	# T-2, and T-8 will all be exact --- a property that will #
				7042	# be exploited in Step 6 below. The total relative error #
				7043	# in p is no bigger than 2^(-67.7) compared to the final #
				7044	# result. #
				7045	# #
				7046	# Step 6. Reconstruction of exp(X)-1 #
				7047	# exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). #
				7048	# 6.1 If M <= 63, go to Step 6.3. #
				7049	# 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 #
				7050	# 6.3 If M >= -3, go to 6.5. #
				7051	# 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 #
				7052	# 6.5 ans := (T + OnebySc) + (p + t). #
				7053	# 6.6 Restore user FPCR. #
				7054	# 6.7 Return ans := Sc * ans. Exit. #
				7055	# Notes: The various arrangements of the expressions give #
				7056	# accurate evaluations. #
				7057	# #
				7058	# Step 7. exp(X)-1 for \|X\| < 1/4. #
				7059	# 7.1 If \|X\| >= 2^(-65), go to Step 9. #
				7060	# 7.2 Go to Step 8. #
				7061	# #
				7062	# Step 8. Calculate exp(X)-1, \|X\| < 2^(-65). #
				7063	# 8.1 If \|X\| < 2^(-16312), goto 8.3 #
				7064	# 8.2 Restore FPCR; return ans := X - 2^(-16382). #
				7065	# Exit. #
				7066	# 8.3 X := X * 2^(140). #
				7067	# 8.4 Restore FPCR; ans := ans - 2^(-16382). #
				7068	# Return ans := ans*2^(140). Exit #
				7069	# Notes: The idea is to return "X - tiny" under the user #
				7070	# precision and rounding modes. To avoid unnecessary #
				7071	# inefficiency, we stay away from denormalized numbers #
				7072	# the best we can. For \|X\| >= 2^(-16312), the #
				7073	# straightforward 8.2 generates the inexact exception as #
				7074	# the case warrants. #
				7075	# #
				7076	# Step 9. Calculate exp(X)-1, \|X\| < 1/4, by a polynomial #
				7077	# p = X + XX(B1 + X(B2 + ... + XB12)) #
				7078	# Notes: a) In order to reduce memory access, the coefficients #
				7079	# are made as "short" as possible: B1 (which is 1/2), B9 #
				7080	# to B12 are single precision; B3 to B8 are double #
				7081	# precision; and B2 is double extended. #
				7082	# b) Even with the restriction above, #
				7083	# \|p - (exp(X)-1)\| < \|X\| 2^(-70.6) #
				7084	# for all \|X\| <= 0.251. #
				7085	# Note that 0.251 is slightly bigger than 1/4. #
				7086	# c) To fully preserve accuracy, the polynomial is #
				7087	# computed as #
				7088	# X + ( SB1 + Q ) where S = XX and #
				7089	# Q = XS(B2 + X(B3 + ... + XB12)) #
				7090	# d) To fully utilize the pipeline, Q is separated into #
				7091	# two independent pieces of roughly equal complexity #
				7092	# Q = [ XS(B2 + S(B4 + ... + SB12)) ] + #
				7093	# [ SS(B3 + S(B5 + ... + SB11)) ] #
				7094	# #
				7095	# Step 10. Calculate exp(X)-1 for \|X\| >= 70 log 2. #
				7096	# 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all #
				7097	# practical purposes. Therefore, go to Step 1 of setox. #
				7098	# 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical #
				7099	# purposes. #
				7100	# ans := -1 #
				7101	# Restore user FPCR #
				7102	# Return ans := ans + 2^(-126). Exit. #
				7103	# Notes: 10.2 will always create an inexact and return -1 + tiny #
				7104	# in the user rounding precision and mode. #
				7105	# #
				7106	#########################################################################
				7107
				7108	L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
				7109
				7110	EEXPA3: long 0x3FA55555,0x55554CC1
				7111	EEXPA2: long 0x3FC55555,0x55554A54
				7112
				7113	EM1A4: long 0x3F811111,0x11174385
				7114	EM1A3: long 0x3FA55555,0x55554F5A
				7115
				7116	EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000
				7117
				7118	EM1B8: long 0x3EC71DE3,0xA5774682
				7119	EM1B7: long 0x3EFA01A0,0x19D7CB68
				7120
				7121	EM1B6: long 0x3F2A01A0,0x1A019DF3
				7122	EM1B5: long 0x3F56C16C,0x16C170E2
				7123
				7124	EM1B4: long 0x3F811111,0x11111111
				7125	EM1B3: long 0x3FA55555,0x55555555
				7126
				7127	EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
				7128	long 0x00000000
				7129
				7130	TWO140: long 0x48B00000,0x00000000
				7131	TWON140:
				7132	long 0x37300000,0x00000000
				7133
				7134	EEXPTBL:
				7135	long 0x3FFF0000,0x80000000,0x00000000,0x00000000
				7136	long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
				7137	long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
				7138	long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
				7139	long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
				7140	long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
				7141	long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
				7142	long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
				7143	long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
				7144	long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
				7145	long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
				7146	long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
				7147	long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
				7148	long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
				7149	long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
				7150	long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
				7151	long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
				7152	long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
				7153	long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
				7154	long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
				7155	long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
				7156	long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
				7157	long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
				7158	long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
				7159	long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
				7160	long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
				7161	long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
				7162	long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
				7163	long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
				7164	long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
				7165	long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
				7166	long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
				7167	long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
				7168	long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
				7169	long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
				7170	long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
				7171	long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
				7172	long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
				7173	long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
				7174	long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
				7175	long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
				7176	long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
				7177	long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
				7178	long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
				7179	long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
				7180	long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
				7181	long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
				7182	long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
				7183	long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
				7184	long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
				7185	long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
				7186	long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
				7187	long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
				7188	long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
				7189	long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
				7190	long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
				7191	long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
				7192	long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
				7193	long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
				7194	long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
				7195	long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
				7196	long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
				7197	long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
				7198	long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
				7199
				7200	set ADJFLAG,L_SCR2
				7201	set SCALE,FP_SCR0
				7202	set ADJSCALE,FP_SCR1
				7203	set SC,FP_SCR0
				7204	set ONEBYSC,FP_SCR1
				7205
				7206	global setox
				7207	setox:
				7208	#--entry point for EXP(X), here X is finite, non-zero, and not NaN's
				7209
				7210	#--Step 1.
				7211	mov.l (%a0),%d1 # load part of input X
				7212	and.l &0x7FFF0000,%d1 # biased expo. of X
				7213	cmp.l %d1,&0x3FBE0000 # 2^(-65)
				7214	bge.b EXPC1 # normal case
				7215	bra EXPSM
				7216
				7217	EXPC1:
				7218	#--The case \|X\| >= 2^(-65)
				7219	mov.w 4(%a0),%d1 # expo. and partial sig. of \|X\|
				7220	cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits
				7221	blt.b EXPMAIN # normal case
				7222	bra EEXPBIG
				7223
				7224	EXPMAIN:
				7225	#--Step 2.
				7226	#--This is the normal branch: 2^(-65) <= \|X\| < 16380 log2.
				7227	fmov.x (%a0),%fp0 # load input from (a0)
				7228
				7229	fmov.x %fp0,%fp1
				7230	fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
				7231	fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
				7232	mov.l &0,ADJFLAG(%a6)
				7233	fmov.l %fp0,%d1 # N = int( X * 64/log2 )
				7234	lea EEXPTBL(%pc),%a1
				7235	fmov.l %d1,%fp0 # convert to floating-format
				7236
				7237	mov.l %d1,L_SCR1(%a6) # save N temporarily
				7238	and.l &0x3F,%d1 # D0 is J = N mod 64
				7239	lsl.l &4,%d1
				7240	add.l %d1,%a1 # address of 2^(J/64)
				7241	mov.l L_SCR1(%a6),%d1
				7242	asr.l &6,%d1 # D0 is M
				7243	add.w &0x3FFF,%d1 # biased expo. of 2^(M)
				7244	mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB
				7245
				7246	EXPCONT1:
				7247	#--Step 3.
				7248	#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
				7249	#--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
				7250	fmov.x %fp0,%fp2
				7251	fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
				7252	fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
				7253	fadd.x %fp1,%fp0 # X + N*L1
				7254	fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
				7255
				7256	#--Step 4.
				7257	#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
				7258	#-- R + RR(A1 + R(A2 + R(A3 + R(A4 + RA5))))
				7259	#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
				7260	#--[R+RS(A2+SA4)] + [S(A1+S(A3+SA5))]
				7261
				7262	fmov.x %fp0,%fp1
				7263	fmul.x %fp1,%fp1 # fp1 IS S = R*R
				7264
				7265	fmov.s &0x3AB60B70,%fp2 # fp2 IS A5
				7266
				7267	fmul.x %fp1,%fp2 # fp2 IS S*A5
				7268	fmov.x %fp1,%fp3
				7269	fmul.s &0x3C088895,%fp3 # fp3 IS S*A4
				7270
				7271	fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5
				7272	fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4
				7273
				7274	fmul.x %fp1,%fp2 # fp2 IS S(A3+SA5)
				7275	mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended
				7276	mov.l &0x80000000,SCALE+4(%a6)
				7277	clr.l SCALE+8(%a6)
				7278
				7279	fmul.x %fp1,%fp3 # fp3 IS S(A2+SA4)
				7280
				7281	fadd.s &0x3F000000,%fp2 # fp2 IS A1+S(A3+SA5)
				7282	fmul.x %fp0,%fp3 # fp3 IS RS(A2+S*A4)
				7283
				7284	fmul.x %fp1,%fp2 # fp2 IS S(A1+S(A3+S*A5))
				7285	fadd.x %fp3,%fp0 # fp0 IS R+RS(A2+S*A4),
				7286
				7287	fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64)
				7288	fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1
				7289
				7290	#--Step 5
				7291	#--final reconstruction process
				7292	#--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
				7293
				7294	fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1)
				7295	fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
				7296	fadd.s (%a1),%fp0 # accurate 2^(J/64)
				7297
				7298	fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*...
				7299	mov.l ADJFLAG(%a6),%d1
				7300
				7301	#--Step 6
				7302	tst.l %d1
				7303	beq.b NORMAL
				7304	ADJUST:
				7305	fmul.x ADJSCALE(%a6),%fp0
				7306	NORMAL:
				7307	fmov.l %d0,%fpcr # restore user FPCR
				7308	mov.b &FMUL_OP,%d1 # last inst is MUL
				7309	fmul.x SCALE(%a6),%fp0 # multiply 2^(M)
				7310	bra t_catch
				7311
				7312	EXPSM:
				7313	#--Step 7
				7314	fmovm.x (%a0),&0x80 # load X
				7315	fmov.l %d0,%fpcr
				7316	fadd.s &0x3F800000,%fp0 # 1+X in user mode
				7317	bra t_pinx2
				7318
				7319	EEXPBIG:
				7320	#--Step 8
				7321	cmp.l %d1,&0x400CB27C # 16480 log2
				7322	bgt.b EXP2BIG
				7323	#--Steps 8.2 -- 8.6
				7324	fmov.x (%a0),%fp0 # load input from (a0)
				7325
				7326	fmov.x %fp0,%fp1
				7327	fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
				7328	fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
				7329	mov.l &1,ADJFLAG(%a6)
				7330	fmov.l %fp0,%d1 # N = int( X * 64/log2 )
				7331	lea EEXPTBL(%pc),%a1
				7332	fmov.l %d1,%fp0 # convert to floating-format
				7333	mov.l %d1,L_SCR1(%a6) # save N temporarily
				7334	and.l &0x3F,%d1 # D0 is J = N mod 64
				7335	lsl.l &4,%d1
				7336	add.l %d1,%a1 # address of 2^(J/64)
				7337	mov.l L_SCR1(%a6),%d1
				7338	asr.l &6,%d1 # D0 is K
				7339	mov.l %d1,L_SCR1(%a6) # save K temporarily
				7340	asr.l &1,%d1 # D0 is M1
				7341	sub.l %d1,L_SCR1(%a6) # a1 is M
				7342	add.w &0x3FFF,%d1 # biased expo. of 2^(M1)
				7343	mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1)
				7344	mov.l &0x80000000,ADJSCALE+4(%a6)
				7345	clr.l ADJSCALE+8(%a6)
				7346	mov.l L_SCR1(%a6),%d1 # D0 is M
				7347	add.w &0x3FFF,%d1 # biased expo. of 2^(M)
				7348	bra.w EXPCONT1 # go back to Step 3
				7349
				7350	EXP2BIG:
				7351	#--Step 9
				7352	tst.b (%a0) # is X positive or negative?
				7353	bmi t_unfl2
				7354	bra t_ovfl2
				7355
				7356	global setoxd
				7357	setoxd:
				7358	#--entry point for EXP(X), X is denormalized
				7359	mov.l (%a0),-(%sp)
				7360	andi.l &0x80000000,(%sp)
				7361	ori.l &0x00800000,(%sp) # sign(X)*2^(-126)
				7362
				7363	fmov.s &0x3F800000,%fp0
				7364
				7365	fmov.l %d0,%fpcr
				7366	fadd.s (%sp)+,%fp0
				7367	bra t_pinx2
				7368
				7369	global setoxm1
				7370	setoxm1:
				7371	#--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
				7372
				7373	#--Step 1.
				7374	#--Step 1.1
				7375	mov.l (%a0),%d1 # load part of input X
				7376	and.l &0x7FFF0000,%d1 # biased expo. of X
				7377	cmp.l %d1,&0x3FFD0000 # 1/4
				7378	bge.b EM1CON1 # \|X\| >= 1/4
				7379	bra EM1SM
				7380
				7381	EM1CON1:
				7382	#--Step 1.3
				7383	#--The case \|X\| >= 1/4
				7384	mov.w 4(%a0),%d1 # expo. and partial sig. of \|X\|
				7385	cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits
				7386	ble.b EM1MAIN # 1/4 <= \|X\| <= 70log2
				7387	bra EM1BIG
				7388
				7389	EM1MAIN:
				7390	#--Step 2.
				7391	#--This is the case: 1/4 <= \|X\| <= 70 log2.
				7392	fmov.x (%a0),%fp0 # load input from (a0)
				7393
				7394	fmov.x %fp0,%fp1
				7395	fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X
				7396	fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
				7397	fmov.l %fp0,%d1 # N = int( X * 64/log2 )
				7398	lea EEXPTBL(%pc),%a1
				7399	fmov.l %d1,%fp0 # convert to floating-format
				7400
				7401	mov.l %d1,L_SCR1(%a6) # save N temporarily
				7402	and.l &0x3F,%d1 # D0 is J = N mod 64
				7403	lsl.l &4,%d1
				7404	add.l %d1,%a1 # address of 2^(J/64)
				7405	mov.l L_SCR1(%a6),%d1
				7406	asr.l &6,%d1 # D0 is M
				7407	mov.l %d1,L_SCR1(%a6) # save a copy of M
				7408
				7409	#--Step 3.
				7410	#--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
				7411	#--a0 points to 2^(J/64), D0 and a1 both contain M
				7412	fmov.x %fp0,%fp2
				7413	fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64)
				7414	fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64
				7415	fadd.x %fp1,%fp0 # X + N*L1
				7416	fadd.x %fp2,%fp0 # fp0 is R, reduced arg.
				7417	add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M
				7418
				7419	#--Step 4.
				7420	#--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
				7421	#-- R + RR(A1 + R(A2 + R(A3 + R(A4 + R(A5 + R*A6)))))
				7422	#--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
				7423	#--[RS(A2+S(A4+SA6))] + [R+S(A1+S(A3+S*A5))]
				7424
				7425	fmov.x %fp0,%fp1
				7426	fmul.x %fp1,%fp1 # fp1 IS S = R*R
				7427
				7428	fmov.s &0x3950097B,%fp2 # fp2 IS a6
				7429
				7430	fmul.x %fp1,%fp2 # fp2 IS S*A6
				7431	fmov.x %fp1,%fp3
				7432	fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5
				7433
				7434	fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6
				7435	fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5
				7436	mov.w %d1,SC(%a6) # SC is 2^(M) in extended
				7437	mov.l &0x80000000,SC+4(%a6)
				7438	clr.l SC+8(%a6)
				7439
				7440	fmul.x %fp1,%fp2 # fp2 IS S(A4+SA6)
				7441	mov.l L_SCR1(%a6),%d1 # D0 is M
				7442	neg.w %d1 # D0 is -M
				7443	fmul.x %fp1,%fp3 # fp3 IS S(A3+SA5)
				7444	add.w &0x3FFF,%d1 # biased expo. of 2^(-M)
				7445	fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S(A4+SA6)
				7446	fadd.s &0x3F000000,%fp3 # fp3 IS A1+S(A3+SA5)
				7447
				7448	fmul.x %fp1,%fp2 # fp2 IS S(A2+S(A4+S*A6))
				7449	or.w &0x8000,%d1 # signed/expo. of -2^(-M)
				7450	mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M)
				7451	mov.l &0x80000000,ONEBYSC+4(%a6)
				7452	clr.l ONEBYSC+8(%a6)
				7453	fmul.x %fp3,%fp1 # fp1 IS S(A1+S(A3+S*A5))
				7454
				7455	fmul.x %fp0,%fp2 # fp2 IS RS(A2+S(A4+SA6))
				7456	fadd.x %fp1,%fp0 # fp0 IS R+S(A1+S(A3+S*A5))
				7457
				7458	fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1
				7459
				7460	fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
				7461
				7462	#--Step 5
				7463	#--Compute 2^(J/64)*p
				7464
				7465	fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1)
				7466
				7467	#--Step 6
				7468	#--Step 6.1
				7469	mov.l L_SCR1(%a6),%d1 # retrieve M
				7470	cmp.l %d1,&63
				7471	ble.b MLE63
				7472	#--Step 6.2 M >= 64
				7473	fmov.s 12(%a1),%fp1 # fp1 is t
				7474	fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc
				7475	fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released
				7476	fadd.x (%a1),%fp0 # T+(p+(t+OnebySc))
				7477	bra EM1SCALE
				7478	MLE63:
				7479	#--Step 6.3 M <= 63
				7480	cmp.l %d1,&-3
				7481	bge.b MGEN3
				7482	MLTN3:
				7483	#--Step 6.4 M <= -4
				7484	fadd.s 12(%a1),%fp0 # p+t
				7485	fadd.x (%a1),%fp0 # T+(p+t)
				7486	fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t))
				7487	bra EM1SCALE
				7488	MGEN3:
				7489	#--Step 6.5 -3 <= M <= 63
				7490	fmov.x (%a1)+,%fp1 # fp1 is T
				7491	fadd.s (%a1),%fp0 # fp0 is p+t
				7492	fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc
				7493	fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t)
				7494
				7495	EM1SCALE:
				7496	#--Step 6.6
				7497	fmov.l %d0,%fpcr
				7498	fmul.x SC(%a6),%fp0
				7499	bra t_inx2
				7500
				7501	EM1SM:
				7502	#--Step 7 \|X\| < 1/4.
				7503	cmp.l %d1,&0x3FBE0000 # 2^(-65)
				7504	bge.b EM1POLY
				7505
				7506	EM1TINY:
				7507	#--Step 8 \|X\| < 2^(-65)
				7508	cmp.l %d1,&0x00330000 # 2^(-16312)
				7509	blt.b EM12TINY
				7510	#--Step 8.2
				7511	mov.l &0x80010000,SC(%a6) # SC is -2^(-16382)
				7512	mov.l &0x80000000,SC+4(%a6)
				7513	clr.l SC+8(%a6)
				7514	fmov.x (%a0),%fp0
				7515	fmov.l %d0,%fpcr
				7516	mov.b &FADD_OP,%d1 # last inst is ADD
				7517	fadd.x SC(%a6),%fp0
				7518	bra t_catch
				7519
				7520	EM12TINY:
				7521	#--Step 8.3
				7522	fmov.x (%a0),%fp0
				7523	fmul.d TWO140(%pc),%fp0
				7524	mov.l &0x80010000,SC(%a6)
				7525	mov.l &0x80000000,SC+4(%a6)
				7526	clr.l SC+8(%a6)
				7527	fadd.x SC(%a6),%fp0
				7528	fmov.l %d0,%fpcr
				7529	mov.b &FMUL_OP,%d1 # last inst is MUL
				7530	fmul.d TWON140(%pc),%fp0
				7531	bra t_catch
				7532
				7533	EM1POLY:
				7534	#--Step 9 exp(X)-1 by a simple polynomial
				7535	fmov.x (%a0),%fp0 # fp0 is X
				7536	fmul.x %fp0,%fp0 # fp0 is S := X*X
				7537	fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3}
				7538	fmov.s &0x2F30CAA8,%fp1 # fp1 is B12
				7539	fmul.x %fp0,%fp1 # fp1 is S*B12
				7540	fmov.s &0x310F8290,%fp2 # fp2 is B11
				7541	fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12
				7542
				7543	fmul.x %fp0,%fp2 # fp2 is S*B11
				7544	fmul.x %fp0,%fp1 # fp1 is S*(B10 + ...
				7545
				7546	fadd.s &0x3493F281,%fp2 # fp2 is B9+S*...
				7547	fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*...
				7548
				7549	fmul.x %fp0,%fp2 # fp2 is S*(B9+...
				7550	fmul.x %fp0,%fp1 # fp1 is S*(B8+...
				7551
				7552	fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*...
				7553	fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*...
				7554
				7555	fmul.x %fp0,%fp2 # fp2 is S*(B7+...
				7556	fmul.x %fp0,%fp1 # fp1 is S*(B6+...
				7557
				7558	fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*...
				7559	fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*...
				7560
				7561	fmul.x %fp0,%fp2 # fp2 is S*(B5+...
				7562	fmul.x %fp0,%fp1 # fp1 is S*(B4+...
				7563
				7564	fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*...
				7565	fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*...
				7566
				7567	fmul.x %fp0,%fp2 # fp2 is S*(B3+...
				7568	fmul.x %fp0,%fp1 # fp1 is S*(B2+...
				7569
				7570	fmul.x %fp0,%fp2 # fp2 is SS(B3+...)
				7571	fmul.x (%a0),%fp1 # fp1 is XS(B2...
				7572
				7573	fmul.s &0x3F000000,%fp0 # fp0 is S*B1
				7574	fadd.x %fp2,%fp1 # fp1 is Q
				7575
				7576	fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3}
				7577
				7578	fadd.x %fp1,%fp0 # fp0 is S*B1+Q
				7579
				7580	fmov.l %d0,%fpcr
				7581	fadd.x (%a0),%fp0
				7582	bra t_inx2
				7583
				7584	EM1BIG:
				7585	#--Step 10 \|X\| > 70 log2
				7586	mov.l (%a0),%d1
				7587	cmp.l %d1,&0
				7588	bgt.w EXPC1
				7589	#--Step 10.2
				7590	fmov.s &0xBF800000,%fp0 # fp0 is -1
				7591	fmov.l %d0,%fpcr
				7592	fadd.s &0x00800000,%fp0 # -1 + 2^(-126)
				7593	bra t_minx2
				7594
				7595	global setoxm1d
				7596	setoxm1d:
				7597	#--entry point for EXPM1(X), here X is denormalized
				7598	#--Step 0.
				7599	bra t_extdnrm
				7600
				7601	#########################################################################
				7602	# sgetexp(): returns the exponent portion of the input argument. #
				7603	# The exponent bias is removed and the exponent value is #
				7604	# returned as an extended precision number in fp0. #
				7605	# sgetexpd(): handles denormalized numbers. #
				7606	# #
				7607	# sgetman(): extracts the mantissa of the input argument. The #
				7608	# mantissa is converted to an extended precision number w/ #
				7609	# an exponent of $3fff and is returned in fp0. The range of #
				7610	# the result is [1.0 - 2.0). #
				7611	# sgetmand(): handles denormalized numbers. #
				7612	# #
				7613	# INPUT *************************************************************** #
				7614	# a0 = pointer to extended precision input #
				7615	# #
				7616	# OUTPUT ************************************************************** #
				7617	# fp0 = exponent(X) or mantissa(X) #
				7618	# #
				7619	#########################################################################
				7620
				7621	global sgetexp
				7622	sgetexp:
				7623	mov.w SRC_EX(%a0),%d0 # get the exponent
				7624	bclr &0xf,%d0 # clear the sign bit
				7625	subi.w &0x3fff,%d0 # subtract off the bias
				7626	fmov.w %d0,%fp0 # return exp in fp0
				7627	blt.b sgetexpn # it's negative
				7628	rts
				7629
				7630	sgetexpn:
				7631	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				7632	rts
				7633
				7634	global sgetexpd
				7635	sgetexpd:
				7636	bsr.l norm # normalize
				7637	neg.w %d0 # new exp = -(shft amt)
				7638	subi.w &0x3fff,%d0 # subtract off the bias
				7639	fmov.w %d0,%fp0 # return exp in fp0
				7640	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				7641	rts
				7642
				7643	global sgetman
				7644	sgetman:
				7645	mov.w SRC_EX(%a0),%d0 # get the exp
				7646	ori.w &0x7fff,%d0 # clear old exp
				7647	bclr &0xe,%d0 # make it the new exp +-3fff
				7648
				7649	# here, we build the result in a tmp location so as not to disturb the input
				7650	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc
				7651	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc
				7652	mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
				7653	fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0
				7654	bmi.b sgetmann # it's negative
				7655	rts
				7656
				7657	sgetmann:
				7658	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				7659	rts
				7660
				7661	#
				7662	# For denormalized numbers, shift the mantissa until the j-bit = 1,
				7663	# then load the exponent with +/1 $3fff.
				7664	#
				7665	global sgetmand
				7666	sgetmand:
				7667	bsr.l norm # normalize exponent
				7668	bra.b sgetman
				7669
				7670	#########################################################################
				7671	# scosh(): computes the hyperbolic cosine of a normalized input #
				7672	# scoshd(): computes the hyperbolic cosine of a denormalized input #
				7673	# #
				7674	# INPUT *************************************************************** #
				7675	# a0 = pointer to extended precision input #
				7676	# d0 = round precision,mode #
				7677	# #
				7678	# OUTPUT ************************************************************** #
				7679	# fp0 = cosh(X) #
				7680	# #
				7681	# ACCURACY and MONOTONICITY ******************************************* #
				7682	# The returned result is within 3 ulps in 64 significant bit, #
				7683	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				7684	# rounded to double precision. The result is provably monotonic #
				7685	# in double precision. #
				7686	# #
				7687	# ALGORITHM *********************************************************** #
				7688	# #
				7689	# COSH #
				7690	# 1. If \|X\| > 16380 log2, go to 3. #
				7691	# #
				7692	# 2. (\|X\| <= 16380 log2) Cosh(X) is obtained by the formulae #
				7693	# y = \|X\|, z = exp(Y), and #
				7694	# cosh(X) = (1/2)*( z + 1/z ). #
				7695	# Exit. #
				7696	# #
				7697	# 3. (\|X\| > 16380 log2). If \|X\| > 16480 log2, go to 5. #
				7698	# #
				7699	# 4. (16380 log2 < \|X\| <= 16480 log2) #
				7700	# cosh(X) = sign(X) * exp(\|X\|)/2. #
				7701	# However, invoking exp(\|X\|) may cause premature #
				7702	# overflow. Thus, we calculate sinh(X) as follows: #
				7703	# Y := \|X\| #
				7704	# Fact := 2**(16380) #
				7705	# Y' := Y - 16381 log2 #
				7706	# cosh(X) := Fact * exp(Y'). #
				7707	# Exit. #
				7708	# #
				7709	# 5. (\|X\| > 16480 log2) sinh(X) must overflow. Return #
				7710	# Huge*Huge to generate overflow and an infinity with #
				7711	# the appropriate sign. Huge is the largest finite number #
				7712	# in extended format. Exit. #
				7713	# #
				7714	#########################################################################
				7715
				7716	TWO16380:
				7717	long 0x7FFB0000,0x80000000,0x00000000,0x00000000
				7718
				7719	global scosh
				7720	scosh:
				7721	fmov.x (%a0),%fp0 # LOAD INPUT
				7722
				7723	mov.l (%a0),%d1
				7724	mov.w 4(%a0),%d1
				7725	and.l &0x7FFFFFFF,%d1
				7726	cmp.l %d1,&0x400CB167
				7727	bgt.b COSHBIG
				7728
				7729	#--THIS IS THE USUAL CASE, \|X\| < 16380 LOG2
				7730	#--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
				7731
				7732	fabs.x %fp0 # \|X\|
				7733
				7734	mov.l %d0,-(%sp)
				7735	clr.l %d0
				7736	fmovm.x &0x01,-(%sp) # save \|X\| to stack
				7737	lea (%sp),%a0 # pass ptr to \|X\|
				7738	bsr setox # FP0 IS EXP(\|X\|)
				7739	add.l &0xc,%sp # erase \|X\| from stack
				7740	fmul.s &0x3F000000,%fp0 # (1/2)EXP(\|X\|)
				7741	mov.l (%sp)+,%d0
				7742
				7743	fmov.s &0x3E800000,%fp1 # (1/4)
				7744	fdiv.x %fp0,%fp1 # 1/(2 EXP(\|X\|))
				7745
				7746	fmov.l %d0,%fpcr
				7747	mov.b &FADD_OP,%d1 # last inst is ADD
				7748	fadd.x %fp1,%fp0
				7749	bra t_catch
				7750
				7751	COSHBIG:
				7752	cmp.l %d1,&0x400CB2B3
				7753	bgt.b COSHHUGE
				7754
				7755	fabs.x %fp0
				7756	fsub.d T1(%pc),%fp0 # (\|X\|-16381LOG2_LEAD)
				7757	fsub.d T2(%pc),%fp0 # \|X\| - 16381 LOG2, ACCURATE
				7758
				7759	mov.l %d0,-(%sp)
				7760	clr.l %d0
				7761	fmovm.x &0x01,-(%sp) # save fp0 to stack
				7762	lea (%sp),%a0 # pass ptr to fp0
				7763	bsr setox
				7764	add.l &0xc,%sp # clear fp0 from stack
				7765	mov.l (%sp)+,%d0
				7766
				7767	fmov.l %d0,%fpcr
				7768	mov.b &FMUL_OP,%d1 # last inst is MUL
				7769	fmul.x TWO16380(%pc),%fp0
				7770	bra t_catch
				7771
				7772	COSHHUGE:
				7773	bra t_ovfl2
				7774
				7775	global scoshd
				7776	#--COSH(X) = 1 FOR DENORMALIZED X
				7777	scoshd:
				7778	fmov.s &0x3F800000,%fp0
				7779
				7780	fmov.l %d0,%fpcr
				7781	fadd.s &0x00800000,%fp0
				7782	bra t_pinx2
				7783
				7784	#########################################################################
				7785	# ssinh(): computes the hyperbolic sine of a normalized input #
				7786	# ssinhd(): computes the hyperbolic sine of a denormalized input #
				7787	# #
				7788	# INPUT *************************************************************** #
				7789	# a0 = pointer to extended precision input #
				7790	# d0 = round precision,mode #
				7791	# #
				7792	# OUTPUT ************************************************************** #
				7793	# fp0 = sinh(X) #
				7794	# #
				7795	# ACCURACY and MONOTONICITY ******************************************* #
				7796	# The returned result is within 3 ulps in 64 significant bit, #
				7797	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				7798	# rounded to double precision. The result is provably monotonic #
				7799	# in double precision. #
				7800	# #
				7801	# ALGORITHM *********************************************************** #
				7802	# #
				7803	# SINH #
				7804	# 1. If \|X\| > 16380 log2, go to 3. #
				7805	# #
				7806	# 2. (\|X\| <= 16380 log2) Sinh(X) is obtained by the formula #
				7807	# y = \|X\|, sgn = sign(X), and z = expm1(Y), #
				7808	# sinh(X) = sgn(1/2)( z + z/(1+z) ). #
				7809	# Exit. #
				7810	# #
				7811	# 3. If \|X\| > 16480 log2, go to 5. #
				7812	# #
				7813	# 4. (16380 log2 < \|X\| <= 16480 log2) #
				7814	# sinh(X) = sign(X) * exp(\|X\|)/2. #
				7815	# However, invoking exp(\|X\|) may cause premature overflow. #
				7816	# Thus, we calculate sinh(X) as follows: #
				7817	# Y := \|X\| #
				7818	# sgn := sign(X) #
				7819	# sgnFact := sgn * 2**(16380) #
				7820	# Y' := Y - 16381 log2 #
				7821	# sinh(X) := sgnFact * exp(Y'). #
				7822	# Exit. #
				7823	# #
				7824	# 5. (\|X\| > 16480 log2) sinh(X) must overflow. Return #
				7825	# sign(X)HugeHuge to generate overflow and an infinity with #
				7826	# the appropriate sign. Huge is the largest finite number in #
				7827	# extended format. Exit. #
				7828	# #
				7829	#########################################################################
				7830
				7831	global ssinh
				7832	ssinh:
				7833	fmov.x (%a0),%fp0 # LOAD INPUT
				7834
				7835	mov.l (%a0),%d1
				7836	mov.w 4(%a0),%d1
				7837	mov.l %d1,%a1 # save (compacted) operand
				7838	and.l &0x7FFFFFFF,%d1
				7839	cmp.l %d1,&0x400CB167
				7840	bgt.b SINHBIG
				7841
				7842	#--THIS IS THE USUAL CASE, \|X\| < 16380 LOG2
				7843	#--Y = \|X\|, Z = EXPM1(Y), SINH(X) = SIGN(X)(1/2)( Z + Z/(1+Z) )
				7844
				7845	fabs.x %fp0 # Y = \|X\|
				7846
				7847	movm.l &0x8040,-(%sp) # {a1/d0}
				7848	fmovm.x &0x01,-(%sp) # save Y on stack
				7849	lea (%sp),%a0 # pass ptr to Y
				7850	clr.l %d0
				7851	bsr setoxm1 # FP0 IS Z = EXPM1(Y)
				7852	add.l &0xc,%sp # clear Y from stack
				7853	fmov.l &0,%fpcr
				7854	movm.l (%sp)+,&0x0201 # {a1/d0}
				7855
				7856	fmov.x %fp0,%fp1
				7857	fadd.s &0x3F800000,%fp1 # 1+Z
				7858	fmov.x %fp0,-(%sp)
				7859	fdiv.x %fp1,%fp0 # Z/(1+Z)
				7860	mov.l %a1,%d1
				7861	and.l &0x80000000,%d1
				7862	or.l &0x3F000000,%d1
				7863	fadd.x (%sp)+,%fp0
				7864	mov.l %d1,-(%sp)
				7865
				7866	fmov.l %d0,%fpcr
				7867	mov.b &FMUL_OP,%d1 # last inst is MUL
				7868	fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set
				7869	bra t_catch
				7870
				7871	SINHBIG:
				7872	cmp.l %d1,&0x400CB2B3
				7873	bgt t_ovfl
				7874	fabs.x %fp0
				7875	fsub.d T1(%pc),%fp0 # (\|X\|-16381LOG2_LEAD)
				7876	mov.l &0,-(%sp)
				7877	mov.l &0x80000000,-(%sp)
				7878	mov.l %a1,%d1
				7879	and.l &0x80000000,%d1
				7880	or.l &0x7FFB0000,%d1
				7881	mov.l %d1,-(%sp) # EXTENDED FMT
				7882	fsub.d T2(%pc),%fp0 # \|X\| - 16381 LOG2, ACCURATE
				7883
				7884	mov.l %d0,-(%sp)
				7885	clr.l %d0
				7886	fmovm.x &0x01,-(%sp) # save fp0 on stack
				7887	lea (%sp),%a0 # pass ptr to fp0
				7888	bsr setox
				7889	add.l &0xc,%sp # clear fp0 from stack
				7890
				7891	mov.l (%sp)+,%d0
				7892	fmov.l %d0,%fpcr
				7893	mov.b &FMUL_OP,%d1 # last inst is MUL
				7894	fmul.x (%sp)+,%fp0 # possible exception
				7895	bra t_catch
				7896
				7897	global ssinhd
				7898	#--SINH(X) = X FOR DENORMALIZED X
				7899	ssinhd:
				7900	bra t_extdnrm
				7901
				7902	#########################################################################
				7903	# stanh(): computes the hyperbolic tangent of a normalized input #
				7904	# stanhd(): computes the hyperbolic tangent of a denormalized input #
				7905	# #
				7906	# INPUT *************************************************************** #
				7907	# a0 = pointer to extended precision input #
				7908	# d0 = round precision,mode #
				7909	# #
				7910	# OUTPUT ************************************************************** #
				7911	# fp0 = tanh(X) #
				7912	# #
				7913	# ACCURACY and MONOTONICITY ******************************************* #
				7914	# The returned result is within 3 ulps in 64 significant bit, #
				7915	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				7916	# rounded to double precision. The result is provably monotonic #
				7917	# in double precision. #
				7918	# #
				7919	# ALGORITHM *********************************************************** #
				7920	# #
				7921	# TANH #
				7922	# 1. If \|X\| >= (5/2) log2 or \|X\| <= 2**(-40), go to 3. #
				7923	# #
				7924	# 2. (2**(-40) < \|X\| < (5/2) log2) Calculate tanh(X) by #
				7925	# sgn := sign(X), y := 2\|X\|, z := expm1(Y), and #
				7926	# tanh(X) = sgn*( z/(2+z) ). #
				7927	# Exit. #
				7928	# #
				7929	# 3. (\|X\| <= 2**(-40) or \|X\| >= (5/2) log2). If \|X\| < 1, #
				7930	# go to 7. #
				7931	# #
				7932	# 4. (\|X\| >= (5/2) log2) If \|X\| >= 50 log2, go to 6. #
				7933	# #
				7934	# 5. ((5/2) log2 <= \|X\| < 50 log2) Calculate tanh(X) by #
				7935	# sgn := sign(X), y := 2\|X\|, z := exp(Y), #
				7936	# tanh(X) = sgn - [ sgn*2/(1+z) ]. #
				7937	# Exit. #
				7938	# #
				7939	# 6. (\|X\| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we #
				7940	# calculate Tanh(X) by #
				7941	# sgn := sign(X), Tiny := 2**(-126), #
				7942	# tanh(X) := sgn - sgn*Tiny. #
				7943	# Exit. #
				7944	# #
				7945	# 7. (\|X\| < 2**(-40)). Tanh(X) = X. Exit. #
				7946	# #
				7947	#########################################################################
				7948
				7949	set X,FP_SCR0
				7950	set XFRAC,X+4
				7951
				7952	set SGN,L_SCR3
				7953
				7954	set V,FP_SCR0
				7955
				7956	global stanh
				7957	stanh:
				7958	fmov.x (%a0),%fp0 # LOAD INPUT
				7959
				7960	fmov.x %fp0,X(%a6)
				7961	mov.l (%a0),%d1
				7962	mov.w 4(%a0),%d1
				7963	mov.l %d1,X(%a6)
				7964	and.l &0x7FFFFFFF,%d1
				7965	cmp.l %d1, &0x3fd78000 # is \|X\| < 2^(-40)?
				7966	blt.w TANHBORS # yes
				7967	cmp.l %d1, &0x3fffddce # is \|X\| > (5/2)LOG2?
				7968	bgt.w TANHBORS # yes
				7969
				7970	#--THIS IS THE USUAL CASE
				7971	#--Y = 2\|X\|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
				7972
				7973	mov.l X(%a6),%d1
				7974	mov.l %d1,SGN(%a6)
				7975	and.l &0x7FFF0000,%d1
				7976	add.l &0x00010000,%d1 # EXPONENT OF 2\|X\|
				7977	mov.l %d1,X(%a6)
				7978	and.l &0x80000000,SGN(%a6)
				7979	fmov.x X(%a6),%fp0 # FP0 IS Y = 2\|X\|
				7980
				7981	mov.l %d0,-(%sp)
				7982	clr.l %d0
				7983	fmovm.x &0x1,-(%sp) # save Y on stack
				7984	lea (%sp),%a0 # pass ptr to Y
				7985	bsr setoxm1 # FP0 IS Z = EXPM1(Y)
				7986	add.l &0xc,%sp # clear Y from stack
				7987	mov.l (%sp)+,%d0
				7988
				7989	fmov.x %fp0,%fp1
				7990	fadd.s &0x40000000,%fp1 # Z+2
				7991	mov.l SGN(%a6),%d1
				7992	fmov.x %fp1,V(%a6)
				7993	eor.l %d1,V(%a6)
				7994
				7995	fmov.l %d0,%fpcr # restore users round prec,mode
				7996	fdiv.x V(%a6),%fp0
				7997	bra t_inx2
				7998
				7999	TANHBORS:
				8000	cmp.l %d1,&0x3FFF8000
				8001	blt.w TANHSM
				8002
				8003	cmp.l %d1,&0x40048AA1
				8004	bgt.w TANHHUGE
				8005
				8006	#-- (5/2) LOG2 < \|X\| < 50 LOG2,
				8007	#--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2\|X\|, SGN = SIGN(X),
				8008	#--TANH(X) = SGN - SGN*2/[EXP(Y)+1].
				8009
				8010	mov.l X(%a6),%d1
				8011	mov.l %d1,SGN(%a6)
				8012	and.l &0x7FFF0000,%d1
				8013	add.l &0x00010000,%d1 # EXPO OF 2\|X\|
				8014	mov.l %d1,X(%a6) # Y = 2\|X\|
				8015	and.l &0x80000000,SGN(%a6)
				8016	mov.l SGN(%a6),%d1
				8017	fmov.x X(%a6),%fp0 # Y = 2\|X\|
				8018
				8019	mov.l %d0,-(%sp)
				8020	clr.l %d0
				8021	fmovm.x &0x01,-(%sp) # save Y on stack
				8022	lea (%sp),%a0 # pass ptr to Y
				8023	bsr setox # FP0 IS EXP(Y)
				8024	add.l &0xc,%sp # clear Y from stack
				8025	mov.l (%sp)+,%d0
				8026	mov.l SGN(%a6),%d1
				8027	fadd.s &0x3F800000,%fp0 # EXP(Y)+1
				8028
				8029	eor.l &0xC0000000,%d1 # -SIGN(X)*2
				8030	fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT
				8031	fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ]
				8032
				8033	mov.l SGN(%a6),%d1
				8034	or.l &0x3F800000,%d1 # SGN
				8035	fmov.s %d1,%fp0 # SGN IN SGL FMT
				8036
				8037	fmov.l %d0,%fpcr # restore users round prec,mode
				8038	mov.b &FADD_OP,%d1 # last inst is ADD
				8039	fadd.x %fp1,%fp0
				8040	bra t_inx2
				8041
				8042	TANHSM:
				8043	fmov.l %d0,%fpcr # restore users round prec,mode
				8044	mov.b &FMOV_OP,%d1 # last inst is MOVE
				8045	fmov.x X(%a6),%fp0 # last inst - possible exception set
				8046	bra t_catch
				8047
				8048	#---RETURN SGN(X) - SGN(X)EPS
				8049	TANHHUGE:
				8050	mov.l X(%a6),%d1
				8051	and.l &0x80000000,%d1
				8052	or.l &0x3F800000,%d1
				8053	fmov.s %d1,%fp0
				8054	and.l &0x80000000,%d1
				8055	eor.l &0x80800000,%d1 # -SIGN(X)*EPS
				8056
				8057	fmov.l %d0,%fpcr # restore users round prec,mode
				8058	fadd.s %d1,%fp0
				8059	bra t_inx2
				8060
				8061	global stanhd
				8062	#--TANH(X) = X FOR DENORMALIZED X
				8063	stanhd:
				8064	bra t_extdnrm
				8065
				8066	#########################################################################
				8067	# slogn(): computes the natural logarithm of a normalized input #
				8068	# slognd(): computes the natural logarithm of a denormalized input #
				8069	# slognp1(): computes the log(1+X) of a normalized input #
				8070	# slognp1d(): computes the log(1+X) of a denormalized input #
				8071	# #
				8072	# INPUT *************************************************************** #
				8073	# a0 = pointer to extended precision input #
				8074	# d0 = round precision,mode #
				8075	# #
				8076	# OUTPUT ************************************************************** #
				8077	# fp0 = log(X) or log(1+X) #
				8078	# #
				8079	# ACCURACY and MONOTONICITY ******************************************* #
				8080	# The returned result is within 2 ulps in 64 significant bit, #
				8081	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				8082	# rounded to double precision. The result is provably monotonic #
				8083	# in double precision. #
				8084	# #
				8085	# ALGORITHM *********************************************************** #
				8086	# LOGN: #
				8087	# Step 1. If \|X-1\| < 1/16, approximate log(X) by an odd #
				8088	# polynomial in u, where u = 2(X-1)/(X+1). Otherwise, #
				8089	# move on to Step 2. #
				8090	# #
				8091	# Step 2. X = 2*k Y where 1 <= Y < 2. Define F to be the first #
				8092	# seven significant bits of Y plus 2**(-7), i.e. #
				8093	# F = 1.xxxxxx1 in base 2 where the six "x" match those #
				8094	# of Y. Note that \|Y-F\| <= 2**(-7). #
				8095	# #
				8096	# Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a #
				8097	# polynomial in u, log(1+u) = poly. #
				8098	# #
				8099	# Step 4. Reconstruct #
				8100	# log(X) = log( 2*k Y ) = k*log(2) + log(F) + log(1+u) #
				8101	# by k*log(2) + (log(F) + poly). The values of log(F) are #
				8102	# calculated beforehand and stored in the program. #
				8103	# #
				8104	# lognp1: #
				8105	# Step 1: If \|X\| < 1/16, approximate log(1+X) by an odd #
				8106	# polynomial in u where u = 2X/(2+X). Otherwise, move on #
				8107	# to Step 2. #
				8108	# #
				8109	# Step 2: Let 1+X = 2*k Y, where 1 <= Y < 2. Define F as done #
				8110	# in Step 2 of the algorithm for LOGN and compute #
				8111	# log(1+X) as k*log(2) + log(F) + poly where poly #
				8112	# approximates log(1+u), u = (Y-F)/F. #
				8113	# #
				8114	# Implementation Notes: #
				8115	# Note 1. There are 64 different possible values for F, thus 64 #
				8116	# log(F)'s need to be tabulated. Moreover, the values of #
				8117	# 1/F are also tabulated so that the division in (Y-F)/F #
				8118	# can be performed by a multiplication. #
				8119	# #
				8120	# Note 2. In Step 2 of lognp1, in order to preserved accuracy, #
				8121	# the value Y-F has to be calculated carefully when #
				8122	# 1/2 <= X < 3/2. #
				8123	# #
				8124	# Note 3. To fully exploit the pipeline, polynomials are usually #
				8125	# separated into two parts evaluated independently before #
				8126	# being added up. #
				8127	# #
				8128	#########################################################################
				8129	LOGOF2:
				8130	long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
				8131
				8132	one:
				8133	long 0x3F800000
				8134	zero:
				8135	long 0x00000000
				8136	infty:
				8137	long 0x7F800000
				8138	negone:
				8139	long 0xBF800000
				8140
				8141	LOGA6:
				8142	long 0x3FC2499A,0xB5E4040B
				8143	LOGA5:
				8144	long 0xBFC555B5,0x848CB7DB
				8145
				8146	LOGA4:
				8147	long 0x3FC99999,0x987D8730
				8148	LOGA3:
				8149	long 0xBFCFFFFF,0xFF6F7E97
				8150
				8151	LOGA2:
				8152	long 0x3FD55555,0x555555A4
				8153	LOGA1:
				8154	long 0xBFE00000,0x00000008
				8155
				8156	LOGB5:
				8157	long 0x3F175496,0xADD7DAD6
				8158	LOGB4:
				8159	long 0x3F3C71C2,0xFE80C7E0
				8160
				8161	LOGB3:
				8162	long 0x3F624924,0x928BCCFF
				8163	LOGB2:
				8164	long 0x3F899999,0x999995EC
				8165
				8166	LOGB1:
				8167	long 0x3FB55555,0x55555555
				8168	TWO:
				8169	long 0x40000000,0x00000000
				8170
				8171	LTHOLD:
				8172	long 0x3f990000,0x80000000,0x00000000,0x00000000
				8173
				8174	LOGTBL:
				8175	long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
				8176	long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000
				8177	long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
				8178	long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
				8179	long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
				8180	long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
				8181	long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
				8182	long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
				8183	long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
				8184	long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
				8185	long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
				8186	long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
				8187	long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
				8188	long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
				8189	long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
				8190	long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
				8191	long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
				8192	long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
				8193	long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
				8194	long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
				8195	long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
				8196	long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
				8197	long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
				8198	long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
				8199	long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
				8200	long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
				8201	long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
				8202	long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
				8203	long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
				8204	long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
				8205	long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
				8206	long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
				8207	long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
				8208	long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
				8209	long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
				8210	long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
				8211	long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
				8212	long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
				8213	long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
				8214	long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
				8215	long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
				8216	long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
				8217	long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
				8218	long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
				8219	long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
				8220	long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
				8221	long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
				8222	long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
				8223	long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
				8224	long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
				8225	long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
				8226	long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
				8227	long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
				8228	long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
				8229	long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
				8230	long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
				8231	long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
				8232	long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
				8233	long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
				8234	long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
				8235	long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
				8236	long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
				8237	long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
				8238	long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
				8239	long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
				8240	long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
				8241	long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
				8242	long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
				8243	long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
				8244	long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
				8245	long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
				8246	long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
				8247	long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
				8248	long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
				8249	long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
				8250	long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
				8251	long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
				8252	long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
				8253	long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
				8254	long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
				8255	long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
				8256	long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
				8257	long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
				8258	long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
				8259	long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
				8260	long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000
				8261	long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000
				8262	long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
				8263	long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
				8264	long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
				8265	long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000
				8266	long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
				8267	long 0x3FFE0000,0x94458094,0x45809446,0x00000000
				8268	long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
				8269	long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000
				8270	long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
				8271	long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
				8272	long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
				8273	long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
				8274	long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
				8275	long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
				8276	long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
				8277	long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
				8278	long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
				8279	long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
				8280	long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
				8281	long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
				8282	long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
				8283	long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
				8284	long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
				8285	long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
				8286	long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
				8287	long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
				8288	long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
				8289	long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
				8290	long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
				8291	long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
				8292	long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
				8293	long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
				8294	long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
				8295	long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
				8296	long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
				8297	long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
				8298	long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
				8299	long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
				8300	long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
				8301	long 0x3FFE0000,0x80808080,0x80808081,0x00000000
				8302	long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
				8303
				8304	set ADJK,L_SCR1
				8305
				8306	set X,FP_SCR0
				8307	set XDCARE,X+2
				8308	set XFRAC,X+4
				8309
				8310	set F,FP_SCR1
				8311	set FFRAC,F+4
				8312
				8313	set KLOG2,FP_SCR0
				8314
				8315	set SAVEU,FP_SCR0
				8316
				8317	global slogn
				8318	#--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
				8319	slogn:
				8320	fmov.x (%a0),%fp0 # LOAD INPUT
				8321	mov.l &0x00000000,ADJK(%a6)
				8322
				8323	LOGBGN:
				8324	#--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
				8325	#--A FINITE, NON-ZERO, NORMALIZED NUMBER.
				8326
				8327	mov.l (%a0),%d1
				8328	mov.w 4(%a0),%d1
				8329
				8330	mov.l (%a0),X(%a6)
				8331	mov.l 4(%a0),X+4(%a6)
				8332	mov.l 8(%a0),X+8(%a6)
				8333
				8334	cmp.l %d1,&0 # CHECK IF X IS NEGATIVE
				8335	blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID
				8336	# X IS POSITIVE, CHECK IF X IS NEAR 1
				8337	cmp.l %d1,&0x3ffef07d # IS X < 15/16?
				8338	blt.b LOGMAIN # YES
				8339	cmp.l %d1,&0x3fff8841 # IS X > 17/16?
				8340	ble.w LOGNEAR1 # NO
				8341
				8342	LOGMAIN:
				8343	#--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
				8344
				8345	#--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
				8346	#--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
				8347	#--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
				8348	#-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
				8349	#--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
				8350	#--LOG(1+U) CAN BE VERY EFFICIENT.
				8351	#--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
				8352	#--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
				8353
				8354	#--GET K, Y, F, AND ADDRESS OF 1/F.
				8355	asr.l &8,%d1
				8356	asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X
				8357	sub.l &0x3FFF,%d1 # THIS IS K
				8358	add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM.
				8359	lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F)
				8360	fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT
				8361
				8362	#--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
				8363	mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X
				8364	mov.l XFRAC(%a6),FFRAC(%a6)
				8365	and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y
				8366	or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT
				8367	mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F
				8368	and.l &0x7E000000,%d1
				8369	asr.l &8,%d1
				8370	asr.l &8,%d1
				8371	asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT
				8372	add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F
				8373
				8374	fmov.x X(%a6),%fp0
				8375	mov.l &0x3fff0000,F(%a6)
				8376	clr.l F+8(%a6)
				8377	fsub.x F(%a6),%fp0 # Y-F
				8378	fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY
				8379	#--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
				8380	#--REGISTERS SAVED: FPCR, FP1, FP2
				8381
				8382	LP1CONT1:
				8383	#--AN RE-ENTRY POINT FOR LOGNP1
				8384	fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F
				8385	fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY
				8386	fmov.x %fp0,%fp2
				8387	fmul.x %fp2,%fp2 # FP2 IS V=U*U
				8388	fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1
				8389
				8390	#--LOG(1+U) IS APPROXIMATED BY
				8391	#--U + V(A1+U(A2+U(A3+U(A4+U(A5+UA6))))) WHICH IS
				8392	#--[U + V(A1+V(A3+VA5))] + [UV(A2+V(A4+V*A6))]
				8393
				8394	fmov.x %fp2,%fp3
				8395	fmov.x %fp2,%fp1
				8396
				8397	fmul.d LOGA6(%pc),%fp1 # V*A6
				8398	fmul.d LOGA5(%pc),%fp2 # V*A5
				8399
				8400	fadd.d LOGA4(%pc),%fp1 # A4+V*A6
				8401	fadd.d LOGA3(%pc),%fp2 # A3+V*A5
				8402
				8403	fmul.x %fp3,%fp1 # V(A4+VA6)
				8404	fmul.x %fp3,%fp2 # V(A3+VA5)
				8405
				8406	fadd.d LOGA2(%pc),%fp1 # A2+V(A4+VA6)
				8407	fadd.d LOGA1(%pc),%fp2 # A1+V(A3+VA5)
				8408
				8409	fmul.x %fp3,%fp1 # V(A2+V(A4+V*A6))
				8410	add.l &16,%a0 # ADDRESS OF LOG(F)
				8411	fmul.x %fp3,%fp2 # V(A1+V(A3+V*A5))
				8412
				8413	fmul.x %fp0,%fp1 # UV(A2+V(A4+VA6))
				8414	fadd.x %fp2,%fp0 # U+V(A1+V(A3+V*A5))
				8415
				8416	fadd.x (%a0),%fp1 # LOG(F)+UV(A2+V(A4+VA6))
				8417	fmovm.x (%sp)+,&0x30 # RESTORE FP2-3
				8418	fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U)
				8419
				8420	fmov.l %d0,%fpcr
				8421	fadd.x KLOG2(%a6),%fp0 # FINAL ADD
				8422	bra t_inx2
				8423
				8424
				8425	LOGNEAR1:
				8426
				8427	# if the input is exactly equal to one, then exit through ld_pzero.
				8428	# if these 2 lines weren't here, the correct answer would be returned
				8429	# but the INEX2 bit would be set.
				8430	fcmp.b %fp0,&0x1 # is it equal to one?
				8431	fbeq.l ld_pzero # yes
				8432
				8433	#--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
				8434	fmov.x %fp0,%fp1
				8435	fsub.s one(%pc),%fp1 # FP1 IS X-1
				8436	fadd.s one(%pc),%fp0 # FP0 IS X+1
				8437	fadd.x %fp1,%fp1 # FP1 IS 2(X-1)
				8438	#--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
				8439	#--IN U, U = 2(X-1)/(X+1) = FP1/FP0
				8440
				8441	LP1CONT2:
				8442	#--THIS IS AN RE-ENTRY POINT FOR LOGNP1
				8443	fdiv.x %fp0,%fp1 # FP1 IS U
				8444	fmovm.x &0xc,-(%sp) # SAVE FP2-3
				8445	#--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
				8446	#--LET V=UU, W=VV, CALCULATE
				8447	#--U + UV(B1 + V(B2 + V(B3 + V(B4 + VB5)))) BY
				8448	#--U + UV( [B1 + W(B3 + WB5)] + [V(B2 + WB4)] )
				8449	fmov.x %fp1,%fp0
				8450	fmul.x %fp0,%fp0 # FP0 IS V
				8451	fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1
				8452	fmov.x %fp0,%fp1
				8453	fmul.x %fp1,%fp1 # FP1 IS W
				8454
				8455	fmov.d LOGB5(%pc),%fp3
				8456	fmov.d LOGB4(%pc),%fp2
				8457
				8458	fmul.x %fp1,%fp3 # W*B5
				8459	fmul.x %fp1,%fp2 # W*B4
				8460
				8461	fadd.d LOGB3(%pc),%fp3 # B3+W*B5
				8462	fadd.d LOGB2(%pc),%fp2 # B2+W*B4
				8463
				8464	fmul.x %fp3,%fp1 # W(B3+WB5), FP3 RELEASED
				8465
				8466	fmul.x %fp0,%fp2 # V(B2+WB4)
				8467
				8468	fadd.d LOGB1(%pc),%fp1 # B1+W(B3+WB5)
				8469	fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V
				8470
				8471	fadd.x %fp2,%fp1 # B1+W(B3+WB5) + V(B2+WB4), FP2 RELEASED
				8472	fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED
				8473
				8474	fmul.x %fp1,%fp0 # UV( [B1+W(B3+WB5)] + [V(B2+WB4)] )
				8475
				8476	fmov.l %d0,%fpcr
				8477	fadd.x SAVEU(%a6),%fp0
				8478	bra t_inx2
				8479
				8480	#--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
				8481	LOGNEG:
				8482	bra t_operr
				8483
				8484	global slognd
				8485	slognd:
				8486	#--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
				8487
				8488	mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0
				8489
				8490	#----normalize the input value by left shifting k bits (k to be determined
				8491	#----below), adjusting exponent and storing -k to ADJK
				8492	#----the value TWOTO100 is no longer needed.
				8493	#----Note that this code assumes the denormalized input is NON-ZERO.
				8494
				8495	movm.l &0x3f00,-(%sp) # save some registers {d2-d7}
				8496	mov.l (%a0),%d3 # D3 is exponent of smallest norm. #
				8497	mov.l 4(%a0),%d4
				8498	mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X)
				8499	clr.l %d2 # D2 used for holding K
				8500
				8501	tst.l %d4
				8502	bne.b Hi_not0
				8503
				8504	Hi_0:
				8505	mov.l %d5,%d4
				8506	clr.l %d5
				8507	mov.l &32,%d2
				8508	clr.l %d6
				8509	bfffo %d4{&0:&32},%d6
				8510	lsl.l %d6,%d4
				8511	add.l %d6,%d2 # (D3,D4,D5) is normalized
				8512
				8513	mov.l %d3,X(%a6)
				8514	mov.l %d4,XFRAC(%a6)
				8515	mov.l %d5,XFRAC+4(%a6)
				8516	neg.l %d2
				8517	mov.l %d2,ADJK(%a6)
				8518	fmov.x X(%a6),%fp0
				8519	movm.l (%sp)+,&0xfc # restore registers {d2-d7}
				8520	lea X(%a6),%a0
				8521	bra.w LOGBGN # begin regular log(X)
				8522
				8523	Hi_not0:
				8524	clr.l %d6
				8525	bfffo %d4{&0:&32},%d6 # find first 1
				8526	mov.l %d6,%d2 # get k
				8527	lsl.l %d6,%d4
				8528	mov.l %d5,%d7 # a copy of D5
				8529	lsl.l %d6,%d5
				8530	neg.l %d6
				8531	add.l &32,%d6
				8532	lsr.l %d6,%d7
				8533	or.l %d7,%d4 # (D3,D4,D5) normalized
				8534
				8535	mov.l %d3,X(%a6)
				8536	mov.l %d4,XFRAC(%a6)
				8537	mov.l %d5,XFRAC+4(%a6)
				8538	neg.l %d2
				8539	mov.l %d2,ADJK(%a6)
				8540	fmov.x X(%a6),%fp0
				8541	movm.l (%sp)+,&0xfc # restore registers {d2-d7}
				8542	lea X(%a6),%a0
				8543	bra.w LOGBGN # begin regular log(X)
				8544
				8545	global slognp1
				8546	#--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
				8547	slognp1:
				8548	fmov.x (%a0),%fp0 # LOAD INPUT
				8549	fabs.x %fp0 # test magnitude
				8550	fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold
				8551	fbgt.w LP1REAL # if greater, continue
				8552	fmov.l %d0,%fpcr
				8553	mov.b &FMOV_OP,%d1 # last inst is MOVE
				8554	fmov.x (%a0),%fp0 # return signed argument
				8555	bra t_catch
				8556
				8557	LP1REAL:
				8558	fmov.x (%a0),%fp0 # LOAD INPUT
				8559	mov.l &0x00000000,ADJK(%a6)
				8560	fmov.x %fp0,%fp1 # FP1 IS INPUT Z
				8561	fadd.s one(%pc),%fp0 # X := ROUND(1+Z)
				8562	fmov.x %fp0,X(%a6)
				8563	mov.w XFRAC(%a6),XDCARE(%a6)
				8564	mov.l X(%a6),%d1
				8565	cmp.l %d1,&0
				8566	ble.w LP1NEG0 # LOG OF ZERO OR -VE
				8567	cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]?
				8568	blt.w LOGMAIN
				8569	cmp.l %d1,&0x3fffc000
				8570	bgt.w LOGMAIN
				8571	#--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
				8572	#--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
				8573	#--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
				8574
				8575	LP1NEAR1:
				8576	#--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
				8577	cmp.l %d1,&0x3ffef07d
				8578	blt.w LP1CARE
				8579	cmp.l %d1,&0x3fff8841
				8580	bgt.w LP1CARE
				8581
				8582	LP1ONE16:
				8583	#--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
				8584	#--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
				8585	fadd.x %fp1,%fp1 # FP1 IS 2Z
				8586	fadd.s one(%pc),%fp0 # FP0 IS 1+X
				8587	#--U = FP1/FP0
				8588	bra.w LP1CONT2
				8589
				8590	LP1CARE:
				8591	#--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
				8592	#--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
				8593	#--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
				8594	#--THERE ARE ONLY TWO CASES.
				8595	#--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
				8596	#--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z
				8597	#--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
				8598	#--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
				8599
				8600	mov.l XFRAC(%a6),FFRAC(%a6)
				8601	and.l &0xFE000000,FFRAC(%a6)
				8602	or.l &0x01000000,FFRAC(%a6) # F OBTAINED
				8603	cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1
				8604	bge.b KISZERO
				8605
				8606	KISNEG1:
				8607	fmov.s TWO(%pc),%fp0
				8608	mov.l &0x3fff0000,F(%a6)
				8609	clr.l F+8(%a6)
				8610	fsub.x F(%a6),%fp0 # 2-F
				8611	mov.l FFRAC(%a6),%d1
				8612	and.l &0x7E000000,%d1
				8613	asr.l &8,%d1
				8614	asr.l &8,%d1
				8615	asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F
				8616	fadd.x %fp1,%fp1 # GET 2Z
				8617	fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3}
				8618	fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z
				8619	lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F
				8620	add.l %d1,%a0
				8621	fmov.s negone(%pc),%fp1 # FP1 IS K = -1
				8622	bra.w LP1CONT1
				8623
				8624	KISZERO:
				8625	fmov.s one(%pc),%fp0
				8626	mov.l &0x3fff0000,F(%a6)
				8627	clr.l F+8(%a6)
				8628	fsub.x F(%a6),%fp0 # 1-F
				8629	mov.l FFRAC(%a6),%d1
				8630	and.l &0x7E000000,%d1
				8631	asr.l &8,%d1
				8632	asr.l &8,%d1
				8633	asr.l &4,%d1
				8634	fadd.x %fp1,%fp0 # FP0 IS Y-F
				8635	fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3}
				8636	lea LOGTBL(%pc),%a0
				8637	add.l %d1,%a0 # A0 IS ADDRESS OF 1/F
				8638	fmov.s zero(%pc),%fp1 # FP1 IS K = 0
				8639	bra.w LP1CONT1
				8640
				8641	LP1NEG0:
				8642	#--FPCR SAVED. D0 IS X IN COMPACT FORM.
				8643	cmp.l %d1,&0
				8644	blt.b LP1NEG
				8645	LP1ZERO:
				8646	fmov.s negone(%pc),%fp0
				8647
				8648	fmov.l %d0,%fpcr
				8649	bra t_dz
				8650
				8651	LP1NEG:
				8652	fmov.s zero(%pc),%fp0
				8653
				8654	fmov.l %d0,%fpcr
				8655	bra t_operr
				8656
				8657	global slognp1d
				8658	#--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
				8659	# Simply return the denorm
				8660	slognp1d:
				8661	bra t_extdnrm
				8662
				8663	#########################################################################
				8664	# satanh(): computes the inverse hyperbolic tangent of a norm input #
				8665	# satanhd(): computes the inverse hyperbolic tangent of a denorm input #
				8666	# #
				8667	# INPUT *************************************************************** #
				8668	# a0 = pointer to extended precision input #
				8669	# d0 = round precision,mode #
				8670	# #
				8671	# OUTPUT ************************************************************** #
				8672	# fp0 = arctanh(X) #
				8673	# #
				8674	# ACCURACY and MONOTONICITY ******************************************* #
				8675	# The returned result is within 3 ulps in 64 significant bit, #
				8676	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				8677	# rounded to double precision. The result is provably monotonic #
				8678	# in double precision. #
				8679	# #
				8680	# ALGORITHM *********************************************************** #
				8681	# #
				8682	# ATANH #
				8683	# 1. If \|X\| >= 1, go to 3. #
				8684	# #
				8685	# 2. (\|X\| < 1) Calculate atanh(X) by #
				8686	# sgn := sign(X) #
				8687	# y := \|X\| #
				8688	# z := 2y/(1-y) #
				8689	# atanh(X) := sgn * (1/2) * logp1(z) #
				8690	# Exit. #
				8691	# #
				8692	# 3. If \|X\| > 1, go to 5. #
				8693	# #
				8694	# 4. (\|X\| = 1) Generate infinity with an appropriate sign and #
				8695	# divide-by-zero by #
				8696	# sgn := sign(X) #
				8697	# atan(X) := sgn / (+0). #
				8698	# Exit. #
				8699	# #
				8700	# 5. (\|X\| > 1) Generate an invalid operation by 0 * infinity. #
				8701	# Exit. #
				8702	# #
				8703	#########################################################################
				8704
				8705	global satanh
				8706	satanh:
				8707	mov.l (%a0),%d1
				8708	mov.w 4(%a0),%d1
				8709	and.l &0x7FFFFFFF,%d1
				8710	cmp.l %d1,&0x3FFF8000
				8711	bge.b ATANHBIG
				8712
				8713	#--THIS IS THE USUAL CASE, \|X\| < 1
				8714	#--Y = \|X\|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
				8715
				8716	fabs.x (%a0),%fp0 # Y = \|X\|
				8717	fmov.x %fp0,%fp1
				8718	fneg.x %fp1 # -Y
				8719	fadd.x %fp0,%fp0 # 2Y
				8720	fadd.s &0x3F800000,%fp1 # 1-Y
				8721	fdiv.x %fp1,%fp0 # 2Y/(1-Y)
				8722	mov.l (%a0),%d1
				8723	and.l &0x80000000,%d1
				8724	or.l &0x3F000000,%d1 # SIGN(X)*HALF
				8725	mov.l %d1,-(%sp)
				8726
				8727	mov.l %d0,-(%sp) # save rnd prec,mode
				8728	clr.l %d0 # pass ext prec,RN
				8729	fmovm.x &0x01,-(%sp) # save Z on stack
				8730	lea (%sp),%a0 # pass ptr to Z
				8731	bsr slognp1 # LOG1P(Z)
				8732	add.l &0xc,%sp # clear Z from stack
				8733
				8734	mov.l (%sp)+,%d0 # fetch old prec,mode
				8735	fmov.l %d0,%fpcr # load it
				8736	mov.b &FMUL_OP,%d1 # last inst is MUL
				8737	fmul.s (%sp)+,%fp0
				8738	bra t_catch
				8739
				8740	ATANHBIG:
				8741	fabs.x (%a0),%fp0 # \|X\|
				8742	fcmp.s %fp0,&0x3F800000
				8743	fbgt t_operr
				8744	bra t_dz
				8745
				8746	global satanhd
				8747	#--ATANH(X) = X FOR DENORMALIZED X
				8748	satanhd:
				8749	bra t_extdnrm
				8750
				8751	#########################################################################
				8752	# slog10(): computes the base-10 logarithm of a normalized input #
				8753	# slog10d(): computes the base-10 logarithm of a denormalized input #
				8754	# slog2(): computes the base-2 logarithm of a normalized input #
				8755	# slog2d(): computes the base-2 logarithm of a denormalized input #
				8756	# #
				8757	# INPUT *************************************************************** #
				8758	# a0 = pointer to extended precision input #
				8759	# d0 = round precision,mode #
				8760	# #
				8761	# OUTPUT ************************************************************** #
				8762	# fp0 = log_10(X) or log_2(X) #
				8763	# #
				8764	# ACCURACY and MONOTONICITY ******************************************* #
				8765	# The returned result is within 1.7 ulps in 64 significant bit, #
				8766	# i.e. within 0.5003 ulp to 53 bits if the result is subsequently #
				8767	# rounded to double precision. The result is provably monotonic #
				8768	# in double precision. #
				8769	# #
				8770	# ALGORITHM *********************************************************** #
				8771	# #
				8772	# slog10d: #
				8773	# #
				8774	# Step 0. If X < 0, create a NaN and raise the invalid operation #
				8775	# flag. Otherwise, save FPCR in D1; set FpCR to default. #
				8776	# Notes: Default means round-to-nearest mode, no floating-point #
				8777	# traps, and precision control = double extended. #
				8778	# #
				8779	# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
				8780	# Notes: Even if X is denormalized, log(X) is always normalized. #
				8781	# #
				8782	# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
				8783	# 2.1 Restore the user FPCR #
				8784	# 2.2 Return ans := Y * INV_L10. #
				8785	# #
				8786	# slog10: #
				8787	# #
				8788	# Step 0. If X < 0, create a NaN and raise the invalid operation #
				8789	# flag. Otherwise, save FPCR in D1; set FpCR to default. #
				8790	# Notes: Default means round-to-nearest mode, no floating-point #
				8791	# traps, and precision control = double extended. #
				8792	# #
				8793	# Step 1. Call sLogN to obtain Y = log(X), the natural log of X. #
				8794	# #
				8795	# Step 2. Compute log_10(X) = log(X) * (1/log(10)). #
				8796	# 2.1 Restore the user FPCR #
				8797	# 2.2 Return ans := Y * INV_L10. #
				8798	# #
				8799	# sLog2d: #
				8800	# #
				8801	# Step 0. If X < 0, create a NaN and raise the invalid operation #
				8802	# flag. Otherwise, save FPCR in D1; set FpCR to default. #
				8803	# Notes: Default means round-to-nearest mode, no floating-point #
				8804	# traps, and precision control = double extended. #
				8805	# #
				8806	# Step 1. Call slognd to obtain Y = log(X), the natural log of X. #
				8807	# Notes: Even if X is denormalized, log(X) is always normalized. #
				8808	# #
				8809	# Step 2. Compute log_10(X) = log(X) * (1/log(2)). #
				8810	# 2.1 Restore the user FPCR #
				8811	# 2.2 Return ans := Y * INV_L2. #
				8812	# #
				8813	# sLog2: #
				8814	# #
				8815	# Step 0. If X < 0, create a NaN and raise the invalid operation #
				8816	# flag. Otherwise, save FPCR in D1; set FpCR to default. #
				8817	# Notes: Default means round-to-nearest mode, no floating-point #
				8818	# traps, and precision control = double extended. #
				8819	# #
				8820	# Step 1. If X is not an integer power of two, i.e., X != 2^k, #
				8821	# go to Step 3. #
				8822	# #
				8823	# Step 2. Return k. #
				8824	# 2.1 Get integer k, X = 2^k. #
				8825	# 2.2 Restore the user FPCR. #
				8826	# 2.3 Return ans := convert-to-double-extended(k). #
				8827	# #
				8828	# Step 3. Call sLogN to obtain Y = log(X), the natural log of X. #
				8829	# #
				8830	# Step 4. Compute log_2(X) = log(X) * (1/log(2)). #
				8831	# 4.1 Restore the user FPCR #
				8832	# 4.2 Return ans := Y * INV_L2. #
				8833	# #
				8834	#########################################################################
				8835
				8836	INV_L10:
				8837	long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
				8838
				8839	INV_L2:
				8840	long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
				8841
				8842	global slog10
				8843	#--entry point for Log10(X), X is normalized
				8844	slog10:
				8845	fmov.b &0x1,%fp0
				8846	fcmp.x %fp0,(%a0) # if operand == 1,
				8847	fbeq.l ld_pzero # return an EXACT zero
				8848
				8849	mov.l (%a0),%d1
				8850	blt.w invalid
				8851	mov.l %d0,-(%sp)
				8852	clr.l %d0
				8853	bsr slogn # log(X), X normal.
				8854	fmov.l (%sp)+,%fpcr
				8855	fmul.x INV_L10(%pc),%fp0
				8856	bra t_inx2
				8857
				8858	global slog10d
				8859	#--entry point for Log10(X), X is denormalized
				8860	slog10d:
				8861	mov.l (%a0),%d1
				8862	blt.w invalid
				8863	mov.l %d0,-(%sp)
				8864	clr.l %d0
				8865	bsr slognd # log(X), X denorm.
				8866	fmov.l (%sp)+,%fpcr
				8867	fmul.x INV_L10(%pc),%fp0
				8868	bra t_minx2
				8869
				8870	global slog2
				8871	#--entry point for Log2(X), X is normalized
				8872	slog2:
				8873	mov.l (%a0),%d1
				8874	blt.w invalid
				8875
				8876	mov.l 8(%a0),%d1
				8877	bne.b continue # X is not 2^k
				8878
				8879	mov.l 4(%a0),%d1
				8880	and.l &0x7FFFFFFF,%d1
				8881	bne.b continue
				8882
				8883	#--X = 2^k.
				8884	mov.w (%a0),%d1
				8885	and.l &0x00007FFF,%d1
				8886	sub.l &0x3FFF,%d1
				8887	beq.l ld_pzero
				8888	fmov.l %d0,%fpcr
				8889	fmov.l %d1,%fp0
				8890	bra t_inx2
				8891
				8892	continue:
				8893	mov.l %d0,-(%sp)
				8894	clr.l %d0
				8895	bsr slogn # log(X), X normal.
				8896	fmov.l (%sp)+,%fpcr
				8897	fmul.x INV_L2(%pc),%fp0
				8898	bra t_inx2
				8899
				8900	invalid:
				8901	bra t_operr
				8902
				8903	global slog2d
				8904	#--entry point for Log2(X), X is denormalized
				8905	slog2d:
				8906	mov.l (%a0),%d1
				8907	blt.w invalid
				8908	mov.l %d0,-(%sp)
				8909	clr.l %d0
				8910	bsr slognd # log(X), X denorm.
				8911	fmov.l (%sp)+,%fpcr
				8912	fmul.x INV_L2(%pc),%fp0
				8913	bra t_minx2
				8914
				8915	#########################################################################
				8916	# stwotox(): computes 2**X for a normalized input #
				8917	# stwotoxd(): computes 2**X for a denormalized input #
				8918	# stentox(): computes 10**X for a normalized input #
				8919	# stentoxd(): computes 10**X for a denormalized input #
				8920	# #
				8921	# INPUT *************************************************************** #
				8922	# a0 = pointer to extended precision input #
				8923	# d0 = round precision,mode #
				8924	# #
				8925	# OUTPUT ************************************************************** #
				8926	# fp0 = 2X or 10X #
				8927	# #
				8928	# ACCURACY and MONOTONICITY ******************************************* #
				8929	# The returned result is within 2 ulps in 64 significant bit, #
				8930	# i.e. within 0.5001 ulp to 53 bits if the result is subsequently #
				8931	# rounded to double precision. The result is provably monotonic #
				8932	# in double precision. #
				8933	# #
				8934	# ALGORITHM *********************************************************** #
				8935	# #
				8936	# twotox #
				8937	# 1. If \|X\| > 16480, go to ExpBig. #
				8938	# #
				8939	# 2. If \|X\| < 2**(-70), go to ExpSm. #
				8940	# #
				8941	# 3. Decompose X as X = N/64 + r where \|r\| <= 1/128. Furthermore #
				8942	# decompose N as #
				8943	# N = 64(M + M') + j, j = 0,1,2,...,63. #
				8944	# #
				8945	# 4. Overwrite r := r * log2. Then #
				8946	# 2X = 2(M') * 2*(M) 2*(j/64) exp(r). #
				8947	# Go to expr to compute that expression. #
				8948	# #
				8949	# tentox #
				8950	# 1. If \|X\| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. #
				8951	# #
				8952	# 2. If \|X\| < 2**(-70), go to ExpSm. #
				8953	# #
				8954	# 3. Set y := Xlog_2(10)64 (base 2 log of 10). Set #
				8955	# N := round-to-int(y). Decompose N as #
				8956	# N = 64(M + M') + j, j = 0,1,2,...,63. #
				8957	# #
				8958	# 4. Define r as #
				8959	# r := ((X - NL1)-NL2) * L10 #
				8960	# where L1, L2 are the leading and trailing parts of #
				8961	# log_10(2)/64 and L10 is the natural log of 10. Then #
				8962	# 10X = 2(M') * 2*(M) 2*(j/64) exp(r). #
				8963	# Go to expr to compute that expression. #
				8964	# #
				8965	# expr #
				8966	# 1. Fetch 2**(j/64) from table as Fact1 and Fact2. #
				8967	# #
				8968	# 2. Overwrite Fact1 and Fact2 by #
				8969	# Fact1 := 2*(M) Fact1 #
				8970	# Fact2 := 2*(M) Fact2 #
				8971	# Thus Fact1 + Fact2 = 2*(M) 2**(j/64). #
				8972	# #
				8973	# 3. Calculate P where 1 + P approximates exp(r): #
				8974	# P = r + rr(A1+r(A2+...+rA5)). #
				8975	# #
				8976	# 4. Let AdjFact := 2**(M'). Return #
				8977	# AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). #
				8978	# Exit. #
				8979	# #
				8980	# ExpBig #
				8981	# 1. Generate overflow by Huge * Huge if X > 0; otherwise, #
				8982	# generate underflow by Tiny * Tiny. #
				8983	# #
				8984	# ExpSm #
				8985	# 1. Return 1 + X. #
				8986	# #
				8987	#########################################################################
				8988
				8989	L2TEN64:
				8990	long 0x406A934F,0x0979A371 # 64LOG10/LOG2
				8991	L10TWO1:
				8992	long 0x3F734413,0x509F8000 # LOG2/64LOG10
				8993
				8994	L10TWO2:
				8995	long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
				8996
				8997	LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
				8998
				8999	LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
				9000
				9001	EXPA5: long 0x3F56C16D,0x6F7BD0B2
				9002	EXPA4: long 0x3F811112,0x302C712C
				9003	EXPA3: long 0x3FA55555,0x55554CC1
				9004	EXPA2: long 0x3FC55555,0x55554A54
				9005	EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000
				9006
				9007	TEXPTBL:
				9008	long 0x3FFF0000,0x80000000,0x00000000,0x3F738000
				9009	long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
				9010	long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
				9011	long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
				9012	long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
				9013	long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
				9014	long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
				9015	long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
				9016	long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
				9017	long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
				9018	long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
				9019	long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
				9020	long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
				9021	long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
				9022	long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
				9023	long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
				9024	long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
				9025	long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
				9026	long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
				9027	long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
				9028	long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
				9029	long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
				9030	long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
				9031	long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
				9032	long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
				9033	long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
				9034	long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
				9035	long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
				9036	long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
				9037	long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
				9038	long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
				9039	long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
				9040	long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
				9041	long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
				9042	long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
				9043	long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
				9044	long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
				9045	long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
				9046	long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
				9047	long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
				9048	long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
				9049	long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
				9050	long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
				9051	long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
				9052	long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
				9053	long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
				9054	long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
				9055	long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
				9056	long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
				9057	long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
				9058	long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
				9059	long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
				9060	long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
				9061	long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
				9062	long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
				9063	long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
				9064	long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
				9065	long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
				9066	long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
				9067	long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
				9068	long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
				9069	long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
				9070	long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
				9071	long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
				9072
				9073	set INT,L_SCR1
				9074
				9075	set X,FP_SCR0
				9076	set XDCARE,X+2
				9077	set XFRAC,X+4
				9078
				9079	set ADJFACT,FP_SCR0
				9080
				9081	set FACT1,FP_SCR0
				9082	set FACT1HI,FACT1+4
				9083	set FACT1LOW,FACT1+8
				9084
				9085	set FACT2,FP_SCR1
				9086	set FACT2HI,FACT2+4
				9087	set FACT2LOW,FACT2+8
				9088
				9089	global stwotox
				9090	#--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
				9091	stwotox:
				9092	fmovm.x (%a0),&0x80 # LOAD INPUT
				9093
				9094	mov.l (%a0),%d1
				9095	mov.w 4(%a0),%d1
				9096	fmov.x %fp0,X(%a6)
				9097	and.l &0x7FFFFFFF,%d1
				9098
				9099	cmp.l %d1,&0x3FB98000 # \|X\| >= 2**(-70)?
				9100	bge.b TWOOK1
				9101	bra.w EXPBORS
				9102
				9103	TWOOK1:
				9104	cmp.l %d1,&0x400D80C0 # \|X\| > 16480?
				9105	ble.b TWOMAIN
				9106	bra.w EXPBORS
				9107
				9108	TWOMAIN:
				9109	#--USUAL CASE, 2^(-70) <= \|X\| <= 16480
				9110
				9111	fmov.x %fp0,%fp1
				9112	fmul.s &0x42800000,%fp1 # 64 * X
				9113	fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X)
				9114	mov.l %d2,-(%sp)
				9115	lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
				9116	fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
				9117	mov.l INT(%a6),%d1
				9118	mov.l %d1,%d2
				9119	and.l &0x3F,%d1 # D0 IS J
				9120	asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
				9121	add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
				9122	asr.l &6,%d2 # d2 IS L, N = 64L + J
				9123	mov.l %d2,%d1
				9124	asr.l &1,%d1 # D0 IS M
				9125	sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
				9126	add.l &0x3FFF,%d2
				9127
				9128	#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
				9129	#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT \|M\| <= 16140 BY DESIGN.
				9130	#--ADJFACT = 2^(M').
				9131	#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
				9132
				9133	fmovm.x &0x0c,-(%sp) # save fp2/fp3
				9134
				9135	fmul.s &0x3C800000,%fp1 # (1/64)*N
				9136	mov.l (%a1)+,FACT1(%a6)
				9137	mov.l (%a1)+,FACT1HI(%a6)
				9138	mov.l (%a1)+,FACT1LOW(%a6)
				9139	mov.w (%a1)+,FACT2(%a6)
				9140
				9141	fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X)
				9142
				9143	mov.w (%a1)+,FACT2HI(%a6)
				9144	clr.w FACT2HI+2(%a6)
				9145	clr.l FACT2LOW(%a6)
				9146	add.w %d1,FACT1(%a6)
				9147	fmul.x LOG2(%pc),%fp0 # FP0 IS R
				9148	add.w %d1,FACT2(%a6)
				9149
				9150	bra.w expr
				9151
				9152	EXPBORS:
				9153	#--FPCR, D0 SAVED
				9154	cmp.l %d1,&0x3FFF8000
				9155	bgt.b TEXPBIG
				9156
				9157	#--\|X\| IS SMALL, RETURN 1 + X
				9158
				9159	fmov.l %d0,%fpcr # restore users round prec,mode
				9160	fadd.s &0x3F800000,%fp0 # RETURN 1 + X
				9161	bra t_pinx2
				9162
				9163	TEXPBIG:
				9164	#--\|X\| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
				9165	#--REGISTERS SAVE SO FAR ARE FPCR AND D0
				9166	mov.l X(%a6),%d1
				9167	cmp.l %d1,&0
				9168	blt.b EXPNEG
				9169
				9170	bra t_ovfl2 # t_ovfl expects positive value
				9171
				9172	EXPNEG:
				9173	bra t_unfl2 # t_unfl expects positive value
				9174
				9175	global stwotoxd
				9176	stwotoxd:
				9177	#--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
				9178
				9179	fmov.l %d0,%fpcr # set user's rounding mode/precision
				9180	fmov.s &0x3F800000,%fp0 # RETURN 1 + X
				9181	mov.l (%a0),%d1
				9182	or.l &0x00800001,%d1
				9183	fadd.s %d1,%fp0
				9184	bra t_pinx2
				9185
				9186	global stentox
				9187	#--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
				9188	stentox:
				9189	fmovm.x (%a0),&0x80 # LOAD INPUT
				9190
				9191	mov.l (%a0),%d1
				9192	mov.w 4(%a0),%d1
				9193	fmov.x %fp0,X(%a6)
				9194	and.l &0x7FFFFFFF,%d1
				9195
				9196	cmp.l %d1,&0x3FB98000 # \|X\| >= 2**(-70)?
				9197	bge.b TENOK1
				9198	bra.w EXPBORS
				9199
				9200	TENOK1:
				9201	cmp.l %d1,&0x400B9B07 # \|X\| <= 16480*log2/log10 ?
				9202	ble.b TENMAIN
				9203	bra.w EXPBORS
				9204
				9205	TENMAIN:
				9206	#--USUAL CASE, 2^(-70) <= \|X\| <= 16480 LOG 2 / LOG 10
				9207
				9208	fmov.x %fp0,%fp1
				9209	fmul.d L2TEN64(%pc),%fp1 # X64LOG10/LOG2
				9210	fmov.l %fp1,INT(%a6) # N=INT(X64LOG10/LOG2)
				9211	mov.l %d2,-(%sp)
				9212	lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64)
				9213	fmov.l INT(%a6),%fp1 # N --> FLOATING FMT
				9214	mov.l INT(%a6),%d1
				9215	mov.l %d1,%d2
				9216	and.l &0x3F,%d1 # D0 IS J
				9217	asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64)
				9218	add.l %d1,%a1 # ADDRESS FOR 2^(J/64)
				9219	asr.l &6,%d2 # d2 IS L, N = 64L + J
				9220	mov.l %d2,%d1
				9221	asr.l &1,%d1 # D0 IS M
				9222	sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J
				9223	add.l &0x3FFF,%d2
				9224
				9225	#--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
				9226	#--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT \|M\| <= 16140 BY DESIGN.
				9227	#--ADJFACT = 2^(M').
				9228	#--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
				9229	fmovm.x &0x0c,-(%sp) # save fp2/fp3
				9230
				9231	fmov.x %fp1,%fp2
				9232
				9233	fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD
				9234	mov.l (%a1)+,FACT1(%a6)
				9235
				9236	fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL
				9237
				9238	mov.l (%a1)+,FACT1HI(%a6)
				9239	mov.l (%a1)+,FACT1LOW(%a6)
				9240	fsub.x %fp1,%fp0 # X - N L_LEAD
				9241	mov.w (%a1)+,FACT2(%a6)
				9242
				9243	fsub.x %fp2,%fp0 # X - N L_TRAIL
				9244
				9245	mov.w (%a1)+,FACT2HI(%a6)
				9246	clr.w FACT2HI+2(%a6)
				9247	clr.l FACT2LOW(%a6)
				9248
				9249	fmul.x LOG10(%pc),%fp0 # FP0 IS R
				9250	add.w %d1,FACT1(%a6)
				9251	add.w %d1,FACT2(%a6)
				9252
				9253	expr:
				9254	#--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
				9255	#--ADJFACT CONTAINS 2(M'), FACT1 + FACT2 = 2(M) * 2**(J/64).
				9256	#--FP0 IS R. THE FOLLOWING CODE COMPUTES
				9257	#-- 2*(M'+M) 2*(J/64) EXP(R)
				9258
				9259	fmov.x %fp0,%fp1
				9260	fmul.x %fp1,%fp1 # FP1 IS S = R*R
				9261
				9262	fmov.d EXPA5(%pc),%fp2 # FP2 IS A5
				9263	fmov.d EXPA4(%pc),%fp3 # FP3 IS A4
				9264
				9265	fmul.x %fp1,%fp2 # FP2 IS S*A5
				9266	fmul.x %fp1,%fp3 # FP3 IS S*A4
				9267
				9268	fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5
				9269	fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4
				9270
				9271	fmul.x %fp1,%fp2 # FP2 IS S(A3+SA5)
				9272	fmul.x %fp1,%fp3 # FP3 IS S(A2+SA4)
				9273
				9274	fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S(A3+SA5)
				9275	fmul.x %fp0,%fp3 # FP3 IS RS(A2+S*A4)
				9276
				9277	fmul.x %fp1,%fp2 # FP2 IS S(A1+S(A3+S*A5))
				9278	fadd.x %fp3,%fp0 # FP0 IS R+RS(A2+S*A4)
				9279	fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1
				9280
				9281	fmovm.x (%sp)+,&0x30 # restore fp2/fp3
				9282
				9283	#--FINAL RECONSTRUCTION PROCESS
				9284	#--EXP(X) = 2^M2^(J/64) + 2^M2^(J/64)*(EXP(R)-1) - (1 OR 0)
				9285
				9286	fmul.x FACT1(%a6),%fp0
				9287	fadd.x FACT2(%a6),%fp0
				9288	fadd.x FACT1(%a6),%fp0
				9289
				9290	fmov.l %d0,%fpcr # restore users round prec,mode
				9291	mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT
				9292	mov.l (%sp)+,%d2
				9293	mov.l &0x80000000,ADJFACT+4(%a6)
				9294	clr.l ADJFACT+8(%a6)
				9295	mov.b &FMUL_OP,%d1 # last inst is MUL
				9296	fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT
				9297	bra t_catch
				9298
				9299	global stentoxd
				9300	stentoxd:
				9301	#--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
				9302
				9303	fmov.l %d0,%fpcr # set user's rounding mode/precision
				9304	fmov.s &0x3F800000,%fp0 # RETURN 1 + X
				9305	mov.l (%a0),%d1
				9306	or.l &0x00800001,%d1
				9307	fadd.s %d1,%fp0
				9308	bra t_pinx2
				9309
				9310	#########################################################################
				9311	# smovcr(): returns the ROM constant at the offset specified in d1 #
				9312	# rounded to the mode and precision specified in d0. #
				9313	# #
				9314	# INPUT *************************************************************** #
				9315	# d0 = rnd prec,mode #
				9316	# d1 = ROM offset #
				9317	# #
				9318	# OUTPUT ************************************************************** #
				9319	# fp0 = the ROM constant rounded to the user's rounding mode,prec #
				9320	# #
				9321	#########################################################################
				9322
				9323	global smovcr
				9324	smovcr:
				9325	mov.l %d1,-(%sp) # save rom offset for a sec
				9326
				9327	lsr.b &0x4,%d0 # shift ctrl bits to lo
				9328	mov.l %d0,%d1 # make a copy
				9329	andi.w &0x3,%d1 # extract rnd mode
				9330	andi.w &0xc,%d0 # extract rnd prec
				9331	swap %d0 # put rnd prec in hi
				9332	mov.w %d1,%d0 # put rnd mode in lo
				9333
				9334	mov.l (%sp)+,%d1 # get rom offset
				9335
				9336	#
				9337	# check range of offset
				9338	#
				9339	tst.b %d1 # if zero, offset is to pi
				9340	beq.b pi_tbl # it is pi
				9341	cmpi.b %d1,&0x0a # check range $01 - $0a
				9342	ble.b z_val # if in this range, return zero
				9343	cmpi.b %d1,&0x0e # check range $0b - $0e
				9344	ble.b sm_tbl # valid constants in this range
				9345	cmpi.b %d1,&0x2f # check range $10 - $2f
				9346	ble.b z_val # if in this range, return zero
				9347	cmpi.b %d1,&0x3f # check range $30 - $3f
				9348	ble.b bg_tbl # valid constants in this range
				9349
				9350	z_val:
				9351	bra.l ld_pzero # return a zero
				9352
				9353	#
				9354	# the answer is PI rounded to the proper precision.
				9355	#
				9356	# fetch a pointer to the answer table relating to the proper rounding
				9357	# precision.
				9358	#
				9359	pi_tbl:
				9360	tst.b %d0 # is rmode RN?
				9361	bne.b pi_not_rn # no
				9362	pi_rn:
				9363	lea.l PIRN(%pc),%a0 # yes; load PI RN table addr
				9364	bra.w set_finx
				9365	pi_not_rn:
				9366	cmpi.b %d0,&rp_mode # is rmode RP?
				9367	beq.b pi_rp # yes
				9368	pi_rzrm:
				9369	lea.l PIRZRM(%pc),%a0 # no; load PI RZ,RM table addr
				9370	bra.b set_finx
				9371	pi_rp:
				9372	lea.l PIRP(%pc),%a0 # load PI RP table addr
				9373	bra.b set_finx
				9374
				9375	#
				9376	# the answer is one of:
				9377	# $0B log10(2) (inexact)
				9378	# $0C e (inexact)
				9379	# $0D log2(e) (inexact)
				9380	# $0E log10(e) (exact)
				9381	#
				9382	# fetch a pointer to the answer table relating to the proper rounding
				9383	# precision.
				9384	#
				9385	sm_tbl:
				9386	subi.b &0xb,%d1 # make offset in 0-4 range
				9387	tst.b %d0 # is rmode RN?
				9388	bne.b sm_not_rn # no
				9389	sm_rn:
				9390	lea.l SMALRN(%pc),%a0 # yes; load RN table addr
				9391	sm_tbl_cont:
				9392	cmpi.b %d1,&0x2 # is result log10(e)?
				9393	ble.b set_finx # no; answer is inexact
				9394	bra.b no_finx # yes; answer is exact
				9395	sm_not_rn:
				9396	cmpi.b %d0,&rp_mode # is rmode RP?
				9397	beq.b sm_rp # yes
				9398	sm_rzrm:
				9399	lea.l SMALRZRM(%pc),%a0 # no; load RZ,RM table addr
				9400	bra.b sm_tbl_cont
				9401	sm_rp:
				9402	lea.l SMALRP(%pc),%a0 # load RP table addr
				9403	bra.b sm_tbl_cont
				9404
				9405	#
				9406	# the answer is one of:
				9407	# $30 ln(2) (inexact)
				9408	# $31 ln(10) (inexact)
				9409	# $32 10^0 (exact)
				9410	# $33 10^1 (exact)
				9411	# $34 10^2 (exact)
				9412	# $35 10^4 (exact)
				9413	# $36 10^8 (exact)
				9414	# $37 10^16 (exact)
				9415	# $38 10^32 (inexact)
				9416	# $39 10^64 (inexact)
				9417	# $3A 10^128 (inexact)
				9418	# $3B 10^256 (inexact)
				9419	# $3C 10^512 (inexact)
				9420	# $3D 10^1024 (inexact)
				9421	# $3E 10^2048 (inexact)
				9422	# $3F 10^4096 (inexact)
				9423	#
				9424	# fetch a pointer to the answer table relating to the proper rounding
				9425	# precision.
				9426	#
				9427	bg_tbl:
				9428	subi.b &0x30,%d1 # make offset in 0-f range
				9429	tst.b %d0 # is rmode RN?
				9430	bne.b bg_not_rn # no
				9431	bg_rn:
				9432	lea.l BIGRN(%pc),%a0 # yes; load RN table addr
				9433	bg_tbl_cont:
				9434	cmpi.b %d1,&0x1 # is offset <= $31?
				9435	ble.b set_finx # yes; answer is inexact
				9436	cmpi.b %d1,&0x7 # is $32 <= offset <= $37?
				9437	ble.b no_finx # yes; answer is exact
				9438	bra.b set_finx # no; answer is inexact
				9439	bg_not_rn:
				9440	cmpi.b %d0,&rp_mode # is rmode RP?
				9441	beq.b bg_rp # yes
				9442	bg_rzrm:
				9443	lea.l BIGRZRM(%pc),%a0 # no; load RZ,RM table addr
				9444	bra.b bg_tbl_cont
				9445	bg_rp:
				9446	lea.l BIGRP(%pc),%a0 # load RP table addr
				9447	bra.b bg_tbl_cont
				9448
				9449	# answer is inexact, so set INEX2 and AINEX in the user's FPSR.
				9450	set_finx:
				9451	ori.l &inx2a_mask,USER_FPSR(%a6) # set INEX2/AINEX
				9452	no_finx:
				9453	mulu.w &0xc,%d1 # offset points into tables
				9454	swap %d0 # put rnd prec in lo word
				9455	tst.b %d0 # is precision extended?
				9456
				9457	bne.b not_ext # if xprec, do not call round
				9458
				9459	# Precision is extended
				9460	fmovm.x (%a0,%d1.w),&0x80 # return result in fp0
				9461	rts
				9462
				9463	# Precision is single or double
				9464	not_ext:
				9465	swap %d0 # rnd prec in upper word
				9466
				9467	# call round() to round the answer to the proper precision.
				9468	# exponents out of range for single or double DO NOT cause underflow
				9469	# or overflow.
				9470	mov.w 0x0(%a0,%d1.w),FP_SCR1_EX(%a6) # load first word
				9471	mov.l 0x4(%a0,%d1.w),FP_SCR1_HI(%a6) # load second word
				9472	mov.l 0x8(%a0,%d1.w),FP_SCR1_LO(%a6) # load third word
				9473	mov.l %d0,%d1
				9474	clr.l %d0 # clear g,r,s
				9475	lea FP_SCR1(%a6),%a0 # pass ptr to answer
				9476	clr.w LOCAL_SGN(%a0) # sign always positive
				9477	bsr.l _round # round the mantissa
				9478
				9479	fmovm.x (%a0),&0x80 # return rounded result in fp0
				9480	rts
				9481
				9482	align 0x4
				9483
				9484	PIRN: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
				9485	PIRZRM: long 0x40000000,0xc90fdaa2,0x2168c234 # pi
				9486	PIRP: long 0x40000000,0xc90fdaa2,0x2168c235 # pi
				9487
				9488	SMALRN: long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
				9489	long 0x40000000,0xadf85458,0xa2bb4a9a # e
				9490	long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
				9491	long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
				9492	long 0x00000000,0x00000000,0x00000000 # 0.0
				9493
				9494	SMALRZRM:
				9495	long 0x3ffd0000,0x9a209a84,0xfbcff798 # log10(2)
				9496	long 0x40000000,0xadf85458,0xa2bb4a9a # e
				9497	long 0x3fff0000,0xb8aa3b29,0x5c17f0bb # log2(e)
				9498	long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
				9499	long 0x00000000,0x00000000,0x00000000 # 0.0
				9500
				9501	SMALRP: long 0x3ffd0000,0x9a209a84,0xfbcff799 # log10(2)
				9502	long 0x40000000,0xadf85458,0xa2bb4a9b # e
				9503	long 0x3fff0000,0xb8aa3b29,0x5c17f0bc # log2(e)
				9504	long 0x3ffd0000,0xde5bd8a9,0x37287195 # log10(e)
				9505	long 0x00000000,0x00000000,0x00000000 # 0.0
				9506
				9507	BIGRN: long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
				9508	long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
				9509
				9510	long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
				9511	long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
				9512	long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
				9513	long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
				9514	long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
				9515	long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
				9516	long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
				9517	long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
				9518	long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
				9519	long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
				9520	long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
				9521	long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
				9522	long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
				9523	long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
				9524
				9525	BIGRZRM:
				9526	long 0x3ffe0000,0xb17217f7,0xd1cf79ab # ln(2)
				9527	long 0x40000000,0x935d8ddd,0xaaa8ac16 # ln(10)
				9528
				9529	long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
				9530	long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
				9531	long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
				9532	long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
				9533	long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
				9534	long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
				9535	long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
				9536	long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
				9537	long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
				9538	long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
				9539	long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
				9540	long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
				9541	long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
				9542	long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
				9543
				9544	BIGRP:
				9545	long 0x3ffe0000,0xb17217f7,0xd1cf79ac # ln(2)
				9546	long 0x40000000,0x935d8ddd,0xaaa8ac17 # ln(10)
				9547
				9548	long 0x3fff0000,0x80000000,0x00000000 # 10 ^ 0
				9549	long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
				9550	long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
				9551	long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
				9552	long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
				9553	long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
				9554	long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
				9555	long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
				9556	long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
				9557	long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
				9558	long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
				9559	long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
				9560	long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
				9561	long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
				9562
				9563	#########################################################################
				9564	# sscale(): computes the destination operand scaled by the source #
				9565	# operand. If the absoulute value of the source operand is #
				9566	# >= 2^14, an overflow or underflow is returned. #
				9567	# #
				9568	# INPUT *************************************************************** #
				9569	# a0 = pointer to double-extended source operand X #
				9570	# a1 = pointer to double-extended destination operand Y #
				9571	# #
				9572	# OUTPUT ************************************************************** #
				9573	# fp0 = scale(X,Y) #
				9574	# #
				9575	#########################################################################
				9576
				9577	set SIGN, L_SCR1
				9578
				9579	global sscale
				9580	sscale:
				9581	mov.l %d0,-(%sp) # store off ctrl bits for now
				9582
				9583	mov.w DST_EX(%a1),%d1 # get dst exponent
				9584	smi.b SIGN(%a6) # use SIGN to hold dst sign
				9585	andi.l &0x00007fff,%d1 # strip sign from dst exp
				9586
				9587	mov.w SRC_EX(%a0),%d0 # check src bounds
				9588	andi.w &0x7fff,%d0 # clr src sign bit
				9589	cmpi.w %d0,&0x3fff # is src ~ ZERO?
				9590	blt.w src_small # yes
				9591	cmpi.w %d0,&0x400c # no; is src too big?
				9592	bgt.w src_out # yes
				9593
				9594	#
				9595	# Source is within 2^14 range.
				9596	#
				9597	src_ok:
				9598	fintrz.x SRC(%a0),%fp0 # calc int of src
				9599	fmov.l %fp0,%d0 # int src to d0
				9600	# don't want any accrued bits from the fintrz showing up later since
				9601	# we may need to read the fpsr for the last fp op in t_catch2().
				9602	fmov.l &0x0,%fpsr
				9603
				9604	tst.b DST_HI(%a1) # is dst denormalized?
				9605	bmi.b sok_norm
				9606
				9607	# the dst is a DENORM. normalize the DENORM and add the adjustment to
				9608	# the src value. then, jump to the norm part of the routine.
				9609	sok_dnrm:
				9610	mov.l %d0,-(%sp) # save src for now
				9611
				9612	mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy
				9613	mov.l DST_HI(%a1),FP_SCR0_HI(%a6)
				9614	mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
				9615
				9616	lea FP_SCR0(%a6),%a0 # pass ptr to DENORM
				9617	bsr.l norm # normalize the DENORM
				9618	neg.l %d0
				9619	add.l (%sp)+,%d0 # add adjustment to src
				9620
				9621	fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM
				9622
				9623	cmpi.w %d0,&-0x3fff # is the shft amt really low?
				9624	bge.b sok_norm2 # thank goodness no
				9625
				9626	# the multiply factor that we're trying to create should be a denorm
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	9627	# for the multiply to work. Therefore, we're going to actually do a
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	9628	# multiply with a denorm which will cause an unimplemented data type
				9629	# exception to be put into the machine which will be caught and corrected
				9630	# later. we don't do this with the DENORMs above because this method
				9631	# is slower. but, don't fret, I don't see it being used much either.
				9632	fmov.l (%sp)+,%fpcr # restore user fpcr
				9633	mov.l &0x80000000,%d1 # load normalized mantissa
				9634	subi.l &-0x3fff,%d0 # how many should we shift?
				9635	neg.l %d0 # make it positive
				9636	cmpi.b %d0,&0x20 # is it > 32?
				9637	bge.b sok_dnrm_32 # yes
				9638	lsr.l %d0,%d1 # no; bit stays in upper lw
				9639	clr.l -(%sp) # insert zero low mantissa
				9640	mov.l %d1,-(%sp) # insert new high mantissa
				9641	clr.l -(%sp) # make zero exponent
				9642	bra.b sok_norm_cont
				9643	sok_dnrm_32:
				9644	subi.b &0x20,%d0 # get shift count
				9645	lsr.l %d0,%d1 # make low mantissa longword
				9646	mov.l %d1,-(%sp) # insert new low mantissa
				9647	clr.l -(%sp) # insert zero high mantissa
				9648	clr.l -(%sp) # make zero exponent
				9649	bra.b sok_norm_cont
				9650
				9651	# the src will force the dst to a DENORM value or worse. so, let's
				9652	# create an fp multiply that will create the result.
				9653	sok_norm:
				9654	fmovm.x DST(%a1),&0x80 # load fp0 with normalized src
				9655	sok_norm2:
				9656	fmov.l (%sp)+,%fpcr # restore user fpcr
				9657
				9658	addi.w &0x3fff,%d0 # turn src amt into exp value
				9659	swap %d0 # put exponent in high word
				9660	clr.l -(%sp) # insert new exponent
				9661	mov.l &0x80000000,-(%sp) # insert new high mantissa
				9662	mov.l %d0,-(%sp) # insert new lo mantissa
				9663
				9664	sok_norm_cont:
				9665	fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2
				9666	mov.b &FMUL_OP,%d1 # last inst is MUL
				9667	fmul.x (%sp)+,%fp0 # do the multiply
				9668	bra t_catch2 # catch any exceptions
				9669
				9670	#
				9671	# Source is outside of 2^14 range. Test the sign and branch
				9672	# to the appropriate exception handler.
				9673	#
				9674	src_out:
				9675	mov.l (%sp)+,%d0 # restore ctrl bits
				9676	exg %a0,%a1 # swap src,dst ptrs
				9677	tst.b SRC_EX(%a1) # is src negative?
				9678	bmi t_unfl # yes; underflow
				9679	bra t_ovfl_sc # no; overflow
				9680
				9681	#
				9682	# The source input is below 1, so we check for denormalized numbers
				9683	# and set unfl.
				9684	#
				9685	src_small:
				9686	tst.b DST_HI(%a1) # is dst denormalized?
				9687	bpl.b ssmall_done # yes
				9688
				9689	mov.l (%sp)+,%d0
				9690	fmov.l %d0,%fpcr # no; load control bits
				9691	mov.b &FMOV_OP,%d1 # last inst is MOVE
				9692	fmov.x DST(%a1),%fp0 # simply return dest
				9693	bra t_catch2
				9694	ssmall_done:
				9695	mov.l (%sp)+,%d0 # load control bits into d1
				9696	mov.l %a1,%a0 # pass ptr to dst
				9697	bra t_resdnrm
				9698
				9699	#########################################################################
				9700	# smod(): computes the fp MOD of the input values X,Y. #
				9701	# srem(): computes the fp (IEEE) REM of the input values X,Y. #
				9702	# #
				9703	# INPUT *************************************************************** #
				9704	# a0 = pointer to extended precision input X #
				9705	# a1 = pointer to extended precision input Y #
				9706	# d0 = round precision,mode #
				9707	# #
				9708	# The input operands X and Y can be either normalized or #
				9709	# denormalized. #
				9710	# #
				9711	# OUTPUT ************************************************************** #
				9712	# fp0 = FREM(X,Y) or FMOD(X,Y) #
				9713	# #
				9714	# ALGORITHM *********************************************************** #
				9715	# #
				9716	# Step 1. Save and strip signs of X and Y: signX := sign(X), #
				9717	# signY := sign(Y), X := \|X\|, Y := \|Y\|, #
				9718	# signQ := signX EOR signY. Record whether MOD or REM #
				9719	# is requested. #
				9720	# #
				9721	# Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. #
				9722	# If (L < 0) then #
				9723	# R := X, go to Step 4. #
				9724	# else #
				9725	# R := 2^(-L)X, j := L. #
				9726	# endif #
				9727	# #
				9728	# Step 3. Perform MOD(X,Y) #
				9729	# 3.1 If R = Y, go to Step 9. #
				9730	# 3.2 If R > Y, then { R := R - Y, Q := Q + 1} #
				9731	# 3.3 If j = 0, go to Step 4. #
				9732	# 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to #
				9733	# Step 3.1. #
				9734	# #
				9735	# Step 4. At this point, R = X - QY = MOD(X,Y). Set #
				9736	# Last_Subtract := false (used in Step 7 below). If #
				9737	# MOD is requested, go to Step 6. #
				9738	# #
				9739	# Step 5. R = MOD(X,Y), but REM(X,Y) is requested. #
				9740	# 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to #
				9741	# Step 6. #
				9742	# 5.2 If R > Y/2, then { set Last_Subtract := true, #
				9743	# Q := Q + 1, Y := signY*Y }. Go to Step 6. #
				9744	# 5.3 This is the tricky case of R = Y/2. If Q is odd, #
				9745	# then { Q := Q + 1, signX := -signX }. #
				9746	# #
				9747	# Step 6. R := signX*R. #
				9748	# #
				9749	# Step 7. If Last_Subtract = true, R := R - Y. #
				9750	# #
				9751	# Step 8. Return signQ, last 7 bits of Q, and R as required. #
				9752	# #
				9753	# Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, #
				9754	# X = 2^(j)(Q+1)Y. set Q := 2^(j)(Q+1), #
				9755	# R := 0. Return signQ, last 7 bits of Q, and R. #
				9756	# #
				9757	#########################################################################
				9758
				9759	set Mod_Flag,L_SCR3
				9760	set Sc_Flag,L_SCR3+1
				9761
				9762	set SignY,L_SCR2
				9763	set SignX,L_SCR2+2
				9764	set SignQ,L_SCR3+2
				9765
				9766	set Y,FP_SCR0
				9767	set Y_Hi,Y+4
				9768	set Y_Lo,Y+8
				9769
				9770	set R,FP_SCR1
				9771	set R_Hi,R+4
				9772	set R_Lo,R+8
				9773
				9774	Scale:
				9775	long 0x00010000,0x80000000,0x00000000,0x00000000
				9776
				9777	global smod
				9778	smod:
				9779	clr.b FPSR_QBYTE(%a6)
				9780	mov.l %d0,-(%sp) # save ctrl bits
				9781	clr.b Mod_Flag(%a6)
				9782	bra.b Mod_Rem
				9783
				9784	global srem
				9785	srem:
				9786	clr.b FPSR_QBYTE(%a6)
				9787	mov.l %d0,-(%sp) # save ctrl bits
				9788	mov.b &0x1,Mod_Flag(%a6)
				9789
				9790	Mod_Rem:
				9791	#..Save sign of X and Y
				9792	movm.l &0x3f00,-(%sp) # save data registers
				9793	mov.w SRC_EX(%a0),%d3
				9794	mov.w %d3,SignY(%a6)
				9795	and.l &0x00007FFF,%d3 # Y := \|Y\|
				9796
				9797	#
				9798	mov.l SRC_HI(%a0),%d4
				9799	mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is \|Y\|
				9800
				9801	tst.l %d3
				9802	bne.b Y_Normal
				9803
				9804	mov.l &0x00003FFE,%d3 # $3FFD + 1
				9805	tst.l %d4
				9806	bne.b HiY_not0
				9807
				9808	HiY_0:
				9809	mov.l %d5,%d4
				9810	clr.l %d5
				9811	sub.l &32,%d3
				9812	clr.l %d6
				9813	bfffo %d4{&0:&32},%d6
				9814	lsl.l %d6,%d4
				9815	sub.l %d6,%d3 # (D3,D4,D5) is normalized
				9816	# ...with bias $7FFD
				9817	bra.b Chk_X
				9818
				9819	HiY_not0:
				9820	clr.l %d6
				9821	bfffo %d4{&0:&32},%d6
				9822	sub.l %d6,%d3
				9823	lsl.l %d6,%d4
				9824	mov.l %d5,%d7 # a copy of D5
				9825	lsl.l %d6,%d5
				9826	neg.l %d6
				9827	add.l &32,%d6
				9828	lsr.l %d6,%d7
				9829	or.l %d7,%d4 # (D3,D4,D5) normalized
				9830	# ...with bias $7FFD
				9831	bra.b Chk_X
				9832
				9833	Y_Normal:
				9834	add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized
				9835	# ...with bias $7FFD
				9836
				9837	Chk_X:
				9838	mov.w DST_EX(%a1),%d0
				9839	mov.w %d0,SignX(%a6)
				9840	mov.w SignY(%a6),%d1
				9841	eor.l %d0,%d1
				9842	and.l &0x00008000,%d1
				9843	mov.w %d1,SignQ(%a6) # sign(Q) obtained
				9844	and.l &0x00007FFF,%d0
				9845	mov.l DST_HI(%a1),%d1
				9846	mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is \|X\|
				9847	tst.l %d0
				9848	bne.b X_Normal
				9849	mov.l &0x00003FFE,%d0
				9850	tst.l %d1
				9851	bne.b HiX_not0
				9852
				9853	HiX_0:
				9854	mov.l %d2,%d1
				9855	clr.l %d2
				9856	sub.l &32,%d0
				9857	clr.l %d6
				9858	bfffo %d1{&0:&32},%d6
				9859	lsl.l %d6,%d1
				9860	sub.l %d6,%d0 # (D0,D1,D2) is normalized
				9861	# ...with bias $7FFD
				9862	bra.b Init
				9863
				9864	HiX_not0:
				9865	clr.l %d6
				9866	bfffo %d1{&0:&32},%d6
				9867	sub.l %d6,%d0
				9868	lsl.l %d6,%d1
				9869	mov.l %d2,%d7 # a copy of D2
				9870	lsl.l %d6,%d2
				9871	neg.l %d6
				9872	add.l &32,%d6
				9873	lsr.l %d6,%d7
				9874	or.l %d7,%d1 # (D0,D1,D2) normalized
				9875	# ...with bias $7FFD
				9876	bra.b Init
				9877
				9878	X_Normal:
				9879	add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized
				9880	# ...with bias $7FFD
				9881
				9882	Init:
				9883	#
				9884	mov.l %d3,L_SCR1(%a6) # save biased exp(Y)
				9885	mov.l %d0,-(%sp) # save biased exp(X)
				9886	sub.l %d3,%d0 # L := expo(X)-expo(Y)
				9887
				9888	clr.l %d6 # D6 := carry <- 0
				9889	clr.l %d3 # D3 is Q
				9890	mov.l &0,%a1 # A1 is k; j+k=L, Q=0
				9891
				9892	#..(Carry,D1,D2) is R
				9893	tst.l %d0
				9894	bge.b Mod_Loop_pre
				9895
				9896	#..expo(X) < expo(Y). Thus X = mod(X,Y)
				9897	#
				9898	mov.l (%sp)+,%d0 # restore d0
				9899	bra.w Get_Mod
				9900
				9901	Mod_Loop_pre:
				9902	addq.l &0x4,%sp # erase exp(X)
				9903	#..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L
				9904	Mod_Loop:
				9905	tst.l %d6 # test carry bit
				9906	bgt.b R_GT_Y
				9907
				9908	#..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
				9909	cmp.l %d1,%d4 # compare hi(R) and hi(Y)
				9910	bne.b R_NE_Y
				9911	cmp.l %d2,%d5 # compare lo(R) and lo(Y)
				9912	bne.b R_NE_Y
				9913
				9914	#..At this point, R = Y
				9915	bra.w Rem_is_0
				9916
				9917	R_NE_Y:
				9918	#..use the borrow of the previous compare
				9919	bcs.b R_LT_Y # borrow is set iff R < Y
				9920
				9921	R_GT_Y:
				9922	#..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
				9923	#..and Y < (D1,D2) < 2Y. Either way, perform R - Y
				9924	sub.l %d5,%d2 # lo(R) - lo(Y)
				9925	subx.l %d4,%d1 # hi(R) - hi(Y)
				9926	clr.l %d6 # clear carry
				9927	addq.l &1,%d3 # Q := Q + 1
				9928
				9929	R_LT_Y:
				9930	#..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
				9931	tst.l %d0 # see if j = 0.
				9932	beq.b PostLoop
				9933
				9934	add.l %d3,%d3 # Q := 2Q
				9935	add.l %d2,%d2 # lo(R) = 2lo(R)
				9936	roxl.l &1,%d1 # hi(R) = 2hi(R) + carry
				9937	scs %d6 # set Carry if 2(R) overflows
				9938	addq.l &1,%a1 # k := k+1
				9939	subq.l &1,%d0 # j := j - 1
				9940	#..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
				9941
				9942	bra.b Mod_Loop
				9943
				9944	PostLoop:
				9945	#..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
				9946
				9947	#..normalize R.
				9948	mov.l L_SCR1(%a6),%d0 # new biased expo of R
				9949	tst.l %d1
				9950	bne.b HiR_not0
				9951
				9952	HiR_0:
				9953	mov.l %d2,%d1
				9954	clr.l %d2
				9955	sub.l &32,%d0
				9956	clr.l %d6
				9957	bfffo %d1{&0:&32},%d6
				9958	lsl.l %d6,%d1
				9959	sub.l %d6,%d0 # (D0,D1,D2) is normalized
				9960	# ...with bias $7FFD
				9961	bra.b Get_Mod
				9962
				9963	HiR_not0:
				9964	clr.l %d6
				9965	bfffo %d1{&0:&32},%d6
				9966	bmi.b Get_Mod # already normalized
				9967	sub.l %d6,%d0
				9968	lsl.l %d6,%d1
				9969	mov.l %d2,%d7 # a copy of D2
				9970	lsl.l %d6,%d2
				9971	neg.l %d6
				9972	add.l &32,%d6
				9973	lsr.l %d6,%d7
				9974	or.l %d7,%d1 # (D0,D1,D2) normalized
				9975
				9976	#
				9977	Get_Mod:
				9978	cmp.l %d0,&0x000041FE
				9979	bge.b No_Scale
				9980	Do_Scale:
				9981	mov.w %d0,R(%a6)
				9982	mov.l %d1,R_Hi(%a6)
				9983	mov.l %d2,R_Lo(%a6)
				9984	mov.l L_SCR1(%a6),%d6
				9985	mov.w %d6,Y(%a6)
				9986	mov.l %d4,Y_Hi(%a6)
				9987	mov.l %d5,Y_Lo(%a6)
				9988	fmov.x R(%a6),%fp0 # no exception
				9989	mov.b &1,Sc_Flag(%a6)
				9990	bra.b ModOrRem
				9991	No_Scale:
				9992	mov.l %d1,R_Hi(%a6)
				9993	mov.l %d2,R_Lo(%a6)
				9994	sub.l &0x3FFE,%d0
				9995	mov.w %d0,R(%a6)
				9996	mov.l L_SCR1(%a6),%d6
				9997	sub.l &0x3FFE,%d6
				9998	mov.l %d6,L_SCR1(%a6)
				9999	fmov.x R(%a6),%fp0
				10000	mov.w %d6,Y(%a6)
				10001	mov.l %d4,Y_Hi(%a6)
				10002	mov.l %d5,Y_Lo(%a6)
				10003	clr.b Sc_Flag(%a6)
				10004
				10005	#
				10006	ModOrRem:
				10007	tst.b Mod_Flag(%a6)
				10008	beq.b Fix_Sign
				10009
				10010	mov.l L_SCR1(%a6),%d6 # new biased expo(Y)
				10011	subq.l &1,%d6 # biased expo(Y/2)
				10012	cmp.l %d0,%d6
				10013	blt.b Fix_Sign
				10014	bgt.b Last_Sub
				10015
				10016	cmp.l %d1,%d4
				10017	bne.b Not_EQ
				10018	cmp.l %d2,%d5
				10019	bne.b Not_EQ
				10020	bra.w Tie_Case
				10021
				10022	Not_EQ:
				10023	bcs.b Fix_Sign
				10024
				10025	Last_Sub:
				10026	#
				10027	fsub.x Y(%a6),%fp0 # no exceptions
				10028	addq.l &1,%d3 # Q := Q + 1
				10029
				10030	#
				10031	Fix_Sign:
				10032	#..Get sign of X
				10033	mov.w SignX(%a6),%d6
				10034	bge.b Get_Q
				10035	fneg.x %fp0
				10036
				10037	#..Get Q
				10038	#
				10039	Get_Q:
				10040	clr.l %d6
				10041	mov.w SignQ(%a6),%d6 # D6 is sign(Q)
				10042	mov.l &8,%d7
				10043	lsr.l %d7,%d6
				10044	and.l &0x0000007F,%d3 # 7 bits of Q
				10045	or.l %d6,%d3 # sign and bits of Q
				10046	# swap %d3
				10047	# fmov.l %fpsr,%d6
				10048	# and.l &0xFF00FFFF,%d6
				10049	# or.l %d3,%d6
				10050	# fmov.l %d6,%fpsr # put Q in fpsr
				10051	mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr
				10052
				10053	#
				10054	Restore:
				10055	movm.l (%sp)+,&0xfc # {%d2-%d7}
				10056	mov.l (%sp)+,%d0
				10057	fmov.l %d0,%fpcr
				10058	tst.b Sc_Flag(%a6)
				10059	beq.b Finish
				10060	mov.b &FMUL_OP,%d1 # last inst is MUL
				10061	fmul.x Scale(%pc),%fp0 # may cause underflow
				10062	bra t_catch2
				10063	# the '040 package did this apparently to see if the dst operand for the
				10064	# preceding fmul was a denorm. but, it better not have been since the
				10065	# algorithm just got done playing with fp0 and expected no exceptions
				10066	# as a result. trust me...
				10067	# bra t_avoid_unsupp # check for denorm as a
				10068	# ;result of the scaling
				10069
				10070	Finish:
				10071	mov.b &FMOV_OP,%d1 # last inst is MOVE
				10072	fmov.x %fp0,%fp0 # capture exceptions & round
				10073	bra t_catch2
				10074
				10075	Rem_is_0:
				10076	#..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
				10077	addq.l &1,%d3
				10078	cmp.l %d0,&8 # D0 is j
				10079	bge.b Q_Big
				10080
				10081	lsl.l %d0,%d3
				10082	bra.b Set_R_0
				10083
				10084	Q_Big:
				10085	clr.l %d3
				10086
				10087	Set_R_0:
				10088	fmov.s &0x00000000,%fp0
				10089	clr.b Sc_Flag(%a6)
				10090	bra.w Fix_Sign
				10091
				10092	Tie_Case:
				10093	#..Check parity of Q
				10094	mov.l %d3,%d6
				10095	and.l &0x00000001,%d6
				10096	tst.l %d6
				10097	beq.w Fix_Sign # Q is even
				10098
				10099	#..Q is odd, Q := Q + 1, signX := -signX
				10100	addq.l &1,%d3
				10101	mov.w SignX(%a6),%d6
				10102	eor.l &0x00008000,%d6
				10103	mov.w %d6,SignX(%a6)
				10104	bra.w Fix_Sign
				10105
				10106	qnan: long 0x7fff0000, 0xffffffff, 0xffffffff
				10107
				10108	#########################################################################
				10109	# XDEF **************************************************************** #
				10110	# t_dz(): Handle DZ exception during transcendental emulation. #
				10111	# Sets N bit according to sign of source operand. #
				10112	# t_dz2(): Handle DZ exception during transcendental emulation. #
				10113	# Sets N bit always. #
				10114	# #
				10115	# XREF **************************************************************** #
				10116	# None #
				10117	# #
				10118	# INPUT *************************************************************** #
				10119	# a0 = pointer to source operand #
				10120	# #
				10121	# OUTPUT ************************************************************** #
				10122	# fp0 = default result #
				10123	# #
				10124	# ALGORITHM *********************************************************** #
				10125	# - Store properly signed INF into fp0. #
				10126	# - Set FPSR exception status dz bit, ccode inf bit, and #
				10127	# accrued dz bit. #
				10128	# #
				10129	#########################################################################
				10130
				10131	global t_dz
				10132	t_dz:
				10133	tst.b SRC_EX(%a0) # no; is src negative?
				10134	bmi.b t_dz2 # yes
				10135
				10136	dz_pinf:
				10137	fmov.s &0x7f800000,%fp0 # return +INF in fp0
				10138	ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ
				10139	rts
				10140
				10141	global t_dz2
				10142	t_dz2:
				10143	fmov.s &0xff800000,%fp0 # return -INF in fp0
				10144	ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ
				10145	rts
				10146
				10147	#################################################################
				10148	# OPERR exception: #
				10149	# - set FPSR exception status operr bit, condition code #
				10150	# nan bit; Store default NAN into fp0 #
				10151	#################################################################
				10152	global t_operr
				10153	t_operr:
				10154	ori.l &opnan_mask,USER_FPSR(%a6) # set NaN/OPERR/AIOP
				10155	fmovm.x qnan(%pc),&0x80 # return default NAN in fp0
				10156	rts
				10157
				10158	#################################################################
				10159	# Extended DENORM: #
				10160	# - For all functions that have a denormalized input and #
				10161	# that f(x)=x, this is the entry point. #
				10162	# - we only return the EXOP here if either underflow or #
				10163	# inexact is enabled. #
				10164	#################################################################
				10165
				10166	# Entry point for scale w/ extended denorm. The function does
				10167	# NOT set INEX2/AUNFL/AINEX.
				10168	global t_resdnrm
				10169	t_resdnrm:
				10170	ori.l &unfl_mask,USER_FPSR(%a6) # set UNFL
				10171	bra.b xdnrm_con
				10172
				10173	global t_extdnrm
				10174	t_extdnrm:
				10175	ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
				10176
				10177	xdnrm_con:
				10178	mov.l %a0,%a1 # make copy of src ptr
				10179	mov.l %d0,%d1 # make copy of rnd prec,mode
				10180	andi.b &0xc0,%d1 # extended precision?
				10181	bne.b xdnrm_sd # no
				10182
				10183	# result precision is extended.
				10184	tst.b LOCAL_EX(%a0) # is denorm negative?
				10185	bpl.b xdnrm_exit # no
				10186
				10187	bset &neg_bit,FPSR_CC(%a6) # yes; set 'N' ccode bit
				10188	bra.b xdnrm_exit
				10189
				10190	# result precision is single or double
				10191	xdnrm_sd:
				10192	mov.l %a1,-(%sp)
				10193	tst.b LOCAL_EX(%a0) # is denorm pos or neg?
Andrea Gelmini	86a8280	2016-05-21 13:57:20 +0200	[diff] [blame]	10194	smi.b %d1 # set d0 accordingly
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	10195	bsr.l unf_sub
				10196	mov.l (%sp)+,%a1
				10197	xdnrm_exit:
				10198	fmovm.x (%a0),&0x80 # return default result in fp0
				10199
				10200	mov.b FPCR_ENABLE(%a6),%d0
				10201	andi.b &0x0a,%d0 # is UNFL or INEX enabled?
				10202	bne.b xdnrm_ena # yes
				10203	rts
				10204
				10205	################
				10206	# unfl enabled #
				10207	################
				10208	# we have a DENORM that needs to be converted into an EXOP.
				10209	# so, normalize the mantissa, add 0x6000 to the new exponent,
				10210	# and return the result in fp1.
				10211	xdnrm_ena:
				10212	mov.w LOCAL_EX(%a1),FP_SCR0_EX(%a6)
				10213	mov.l LOCAL_HI(%a1),FP_SCR0_HI(%a6)
				10214	mov.l LOCAL_LO(%a1),FP_SCR0_LO(%a6)
				10215
				10216	lea FP_SCR0(%a6),%a0
				10217	bsr.l norm # normalize mantissa
				10218	addi.l &0x6000,%d0 # add extra bias
				10219	andi.w &0x8000,FP_SCR0_EX(%a6) # keep old sign
				10220	or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
				10221
				10222	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				10223	rts
				10224
				10225	#################################################################
				10226	# UNFL exception: #
				10227	# - This routine is for cases where even an EXOP isn't #
				10228	# large enough to hold the range of this result. #
				10229	# In such a case, the EXOP equals zero. #
				10230	# - Return the default result to the proper precision #
				10231	# with the sign of this result being the same as that #
				10232	# of the src operand. #
				10233	# - t_unfl2() is provided to force the result sign to #
				10234	# positive which is the desired result for fetox(). #
				10235	#################################################################
				10236	global t_unfl
				10237	t_unfl:
				10238	ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
				10239
				10240	tst.b (%a0) # is result pos or neg?
				10241	smi.b %d1 # set d1 accordingly
				10242	bsr.l unf_sub # calc default unfl result
				10243	fmovm.x (%a0),&0x80 # return default result in fp0
				10244
				10245	fmov.s &0x00000000,%fp1 # return EXOP in fp1
				10246	rts
				10247
				10248	# t_unfl2 ALWAYS tells unf_sub to create a positive result
				10249	global t_unfl2
				10250	t_unfl2:
				10251	ori.l &unfinx_mask,USER_FPSR(%a6) # set UNFL/INEX2/AUNFL/AINEX
				10252
				10253	sf.b %d1 # set d0 to represent positive
				10254	bsr.l unf_sub # calc default unfl result
				10255	fmovm.x (%a0),&0x80 # return default result in fp0
				10256
				10257	fmov.s &0x0000000,%fp1 # return EXOP in fp1
				10258	rts
				10259
				10260	#################################################################
				10261	# OVFL exception: #
				10262	# - This routine is for cases where even an EXOP isn't #
				10263	# large enough to hold the range of this result. #
				10264	# - Return the default result to the proper precision #
				10265	# with the sign of this result being the same as that #
				10266	# of the src operand. #
				10267	# - t_ovfl2() is provided to force the result sign to #
				10268	# positive which is the desired result for fcosh(). #
				10269	# - t_ovfl_sc() is provided for scale() which only sets #
				10270	# the inexact bits if the number is inexact for the #
				10271	# precision indicated. #
				10272	#################################################################
				10273
				10274	global t_ovfl_sc
				10275	t_ovfl_sc:
				10276	ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX
				10277
				10278	mov.b %d0,%d1 # fetch rnd mode/prec
				10279	andi.b &0xc0,%d1 # extract rnd prec
				10280	beq.b ovfl_work # prec is extended
				10281
				10282	tst.b LOCAL_HI(%a0) # is dst a DENORM?
				10283	bmi.b ovfl_sc_norm # no
				10284
				10285	# dst op is a DENORM. we have to normalize the mantissa to see if the
				10286	# result would be inexact for the given precision. make a copy of the
				10287	# dst so we don't screw up the version passed to us.
				10288	mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6)
				10289	mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6)
				10290	mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6)
				10291	lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0
				10292	movm.l &0xc080,-(%sp) # save d0-d1/a0
				10293	bsr.l norm # normalize mantissa
				10294	movm.l (%sp)+,&0x0103 # restore d0-d1/a0
				10295
				10296	ovfl_sc_norm:
				10297	cmpi.b %d1,&0x40 # is prec dbl?
				10298	bne.b ovfl_sc_dbl # no; sgl
				10299	ovfl_sc_sgl:
				10300	tst.l LOCAL_LO(%a0) # is lo lw of sgl set?
				10301	bne.b ovfl_sc_inx # yes
				10302	tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set?
				10303	bne.b ovfl_sc_inx # yes
				10304	bra.b ovfl_work # don't set INEX2
				10305	ovfl_sc_dbl:
				10306	mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of
				10307	andi.l &0x7ff,%d1 # dbl mantissa set?
				10308	beq.b ovfl_work # no; don't set INEX2
				10309	ovfl_sc_inx:
				10310	ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2
				10311	bra.b ovfl_work # continue
				10312
				10313	global t_ovfl
				10314	t_ovfl:
				10315	ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
				10316
				10317	ovfl_work:
				10318	tst.b LOCAL_EX(%a0) # what is the sign?
				10319	smi.b %d1 # set d1 accordingly
				10320	bsr.l ovf_res # calc default ovfl result
				10321	mov.b %d0,FPSR_CC(%a6) # insert new ccodes
				10322	fmovm.x (%a0),&0x80 # return default result in fp0
				10323
				10324	fmov.s &0x00000000,%fp1 # return EXOP in fp1
				10325	rts
				10326
				10327	# t_ovfl2 ALWAYS tells ovf_res to create a positive result
				10328	global t_ovfl2
				10329	t_ovfl2:
				10330	ori.l &ovfinx_mask,USER_FPSR(%a6) # set OVFL/INEX2/AOVFL/AINEX
				10331
				10332	sf.b %d1 # clear sign flag for positive
				10333	bsr.l ovf_res # calc default ovfl result
				10334	mov.b %d0,FPSR_CC(%a6) # insert new ccodes
				10335	fmovm.x (%a0),&0x80 # return default result in fp0
				10336
				10337	fmov.s &0x00000000,%fp1 # return EXOP in fp1
				10338	rts
				10339
				10340	#################################################################
				10341	# t_catch(): #
				10342	# - the last operation of a transcendental emulation #
				10343	# routine may have caused an underflow or overflow. #
				10344	# we find out if this occurred by doing an fsave and #
				10345	# checking the exception bit. if one did occur, then we #
				10346	# jump to fgen_except() which creates the default #
				10347	# result and EXOP for us. #
				10348	#################################################################
				10349	global t_catch
				10350	t_catch:
				10351
				10352	fsave -(%sp)
				10353	tst.b 0x2(%sp)
				10354	bmi.b catch
				10355	add.l &0xc,%sp
				10356
				10357	#################################################################
				10358	# INEX2 exception: #
				10359	# - The inex2 and ainex bits are set. #
				10360	#################################################################
				10361	global t_inx2
				10362	t_inx2:
				10363	fblt.w t_minx2
				10364	fbeq.w inx2_zero
				10365
				10366	global t_pinx2
				10367	t_pinx2:
				10368	ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
				10369	rts
				10370
				10371	global t_minx2
				10372	t_minx2:
				10373	ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) # set N/INEX2/AINEX
				10374	rts
				10375
				10376	inx2_zero:
				10377	mov.b &z_bmask,FPSR_CC(%a6)
				10378	ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX2/AINEX
				10379	rts
				10380
				10381	# an underflow or overflow exception occurred.
				10382	# we must set INEX/AINEX since the fmul/fdiv/fmov emulation may not!
				10383	catch:
				10384	ori.w &inx2a_mask,FPSR_EXCEPT(%a6)
				10385	catch2:
				10386	bsr.l fgen_except
				10387	add.l &0xc,%sp
				10388	rts
				10389
				10390	global t_catch2
				10391	t_catch2:
				10392
				10393	fsave -(%sp)
				10394
				10395	tst.b 0x2(%sp)
				10396	bmi.b catch2
				10397	add.l &0xc,%sp
				10398
				10399	fmov.l %fpsr,%d0
				10400	or.l %d0,USER_FPSR(%a6)
				10401
				10402	rts
				10403
				10404	#########################################################################
				10405
				10406	#########################################################################
				10407	# unf_res(): underflow default result calculation for transcendentals #
				10408	# #
				10409	# INPUT: #
				10410	# d0 : rnd mode,precision #
				10411	# d1.b : sign bit of result ('11111111 = (-) ; '00000000 = (+)) #
				10412	# OUTPUT: #
				10413	# a0 : points to result (in instruction memory) #
				10414	#########################################################################
				10415	unf_sub:
				10416	ori.l &unfinx_mask,USER_FPSR(%a6)
				10417
				10418	andi.w &0x10,%d1 # keep sign bit in 4th spot
				10419
				10420	lsr.b &0x4,%d0 # shift rnd prec,mode to lo bits
				10421	andi.b &0xf,%d0 # strip hi rnd mode bit
				10422	or.b %d1,%d0 # concat {sgn,mode,prec}
				10423
				10424	mov.l %d0,%d1 # make a copy
				10425	lsl.b &0x1,%d1 # mult index 2 by 2
				10426
				10427	mov.b (tbl_unf_cc.b,%pc,%d0.w*1),FPSR_CC(%a6) # insert ccode bits
				10428	lea (tbl_unf_result.b,%pc,%d1.w*8),%a0 # grab result ptr
				10429	rts
				10430
				10431	tbl_unf_cc:
				10432	byte 0x4, 0x4, 0x4, 0x0
				10433	byte 0x4, 0x4, 0x4, 0x0
				10434	byte 0x4, 0x4, 0x4, 0x0
				10435	byte 0x0, 0x0, 0x0, 0x0
				10436	byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
				10437	byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
				10438	byte 0x8+0x4, 0x8+0x4, 0x8, 0x8+0x4
				10439
				10440	tbl_unf_result:
				10441	long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
				10442	long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
				10443	long 0x00000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
				10444	long 0x00000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
				10445
				10446	long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
				10447	long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
				10448	long 0x3f810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
				10449	long 0x3f810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
				10450
				10451	long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
				10452	long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZER0;dbl
				10453	long 0x3c010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
				10454	long 0x3c010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
				10455
				10456	long 0x0,0x0,0x0,0x0
				10457	long 0x0,0x0,0x0,0x0
				10458	long 0x0,0x0,0x0,0x0
				10459	long 0x0,0x0,0x0,0x0
				10460
				10461	long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
				10462	long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
				10463	long 0x80000000, 0x00000000, 0x00000001, 0x0 # MIN; ext
				10464	long 0x80000000, 0x00000000, 0x00000000, 0x0 # ZERO;ext
				10465
				10466	long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
				10467	long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
				10468	long 0xbf810000, 0x00000100, 0x00000000, 0x0 # MIN; sgl
				10469	long 0xbf810000, 0x00000000, 0x00000000, 0x0 # ZERO;sgl
				10470
				10471	long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
				10472	long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
				10473	long 0xbc010000, 0x00000000, 0x00000800, 0x0 # MIN; dbl
				10474	long 0xbc010000, 0x00000000, 0x00000000, 0x0 # ZERO;dbl
				10475
				10476	############################################################
				10477
				10478	#########################################################################
				10479	# src_zero(): Return signed zero according to sign of src operand. #
				10480	#########################################################################
				10481	global src_zero
				10482	src_zero:
				10483	tst.b SRC_EX(%a0) # get sign of src operand
				10484	bmi.b ld_mzero # if neg, load neg zero
				10485
				10486	#
				10487	# ld_pzero(): return a positive zero.
				10488	#
				10489	global ld_pzero
				10490	ld_pzero:
				10491	fmov.s &0x00000000,%fp0 # load +0
				10492	mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
				10493	rts
				10494
				10495	# ld_mzero(): return a negative zero.
				10496	global ld_mzero
				10497	ld_mzero:
				10498	fmov.s &0x80000000,%fp0 # load -0
				10499	mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits
				10500	rts
				10501
				10502	#########################################################################
				10503	# dst_zero(): Return signed zero according to sign of dst operand. #
				10504	#########################################################################
				10505	global dst_zero
				10506	dst_zero:
				10507	tst.b DST_EX(%a1) # get sign of dst operand
				10508	bmi.b ld_mzero # if neg, load neg zero
				10509	bra.b ld_pzero # load positive zero
				10510
				10511	#########################################################################
				10512	# src_inf(): Return signed inf according to sign of src operand. #
				10513	#########################################################################
				10514	global src_inf
				10515	src_inf:
				10516	tst.b SRC_EX(%a0) # get sign of src operand
				10517	bmi.b ld_minf # if negative branch
				10518
				10519	#
				10520	# ld_pinf(): return a positive infinity.
				10521	#
				10522	global ld_pinf
				10523	ld_pinf:
				10524	fmov.s &0x7f800000,%fp0 # load +INF
				10525	mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit
				10526	rts
				10527
				10528	#
				10529	# ld_minf():return a negative infinity.
				10530	#
				10531	global ld_minf
				10532	ld_minf:
				10533	fmov.s &0xff800000,%fp0 # load -INF
				10534	mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
				10535	rts
				10536
				10537	#########################################################################
				10538	# dst_inf(): Return signed inf according to sign of dst operand. #
				10539	#########################################################################
				10540	global dst_inf
				10541	dst_inf:
				10542	tst.b DST_EX(%a1) # get sign of dst operand
				10543	bmi.b ld_minf # if negative branch
				10544	bra.b ld_pinf
				10545
				10546	global szr_inf
				10547	#################################################################
				10548	# szr_inf(): Return +ZERO for a negative src operand or #
				10549	# +INF for a positive src operand. #
				10550	# Routine used for fetox, ftwotox, and ftentox. #
				10551	#################################################################
				10552	szr_inf:
				10553	tst.b SRC_EX(%a0) # check sign of source
				10554	bmi.b ld_pzero
				10555	bra.b ld_pinf
				10556
				10557	#########################################################################
				10558	# sopr_inf(): Return +INF for a positive src operand or #
				10559	# jump to operand error routine for a negative src operand. #
				10560	# Routine used for flogn, flognp1, flog10, and flog2. #
				10561	#########################################################################
				10562	global sopr_inf
				10563	sopr_inf:
				10564	tst.b SRC_EX(%a0) # check sign of source
				10565	bmi.w t_operr
				10566	bra.b ld_pinf
				10567
				10568	#################################################################
				10569	# setoxm1i(): Return minus one for a negative src operand or #
				10570	# positive infinity for a positive src operand. #
				10571	# Routine used for fetoxm1. #
				10572	#################################################################
				10573	global setoxm1i
				10574	setoxm1i:
				10575	tst.b SRC_EX(%a0) # check sign of source
				10576	bmi.b ld_mone
				10577	bra.b ld_pinf
				10578
				10579	#########################################################################
				10580	# src_one(): Return signed one according to sign of src operand. #
				10581	#########################################################################
				10582	global src_one
				10583	src_one:
				10584	tst.b SRC_EX(%a0) # check sign of source
				10585	bmi.b ld_mone
				10586
				10587	#
				10588	# ld_pone(): return positive one.
				10589	#
				10590	global ld_pone
				10591	ld_pone:
				10592	fmov.s &0x3f800000,%fp0 # load +1
				10593	clr.b FPSR_CC(%a6)
				10594	rts
				10595
				10596	#
				10597	# ld_mone(): return negative one.
				10598	#
				10599	global ld_mone
				10600	ld_mone:
				10601	fmov.s &0xbf800000,%fp0 # load -1
				10602	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				10603	rts
				10604
				10605	ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235
				10606	mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235
				10607
				10608	#################################################################
				10609	# spi_2(): Return signed PI/2 according to sign of src operand. #
				10610	#################################################################
				10611	global spi_2
				10612	spi_2:
				10613	tst.b SRC_EX(%a0) # check sign of source
				10614	bmi.b ld_mpi2
				10615
				10616	#
				10617	# ld_ppi2(): return positive PI/2.
				10618	#
				10619	global ld_ppi2
				10620	ld_ppi2:
				10621	fmov.l %d0,%fpcr
				10622	fmov.x ppiby2(%pc),%fp0 # load +pi/2
				10623	bra.w t_pinx2 # set INEX2
				10624
				10625	#
				10626	# ld_mpi2(): return negative PI/2.
				10627	#
				10628	global ld_mpi2
				10629	ld_mpi2:
				10630	fmov.l %d0,%fpcr
				10631	fmov.x mpiby2(%pc),%fp0 # load -pi/2
				10632	bra.w t_minx2 # set INEX2
				10633
				10634	####################################################
				10635	# The following routines give support for fsincos. #
				10636	####################################################
				10637
				10638	#
				10639	# ssincosz(): When the src operand is ZERO, store a one in the
				10640	# cosine register and return a ZERO in fp0 w/ the same sign
				10641	# as the src operand.
				10642	#
				10643	global ssincosz
				10644	ssincosz:
				10645	fmov.s &0x3f800000,%fp1
				10646	tst.b SRC_EX(%a0) # test sign
				10647	bpl.b sincoszp
				10648	fmov.s &0x80000000,%fp0 # return sin result in fp0
				10649	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6)
				10650	bra.b sto_cos # store cosine result
				10651	sincoszp:
				10652	fmov.s &0x00000000,%fp0 # return sin result in fp0
				10653	mov.b &z_bmask,FPSR_CC(%a6)
				10654	bra.b sto_cos # store cosine result
				10655
				10656	#
				10657	# ssincosi(): When the src operand is INF, store a QNAN in the cosine
				10658	# register and jump to the operand error routine for negative
				10659	# src operands.
				10660	#
				10661	global ssincosi
				10662	ssincosi:
				10663	fmov.x qnan(%pc),%fp1 # load NAN
				10664	bsr.l sto_cos # store cosine result
				10665	bra.w t_operr
				10666
				10667	#
				10668	# ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine
				10669	# register and branch to the src QNAN routine.
				10670	#
				10671	global ssincosqnan
				10672	ssincosqnan:
				10673	fmov.x LOCAL_EX(%a0),%fp1
				10674	bsr.l sto_cos
				10675	bra.w src_qnan
				10676
				10677	#
				10678	# ssincossnan(): When the src operand is an SNAN, store the SNAN w/ the SNAN bit set
				10679	# in the cosine register and branch to the src SNAN routine.
				10680	#
				10681	global ssincossnan
				10682	ssincossnan:
				10683	fmov.x LOCAL_EX(%a0),%fp1
				10684	bsr.l sto_cos
				10685	bra.w src_snan
				10686
				10687	########################################################################
				10688
				10689	#########################################################################
				10690	# sto_cos(): store fp1 to the fpreg designated by the CMDREG dst field. #
				10691	# fp1 holds the result of the cosine portion of ssincos(). #
				10692	# the value in fp1 will not take any exceptions when moved. #
				10693	# INPUT: #
				10694	# fp1 : fp value to store #
				10695	# MODIFIED: #
				10696	# d0 #
				10697	#########################################################################
				10698	global sto_cos
				10699	sto_cos:
				10700	mov.b 1+EXC_CMDREG(%a6),%d0
				10701	andi.w &0x7,%d0
				10702	mov.w (tbl_sto_cos.b,%pc,%d0.w*2),%d0
				10703	jmp (tbl_sto_cos.b,%pc,%d0.w*1)
				10704
				10705	tbl_sto_cos:
				10706	short sto_cos_0 - tbl_sto_cos
				10707	short sto_cos_1 - tbl_sto_cos
				10708	short sto_cos_2 - tbl_sto_cos
				10709	short sto_cos_3 - tbl_sto_cos
				10710	short sto_cos_4 - tbl_sto_cos
				10711	short sto_cos_5 - tbl_sto_cos
				10712	short sto_cos_6 - tbl_sto_cos
				10713	short sto_cos_7 - tbl_sto_cos
				10714
				10715	sto_cos_0:
				10716	fmovm.x &0x40,EXC_FP0(%a6)
				10717	rts
				10718	sto_cos_1:
				10719	fmovm.x &0x40,EXC_FP1(%a6)
				10720	rts
				10721	sto_cos_2:
				10722	fmov.x %fp1,%fp2
				10723	rts
				10724	sto_cos_3:
				10725	fmov.x %fp1,%fp3
				10726	rts
				10727	sto_cos_4:
				10728	fmov.x %fp1,%fp4
				10729	rts
				10730	sto_cos_5:
				10731	fmov.x %fp1,%fp5
				10732	rts
				10733	sto_cos_6:
				10734	fmov.x %fp1,%fp6
				10735	rts
				10736	sto_cos_7:
				10737	fmov.x %fp1,%fp7
				10738	rts
				10739
				10740	##################################################################
				10741	global smod_sdnrm
				10742	global smod_snorm
				10743	smod_sdnrm:
				10744	smod_snorm:
				10745	mov.b DTAG(%a6),%d1
				10746	beq.l smod
				10747	cmpi.b %d1,&ZERO
				10748	beq.w smod_zro
				10749	cmpi.b %d1,&INF
				10750	beq.l t_operr
				10751	cmpi.b %d1,&DENORM
				10752	beq.l smod
				10753	cmpi.b %d1,&SNAN
				10754	beq.l dst_snan
				10755	bra.l dst_qnan
				10756
				10757	global smod_szero
				10758	smod_szero:
				10759	mov.b DTAG(%a6),%d1
				10760	beq.l t_operr
				10761	cmpi.b %d1,&ZERO
				10762	beq.l t_operr
				10763	cmpi.b %d1,&INF
				10764	beq.l t_operr
				10765	cmpi.b %d1,&DENORM
				10766	beq.l t_operr
				10767	cmpi.b %d1,&QNAN
				10768	beq.l dst_qnan
				10769	bra.l dst_snan
				10770
				10771	global smod_sinf
				10772	smod_sinf:
				10773	mov.b DTAG(%a6),%d1
				10774	beq.l smod_fpn
				10775	cmpi.b %d1,&ZERO
				10776	beq.l smod_zro
				10777	cmpi.b %d1,&INF
				10778	beq.l t_operr
				10779	cmpi.b %d1,&DENORM
				10780	beq.l smod_fpn
				10781	cmpi.b %d1,&QNAN
				10782	beq.l dst_qnan
				10783	bra.l dst_snan
				10784
				10785	smod_zro:
				10786	srem_zro:
				10787	mov.b SRC_EX(%a0),%d1 # get src sign
				10788	mov.b DST_EX(%a1),%d0 # get dst sign
				10789	eor.b %d0,%d1 # get qbyte sign
				10790	andi.b &0x80,%d1
				10791	mov.b %d1,FPSR_QBYTE(%a6)
				10792	tst.b %d0
				10793	bpl.w ld_pzero
				10794	bra.w ld_mzero
				10795
				10796	smod_fpn:
				10797	srem_fpn:
				10798	clr.b FPSR_QBYTE(%a6)
				10799	mov.l %d0,-(%sp)
				10800	mov.b SRC_EX(%a0),%d1 # get src sign
				10801	mov.b DST_EX(%a1),%d0 # get dst sign
				10802	eor.b %d0,%d1 # get qbyte sign
				10803	andi.b &0x80,%d1
				10804	mov.b %d1,FPSR_QBYTE(%a6)
				10805	cmpi.b DTAG(%a6),&DENORM
				10806	bne.b smod_nrm
				10807	lea DST(%a1),%a0
				10808	mov.l (%sp)+,%d0
				10809	bra t_resdnrm
				10810	smod_nrm:
				10811	fmov.l (%sp)+,%fpcr
				10812	fmov.x DST(%a1),%fp0
				10813	tst.b DST_EX(%a1)
				10814	bmi.b smod_nrm_neg
				10815	rts
				10816
				10817	smod_nrm_neg:
				10818	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode
				10819	rts
				10820
				10821	#########################################################################
				10822	global srem_snorm
				10823	global srem_sdnrm
				10824	srem_sdnrm:
				10825	srem_snorm:
				10826	mov.b DTAG(%a6),%d1
				10827	beq.l srem
				10828	cmpi.b %d1,&ZERO
				10829	beq.w srem_zro
				10830	cmpi.b %d1,&INF
				10831	beq.l t_operr
				10832	cmpi.b %d1,&DENORM
				10833	beq.l srem
				10834	cmpi.b %d1,&QNAN
				10835	beq.l dst_qnan
				10836	bra.l dst_snan
				10837
				10838	global srem_szero
				10839	srem_szero:
				10840	mov.b DTAG(%a6),%d1
				10841	beq.l t_operr
				10842	cmpi.b %d1,&ZERO
				10843	beq.l t_operr
				10844	cmpi.b %d1,&INF
				10845	beq.l t_operr
				10846	cmpi.b %d1,&DENORM
				10847	beq.l t_operr
				10848	cmpi.b %d1,&QNAN
				10849	beq.l dst_qnan
				10850	bra.l dst_snan
				10851
				10852	global srem_sinf
				10853	srem_sinf:
				10854	mov.b DTAG(%a6),%d1
				10855	beq.w srem_fpn
				10856	cmpi.b %d1,&ZERO
				10857	beq.w srem_zro
				10858	cmpi.b %d1,&INF
				10859	beq.l t_operr
				10860	cmpi.b %d1,&DENORM
				10861	beq.l srem_fpn
				10862	cmpi.b %d1,&QNAN
				10863	beq.l dst_qnan
				10864	bra.l dst_snan
				10865
				10866	#########################################################################
				10867	global sscale_snorm
				10868	global sscale_sdnrm
				10869	sscale_snorm:
				10870	sscale_sdnrm:
				10871	mov.b DTAG(%a6),%d1
				10872	beq.l sscale
				10873	cmpi.b %d1,&ZERO
				10874	beq.l dst_zero
				10875	cmpi.b %d1,&INF
				10876	beq.l dst_inf
				10877	cmpi.b %d1,&DENORM
				10878	beq.l sscale
				10879	cmpi.b %d1,&QNAN
				10880	beq.l dst_qnan
				10881	bra.l dst_snan
				10882
				10883	global sscale_szero
				10884	sscale_szero:
				10885	mov.b DTAG(%a6),%d1
				10886	beq.l sscale
				10887	cmpi.b %d1,&ZERO
				10888	beq.l dst_zero
				10889	cmpi.b %d1,&INF
				10890	beq.l dst_inf
				10891	cmpi.b %d1,&DENORM
				10892	beq.l sscale
				10893	cmpi.b %d1,&QNAN
				10894	beq.l dst_qnan
				10895	bra.l dst_snan
				10896
				10897	global sscale_sinf
				10898	sscale_sinf:
				10899	mov.b DTAG(%a6),%d1
				10900	beq.l t_operr
				10901	cmpi.b %d1,&QNAN
				10902	beq.l dst_qnan
				10903	cmpi.b %d1,&SNAN
				10904	beq.l dst_snan
				10905	bra.l t_operr
				10906
				10907	########################################################################
				10908
				10909	#
				10910	# sop_sqnan(): The src op for frem/fmod/fscale was a QNAN.
				10911	#
				10912	global sop_sqnan
				10913	sop_sqnan:
				10914	mov.b DTAG(%a6),%d1
				10915	cmpi.b %d1,&QNAN
				10916	beq.b dst_qnan
				10917	cmpi.b %d1,&SNAN
				10918	beq.b dst_snan
				10919	bra.b src_qnan
				10920
				10921	#
				10922	# sop_ssnan(): The src op for frem/fmod/fscale was an SNAN.
				10923	#
				10924	global sop_ssnan
				10925	sop_ssnan:
				10926	mov.b DTAG(%a6),%d1
				10927	cmpi.b %d1,&QNAN
				10928	beq.b dst_qnan_src_snan
				10929	cmpi.b %d1,&SNAN
				10930	beq.b dst_snan
				10931	bra.b src_snan
				10932
				10933	dst_qnan_src_snan:
				10934	ori.l &snaniop_mask,USER_FPSR(%a6) # set NAN/SNAN/AIOP
				10935	bra.b dst_qnan
				10936
				10937	#
				10938	# dst_qnan(): Return the dst SNAN w/ the SNAN bit set.
				10939	#
				10940	global dst_snan
				10941	dst_snan:
				10942	fmov.x DST(%a1),%fp0 # the fmove sets the SNAN bit
				10943	fmov.l %fpsr,%d0 # catch resulting status
				10944	or.l %d0,USER_FPSR(%a6) # store status
				10945	rts
				10946
				10947	#
				10948	# dst_qnan(): Return the dst QNAN.
				10949	#
				10950	global dst_qnan
				10951	dst_qnan:
				10952	fmov.x DST(%a1),%fp0 # return the non-signalling nan
				10953	tst.b DST_EX(%a1) # set ccodes according to QNAN sign
				10954	bmi.b dst_qnan_m
				10955	dst_qnan_p:
				10956	mov.b &nan_bmask,FPSR_CC(%a6)
				10957	rts
				10958	dst_qnan_m:
				10959	mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
				10960	rts
				10961
				10962	#
				10963	# src_snan(): Return the src SNAN w/ the SNAN bit set.
				10964	#
				10965	global src_snan
				10966	src_snan:
				10967	fmov.x SRC(%a0),%fp0 # the fmove sets the SNAN bit
				10968	fmov.l %fpsr,%d0 # catch resulting status
				10969	or.l %d0,USER_FPSR(%a6) # store status
				10970	rts
				10971
				10972	#
				10973	# src_qnan(): Return the src QNAN.
				10974	#
				10975	global src_qnan
				10976	src_qnan:
				10977	fmov.x SRC(%a0),%fp0 # return the non-signalling nan
				10978	tst.b SRC_EX(%a0) # set ccodes according to QNAN sign
				10979	bmi.b dst_qnan_m
				10980	src_qnan_p:
				10981	mov.b &nan_bmask,FPSR_CC(%a6)
				10982	rts
				10983	src_qnan_m:
				10984	mov.b &neg_bmask+nan_bmask,FPSR_CC(%a6)
				10985	rts
				10986
				10987	#
				10988	# fkern2.s:
				10989	# These entry points are used by the exception handler
				10990	# routines where an instruction is selected by an index into
				10991	# a large jump table corresponding to a given instruction which
				10992	# has been decoded. Flow continues here where we now decode
Andrea Gelmini	86a8280	2016-05-21 13:57:20 +0200	[diff] [blame]	10993	# further according to the source operand type.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	10994	#
				10995
				10996	global fsinh
				10997	fsinh:
				10998	mov.b STAG(%a6),%d1
				10999	beq.l ssinh
				11000	cmpi.b %d1,&ZERO
				11001	beq.l src_zero
				11002	cmpi.b %d1,&INF
				11003	beq.l src_inf
				11004	cmpi.b %d1,&DENORM
				11005	beq.l ssinhd
				11006	cmpi.b %d1,&QNAN
				11007	beq.l src_qnan
				11008	bra.l src_snan
				11009
				11010	global flognp1
				11011	flognp1:
				11012	mov.b STAG(%a6),%d1
				11013	beq.l slognp1
				11014	cmpi.b %d1,&ZERO
				11015	beq.l src_zero
				11016	cmpi.b %d1,&INF
				11017	beq.l sopr_inf
				11018	cmpi.b %d1,&DENORM
				11019	beq.l slognp1d
				11020	cmpi.b %d1,&QNAN
				11021	beq.l src_qnan
				11022	bra.l src_snan
				11023
				11024	global fetoxm1
				11025	fetoxm1:
				11026	mov.b STAG(%a6),%d1
				11027	beq.l setoxm1
				11028	cmpi.b %d1,&ZERO
				11029	beq.l src_zero
				11030	cmpi.b %d1,&INF
				11031	beq.l setoxm1i
				11032	cmpi.b %d1,&DENORM
				11033	beq.l setoxm1d
				11034	cmpi.b %d1,&QNAN
				11035	beq.l src_qnan
				11036	bra.l src_snan
				11037
				11038	global ftanh
				11039	ftanh:
				11040	mov.b STAG(%a6),%d1
				11041	beq.l stanh
				11042	cmpi.b %d1,&ZERO
				11043	beq.l src_zero
				11044	cmpi.b %d1,&INF
				11045	beq.l src_one
				11046	cmpi.b %d1,&DENORM
				11047	beq.l stanhd
				11048	cmpi.b %d1,&QNAN
				11049	beq.l src_qnan
				11050	bra.l src_snan
				11051
				11052	global fatan
				11053	fatan:
				11054	mov.b STAG(%a6),%d1
				11055	beq.l satan
				11056	cmpi.b %d1,&ZERO
				11057	beq.l src_zero
				11058	cmpi.b %d1,&INF
				11059	beq.l spi_2
				11060	cmpi.b %d1,&DENORM
				11061	beq.l satand
				11062	cmpi.b %d1,&QNAN
				11063	beq.l src_qnan
				11064	bra.l src_snan
				11065
				11066	global fasin
				11067	fasin:
				11068	mov.b STAG(%a6),%d1
				11069	beq.l sasin
				11070	cmpi.b %d1,&ZERO
				11071	beq.l src_zero
				11072	cmpi.b %d1,&INF
				11073	beq.l t_operr
				11074	cmpi.b %d1,&DENORM
				11075	beq.l sasind
				11076	cmpi.b %d1,&QNAN
				11077	beq.l src_qnan
				11078	bra.l src_snan
				11079
				11080	global fatanh
				11081	fatanh:
				11082	mov.b STAG(%a6),%d1
				11083	beq.l satanh
				11084	cmpi.b %d1,&ZERO
				11085	beq.l src_zero
				11086	cmpi.b %d1,&INF
				11087	beq.l t_operr
				11088	cmpi.b %d1,&DENORM
				11089	beq.l satanhd
				11090	cmpi.b %d1,&QNAN
				11091	beq.l src_qnan
				11092	bra.l src_snan
				11093
				11094	global fsine
				11095	fsine:
				11096	mov.b STAG(%a6),%d1
				11097	beq.l ssin
				11098	cmpi.b %d1,&ZERO
				11099	beq.l src_zero
				11100	cmpi.b %d1,&INF
				11101	beq.l t_operr
				11102	cmpi.b %d1,&DENORM
				11103	beq.l ssind
				11104	cmpi.b %d1,&QNAN
				11105	beq.l src_qnan
				11106	bra.l src_snan
				11107
				11108	global ftan
				11109	ftan:
				11110	mov.b STAG(%a6),%d1
				11111	beq.l stan
				11112	cmpi.b %d1,&ZERO
				11113	beq.l src_zero
				11114	cmpi.b %d1,&INF
				11115	beq.l t_operr
				11116	cmpi.b %d1,&DENORM
				11117	beq.l stand
				11118	cmpi.b %d1,&QNAN
				11119	beq.l src_qnan
				11120	bra.l src_snan
				11121
				11122	global fetox
				11123	fetox:
				11124	mov.b STAG(%a6),%d1
				11125	beq.l setox
				11126	cmpi.b %d1,&ZERO
				11127	beq.l ld_pone
				11128	cmpi.b %d1,&INF
				11129	beq.l szr_inf
				11130	cmpi.b %d1,&DENORM
				11131	beq.l setoxd
				11132	cmpi.b %d1,&QNAN
				11133	beq.l src_qnan
				11134	bra.l src_snan
				11135
				11136	global ftwotox
				11137	ftwotox:
				11138	mov.b STAG(%a6),%d1
				11139	beq.l stwotox
				11140	cmpi.b %d1,&ZERO
				11141	beq.l ld_pone
				11142	cmpi.b %d1,&INF
				11143	beq.l szr_inf
				11144	cmpi.b %d1,&DENORM
				11145	beq.l stwotoxd
				11146	cmpi.b %d1,&QNAN
				11147	beq.l src_qnan
				11148	bra.l src_snan
				11149
				11150	global ftentox
				11151	ftentox:
				11152	mov.b STAG(%a6),%d1
				11153	beq.l stentox
				11154	cmpi.b %d1,&ZERO
				11155	beq.l ld_pone
				11156	cmpi.b %d1,&INF
				11157	beq.l szr_inf
				11158	cmpi.b %d1,&DENORM
				11159	beq.l stentoxd
				11160	cmpi.b %d1,&QNAN
				11161	beq.l src_qnan
				11162	bra.l src_snan
				11163
				11164	global flogn
				11165	flogn:
				11166	mov.b STAG(%a6),%d1
				11167	beq.l slogn
				11168	cmpi.b %d1,&ZERO
				11169	beq.l t_dz2
				11170	cmpi.b %d1,&INF
				11171	beq.l sopr_inf
				11172	cmpi.b %d1,&DENORM
				11173	beq.l slognd
				11174	cmpi.b %d1,&QNAN
				11175	beq.l src_qnan
				11176	bra.l src_snan
				11177
				11178	global flog10
				11179	flog10:
				11180	mov.b STAG(%a6),%d1
				11181	beq.l slog10
				11182	cmpi.b %d1,&ZERO
				11183	beq.l t_dz2
				11184	cmpi.b %d1,&INF
				11185	beq.l sopr_inf
				11186	cmpi.b %d1,&DENORM
				11187	beq.l slog10d
				11188	cmpi.b %d1,&QNAN
				11189	beq.l src_qnan
				11190	bra.l src_snan
				11191
				11192	global flog2
				11193	flog2:
				11194	mov.b STAG(%a6),%d1
				11195	beq.l slog2
				11196	cmpi.b %d1,&ZERO
				11197	beq.l t_dz2
				11198	cmpi.b %d1,&INF
				11199	beq.l sopr_inf
				11200	cmpi.b %d1,&DENORM
				11201	beq.l slog2d
				11202	cmpi.b %d1,&QNAN
				11203	beq.l src_qnan
				11204	bra.l src_snan
				11205
				11206	global fcosh
				11207	fcosh:
				11208	mov.b STAG(%a6),%d1
				11209	beq.l scosh
				11210	cmpi.b %d1,&ZERO
				11211	beq.l ld_pone
				11212	cmpi.b %d1,&INF
				11213	beq.l ld_pinf
				11214	cmpi.b %d1,&DENORM
				11215	beq.l scoshd
				11216	cmpi.b %d1,&QNAN
				11217	beq.l src_qnan
				11218	bra.l src_snan
				11219
				11220	global facos
				11221	facos:
				11222	mov.b STAG(%a6),%d1
				11223	beq.l sacos
				11224	cmpi.b %d1,&ZERO
				11225	beq.l ld_ppi2
				11226	cmpi.b %d1,&INF
				11227	beq.l t_operr
				11228	cmpi.b %d1,&DENORM
				11229	beq.l sacosd
				11230	cmpi.b %d1,&QNAN
				11231	beq.l src_qnan
				11232	bra.l src_snan
				11233
				11234	global fcos
				11235	fcos:
				11236	mov.b STAG(%a6),%d1
				11237	beq.l scos
				11238	cmpi.b %d1,&ZERO
				11239	beq.l ld_pone
				11240	cmpi.b %d1,&INF
				11241	beq.l t_operr
				11242	cmpi.b %d1,&DENORM
				11243	beq.l scosd
				11244	cmpi.b %d1,&QNAN
				11245	beq.l src_qnan
				11246	bra.l src_snan
				11247
				11248	global fgetexp
				11249	fgetexp:
				11250	mov.b STAG(%a6),%d1
				11251	beq.l sgetexp
				11252	cmpi.b %d1,&ZERO
				11253	beq.l src_zero
				11254	cmpi.b %d1,&INF
				11255	beq.l t_operr
				11256	cmpi.b %d1,&DENORM
				11257	beq.l sgetexpd
				11258	cmpi.b %d1,&QNAN
				11259	beq.l src_qnan
				11260	bra.l src_snan
				11261
				11262	global fgetman
				11263	fgetman:
				11264	mov.b STAG(%a6),%d1
				11265	beq.l sgetman
				11266	cmpi.b %d1,&ZERO
				11267	beq.l src_zero
				11268	cmpi.b %d1,&INF
				11269	beq.l t_operr
				11270	cmpi.b %d1,&DENORM
				11271	beq.l sgetmand
				11272	cmpi.b %d1,&QNAN
				11273	beq.l src_qnan
				11274	bra.l src_snan
				11275
				11276	global fsincos
				11277	fsincos:
				11278	mov.b STAG(%a6),%d1
				11279	beq.l ssincos
				11280	cmpi.b %d1,&ZERO
				11281	beq.l ssincosz
				11282	cmpi.b %d1,&INF
				11283	beq.l ssincosi
				11284	cmpi.b %d1,&DENORM
				11285	beq.l ssincosd
				11286	cmpi.b %d1,&QNAN
				11287	beq.l ssincosqnan
				11288	bra.l ssincossnan
				11289
				11290	global fmod
				11291	fmod:
				11292	mov.b STAG(%a6),%d1
				11293	beq.l smod_snorm
				11294	cmpi.b %d1,&ZERO
				11295	beq.l smod_szero
				11296	cmpi.b %d1,&INF
				11297	beq.l smod_sinf
				11298	cmpi.b %d1,&DENORM
				11299	beq.l smod_sdnrm
				11300	cmpi.b %d1,&QNAN
				11301	beq.l sop_sqnan
				11302	bra.l sop_ssnan
				11303
				11304	global frem
				11305	frem:
				11306	mov.b STAG(%a6),%d1
				11307	beq.l srem_snorm
				11308	cmpi.b %d1,&ZERO
				11309	beq.l srem_szero
				11310	cmpi.b %d1,&INF
				11311	beq.l srem_sinf
				11312	cmpi.b %d1,&DENORM
				11313	beq.l srem_sdnrm
				11314	cmpi.b %d1,&QNAN
				11315	beq.l sop_sqnan
				11316	bra.l sop_ssnan
				11317
				11318	global fscale
				11319	fscale:
				11320	mov.b STAG(%a6),%d1
				11321	beq.l sscale_snorm
				11322	cmpi.b %d1,&ZERO
				11323	beq.l sscale_szero
				11324	cmpi.b %d1,&INF
				11325	beq.l sscale_sinf
				11326	cmpi.b %d1,&DENORM
				11327	beq.l sscale_sdnrm
				11328	cmpi.b %d1,&QNAN
				11329	beq.l sop_sqnan
				11330	bra.l sop_ssnan
				11331
				11332	#########################################################################
				11333	# XDEF **************************************************************** #
				11334	# fgen_except(): catch an exception during transcendental #
				11335	# emulation #
				11336	# #
				11337	# XREF **************************************************************** #
				11338	# fmul() - emulate a multiply instruction #
				11339	# fadd() - emulate an add instruction #
				11340	# fin() - emulate an fmove instruction #
				11341	# #
				11342	# INPUT *************************************************************** #
				11343	# fp0 = destination operand #
				11344	# d0 = type of instruction that took exception #
				11345	# fsave frame = source operand #
				11346	# #
				11347	# OUTPUT ************************************************************** #
				11348	# fp0 = result #
				11349	# fp1 = EXOP #
				11350	# #
				11351	# ALGORITHM *********************************************************** #
				11352	# An exception occurred on the last instruction of the #
				11353	# transcendental emulation. hopefully, this won't be happening much #
				11354	# because it will be VERY slow. #
				11355	# The only exceptions capable of passing through here are #
				11356	# Overflow, Underflow, and Unsupported Data Type. #
				11357	# #
				11358	#########################################################################
				11359
				11360	global fgen_except
				11361	fgen_except:
				11362	cmpi.b 0x3(%sp),&0x7 # is exception UNSUPP?
				11363	beq.b fge_unsupp # yes
				11364
				11365	mov.b &NORM,STAG(%a6)
				11366
				11367	fge_cont:
				11368	mov.b &NORM,DTAG(%a6)
				11369
				11370	# ok, I have a problem with putting the dst op at FP_DST. the emulation
				11371	# routines aren't supposed to alter the operands but we've just squashed
				11372	# FP_DST here...
				11373
				11374	# 8/17/93 - this turns out to be more of a "cleanliness" standpoint
				11375	# then a potential bug. to begin with, only the dyadic functions
				11376	# frem,fmod, and fscale would get the dst trashed here. But, for
				11377	# the 060SP, the FP_DST is never used again anyways.
				11378	fmovm.x &0x80,FP_DST(%a6) # dst op is in fp0
				11379
				11380	lea 0x4(%sp),%a0 # pass: ptr to src op
				11381	lea FP_DST(%a6),%a1 # pass: ptr to dst op
				11382
				11383	cmpi.b %d1,&FMOV_OP
				11384	beq.b fge_fin # it was an "fmov"
				11385	cmpi.b %d1,&FADD_OP
				11386	beq.b fge_fadd # it was an "fadd"
				11387	fge_fmul:
				11388	bsr.l fmul
				11389	rts
				11390	fge_fadd:
				11391	bsr.l fadd
				11392	rts
				11393	fge_fin:
				11394	bsr.l fin
				11395	rts
				11396
				11397	fge_unsupp:
				11398	mov.b &DENORM,STAG(%a6)
				11399	bra.b fge_cont
				11400
				11401	#
				11402	# This table holds the offsets of the emulation routines for each individual
				11403	# math operation relative to the address of this table. Included are
				11404	# routines like fadd/fmul/fabs as well as the transcendentals.
				11405	# The location within the table is determined by the extension bits of the
				11406	# operation longword.
				11407	#
				11408
				11409	swbeg &109
				11410	tbl_unsupp:
				11411	long fin - tbl_unsupp # 00: fmove
				11412	long fint - tbl_unsupp # 01: fint
				11413	long fsinh - tbl_unsupp # 02: fsinh
				11414	long fintrz - tbl_unsupp # 03: fintrz
				11415	long fsqrt - tbl_unsupp # 04: fsqrt
				11416	long tbl_unsupp - tbl_unsupp
				11417	long flognp1 - tbl_unsupp # 06: flognp1
				11418	long tbl_unsupp - tbl_unsupp
				11419	long fetoxm1 - tbl_unsupp # 08: fetoxm1
				11420	long ftanh - tbl_unsupp # 09: ftanh
				11421	long fatan - tbl_unsupp # 0a: fatan
				11422	long tbl_unsupp - tbl_unsupp
				11423	long fasin - tbl_unsupp # 0c: fasin
				11424	long fatanh - tbl_unsupp # 0d: fatanh
				11425	long fsine - tbl_unsupp # 0e: fsin
				11426	long ftan - tbl_unsupp # 0f: ftan
				11427	long fetox - tbl_unsupp # 10: fetox
				11428	long ftwotox - tbl_unsupp # 11: ftwotox
				11429	long ftentox - tbl_unsupp # 12: ftentox
				11430	long tbl_unsupp - tbl_unsupp
				11431	long flogn - tbl_unsupp # 14: flogn
				11432	long flog10 - tbl_unsupp # 15: flog10
				11433	long flog2 - tbl_unsupp # 16: flog2
				11434	long tbl_unsupp - tbl_unsupp
				11435	long fabs - tbl_unsupp # 18: fabs
				11436	long fcosh - tbl_unsupp # 19: fcosh
				11437	long fneg - tbl_unsupp # 1a: fneg
				11438	long tbl_unsupp - tbl_unsupp
				11439	long facos - tbl_unsupp # 1c: facos
				11440	long fcos - tbl_unsupp # 1d: fcos
				11441	long fgetexp - tbl_unsupp # 1e: fgetexp
				11442	long fgetman - tbl_unsupp # 1f: fgetman
				11443	long fdiv - tbl_unsupp # 20: fdiv
				11444	long fmod - tbl_unsupp # 21: fmod
				11445	long fadd - tbl_unsupp # 22: fadd
				11446	long fmul - tbl_unsupp # 23: fmul
				11447	long fsgldiv - tbl_unsupp # 24: fsgldiv
				11448	long frem - tbl_unsupp # 25: frem
				11449	long fscale - tbl_unsupp # 26: fscale
				11450	long fsglmul - tbl_unsupp # 27: fsglmul
				11451	long fsub - tbl_unsupp # 28: fsub
				11452	long tbl_unsupp - tbl_unsupp
				11453	long tbl_unsupp - tbl_unsupp
				11454	long tbl_unsupp - tbl_unsupp
				11455	long tbl_unsupp - tbl_unsupp
				11456	long tbl_unsupp - tbl_unsupp
				11457	long tbl_unsupp - tbl_unsupp
				11458	long tbl_unsupp - tbl_unsupp
				11459	long fsincos - tbl_unsupp # 30: fsincos
				11460	long fsincos - tbl_unsupp # 31: fsincos
				11461	long fsincos - tbl_unsupp # 32: fsincos
				11462	long fsincos - tbl_unsupp # 33: fsincos
				11463	long fsincos - tbl_unsupp # 34: fsincos
				11464	long fsincos - tbl_unsupp # 35: fsincos
				11465	long fsincos - tbl_unsupp # 36: fsincos
				11466	long fsincos - tbl_unsupp # 37: fsincos
				11467	long fcmp - tbl_unsupp # 38: fcmp
				11468	long tbl_unsupp - tbl_unsupp
				11469	long ftst - tbl_unsupp # 3a: ftst
				11470	long tbl_unsupp - tbl_unsupp
				11471	long tbl_unsupp - tbl_unsupp
				11472	long tbl_unsupp - tbl_unsupp
				11473	long tbl_unsupp - tbl_unsupp
				11474	long tbl_unsupp - tbl_unsupp
				11475	long fsin - tbl_unsupp # 40: fsmove
				11476	long fssqrt - tbl_unsupp # 41: fssqrt
				11477	long tbl_unsupp - tbl_unsupp
				11478	long tbl_unsupp - tbl_unsupp
				11479	long fdin - tbl_unsupp # 44: fdmove
				11480	long fdsqrt - tbl_unsupp # 45: fdsqrt
				11481	long tbl_unsupp - tbl_unsupp
				11482	long tbl_unsupp - tbl_unsupp
				11483	long tbl_unsupp - tbl_unsupp
				11484	long tbl_unsupp - tbl_unsupp
				11485	long tbl_unsupp - tbl_unsupp
				11486	long tbl_unsupp - tbl_unsupp
				11487	long tbl_unsupp - tbl_unsupp
				11488	long tbl_unsupp - tbl_unsupp
				11489	long tbl_unsupp - tbl_unsupp
				11490	long tbl_unsupp - tbl_unsupp
				11491	long tbl_unsupp - tbl_unsupp
				11492	long tbl_unsupp - tbl_unsupp
				11493	long tbl_unsupp - tbl_unsupp
				11494	long tbl_unsupp - tbl_unsupp
				11495	long tbl_unsupp - tbl_unsupp
				11496	long tbl_unsupp - tbl_unsupp
				11497	long tbl_unsupp - tbl_unsupp
				11498	long tbl_unsupp - tbl_unsupp
				11499	long fsabs - tbl_unsupp # 58: fsabs
				11500	long tbl_unsupp - tbl_unsupp
				11501	long fsneg - tbl_unsupp # 5a: fsneg
				11502	long tbl_unsupp - tbl_unsupp
				11503	long fdabs - tbl_unsupp # 5c: fdabs
				11504	long tbl_unsupp - tbl_unsupp
				11505	long fdneg - tbl_unsupp # 5e: fdneg
				11506	long tbl_unsupp - tbl_unsupp
				11507	long fsdiv - tbl_unsupp # 60: fsdiv
				11508	long tbl_unsupp - tbl_unsupp
				11509	long fsadd - tbl_unsupp # 62: fsadd
				11510	long fsmul - tbl_unsupp # 63: fsmul
				11511	long fddiv - tbl_unsupp # 64: fddiv
				11512	long tbl_unsupp - tbl_unsupp
				11513	long fdadd - tbl_unsupp # 66: fdadd
				11514	long fdmul - tbl_unsupp # 67: fdmul
				11515	long fssub - tbl_unsupp # 68: fssub
				11516	long tbl_unsupp - tbl_unsupp
				11517	long tbl_unsupp - tbl_unsupp
				11518	long tbl_unsupp - tbl_unsupp
				11519	long fdsub - tbl_unsupp # 6c: fdsub
				11520
				11521	#########################################################################
				11522	# XDEF **************************************************************** #
				11523	# fmul(): emulates the fmul instruction #
				11524	# fsmul(): emulates the fsmul instruction #
				11525	# fdmul(): emulates the fdmul instruction #
				11526	# #
				11527	# XREF **************************************************************** #
				11528	# scale_to_zero_src() - scale src exponent to zero #
				11529	# scale_to_zero_dst() - scale dst exponent to zero #
				11530	# unf_res() - return default underflow result #
				11531	# ovf_res() - return default overflow result #
				11532	# res_qnan() - return QNAN result #
				11533	# res_snan() - return SNAN result #
				11534	# #
				11535	# INPUT *************************************************************** #
				11536	# a0 = pointer to extended precision source operand #
				11537	# a1 = pointer to extended precision destination operand #
				11538	# d0 rnd prec,mode #
				11539	# #
				11540	# OUTPUT ************************************************************** #
				11541	# fp0 = result #
				11542	# fp1 = EXOP (if exception occurred) #
				11543	# #
				11544	# ALGORITHM *********************************************************** #
				11545	# Handle NANs, infinities, and zeroes as special cases. Divide #
				11546	# norms/denorms into ext/sgl/dbl precision. #
				11547	# For norms/denorms, scale the exponents such that a multiply #
				11548	# instruction won't cause an exception. Use the regular fmul to #
				11549	# compute a result. Check if the regular operands would have taken #
				11550	# an exception. If so, return the default overflow/underflow result #
				11551	# and return the EXOP if exceptions are enabled. Else, scale the #
				11552	# result operand to the proper exponent. #
				11553	# #
				11554	#########################################################################
				11555
				11556	align 0x10
				11557	tbl_fmul_ovfl:
				11558	long 0x3fff - 0x7ffe # ext_max
				11559	long 0x3fff - 0x407e # sgl_max
				11560	long 0x3fff - 0x43fe # dbl_max
				11561	tbl_fmul_unfl:
				11562	long 0x3fff + 0x0001 # ext_unfl
				11563	long 0x3fff - 0x3f80 # sgl_unfl
				11564	long 0x3fff - 0x3c00 # dbl_unfl
				11565
				11566	global fsmul
				11567	fsmul:
				11568	andi.b &0x30,%d0 # clear rnd prec
				11569	ori.b &s_mode*0x10,%d0 # insert sgl prec
				11570	bra.b fmul
				11571
				11572	global fdmul
				11573	fdmul:
				11574	andi.b &0x30,%d0
				11575	ori.b &d_mode*0x10,%d0 # insert dbl prec
				11576
				11577	global fmul
				11578	fmul:
				11579	mov.l %d0,L_SCR3(%a6) # store rnd info
				11580
				11581	clr.w %d1
				11582	mov.b DTAG(%a6),%d1
				11583	lsl.b &0x3,%d1
				11584	or.b STAG(%a6),%d1 # combine src tags
				11585	bne.w fmul_not_norm # optimize on non-norm input
				11586
				11587	fmul_norm:
				11588	mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
				11589	mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
				11590	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				11591
				11592	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				11593	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				11594	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				11595
				11596	bsr.l scale_to_zero_src # scale src exponent
				11597	mov.l %d0,-(%sp) # save scale factor 1
				11598
				11599	bsr.l scale_to_zero_dst # scale dst exponent
				11600
				11601	add.l %d0,(%sp) # SCALE_FACTOR = scale1 + scale2
				11602
				11603	mov.w 2+L_SCR3(%a6),%d1 # fetch precision
				11604	lsr.b &0x6,%d1 # shift to lo bits
				11605	mov.l (%sp)+,%d0 # load S.F.
				11606	cmp.l %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
				11607	beq.w fmul_may_ovfl # result may rnd to overflow
				11608	blt.w fmul_ovfl # result will overflow
				11609
				11610	cmp.l %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
				11611	beq.w fmul_may_unfl # result may rnd to no unfl
				11612	bgt.w fmul_unfl # result will underflow
				11613
				11614	#
				11615	# NORMAL:
				11616	# - the result of the multiply operation will neither overflow nor underflow.
				11617	# - do the multiply to the proper precision and rounding mode.
				11618	# - scale the result exponent using the scale factor. if both operands were
				11619	# normalized then we really don't need to go through this scaling. but for now,
				11620	# this will do.
				11621	#
				11622	fmul_normal:
				11623	fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
				11624
				11625	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				11626	fmov.l &0x0,%fpsr # clear FPSR
				11627
				11628	fmul.x FP_SCR0(%a6),%fp0 # execute multiply
				11629
				11630	fmov.l %fpsr,%d1 # save status
				11631	fmov.l &0x0,%fpcr # clear FPCR
				11632
				11633	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				11634
				11635	fmul_normal_exit:
				11636	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				11637	mov.l %d2,-(%sp) # save d2
				11638	mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
				11639	mov.l %d1,%d2 # make a copy
				11640	andi.l &0x7fff,%d1 # strip sign
				11641	andi.w &0x8000,%d2 # keep old sign
				11642	sub.l %d0,%d1 # add scale factor
				11643	or.w %d2,%d1 # concat old sign,new exp
				11644	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				11645	mov.l (%sp)+,%d2 # restore d2
				11646	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				11647	rts
				11648
				11649	#
				11650	# OVERFLOW:
				11651	# - the result of the multiply operation is an overflow.
				11652	# - do the multiply to the proper precision and rounding mode in order to
				11653	# set the inexact bits.
				11654	# - calculate the default result and return it in fp0.
				11655	# - if overflow or inexact is enabled, we need a multiply result rounded to
				11656	# extended precision. if the original operation was extended, then we have this
				11657	# result. if the original operation was single or double, we have to do another
				11658	# multiply using extended precision and the correct rounding mode. the result
				11659	# of this operation then has its exponent scaled by -0x6000 to create the
				11660	# exceptional operand.
				11661	#
				11662	fmul_ovfl:
				11663	fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
				11664
				11665	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				11666	fmov.l &0x0,%fpsr # clear FPSR
				11667
				11668	fmul.x FP_SCR0(%a6),%fp0 # execute multiply
				11669
				11670	fmov.l %fpsr,%d1 # save status
				11671	fmov.l &0x0,%fpcr # clear FPCR
				11672
				11673	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				11674
				11675	# save setting this until now because this is where fmul_may_ovfl may jump in
				11676	fmul_ovfl_tst:
				11677	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				11678
				11679	mov.b FPCR_ENABLE(%a6),%d1
				11680	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				11681	bne.b fmul_ovfl_ena # yes
				11682
				11683	# calculate the default result
				11684	fmul_ovfl_dis:
				11685	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				11686	sne %d1 # set sign param accordingly
				11687	mov.l L_SCR3(%a6),%d0 # pass rnd prec,mode
				11688	bsr.l ovf_res # calculate default result
				11689	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				11690	fmovm.x (%a0),&0x80 # return default result in fp0
				11691	rts
				11692
				11693	#
				11694	# OVFL is enabled; Create EXOP:
				11695	# - if precision is extended, then we have the EXOP. simply bias the exponent
				11696	# with an extra -0x6000. if the precision is single or double, we need to
				11697	# calculate a result rounded to extended precision.
				11698	#
				11699	fmul_ovfl_ena:
				11700	mov.l L_SCR3(%a6),%d1
				11701	andi.b &0xc0,%d1 # test the rnd prec
				11702	bne.b fmul_ovfl_ena_sd # it's sgl or dbl
				11703
				11704	fmul_ovfl_ena_cont:
				11705	fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
				11706
				11707	mov.l %d2,-(%sp) # save d2
				11708	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				11709	mov.w %d1,%d2 # make a copy
				11710	andi.l &0x7fff,%d1 # strip sign
				11711	sub.l %d0,%d1 # add scale factor
				11712	subi.l &0x6000,%d1 # subtract bias
				11713	andi.w &0x7fff,%d1 # clear sign bit
				11714	andi.w &0x8000,%d2 # keep old sign
				11715	or.w %d2,%d1 # concat old sign,new exp
				11716	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				11717	mov.l (%sp)+,%d2 # restore d2
				11718	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				11719	bra.b fmul_ovfl_dis
				11720
				11721	fmul_ovfl_ena_sd:
				11722	fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
				11723
				11724	mov.l L_SCR3(%a6),%d1
				11725	andi.b &0x30,%d1 # keep rnd mode only
				11726	fmov.l %d1,%fpcr # set FPCR
				11727
				11728	fmul.x FP_SCR0(%a6),%fp0 # execute multiply
				11729
				11730	fmov.l &0x0,%fpcr # clear FPCR
				11731	bra.b fmul_ovfl_ena_cont
				11732
				11733	#
				11734	# may OVERFLOW:
				11735	# - the result of the multiply operation MAY overflow.
				11736	# - do the multiply to the proper precision and rounding mode in order to
				11737	# set the inexact bits.
				11738	# - calculate the default result and return it in fp0.
				11739	#
				11740	fmul_may_ovfl:
				11741	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				11742
				11743	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				11744	fmov.l &0x0,%fpsr # clear FPSR
				11745
				11746	fmul.x FP_SCR0(%a6),%fp0 # execute multiply
				11747
				11748	fmov.l %fpsr,%d1 # save status
				11749	fmov.l &0x0,%fpcr # clear FPCR
				11750
				11751	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				11752
				11753	fabs.x %fp0,%fp1 # make a copy of result
				11754	fcmp.b %fp1,&0x2 # is \|result\| >= 2.b?
				11755	fbge.w fmul_ovfl_tst # yes; overflow has occurred
				11756
				11757	# no, it didn't overflow; we have correct result
				11758	bra.w fmul_normal_exit
				11759
				11760	#
				11761	# UNDERFLOW:
				11762	# - the result of the multiply operation is an underflow.
				11763	# - do the multiply to the proper precision and rounding mode in order to
				11764	# set the inexact bits.
				11765	# - calculate the default result and return it in fp0.
				11766	# - if overflow or inexact is enabled, we need a multiply result rounded to
				11767	# extended precision. if the original operation was extended, then we have this
				11768	# result. if the original operation was single or double, we have to do another
				11769	# multiply using extended precision and the correct rounding mode. the result
				11770	# of this operation then has its exponent scaled by -0x6000 to create the
				11771	# exceptional operand.
				11772	#
				11773	fmul_unfl:
				11774	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				11775
				11776	# for fun, let's use only extended precision, round to zero. then, let
				11777	# the unf_res() routine figure out all the rest.
				11778	# will we get the correct answer.
				11779	fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
				11780
				11781	fmov.l &rz_mode*0x10,%fpcr # set FPCR
				11782	fmov.l &0x0,%fpsr # clear FPSR
				11783
				11784	fmul.x FP_SCR0(%a6),%fp0 # execute multiply
				11785
				11786	fmov.l %fpsr,%d1 # save status
				11787	fmov.l &0x0,%fpcr # clear FPCR
				11788
				11789	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				11790
				11791	mov.b FPCR_ENABLE(%a6),%d1
				11792	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				11793	bne.b fmul_unfl_ena # yes
				11794
				11795	fmul_unfl_dis:
				11796	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				11797
				11798	lea FP_SCR0(%a6),%a0 # pass: result addr
				11799	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				11800	bsr.l unf_res # calculate default result
				11801	or.b %d0,FPSR_CC(%a6) # unf_res2 may have set 'Z'
				11802	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				11803	rts
				11804
				11805	#
				11806	# UNFL is enabled.
				11807	#
				11808	fmul_unfl_ena:
				11809	fmovm.x FP_SCR1(%a6),&0x40 # load dst op
				11810
				11811	mov.l L_SCR3(%a6),%d1
				11812	andi.b &0xc0,%d1 # is precision extended?
				11813	bne.b fmul_unfl_ena_sd # no, sgl or dbl
				11814
				11815	# if the rnd mode is anything but RZ, then we have to re-do the above
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	11816	# multiplication because we used RZ for all.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	11817	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				11818
				11819	fmul_unfl_ena_cont:
				11820	fmov.l &0x0,%fpsr # clear FPSR
				11821
				11822	fmul.x FP_SCR0(%a6),%fp1 # execute multiply
				11823
				11824	fmov.l &0x0,%fpcr # clear FPCR
				11825
				11826	fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
				11827	mov.l %d2,-(%sp) # save d2
				11828	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				11829	mov.l %d1,%d2 # make a copy
				11830	andi.l &0x7fff,%d1 # strip sign
				11831	andi.w &0x8000,%d2 # keep old sign
				11832	sub.l %d0,%d1 # add scale factor
				11833	addi.l &0x6000,%d1 # add bias
				11834	andi.w &0x7fff,%d1
				11835	or.w %d2,%d1 # concat old sign,new exp
				11836	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				11837	mov.l (%sp)+,%d2 # restore d2
				11838	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				11839	bra.w fmul_unfl_dis
				11840
				11841	fmul_unfl_ena_sd:
				11842	mov.l L_SCR3(%a6),%d1
				11843	andi.b &0x30,%d1 # use only rnd mode
				11844	fmov.l %d1,%fpcr # set FPCR
				11845
				11846	bra.b fmul_unfl_ena_cont
				11847
				11848	# MAY UNDERFLOW:
				11849	# -use the correct rounding mode and precision. this code favors operations
				11850	# that do not underflow.
				11851	fmul_may_unfl:
				11852	fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
				11853
				11854	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				11855	fmov.l &0x0,%fpsr # clear FPSR
				11856
				11857	fmul.x FP_SCR0(%a6),%fp0 # execute multiply
				11858
				11859	fmov.l %fpsr,%d1 # save status
				11860	fmov.l &0x0,%fpcr # clear FPCR
				11861
				11862	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				11863
				11864	fabs.x %fp0,%fp1 # make a copy of result
				11865	fcmp.b %fp1,&0x2 # is \|result\| > 2.b?
				11866	fbgt.w fmul_normal_exit # no; no underflow occurred
				11867	fblt.w fmul_unfl # yes; underflow occurred
				11868
				11869	#
				11870	# we still don't know if underflow occurred. result is ~ equal to 2. but,
				11871	# we don't know if the result was an underflow that rounded up to a 2 or
				11872	# a normalized number that rounded down to a 2. so, redo the entire operation
				11873	# using RZ as the rounding mode to see what the pre-rounded result is.
				11874	# this case should be relatively rare.
				11875	#
				11876	fmovm.x FP_SCR1(%a6),&0x40 # load dst operand
				11877
				11878	mov.l L_SCR3(%a6),%d1
				11879	andi.b &0xc0,%d1 # keep rnd prec
				11880	ori.b &rz_mode*0x10,%d1 # insert RZ
				11881
				11882	fmov.l %d1,%fpcr # set FPCR
				11883	fmov.l &0x0,%fpsr # clear FPSR
				11884
				11885	fmul.x FP_SCR0(%a6),%fp1 # execute multiply
				11886
				11887	fmov.l &0x0,%fpcr # clear FPCR
				11888	fabs.x %fp1 # make absolute value
				11889	fcmp.b %fp1,&0x2 # is \|result\| < 2.b?
				11890	fbge.w fmul_normal_exit # no; no underflow occurred
				11891	bra.w fmul_unfl # yes, underflow occurred
				11892
				11893	################################################################################
				11894
				11895	#
				11896	# Multiply: inputs are not both normalized; what are they?
				11897	#
				11898	fmul_not_norm:
				11899	mov.w (tbl_fmul_op.b,%pc,%d1.w*2),%d1
				11900	jmp (tbl_fmul_op.b,%pc,%d1.w)
				11901
				11902	swbeg &48
				11903	tbl_fmul_op:
				11904	short fmul_norm - tbl_fmul_op # NORM x NORM
				11905	short fmul_zero - tbl_fmul_op # NORM x ZERO
				11906	short fmul_inf_src - tbl_fmul_op # NORM x INF
				11907	short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
				11908	short fmul_norm - tbl_fmul_op # NORM x DENORM
				11909	short fmul_res_snan - tbl_fmul_op # NORM x SNAN
				11910	short tbl_fmul_op - tbl_fmul_op #
				11911	short tbl_fmul_op - tbl_fmul_op #
				11912
				11913	short fmul_zero - tbl_fmul_op # ZERO x NORM
				11914	short fmul_zero - tbl_fmul_op # ZERO x ZERO
				11915	short fmul_res_operr - tbl_fmul_op # ZERO x INF
				11916	short fmul_res_qnan - tbl_fmul_op # ZERO x QNAN
				11917	short fmul_zero - tbl_fmul_op # ZERO x DENORM
				11918	short fmul_res_snan - tbl_fmul_op # ZERO x SNAN
				11919	short tbl_fmul_op - tbl_fmul_op #
				11920	short tbl_fmul_op - tbl_fmul_op #
				11921
				11922	short fmul_inf_dst - tbl_fmul_op # INF x NORM
				11923	short fmul_res_operr - tbl_fmul_op # INF x ZERO
				11924	short fmul_inf_dst - tbl_fmul_op # INF x INF
				11925	short fmul_res_qnan - tbl_fmul_op # INF x QNAN
				11926	short fmul_inf_dst - tbl_fmul_op # INF x DENORM
				11927	short fmul_res_snan - tbl_fmul_op # INF x SNAN
				11928	short tbl_fmul_op - tbl_fmul_op #
				11929	short tbl_fmul_op - tbl_fmul_op #
				11930
				11931	short fmul_res_qnan - tbl_fmul_op # QNAN x NORM
				11932	short fmul_res_qnan - tbl_fmul_op # QNAN x ZERO
				11933	short fmul_res_qnan - tbl_fmul_op # QNAN x INF
				11934	short fmul_res_qnan - tbl_fmul_op # QNAN x QNAN
				11935	short fmul_res_qnan - tbl_fmul_op # QNAN x DENORM
				11936	short fmul_res_snan - tbl_fmul_op # QNAN x SNAN
				11937	short tbl_fmul_op - tbl_fmul_op #
				11938	short tbl_fmul_op - tbl_fmul_op #
				11939
				11940	short fmul_norm - tbl_fmul_op # NORM x NORM
				11941	short fmul_zero - tbl_fmul_op # NORM x ZERO
				11942	short fmul_inf_src - tbl_fmul_op # NORM x INF
				11943	short fmul_res_qnan - tbl_fmul_op # NORM x QNAN
				11944	short fmul_norm - tbl_fmul_op # NORM x DENORM
				11945	short fmul_res_snan - tbl_fmul_op # NORM x SNAN
				11946	short tbl_fmul_op - tbl_fmul_op #
				11947	short tbl_fmul_op - tbl_fmul_op #
				11948
				11949	short fmul_res_snan - tbl_fmul_op # SNAN x NORM
				11950	short fmul_res_snan - tbl_fmul_op # SNAN x ZERO
				11951	short fmul_res_snan - tbl_fmul_op # SNAN x INF
				11952	short fmul_res_snan - tbl_fmul_op # SNAN x QNAN
				11953	short fmul_res_snan - tbl_fmul_op # SNAN x DENORM
				11954	short fmul_res_snan - tbl_fmul_op # SNAN x SNAN
				11955	short tbl_fmul_op - tbl_fmul_op #
				11956	short tbl_fmul_op - tbl_fmul_op #
				11957
				11958	fmul_res_operr:
				11959	bra.l res_operr
				11960	fmul_res_snan:
				11961	bra.l res_snan
				11962	fmul_res_qnan:
				11963	bra.l res_qnan
				11964
				11965	#
				11966	# Multiply: (Zero x Zero) \|\| (Zero x norm) \|\| (Zero x denorm)
				11967	#
				11968	global fmul_zero # global for fsglmul
				11969	fmul_zero:
				11970	mov.b SRC_EX(%a0),%d0 # exclusive or the signs
				11971	mov.b DST_EX(%a1),%d1
				11972	eor.b %d0,%d1
				11973	bpl.b fmul_zero_p # result ZERO is pos.
				11974	fmul_zero_n:
				11975	fmov.s &0x80000000,%fp0 # load -ZERO
				11976	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
				11977	rts
				11978	fmul_zero_p:
				11979	fmov.s &0x00000000,%fp0 # load +ZERO
				11980	mov.b &z_bmask,FPSR_CC(%a6) # set Z
				11981	rts
				11982
				11983	#
				11984	# Multiply: (inf x inf) \|\| (inf x norm) \|\| (inf x denorm)
				11985	#
				11986	# Note: The j-bit for an infinity is a don't-care. However, to be
				11987	# strictly compatible w/ the 68881/882, we make sure to return an
				11988	# INF w/ the j-bit set if the input INF j-bit was set. Destination
				11989	# INFs take priority.
				11990	#
				11991	global fmul_inf_dst # global for fsglmul
				11992	fmul_inf_dst:
				11993	fmovm.x DST(%a1),&0x80 # return INF result in fp0
				11994	mov.b SRC_EX(%a0),%d0 # exclusive or the signs
				11995	mov.b DST_EX(%a1),%d1
				11996	eor.b %d0,%d1
				11997	bpl.b fmul_inf_dst_p # result INF is pos.
				11998	fmul_inf_dst_n:
				11999	fabs.x %fp0 # clear result sign
				12000	fneg.x %fp0 # set result sign
				12001	mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
				12002	rts
				12003	fmul_inf_dst_p:
				12004	fabs.x %fp0 # clear result sign
				12005	mov.b &inf_bmask,FPSR_CC(%a6) # set INF
				12006	rts
				12007
				12008	global fmul_inf_src # global for fsglmul
				12009	fmul_inf_src:
				12010	fmovm.x SRC(%a0),&0x80 # return INF result in fp0
				12011	mov.b SRC_EX(%a0),%d0 # exclusive or the signs
				12012	mov.b DST_EX(%a1),%d1
				12013	eor.b %d0,%d1
				12014	bpl.b fmul_inf_dst_p # result INF is pos.
				12015	bra.b fmul_inf_dst_n
				12016
				12017	#########################################################################
				12018	# XDEF **************************************************************** #
				12019	# fin(): emulates the fmove instruction #
				12020	# fsin(): emulates the fsmove instruction #
				12021	# fdin(): emulates the fdmove instruction #
				12022	# #
				12023	# XREF **************************************************************** #
				12024	# norm() - normalize mantissa for EXOP on denorm #
				12025	# scale_to_zero_src() - scale src exponent to zero #
				12026	# ovf_res() - return default overflow result #
				12027	# unf_res() - return default underflow result #
				12028	# res_qnan_1op() - return QNAN result #
				12029	# res_snan_1op() - return SNAN result #
				12030	# #
				12031	# INPUT *************************************************************** #
				12032	# a0 = pointer to extended precision source operand #
				12033	# d0 = round prec/mode #
				12034	# #
				12035	# OUTPUT ************************************************************** #
				12036	# fp0 = result #
				12037	# fp1 = EXOP (if exception occurred) #
				12038	# #
				12039	# ALGORITHM *********************************************************** #
				12040	# Handle NANs, infinities, and zeroes as special cases. Divide #
				12041	# norms into extended, single, and double precision. #
				12042	# Norms can be emulated w/ a regular fmove instruction. For #
				12043	# sgl/dbl, must scale exponent and perform an "fmove". Check to see #
				12044	# if the result would have overflowed/underflowed. If so, use unf_res() #
				12045	# or ovf_res() to return the default result. Also return EXOP if #
				12046	# exception is enabled. If no exception, return the default result. #
				12047	# Unnorms don't pass through here. #
				12048	# #
				12049	#########################################################################
				12050
				12051	global fsin
				12052	fsin:
				12053	andi.b &0x30,%d0 # clear rnd prec
				12054	ori.b &s_mode*0x10,%d0 # insert sgl precision
				12055	bra.b fin
				12056
				12057	global fdin
				12058	fdin:
				12059	andi.b &0x30,%d0 # clear rnd prec
				12060	ori.b &d_mode*0x10,%d0 # insert dbl precision
				12061
				12062	global fin
				12063	fin:
				12064	mov.l %d0,L_SCR3(%a6) # store rnd info
				12065
				12066	mov.b STAG(%a6),%d1 # fetch src optype tag
				12067	bne.w fin_not_norm # optimize on non-norm input
				12068
				12069	#
				12070	# FP MOVE IN: NORMs and DENORMs ONLY!
				12071	#
				12072	fin_norm:
				12073	andi.b &0xc0,%d0 # is precision extended?
				12074	bne.w fin_not_ext # no, so go handle dbl or sgl
				12075
				12076	#
				12077	# precision selected is extended. so...we cannot get an underflow
				12078	# or overflow because of rounding to the correct precision. so...
				12079	# skip the scaling and unscaling...
				12080	#
				12081	tst.b SRC_EX(%a0) # is the operand negative?
				12082	bpl.b fin_norm_done # no
				12083	bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
				12084	fin_norm_done:
				12085	fmovm.x SRC(%a0),&0x80 # return result in fp0
				12086	rts
				12087
				12088	#
				12089	# for an extended precision DENORM, the UNFL exception bit is set
				12090	# the accrued bit is NOT set in this instance(no inexactness!)
				12091	#
				12092	fin_denorm:
				12093	andi.b &0xc0,%d0 # is precision extended?
				12094	bne.w fin_not_ext # no, so go handle dbl or sgl
				12095
				12096	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				12097	tst.b SRC_EX(%a0) # is the operand negative?
				12098	bpl.b fin_denorm_done # no
				12099	bset &neg_bit,FPSR_CC(%a6) # yes, so set 'N' ccode bit
				12100	fin_denorm_done:
				12101	fmovm.x SRC(%a0),&0x80 # return result in fp0
				12102	btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
				12103	bne.b fin_denorm_unfl_ena # yes
				12104	rts
				12105
				12106	#
				12107	# the input is an extended DENORM and underflow is enabled in the FPCR.
				12108	# normalize the mantissa and add the bias of 0x6000 to the resulting negative
				12109	# exponent and insert back into the operand.
				12110	#
				12111	fin_denorm_unfl_ena:
				12112	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				12113	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12114	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12115	lea FP_SCR0(%a6),%a0 # pass: ptr to operand
				12116	bsr.l norm # normalize result
				12117	neg.w %d0 # new exponent = -(shft val)
				12118	addi.w &0x6000,%d0 # add new bias to exponent
				12119	mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
				12120	andi.w &0x8000,%d1 # keep old sign
				12121	andi.w &0x7fff,%d0 # clear sign position
				12122	or.w %d1,%d0 # concat new exo,old sign
				12123	mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
				12124	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				12125	rts
				12126
				12127	#
				12128	# operand is to be rounded to single or double precision
				12129	#
				12130	fin_not_ext:
				12131	cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
				12132	bne.b fin_dbl
				12133
				12134	#
				12135	# operand is to be rounded to single precision
				12136	#
				12137	fin_sgl:
				12138	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				12139	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12140	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12141	bsr.l scale_to_zero_src # calculate scale factor
				12142
				12143	cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
				12144	bge.w fin_sd_unfl # yes; go handle underflow
				12145	cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
				12146	beq.w fin_sd_may_ovfl # maybe; go check
				12147	blt.w fin_sd_ovfl # yes; go handle overflow
				12148
				12149	#
				12150	# operand will NOT overflow or underflow when moved into the fp reg file
				12151	#
				12152	fin_sd_normal:
				12153	fmov.l &0x0,%fpsr # clear FPSR
				12154	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				12155
				12156	fmov.x FP_SCR0(%a6),%fp0 # perform move
				12157
				12158	fmov.l %fpsr,%d1 # save FPSR
				12159	fmov.l &0x0,%fpcr # clear FPCR
				12160
				12161	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				12162
				12163	fin_sd_normal_exit:
				12164	mov.l %d2,-(%sp) # save d2
				12165	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				12166	mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
				12167	mov.w %d1,%d2 # make a copy
				12168	andi.l &0x7fff,%d1 # strip sign
				12169	sub.l %d0,%d1 # add scale factor
				12170	andi.w &0x8000,%d2 # keep old sign
				12171	or.w %d1,%d2 # concat old sign,new exponent
				12172	mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
				12173	mov.l (%sp)+,%d2 # restore d2
				12174	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				12175	rts
				12176
				12177	#
				12178	# operand is to be rounded to double precision
				12179	#
				12180	fin_dbl:
				12181	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				12182	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12183	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12184	bsr.l scale_to_zero_src # calculate scale factor
				12185
				12186	cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
				12187	bge.w fin_sd_unfl # yes; go handle underflow
				12188	cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
				12189	beq.w fin_sd_may_ovfl # maybe; go check
				12190	blt.w fin_sd_ovfl # yes; go handle overflow
				12191	bra.w fin_sd_normal # no; ho handle normalized op
				12192
				12193	#
				12194	# operand WILL underflow when moved in to the fp register file
				12195	#
				12196	fin_sd_unfl:
				12197	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				12198
				12199	tst.b FP_SCR0_EX(%a6) # is operand negative?
				12200	bpl.b fin_sd_unfl_tst
				12201	bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
				12202
				12203	# if underflow or inexact is enabled, then go calculate the EXOP first.
				12204	fin_sd_unfl_tst:
				12205	mov.b FPCR_ENABLE(%a6),%d1
				12206	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				12207	bne.b fin_sd_unfl_ena # yes
				12208
				12209	fin_sd_unfl_dis:
				12210	lea FP_SCR0(%a6),%a0 # pass: result addr
				12211	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				12212	bsr.l unf_res # calculate default result
				12213	or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
				12214	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				12215	rts
				12216
				12217	#
				12218	# operand will underflow AND underflow or inexact is enabled.
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	12219	# Therefore, we must return the result rounded to extended precision.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	12220	#
				12221	fin_sd_unfl_ena:
				12222	mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
				12223	mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
				12224	mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
				12225
				12226	mov.l %d2,-(%sp) # save d2
				12227	mov.w %d1,%d2 # make a copy
				12228	andi.l &0x7fff,%d1 # strip sign
				12229	sub.l %d0,%d1 # subtract scale factor
				12230	andi.w &0x8000,%d2 # extract old sign
				12231	addi.l &0x6000,%d1 # add new bias
				12232	andi.w &0x7fff,%d1
				12233	or.w %d1,%d2 # concat old sign,new exp
				12234	mov.w %d2,FP_SCR1_EX(%a6) # insert new exponent
				12235	fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
				12236	mov.l (%sp)+,%d2 # restore d2
				12237	bra.b fin_sd_unfl_dis
				12238
				12239	#
				12240	# operand WILL overflow.
				12241	#
				12242	fin_sd_ovfl:
				12243	fmov.l &0x0,%fpsr # clear FPSR
				12244	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				12245
				12246	fmov.x FP_SCR0(%a6),%fp0 # perform move
				12247
				12248	fmov.l &0x0,%fpcr # clear FPCR
				12249	fmov.l %fpsr,%d1 # save FPSR
				12250
				12251	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				12252
				12253	fin_sd_ovfl_tst:
				12254	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				12255
				12256	mov.b FPCR_ENABLE(%a6),%d1
				12257	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				12258	bne.b fin_sd_ovfl_ena # yes
				12259
				12260	#
				12261	# OVFL is not enabled; therefore, we must create the default result by
				12262	# calling ovf_res().
				12263	#
				12264	fin_sd_ovfl_dis:
				12265	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				12266	sne %d1 # set sign param accordingly
				12267	mov.l L_SCR3(%a6),%d0 # pass: prec,mode
				12268	bsr.l ovf_res # calculate default result
				12269	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				12270	fmovm.x (%a0),&0x80 # return default result in fp0
				12271	rts
				12272
				12273	#
				12274	# OVFL is enabled.
				12275	# the INEX2 bit has already been updated by the round to the correct precision.
				12276	# now, round to extended(and don't alter the FPSR).
				12277	#
				12278	fin_sd_ovfl_ena:
				12279	mov.l %d2,-(%sp) # save d2
				12280	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				12281	mov.l %d1,%d2 # make a copy
				12282	andi.l &0x7fff,%d1 # strip sign
				12283	andi.w &0x8000,%d2 # keep old sign
				12284	sub.l %d0,%d1 # add scale factor
				12285	sub.l &0x6000,%d1 # subtract bias
				12286	andi.w &0x7fff,%d1
				12287	or.w %d2,%d1
				12288	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				12289	mov.l (%sp)+,%d2 # restore d2
				12290	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				12291	bra.b fin_sd_ovfl_dis
				12292
				12293	#
				12294	# the move in MAY overflow. so...
				12295	#
				12296	fin_sd_may_ovfl:
				12297	fmov.l &0x0,%fpsr # clear FPSR
				12298	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				12299
				12300	fmov.x FP_SCR0(%a6),%fp0 # perform the move
				12301
				12302	fmov.l %fpsr,%d1 # save status
				12303	fmov.l &0x0,%fpcr # clear FPCR
				12304
				12305	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				12306
				12307	fabs.x %fp0,%fp1 # make a copy of result
				12308	fcmp.b %fp1,&0x2 # is \|result\| >= 2.b?
				12309	fbge.w fin_sd_ovfl_tst # yes; overflow has occurred
				12310
				12311	# no, it didn't overflow; we have correct result
				12312	bra.w fin_sd_normal_exit
				12313
				12314	##########################################################################
				12315
				12316	#
				12317	# operand is not a NORM: check its optype and branch accordingly
				12318	#
				12319	fin_not_norm:
				12320	cmpi.b %d1,&DENORM # weed out DENORM
				12321	beq.w fin_denorm
				12322	cmpi.b %d1,&SNAN # weed out SNANs
				12323	beq.l res_snan_1op
				12324	cmpi.b %d1,&QNAN # weed out QNANs
				12325	beq.l res_qnan_1op
				12326
				12327	#
				12328	# do the fmove in; at this point, only possible ops are ZERO and INF.
				12329	# use fmov to determine ccodes.
				12330	# prec:mode should be zero at this point but it won't affect answer anyways.
				12331	#
				12332	fmov.x SRC(%a0),%fp0 # do fmove in
				12333	fmov.l %fpsr,%d0 # no exceptions possible
				12334	rol.l &0x8,%d0 # put ccodes in lo byte
				12335	mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
				12336	rts
				12337
				12338	#########################################################################
				12339	# XDEF **************************************************************** #
				12340	# fdiv(): emulates the fdiv instruction #
				12341	# fsdiv(): emulates the fsdiv instruction #
				12342	# fddiv(): emulates the fddiv instruction #
				12343	# #
				12344	# XREF **************************************************************** #
				12345	# scale_to_zero_src() - scale src exponent to zero #
				12346	# scale_to_zero_dst() - scale dst exponent to zero #
				12347	# unf_res() - return default underflow result #
				12348	# ovf_res() - return default overflow result #
				12349	# res_qnan() - return QNAN result #
				12350	# res_snan() - return SNAN result #
				12351	# #
				12352	# INPUT *************************************************************** #
				12353	# a0 = pointer to extended precision source operand #
				12354	# a1 = pointer to extended precision destination operand #
				12355	# d0 rnd prec,mode #
				12356	# #
				12357	# OUTPUT ************************************************************** #
				12358	# fp0 = result #
				12359	# fp1 = EXOP (if exception occurred) #
				12360	# #
				12361	# ALGORITHM *********************************************************** #
				12362	# Handle NANs, infinities, and zeroes as special cases. Divide #
				12363	# norms/denorms into ext/sgl/dbl precision. #
				12364	# For norms/denorms, scale the exponents such that a divide #
				12365	# instruction won't cause an exception. Use the regular fdiv to #
				12366	# compute a result. Check if the regular operands would have taken #
				12367	# an exception. If so, return the default overflow/underflow result #
				12368	# and return the EXOP if exceptions are enabled. Else, scale the #
				12369	# result operand to the proper exponent. #
				12370	# #
				12371	#########################################################################
				12372
				12373	align 0x10
				12374	tbl_fdiv_unfl:
				12375	long 0x3fff - 0x0000 # ext_unfl
				12376	long 0x3fff - 0x3f81 # sgl_unfl
				12377	long 0x3fff - 0x3c01 # dbl_unfl
				12378
				12379	tbl_fdiv_ovfl:
				12380	long 0x3fff - 0x7ffe # ext overflow exponent
				12381	long 0x3fff - 0x407e # sgl overflow exponent
				12382	long 0x3fff - 0x43fe # dbl overflow exponent
				12383
				12384	global fsdiv
				12385	fsdiv:
				12386	andi.b &0x30,%d0 # clear rnd prec
				12387	ori.b &s_mode*0x10,%d0 # insert sgl prec
				12388	bra.b fdiv
				12389
				12390	global fddiv
				12391	fddiv:
				12392	andi.b &0x30,%d0 # clear rnd prec
				12393	ori.b &d_mode*0x10,%d0 # insert dbl prec
				12394
				12395	global fdiv
				12396	fdiv:
				12397	mov.l %d0,L_SCR3(%a6) # store rnd info
				12398
				12399	clr.w %d1
				12400	mov.b DTAG(%a6),%d1
				12401	lsl.b &0x3,%d1
				12402	or.b STAG(%a6),%d1 # combine src tags
				12403
				12404	bne.w fdiv_not_norm # optimize on non-norm input
				12405
				12406	#
				12407	# DIVIDE: NORMs and DENORMs ONLY!
				12408	#
				12409	fdiv_norm:
				12410	mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
				12411	mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
				12412	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				12413
				12414	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				12415	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12416	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12417
				12418	bsr.l scale_to_zero_src # scale src exponent
				12419	mov.l %d0,-(%sp) # save scale factor 1
				12420
				12421	bsr.l scale_to_zero_dst # scale dst exponent
				12422
				12423	neg.l (%sp) # SCALE FACTOR = scale1 - scale2
				12424	add.l %d0,(%sp)
				12425
				12426	mov.w 2+L_SCR3(%a6),%d1 # fetch precision
				12427	lsr.b &0x6,%d1 # shift to lo bits
				12428	mov.l (%sp)+,%d0 # load S.F.
				12429	cmp.l %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
				12430	ble.w fdiv_may_ovfl # result will overflow
				12431
				12432	cmp.l %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
				12433	beq.w fdiv_may_unfl # maybe
				12434	bgt.w fdiv_unfl # yes; go handle underflow
				12435
				12436	fdiv_normal:
				12437	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				12438
				12439	fmov.l L_SCR3(%a6),%fpcr # save FPCR
				12440	fmov.l &0x0,%fpsr # clear FPSR
				12441
				12442	fdiv.x FP_SCR0(%a6),%fp0 # perform divide
				12443
				12444	fmov.l %fpsr,%d1 # save FPSR
				12445	fmov.l &0x0,%fpcr # clear FPCR
				12446
				12447	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				12448
				12449	fdiv_normal_exit:
				12450	fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
				12451	mov.l %d2,-(%sp) # store d2
				12452	mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
				12453	mov.l %d1,%d2 # make a copy
				12454	andi.l &0x7fff,%d1 # strip sign
				12455	andi.w &0x8000,%d2 # keep old sign
				12456	sub.l %d0,%d1 # add scale factor
				12457	or.w %d2,%d1 # concat old sign,new exp
				12458	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				12459	mov.l (%sp)+,%d2 # restore d2
				12460	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				12461	rts
				12462
				12463	tbl_fdiv_ovfl2:
				12464	long 0x7fff
				12465	long 0x407f
				12466	long 0x43ff
				12467
				12468	fdiv_no_ovfl:
				12469	mov.l (%sp)+,%d0 # restore scale factor
				12470	bra.b fdiv_normal_exit
				12471
				12472	fdiv_may_ovfl:
				12473	mov.l %d0,-(%sp) # save scale factor
				12474
				12475	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				12476
				12477	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				12478	fmov.l &0x0,%fpsr # set FPSR
				12479
				12480	fdiv.x FP_SCR0(%a6),%fp0 # execute divide
				12481
				12482	fmov.l %fpsr,%d0
				12483	fmov.l &0x0,%fpcr
				12484
				12485	or.l %d0,USER_FPSR(%a6) # save INEX,N
				12486
				12487	fmovm.x &0x01,-(%sp) # save result to stack
				12488	mov.w (%sp),%d0 # fetch new exponent
				12489	add.l &0xc,%sp # clear result from stack
				12490	andi.l &0x7fff,%d0 # strip sign
				12491	sub.l (%sp),%d0 # add scale factor
				12492	cmp.l %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
				12493	blt.b fdiv_no_ovfl
				12494	mov.l (%sp)+,%d0
				12495
				12496	fdiv_ovfl_tst:
				12497	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				12498
				12499	mov.b FPCR_ENABLE(%a6),%d1
				12500	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				12501	bne.b fdiv_ovfl_ena # yes
				12502
				12503	fdiv_ovfl_dis:
				12504	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				12505	sne %d1 # set sign param accordingly
				12506	mov.l L_SCR3(%a6),%d0 # pass prec:rnd
				12507	bsr.l ovf_res # calculate default result
				12508	or.b %d0,FPSR_CC(%a6) # set INF if applicable
				12509	fmovm.x (%a0),&0x80 # return default result in fp0
				12510	rts
				12511
				12512	fdiv_ovfl_ena:
				12513	mov.l L_SCR3(%a6),%d1
				12514	andi.b &0xc0,%d1 # is precision extended?
				12515	bne.b fdiv_ovfl_ena_sd # no, do sgl or dbl
				12516
				12517	fdiv_ovfl_ena_cont:
				12518	fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
				12519
				12520	mov.l %d2,-(%sp) # save d2
				12521	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				12522	mov.w %d1,%d2 # make a copy
				12523	andi.l &0x7fff,%d1 # strip sign
				12524	sub.l %d0,%d1 # add scale factor
				12525	subi.l &0x6000,%d1 # subtract bias
				12526	andi.w &0x7fff,%d1 # clear sign bit
				12527	andi.w &0x8000,%d2 # keep old sign
				12528	or.w %d2,%d1 # concat old sign,new exp
				12529	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				12530	mov.l (%sp)+,%d2 # restore d2
				12531	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				12532	bra.b fdiv_ovfl_dis
				12533
				12534	fdiv_ovfl_ena_sd:
				12535	fmovm.x FP_SCR1(%a6),&0x80 # load dst operand
				12536
				12537	mov.l L_SCR3(%a6),%d1
				12538	andi.b &0x30,%d1 # keep rnd mode
				12539	fmov.l %d1,%fpcr # set FPCR
				12540
				12541	fdiv.x FP_SCR0(%a6),%fp0 # execute divide
				12542
				12543	fmov.l &0x0,%fpcr # clear FPCR
				12544	bra.b fdiv_ovfl_ena_cont
				12545
				12546	fdiv_unfl:
				12547	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				12548
				12549	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				12550
				12551	fmov.l &rz_mode*0x10,%fpcr # set FPCR
				12552	fmov.l &0x0,%fpsr # clear FPSR
				12553
				12554	fdiv.x FP_SCR0(%a6),%fp0 # execute divide
				12555
				12556	fmov.l %fpsr,%d1 # save status
				12557	fmov.l &0x0,%fpcr # clear FPCR
				12558
				12559	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				12560
				12561	mov.b FPCR_ENABLE(%a6),%d1
				12562	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				12563	bne.b fdiv_unfl_ena # yes
				12564
				12565	fdiv_unfl_dis:
				12566	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				12567
				12568	lea FP_SCR0(%a6),%a0 # pass: result addr
				12569	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				12570	bsr.l unf_res # calculate default result
				12571	or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
				12572	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				12573	rts
				12574
				12575	#
				12576	# UNFL is enabled.
				12577	#
				12578	fdiv_unfl_ena:
				12579	fmovm.x FP_SCR1(%a6),&0x40 # load dst op
				12580
				12581	mov.l L_SCR3(%a6),%d1
				12582	andi.b &0xc0,%d1 # is precision extended?
				12583	bne.b fdiv_unfl_ena_sd # no, sgl or dbl
				12584
				12585	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				12586
				12587	fdiv_unfl_ena_cont:
				12588	fmov.l &0x0,%fpsr # clear FPSR
				12589
				12590	fdiv.x FP_SCR0(%a6),%fp1 # execute divide
				12591
				12592	fmov.l &0x0,%fpcr # clear FPCR
				12593
				12594	fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
				12595	mov.l %d2,-(%sp) # save d2
				12596	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				12597	mov.l %d1,%d2 # make a copy
				12598	andi.l &0x7fff,%d1 # strip sign
				12599	andi.w &0x8000,%d2 # keep old sign
				12600	sub.l %d0,%d1 # add scale factoer
				12601	addi.l &0x6000,%d1 # add bias
				12602	andi.w &0x7fff,%d1
				12603	or.w %d2,%d1 # concat old sign,new exp
				12604	mov.w %d1,FP_SCR0_EX(%a6) # insert new exp
				12605	mov.l (%sp)+,%d2 # restore d2
				12606	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				12607	bra.w fdiv_unfl_dis
				12608
				12609	fdiv_unfl_ena_sd:
				12610	mov.l L_SCR3(%a6),%d1
				12611	andi.b &0x30,%d1 # use only rnd mode
				12612	fmov.l %d1,%fpcr # set FPCR
				12613
				12614	bra.b fdiv_unfl_ena_cont
				12615
				12616	#
				12617	# the divide operation MAY underflow:
				12618	#
				12619	fdiv_may_unfl:
				12620	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				12621
				12622	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				12623	fmov.l &0x0,%fpsr # clear FPSR
				12624
				12625	fdiv.x FP_SCR0(%a6),%fp0 # execute divide
				12626
				12627	fmov.l %fpsr,%d1 # save status
				12628	fmov.l &0x0,%fpcr # clear FPCR
				12629
				12630	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				12631
				12632	fabs.x %fp0,%fp1 # make a copy of result
				12633	fcmp.b %fp1,&0x1 # is \|result\| > 1.b?
				12634	fbgt.w fdiv_normal_exit # no; no underflow occurred
				12635	fblt.w fdiv_unfl # yes; underflow occurred
				12636
				12637	#
				12638	# we still don't know if underflow occurred. result is ~ equal to 1. but,
				12639	# we don't know if the result was an underflow that rounded up to a 1
				12640	# or a normalized number that rounded down to a 1. so, redo the entire
				12641	# operation using RZ as the rounding mode to see what the pre-rounded
				12642	# result is. this case should be relatively rare.
				12643	#
				12644	fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
				12645
				12646	mov.l L_SCR3(%a6),%d1
				12647	andi.b &0xc0,%d1 # keep rnd prec
				12648	ori.b &rz_mode*0x10,%d1 # insert RZ
				12649
				12650	fmov.l %d1,%fpcr # set FPCR
				12651	fmov.l &0x0,%fpsr # clear FPSR
				12652
				12653	fdiv.x FP_SCR0(%a6),%fp1 # execute divide
				12654
				12655	fmov.l &0x0,%fpcr # clear FPCR
				12656	fabs.x %fp1 # make absolute value
				12657	fcmp.b %fp1,&0x1 # is \|result\| < 1.b?
				12658	fbge.w fdiv_normal_exit # no; no underflow occurred
				12659	bra.w fdiv_unfl # yes; underflow occurred
				12660
				12661	############################################################################
				12662
				12663	#
				12664	# Divide: inputs are not both normalized; what are they?
				12665	#
				12666	fdiv_not_norm:
				12667	mov.w (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
				12668	jmp (tbl_fdiv_op.b,%pc,%d1.w*1)
				12669
				12670	swbeg &48
				12671	tbl_fdiv_op:
				12672	short fdiv_norm - tbl_fdiv_op # NORM / NORM
				12673	short fdiv_inf_load - tbl_fdiv_op # NORM / ZERO
				12674	short fdiv_zero_load - tbl_fdiv_op # NORM / INF
				12675	short fdiv_res_qnan - tbl_fdiv_op # NORM / QNAN
				12676	short fdiv_norm - tbl_fdiv_op # NORM / DENORM
				12677	short fdiv_res_snan - tbl_fdiv_op # NORM / SNAN
				12678	short tbl_fdiv_op - tbl_fdiv_op #
				12679	short tbl_fdiv_op - tbl_fdiv_op #
				12680
				12681	short fdiv_zero_load - tbl_fdiv_op # ZERO / NORM
				12682	short fdiv_res_operr - tbl_fdiv_op # ZERO / ZERO
				12683	short fdiv_zero_load - tbl_fdiv_op # ZERO / INF
				12684	short fdiv_res_qnan - tbl_fdiv_op # ZERO / QNAN
				12685	short fdiv_zero_load - tbl_fdiv_op # ZERO / DENORM
				12686	short fdiv_res_snan - tbl_fdiv_op # ZERO / SNAN
				12687	short tbl_fdiv_op - tbl_fdiv_op #
				12688	short tbl_fdiv_op - tbl_fdiv_op #
				12689
				12690	short fdiv_inf_dst - tbl_fdiv_op # INF / NORM
				12691	short fdiv_inf_dst - tbl_fdiv_op # INF / ZERO
				12692	short fdiv_res_operr - tbl_fdiv_op # INF / INF
				12693	short fdiv_res_qnan - tbl_fdiv_op # INF / QNAN
				12694	short fdiv_inf_dst - tbl_fdiv_op # INF / DENORM
				12695	short fdiv_res_snan - tbl_fdiv_op # INF / SNAN
				12696	short tbl_fdiv_op - tbl_fdiv_op #
				12697	short tbl_fdiv_op - tbl_fdiv_op #
				12698
				12699	short fdiv_res_qnan - tbl_fdiv_op # QNAN / NORM
				12700	short fdiv_res_qnan - tbl_fdiv_op # QNAN / ZERO
				12701	short fdiv_res_qnan - tbl_fdiv_op # QNAN / INF
				12702	short fdiv_res_qnan - tbl_fdiv_op # QNAN / QNAN
				12703	short fdiv_res_qnan - tbl_fdiv_op # QNAN / DENORM
				12704	short fdiv_res_snan - tbl_fdiv_op # QNAN / SNAN
				12705	short tbl_fdiv_op - tbl_fdiv_op #
				12706	short tbl_fdiv_op - tbl_fdiv_op #
				12707
				12708	short fdiv_norm - tbl_fdiv_op # DENORM / NORM
				12709	short fdiv_inf_load - tbl_fdiv_op # DENORM / ZERO
				12710	short fdiv_zero_load - tbl_fdiv_op # DENORM / INF
				12711	short fdiv_res_qnan - tbl_fdiv_op # DENORM / QNAN
				12712	short fdiv_norm - tbl_fdiv_op # DENORM / DENORM
				12713	short fdiv_res_snan - tbl_fdiv_op # DENORM / SNAN
				12714	short tbl_fdiv_op - tbl_fdiv_op #
				12715	short tbl_fdiv_op - tbl_fdiv_op #
				12716
				12717	short fdiv_res_snan - tbl_fdiv_op # SNAN / NORM
				12718	short fdiv_res_snan - tbl_fdiv_op # SNAN / ZERO
				12719	short fdiv_res_snan - tbl_fdiv_op # SNAN / INF
				12720	short fdiv_res_snan - tbl_fdiv_op # SNAN / QNAN
				12721	short fdiv_res_snan - tbl_fdiv_op # SNAN / DENORM
				12722	short fdiv_res_snan - tbl_fdiv_op # SNAN / SNAN
				12723	short tbl_fdiv_op - tbl_fdiv_op #
				12724	short tbl_fdiv_op - tbl_fdiv_op #
				12725
				12726	fdiv_res_qnan:
				12727	bra.l res_qnan
				12728	fdiv_res_snan:
				12729	bra.l res_snan
				12730	fdiv_res_operr:
				12731	bra.l res_operr
				12732
				12733	global fdiv_zero_load # global for fsgldiv
				12734	fdiv_zero_load:
				12735	mov.b SRC_EX(%a0),%d0 # result sign is exclusive
				12736	mov.b DST_EX(%a1),%d1 # or of input signs.
				12737	eor.b %d0,%d1
				12738	bpl.b fdiv_zero_load_p # result is positive
				12739	fmov.s &0x80000000,%fp0 # load a -ZERO
				12740	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
				12741	rts
				12742	fdiv_zero_load_p:
				12743	fmov.s &0x00000000,%fp0 # load a +ZERO
				12744	mov.b &z_bmask,FPSR_CC(%a6) # set Z
				12745	rts
				12746
				12747	#
				12748	# The destination was In Range and the source was a ZERO. The result,
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	12749	# Therefore, is an INF w/ the proper sign.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	12750	# So, determine the sign and return a new INF (w/ the j-bit cleared).
				12751	#
				12752	global fdiv_inf_load # global for fsgldiv
				12753	fdiv_inf_load:
				12754	ori.w &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
				12755	mov.b SRC_EX(%a0),%d0 # load both signs
				12756	mov.b DST_EX(%a1),%d1
				12757	eor.b %d0,%d1
				12758	bpl.b fdiv_inf_load_p # result is positive
				12759	fmov.s &0xff800000,%fp0 # make result -INF
				12760	mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
				12761	rts
				12762	fdiv_inf_load_p:
				12763	fmov.s &0x7f800000,%fp0 # make result +INF
				12764	mov.b &inf_bmask,FPSR_CC(%a6) # set INF
				12765	rts
				12766
				12767	#
				12768	# The destination was an INF w/ an In Range or ZERO source, the result is
				12769	# an INF w/ the proper sign.
				12770	# The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
				12771	# dst INF is set, then then j-bit of the result INF is also set).
				12772	#
				12773	global fdiv_inf_dst # global for fsgldiv
				12774	fdiv_inf_dst:
				12775	mov.b DST_EX(%a1),%d0 # load both signs
				12776	mov.b SRC_EX(%a0),%d1
				12777	eor.b %d0,%d1
				12778	bpl.b fdiv_inf_dst_p # result is positive
				12779
				12780	fmovm.x DST(%a1),&0x80 # return result in fp0
				12781	fabs.x %fp0 # clear sign bit
				12782	fneg.x %fp0 # set sign bit
				12783	mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
				12784	rts
				12785
				12786	fdiv_inf_dst_p:
				12787	fmovm.x DST(%a1),&0x80 # return result in fp0
				12788	fabs.x %fp0 # return positive INF
				12789	mov.b &inf_bmask,FPSR_CC(%a6) # set INF
				12790	rts
				12791
				12792	#########################################################################
				12793	# XDEF **************************************************************** #
				12794	# fneg(): emulates the fneg instruction #
				12795	# fsneg(): emulates the fsneg instruction #
				12796	# fdneg(): emulates the fdneg instruction #
				12797	# #
				12798	# XREF **************************************************************** #
				12799	# norm() - normalize a denorm to provide EXOP #
				12800	# scale_to_zero_src() - scale sgl/dbl source exponent #
				12801	# ovf_res() - return default overflow result #
				12802	# unf_res() - return default underflow result #
				12803	# res_qnan_1op() - return QNAN result #
				12804	# res_snan_1op() - return SNAN result #
				12805	# #
				12806	# INPUT *************************************************************** #
				12807	# a0 = pointer to extended precision source operand #
				12808	# d0 = rnd prec,mode #
				12809	# #
				12810	# OUTPUT ************************************************************** #
				12811	# fp0 = result #
				12812	# fp1 = EXOP (if exception occurred) #
				12813	# #
				12814	# ALGORITHM *********************************************************** #
				12815	# Handle NANs, zeroes, and infinities as special cases. Separate #
				12816	# norms/denorms into ext/sgl/dbl precisions. Extended precision can be #
				12817	# emulated by simply setting sign bit. Sgl/dbl operands must be scaled #
				12818	# and an actual fneg performed to see if overflow/underflow would have #
				12819	# occurred. If so, return default underflow/overflow result. Else, #
				12820	# scale the result exponent and return result. FPSR gets set based on #
				12821	# the result value. #
				12822	# #
				12823	#########################################################################
				12824
				12825	global fsneg
				12826	fsneg:
				12827	andi.b &0x30,%d0 # clear rnd prec
				12828	ori.b &s_mode*0x10,%d0 # insert sgl precision
				12829	bra.b fneg
				12830
				12831	global fdneg
				12832	fdneg:
				12833	andi.b &0x30,%d0 # clear rnd prec
				12834	ori.b &d_mode*0x10,%d0 # insert dbl prec
				12835
				12836	global fneg
				12837	fneg:
				12838	mov.l %d0,L_SCR3(%a6) # store rnd info
				12839	mov.b STAG(%a6),%d1
				12840	bne.w fneg_not_norm # optimize on non-norm input
				12841
				12842	#
				12843	# NEGATE SIGN : norms and denorms ONLY!
				12844	#
				12845	fneg_norm:
				12846	andi.b &0xc0,%d0 # is precision extended?
				12847	bne.w fneg_not_ext # no; go handle sgl or dbl
				12848
				12849	#
				12850	# precision selected is extended. so...we can not get an underflow
				12851	# or overflow because of rounding to the correct precision. so...
				12852	# skip the scaling and unscaling...
				12853	#
				12854	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12855	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12856	mov.w SRC_EX(%a0),%d0
				12857	eori.w &0x8000,%d0 # negate sign
				12858	bpl.b fneg_norm_load # sign is positive
				12859	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				12860	fneg_norm_load:
				12861	mov.w %d0,FP_SCR0_EX(%a6)
				12862	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				12863	rts
				12864
				12865	#
				12866	# for an extended precision DENORM, the UNFL exception bit is set
				12867	# the accrued bit is NOT set in this instance(no inexactness!)
				12868	#
				12869	fneg_denorm:
				12870	andi.b &0xc0,%d0 # is precision extended?
				12871	bne.b fneg_not_ext # no; go handle sgl or dbl
				12872
				12873	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				12874
				12875	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12876	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12877	mov.w SRC_EX(%a0),%d0
				12878	eori.w &0x8000,%d0 # negate sign
				12879	bpl.b fneg_denorm_done # no
				12880	mov.b &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
				12881	fneg_denorm_done:
				12882	mov.w %d0,FP_SCR0_EX(%a6)
				12883	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				12884
				12885	btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
				12886	bne.b fneg_ext_unfl_ena # yes
				12887	rts
				12888
				12889	#
				12890	# the input is an extended DENORM and underflow is enabled in the FPCR.
				12891	# normalize the mantissa and add the bias of 0x6000 to the resulting negative
				12892	# exponent and insert back into the operand.
				12893	#
				12894	fneg_ext_unfl_ena:
				12895	lea FP_SCR0(%a6),%a0 # pass: ptr to operand
				12896	bsr.l norm # normalize result
				12897	neg.w %d0 # new exponent = -(shft val)
				12898	addi.w &0x6000,%d0 # add new bias to exponent
				12899	mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
				12900	andi.w &0x8000,%d1 # keep old sign
				12901	andi.w &0x7fff,%d0 # clear sign position
				12902	or.w %d1,%d0 # concat old sign, new exponent
				12903	mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
				12904	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				12905	rts
				12906
				12907	#
				12908	# operand is either single or double
				12909	#
				12910	fneg_not_ext:
				12911	cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
				12912	bne.b fneg_dbl
				12913
				12914	#
				12915	# operand is to be rounded to single precision
				12916	#
				12917	fneg_sgl:
				12918	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				12919	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12920	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12921	bsr.l scale_to_zero_src # calculate scale factor
				12922
				12923	cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
				12924	bge.w fneg_sd_unfl # yes; go handle underflow
				12925	cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
				12926	beq.w fneg_sd_may_ovfl # maybe; go check
				12927	blt.w fneg_sd_ovfl # yes; go handle overflow
				12928
				12929	#
				12930	# operand will NOT overflow or underflow when moved in to the fp reg file
				12931	#
				12932	fneg_sd_normal:
				12933	fmov.l &0x0,%fpsr # clear FPSR
				12934	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				12935
				12936	fneg.x FP_SCR0(%a6),%fp0 # perform negation
				12937
				12938	fmov.l %fpsr,%d1 # save FPSR
				12939	fmov.l &0x0,%fpcr # clear FPCR
				12940
				12941	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				12942
				12943	fneg_sd_normal_exit:
				12944	mov.l %d2,-(%sp) # save d2
				12945	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				12946	mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
				12947	mov.w %d1,%d2 # make a copy
				12948	andi.l &0x7fff,%d1 # strip sign
				12949	sub.l %d0,%d1 # add scale factor
				12950	andi.w &0x8000,%d2 # keep old sign
				12951	or.w %d1,%d2 # concat old sign,new exp
				12952	mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
				12953	mov.l (%sp)+,%d2 # restore d2
				12954	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				12955	rts
				12956
				12957	#
				12958	# operand is to be rounded to double precision
				12959	#
				12960	fneg_dbl:
				12961	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				12962	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				12963	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				12964	bsr.l scale_to_zero_src # calculate scale factor
				12965
				12966	cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
				12967	bge.b fneg_sd_unfl # yes; go handle underflow
				12968	cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
				12969	beq.w fneg_sd_may_ovfl # maybe; go check
				12970	blt.w fneg_sd_ovfl # yes; go handle overflow
				12971	bra.w fneg_sd_normal # no; ho handle normalized op
				12972
				12973	#
				12974	# operand WILL underflow when moved in to the fp register file
				12975	#
				12976	fneg_sd_unfl:
				12977	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				12978
				12979	eori.b &0x80,FP_SCR0_EX(%a6) # negate sign
				12980	bpl.b fneg_sd_unfl_tst
				12981	bset &neg_bit,FPSR_CC(%a6) # set 'N' ccode bit
				12982
				12983	# if underflow or inexact is enabled, go calculate EXOP first.
				12984	fneg_sd_unfl_tst:
				12985	mov.b FPCR_ENABLE(%a6),%d1
				12986	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				12987	bne.b fneg_sd_unfl_ena # yes
				12988
				12989	fneg_sd_unfl_dis:
				12990	lea FP_SCR0(%a6),%a0 # pass: result addr
				12991	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				12992	bsr.l unf_res # calculate default result
				12993	or.b %d0,FPSR_CC(%a6) # unf_res may have set 'Z'
				12994	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				12995	rts
				12996
				12997	#
				12998	# operand will underflow AND underflow is enabled.
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	12999	# Therefore, we must return the result rounded to extended precision.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13000	#
				13001	fneg_sd_unfl_ena:
				13002	mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
				13003	mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
				13004	mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
				13005
				13006	mov.l %d2,-(%sp) # save d2
				13007	mov.l %d1,%d2 # make a copy
				13008	andi.l &0x7fff,%d1 # strip sign
				13009	andi.w &0x8000,%d2 # keep old sign
				13010	sub.l %d0,%d1 # subtract scale factor
				13011	addi.l &0x6000,%d1 # add new bias
				13012	andi.w &0x7fff,%d1
				13013	or.w %d2,%d1 # concat new sign,new exp
				13014	mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
				13015	fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
				13016	mov.l (%sp)+,%d2 # restore d2
				13017	bra.b fneg_sd_unfl_dis
				13018
				13019	#
				13020	# operand WILL overflow.
				13021	#
				13022	fneg_sd_ovfl:
				13023	fmov.l &0x0,%fpsr # clear FPSR
				13024	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				13025
				13026	fneg.x FP_SCR0(%a6),%fp0 # perform negation
				13027
				13028	fmov.l &0x0,%fpcr # clear FPCR
				13029	fmov.l %fpsr,%d1 # save FPSR
				13030
				13031	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				13032
				13033	fneg_sd_ovfl_tst:
				13034	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				13035
				13036	mov.b FPCR_ENABLE(%a6),%d1
				13037	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				13038	bne.b fneg_sd_ovfl_ena # yes
				13039
				13040	#
				13041	# OVFL is not enabled; therefore, we must create the default result by
				13042	# calling ovf_res().
				13043	#
				13044	fneg_sd_ovfl_dis:
				13045	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				13046	sne %d1 # set sign param accordingly
				13047	mov.l L_SCR3(%a6),%d0 # pass: prec,mode
				13048	bsr.l ovf_res # calculate default result
				13049	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				13050	fmovm.x (%a0),&0x80 # return default result in fp0
				13051	rts
				13052
				13053	#
				13054	# OVFL is enabled.
				13055	# the INEX2 bit has already been updated by the round to the correct precision.
				13056	# now, round to extended(and don't alter the FPSR).
				13057	#
				13058	fneg_sd_ovfl_ena:
				13059	mov.l %d2,-(%sp) # save d2
				13060	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				13061	mov.l %d1,%d2 # make a copy
				13062	andi.l &0x7fff,%d1 # strip sign
				13063	andi.w &0x8000,%d2 # keep old sign
				13064	sub.l %d0,%d1 # add scale factor
				13065	subi.l &0x6000,%d1 # subtract bias
				13066	andi.w &0x7fff,%d1
				13067	or.w %d2,%d1 # concat sign,exp
				13068	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				13069	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				13070	mov.l (%sp)+,%d2 # restore d2
				13071	bra.b fneg_sd_ovfl_dis
				13072
				13073	#
				13074	# the move in MAY underflow. so...
				13075	#
				13076	fneg_sd_may_ovfl:
				13077	fmov.l &0x0,%fpsr # clear FPSR
				13078	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				13079
				13080	fneg.x FP_SCR0(%a6),%fp0 # perform negation
				13081
				13082	fmov.l %fpsr,%d1 # save status
				13083	fmov.l &0x0,%fpcr # clear FPCR
				13084
				13085	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				13086
				13087	fabs.x %fp0,%fp1 # make a copy of result
				13088	fcmp.b %fp1,&0x2 # is \|result\| >= 2.b?
				13089	fbge.w fneg_sd_ovfl_tst # yes; overflow has occurred
				13090
				13091	# no, it didn't overflow; we have correct result
				13092	bra.w fneg_sd_normal_exit
				13093
				13094	##########################################################################
				13095
				13096	#
				13097	# input is not normalized; what is it?
				13098	#
				13099	fneg_not_norm:
				13100	cmpi.b %d1,&DENORM # weed out DENORM
				13101	beq.w fneg_denorm
				13102	cmpi.b %d1,&SNAN # weed out SNAN
				13103	beq.l res_snan_1op
				13104	cmpi.b %d1,&QNAN # weed out QNAN
				13105	beq.l res_qnan_1op
				13106
				13107	#
				13108	# do the fneg; at this point, only possible ops are ZERO and INF.
				13109	# use fneg to determine ccodes.
				13110	# prec:mode should be zero at this point but it won't affect answer anyways.
				13111	#
				13112	fneg.x SRC_EX(%a0),%fp0 # do fneg
				13113	fmov.l %fpsr,%d0
				13114	rol.l &0x8,%d0 # put ccodes in lo byte
				13115	mov.b %d0,FPSR_CC(%a6) # insert correct ccodes
				13116	rts
				13117
				13118	#########################################################################
				13119	# XDEF **************************************************************** #
				13120	# ftst(): emulates the ftest instruction #
				13121	# #
				13122	# XREF **************************************************************** #
				13123	# res{s,q}nan_1op() - set NAN result for monadic instruction #
				13124	# #
				13125	# INPUT *************************************************************** #
				13126	# a0 = pointer to extended precision source operand #
				13127	# #
				13128	# OUTPUT ************************************************************** #
				13129	# none #
				13130	# #
				13131	# ALGORITHM *********************************************************** #
				13132	# Check the source operand tag (STAG) and set the FPCR according #
				13133	# to the operand type and sign. #
				13134	# #
				13135	#########################################################################
				13136
				13137	global ftst
				13138	ftst:
				13139	mov.b STAG(%a6),%d1
				13140	bne.b ftst_not_norm # optimize on non-norm input
				13141
				13142	#
				13143	# Norm:
				13144	#
				13145	ftst_norm:
				13146	tst.b SRC_EX(%a0) # is operand negative?
				13147	bmi.b ftst_norm_m # yes
				13148	rts
				13149	ftst_norm_m:
				13150	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				13151	rts
				13152
				13153	#
				13154	# input is not normalized; what is it?
				13155	#
				13156	ftst_not_norm:
				13157	cmpi.b %d1,&ZERO # weed out ZERO
				13158	beq.b ftst_zero
				13159	cmpi.b %d1,&INF # weed out INF
				13160	beq.b ftst_inf
				13161	cmpi.b %d1,&SNAN # weed out SNAN
				13162	beq.l res_snan_1op
				13163	cmpi.b %d1,&QNAN # weed out QNAN
				13164	beq.l res_qnan_1op
				13165
				13166	#
				13167	# Denorm:
				13168	#
				13169	ftst_denorm:
				13170	tst.b SRC_EX(%a0) # is operand negative?
				13171	bmi.b ftst_denorm_m # yes
				13172	rts
				13173	ftst_denorm_m:
				13174	mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				13175	rts
				13176
				13177	#
				13178	# Infinity:
				13179	#
				13180	ftst_inf:
				13181	tst.b SRC_EX(%a0) # is operand negative?
				13182	bmi.b ftst_inf_m # yes
				13183	ftst_inf_p:
				13184	mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
				13185	rts
				13186	ftst_inf_m:
				13187	mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
				13188	rts
				13189
				13190	#
				13191	# Zero:
				13192	#
				13193	ftst_zero:
				13194	tst.b SRC_EX(%a0) # is operand negative?
				13195	bmi.b ftst_zero_m # yes
				13196	ftst_zero_p:
				13197	mov.b &z_bmask,FPSR_CC(%a6) # set 'N' ccode bit
				13198	rts
				13199	ftst_zero_m:
				13200	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
				13201	rts
				13202
				13203	#########################################################################
				13204	# XDEF **************************************************************** #
				13205	# fint(): emulates the fint instruction #
				13206	# #
				13207	# XREF **************************************************************** #
				13208	# res_{s,q}nan_1op() - set NAN result for monadic operation #
				13209	# #
				13210	# INPUT *************************************************************** #
				13211	# a0 = pointer to extended precision source operand #
				13212	# d0 = round precision/mode #
				13213	# #
				13214	# OUTPUT ************************************************************** #
				13215	# fp0 = result #
				13216	# #
				13217	# ALGORITHM *********************************************************** #
				13218	# Separate according to operand type. Unnorms don't pass through #
				13219	# here. For norms, load the rounding mode/prec, execute a "fint", then #
				13220	# store the resulting FPSR bits. #
				13221	# For denorms, force the j-bit to a one and do the same as for #
				13222	# norms. Denorms are so low that the answer will either be a zero or a #
				13223	# one. #
				13224	# For zeroes/infs/NANs, return the same while setting the FPSR #
				13225	# as appropriate. #
				13226	# #
				13227	#########################################################################
				13228
				13229	global fint
				13230	fint:
				13231	mov.b STAG(%a6),%d1
				13232	bne.b fint_not_norm # optimize on non-norm input
				13233
				13234	#
				13235	# Norm:
				13236	#
				13237	fint_norm:
				13238	andi.b &0x30,%d0 # set prec = ext
				13239
				13240	fmov.l %d0,%fpcr # set FPCR
				13241	fmov.l &0x0,%fpsr # clear FPSR
				13242
				13243	fint.x SRC(%a0),%fp0 # execute fint
				13244
				13245	fmov.l &0x0,%fpcr # clear FPCR
				13246	fmov.l %fpsr,%d0 # save FPSR
				13247	or.l %d0,USER_FPSR(%a6) # set exception bits
				13248
				13249	rts
				13250
				13251	#
				13252	# input is not normalized; what is it?
				13253	#
				13254	fint_not_norm:
				13255	cmpi.b %d1,&ZERO # weed out ZERO
				13256	beq.b fint_zero
				13257	cmpi.b %d1,&INF # weed out INF
				13258	beq.b fint_inf
				13259	cmpi.b %d1,&DENORM # weed out DENORM
				13260	beq.b fint_denorm
				13261	cmpi.b %d1,&SNAN # weed out SNAN
				13262	beq.l res_snan_1op
				13263	bra.l res_qnan_1op # weed out QNAN
				13264
				13265	#
				13266	# Denorm:
				13267	#
				13268	# for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
				13269	# also, the INEX2 and AINEX exception bits will be set.
				13270	# so, we could either set these manually or force the DENORM
				13271	# to a very small NORM and ship it to the NORM routine.
				13272	# I do the latter.
				13273	#
				13274	fint_denorm:
				13275	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
				13276	mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
				13277	lea FP_SCR0(%a6),%a0
				13278	bra.b fint_norm
				13279
				13280	#
				13281	# Zero:
				13282	#
				13283	fint_zero:
				13284	tst.b SRC_EX(%a0) # is ZERO negative?
				13285	bmi.b fint_zero_m # yes
				13286	fint_zero_p:
				13287	fmov.s &0x00000000,%fp0 # return +ZERO in fp0
				13288	mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
				13289	rts
				13290	fint_zero_m:
				13291	fmov.s &0x80000000,%fp0 # return -ZERO in fp0
				13292	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
				13293	rts
				13294
				13295	#
				13296	# Infinity:
				13297	#
				13298	fint_inf:
				13299	fmovm.x SRC(%a0),&0x80 # return result in fp0
				13300	tst.b SRC_EX(%a0) # is INF negative?
				13301	bmi.b fint_inf_m # yes
				13302	fint_inf_p:
				13303	mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
				13304	rts
				13305	fint_inf_m:
				13306	mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
				13307	rts
				13308
				13309	#########################################################################
				13310	# XDEF **************************************************************** #
				13311	# fintrz(): emulates the fintrz instruction #
				13312	# #
				13313	# XREF **************************************************************** #
				13314	# res_{s,q}nan_1op() - set NAN result for monadic operation #
				13315	# #
				13316	# INPUT *************************************************************** #
				13317	# a0 = pointer to extended precision source operand #
				13318	# d0 = round precision/mode #
				13319	# #
				13320	# OUTPUT ************************************************************** #
				13321	# fp0 = result #
				13322	# #
				13323	# ALGORITHM *********************************************************** #
				13324	# Separate according to operand type. Unnorms don't pass through #
				13325	# here. For norms, load the rounding mode/prec, execute a "fintrz", #
				13326	# then store the resulting FPSR bits. #
				13327	# For denorms, force the j-bit to a one and do the same as for #
				13328	# norms. Denorms are so low that the answer will either be a zero or a #
				13329	# one. #
				13330	# For zeroes/infs/NANs, return the same while setting the FPSR #
				13331	# as appropriate. #
				13332	# #
				13333	#########################################################################
				13334
				13335	global fintrz
				13336	fintrz:
				13337	mov.b STAG(%a6),%d1
				13338	bne.b fintrz_not_norm # optimize on non-norm input
				13339
				13340	#
				13341	# Norm:
				13342	#
				13343	fintrz_norm:
				13344	fmov.l &0x0,%fpsr # clear FPSR
				13345
				13346	fintrz.x SRC(%a0),%fp0 # execute fintrz
				13347
				13348	fmov.l %fpsr,%d0 # save FPSR
				13349	or.l %d0,USER_FPSR(%a6) # set exception bits
				13350
				13351	rts
				13352
				13353	#
				13354	# input is not normalized; what is it?
				13355	#
				13356	fintrz_not_norm:
				13357	cmpi.b %d1,&ZERO # weed out ZERO
				13358	beq.b fintrz_zero
				13359	cmpi.b %d1,&INF # weed out INF
				13360	beq.b fintrz_inf
				13361	cmpi.b %d1,&DENORM # weed out DENORM
				13362	beq.b fintrz_denorm
				13363	cmpi.b %d1,&SNAN # weed out SNAN
				13364	beq.l res_snan_1op
				13365	bra.l res_qnan_1op # weed out QNAN
				13366
				13367	#
				13368	# Denorm:
				13369	#
				13370	# for DENORMs, the result will be (+/-)ZERO.
				13371	# also, the INEX2 and AINEX exception bits will be set.
				13372	# so, we could either set these manually or force the DENORM
				13373	# to a very small NORM and ship it to the NORM routine.
				13374	# I do the latter.
				13375	#
				13376	fintrz_denorm:
				13377	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
				13378	mov.b &0x80,FP_SCR0_HI(%a6) # force DENORM ==> small NORM
				13379	lea FP_SCR0(%a6),%a0
				13380	bra.b fintrz_norm
				13381
				13382	#
				13383	# Zero:
				13384	#
				13385	fintrz_zero:
				13386	tst.b SRC_EX(%a0) # is ZERO negative?
				13387	bmi.b fintrz_zero_m # yes
				13388	fintrz_zero_p:
				13389	fmov.s &0x00000000,%fp0 # return +ZERO in fp0
				13390	mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
				13391	rts
				13392	fintrz_zero_m:
				13393	fmov.s &0x80000000,%fp0 # return -ZERO in fp0
				13394	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
				13395	rts
				13396
				13397	#
				13398	# Infinity:
				13399	#
				13400	fintrz_inf:
				13401	fmovm.x SRC(%a0),&0x80 # return result in fp0
				13402	tst.b SRC_EX(%a0) # is INF negative?
				13403	bmi.b fintrz_inf_m # yes
				13404	fintrz_inf_p:
				13405	mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
				13406	rts
				13407	fintrz_inf_m:
				13408	mov.b &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
				13409	rts
				13410
				13411	#########################################################################
				13412	# XDEF **************************************************************** #
				13413	# fabs(): emulates the fabs instruction #
				13414	# fsabs(): emulates the fsabs instruction #
				13415	# fdabs(): emulates the fdabs instruction #
				13416	# #
				13417	# XREF **************************************************************** #
				13418	# norm() - normalize denorm mantissa to provide EXOP #
				13419	# scale_to_zero_src() - make exponent. = 0; get scale factor #
				13420	# unf_res() - calculate underflow result #
				13421	# ovf_res() - calculate overflow result #
				13422	# res_{s,q}nan_1op() - set NAN result for monadic operation #
				13423	# #
				13424	# INPUT *************************************************************** #
				13425	# a0 = pointer to extended precision source operand #
				13426	# d0 = rnd precision/mode #
				13427	# #
				13428	# OUTPUT ************************************************************** #
				13429	# fp0 = result #
				13430	# fp1 = EXOP (if exception occurred) #
				13431	# #
				13432	# ALGORITHM *********************************************************** #
				13433	# Handle NANs, infinities, and zeroes as special cases. Divide #
				13434	# norms into extended, single, and double precision. #
				13435	# Simply clear sign for extended precision norm. Ext prec denorm #
				13436	# gets an EXOP created for it since it's an underflow. #
				13437	# Double and single precision can overflow and underflow. First, #
				13438	# scale the operand such that the exponent is zero. Perform an "fabs" #
				13439	# using the correct rnd mode/prec. Check to see if the original #
				13440	# exponent would take an exception. If so, use unf_res() or ovf_res() #
				13441	# to calculate the default result. Also, create the EXOP for the #
				13442	# exceptional case. If no exception should occur, insert the correct #
				13443	# result exponent and return. #
				13444	# Unnorms don't pass through here. #
				13445	# #
				13446	#########################################################################
				13447
				13448	global fsabs
				13449	fsabs:
				13450	andi.b &0x30,%d0 # clear rnd prec
				13451	ori.b &s_mode*0x10,%d0 # insert sgl precision
				13452	bra.b fabs
				13453
				13454	global fdabs
				13455	fdabs:
				13456	andi.b &0x30,%d0 # clear rnd prec
				13457	ori.b &d_mode*0x10,%d0 # insert dbl precision
				13458
				13459	global fabs
				13460	fabs:
				13461	mov.l %d0,L_SCR3(%a6) # store rnd info
				13462	mov.b STAG(%a6),%d1
				13463	bne.w fabs_not_norm # optimize on non-norm input
				13464
				13465	#
				13466	# ABSOLUTE VALUE: norms and denorms ONLY!
				13467	#
				13468	fabs_norm:
				13469	andi.b &0xc0,%d0 # is precision extended?
				13470	bne.b fabs_not_ext # no; go handle sgl or dbl
				13471
				13472	#
				13473	# precision selected is extended. so...we can not get an underflow
				13474	# or overflow because of rounding to the correct precision. so...
				13475	# skip the scaling and unscaling...
				13476	#
				13477	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				13478	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				13479	mov.w SRC_EX(%a0),%d1
				13480	bclr &15,%d1 # force absolute value
				13481	mov.w %d1,FP_SCR0_EX(%a6) # insert exponent
				13482	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				13483	rts
				13484
				13485	#
				13486	# for an extended precision DENORM, the UNFL exception bit is set
				13487	# the accrued bit is NOT set in this instance(no inexactness!)
				13488	#
				13489	fabs_denorm:
				13490	andi.b &0xc0,%d0 # is precision extended?
				13491	bne.b fabs_not_ext # no
				13492
				13493	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				13494
				13495	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				13496	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				13497	mov.w SRC_EX(%a0),%d0
				13498	bclr &15,%d0 # clear sign
				13499	mov.w %d0,FP_SCR0_EX(%a6) # insert exponent
				13500
				13501	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				13502
				13503	btst &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
				13504	bne.b fabs_ext_unfl_ena
				13505	rts
				13506
				13507	#
				13508	# the input is an extended DENORM and underflow is enabled in the FPCR.
				13509	# normalize the mantissa and add the bias of 0x6000 to the resulting negative
				13510	# exponent and insert back into the operand.
				13511	#
				13512	fabs_ext_unfl_ena:
				13513	lea FP_SCR0(%a6),%a0 # pass: ptr to operand
				13514	bsr.l norm # normalize result
				13515	neg.w %d0 # new exponent = -(shft val)
				13516	addi.w &0x6000,%d0 # add new bias to exponent
				13517	mov.w FP_SCR0_EX(%a6),%d1 # fetch old sign,exp
				13518	andi.w &0x8000,%d1 # keep old sign
				13519	andi.w &0x7fff,%d0 # clear sign position
				13520	or.w %d1,%d0 # concat old sign, new exponent
				13521	mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent
				13522	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				13523	rts
				13524
				13525	#
				13526	# operand is either single or double
				13527	#
				13528	fabs_not_ext:
				13529	cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
				13530	bne.b fabs_dbl
				13531
				13532	#
				13533	# operand is to be rounded to single precision
				13534	#
				13535	fabs_sgl:
				13536	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				13537	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				13538	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				13539	bsr.l scale_to_zero_src # calculate scale factor
				13540
				13541	cmpi.l %d0,&0x3fff-0x3f80 # will move in underflow?
				13542	bge.w fabs_sd_unfl # yes; go handle underflow
				13543	cmpi.l %d0,&0x3fff-0x407e # will move in overflow?
				13544	beq.w fabs_sd_may_ovfl # maybe; go check
				13545	blt.w fabs_sd_ovfl # yes; go handle overflow
				13546
				13547	#
				13548	# operand will NOT overflow or underflow when moved in to the fp reg file
				13549	#
				13550	fabs_sd_normal:
				13551	fmov.l &0x0,%fpsr # clear FPSR
				13552	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				13553
				13554	fabs.x FP_SCR0(%a6),%fp0 # perform absolute
				13555
				13556	fmov.l %fpsr,%d1 # save FPSR
				13557	fmov.l &0x0,%fpcr # clear FPCR
				13558
				13559	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				13560
				13561	fabs_sd_normal_exit:
				13562	mov.l %d2,-(%sp) # save d2
				13563	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				13564	mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
				13565	mov.l %d1,%d2 # make a copy
				13566	andi.l &0x7fff,%d1 # strip sign
				13567	sub.l %d0,%d1 # add scale factor
				13568	andi.w &0x8000,%d2 # keep old sign
				13569	or.w %d1,%d2 # concat old sign,new exp
				13570	mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
				13571	mov.l (%sp)+,%d2 # restore d2
				13572	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				13573	rts
				13574
				13575	#
				13576	# operand is to be rounded to double precision
				13577	#
				13578	fabs_dbl:
				13579	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				13580	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				13581	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				13582	bsr.l scale_to_zero_src # calculate scale factor
				13583
				13584	cmpi.l %d0,&0x3fff-0x3c00 # will move in underflow?
				13585	bge.b fabs_sd_unfl # yes; go handle underflow
				13586	cmpi.l %d0,&0x3fff-0x43fe # will move in overflow?
				13587	beq.w fabs_sd_may_ovfl # maybe; go check
				13588	blt.w fabs_sd_ovfl # yes; go handle overflow
				13589	bra.w fabs_sd_normal # no; ho handle normalized op
				13590
				13591	#
				13592	# operand WILL underflow when moved in to the fp register file
				13593	#
				13594	fabs_sd_unfl:
				13595	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				13596
				13597	bclr &0x7,FP_SCR0_EX(%a6) # force absolute value
				13598
				13599	# if underflow or inexact is enabled, go calculate EXOP first.
				13600	mov.b FPCR_ENABLE(%a6),%d1
				13601	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				13602	bne.b fabs_sd_unfl_ena # yes
				13603
				13604	fabs_sd_unfl_dis:
				13605	lea FP_SCR0(%a6),%a0 # pass: result addr
				13606	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				13607	bsr.l unf_res # calculate default result
				13608	or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
				13609	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				13610	rts
				13611
				13612	#
				13613	# operand will underflow AND underflow is enabled.
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	13614	# Therefore, we must return the result rounded to extended precision.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	13615	#
				13616	fabs_sd_unfl_ena:
				13617	mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
				13618	mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
				13619	mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
				13620
				13621	mov.l %d2,-(%sp) # save d2
				13622	mov.l %d1,%d2 # make a copy
				13623	andi.l &0x7fff,%d1 # strip sign
				13624	andi.w &0x8000,%d2 # keep old sign
				13625	sub.l %d0,%d1 # subtract scale factor
				13626	addi.l &0x6000,%d1 # add new bias
				13627	andi.w &0x7fff,%d1
				13628	or.w %d2,%d1 # concat new sign,new exp
				13629	mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
				13630	fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
				13631	mov.l (%sp)+,%d2 # restore d2
				13632	bra.b fabs_sd_unfl_dis
				13633
				13634	#
				13635	# operand WILL overflow.
				13636	#
				13637	fabs_sd_ovfl:
				13638	fmov.l &0x0,%fpsr # clear FPSR
				13639	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				13640
				13641	fabs.x FP_SCR0(%a6),%fp0 # perform absolute
				13642
				13643	fmov.l &0x0,%fpcr # clear FPCR
				13644	fmov.l %fpsr,%d1 # save FPSR
				13645
				13646	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				13647
				13648	fabs_sd_ovfl_tst:
				13649	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				13650
				13651	mov.b FPCR_ENABLE(%a6),%d1
				13652	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				13653	bne.b fabs_sd_ovfl_ena # yes
				13654
				13655	#
				13656	# OVFL is not enabled; therefore, we must create the default result by
				13657	# calling ovf_res().
				13658	#
				13659	fabs_sd_ovfl_dis:
				13660	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				13661	sne %d1 # set sign param accordingly
				13662	mov.l L_SCR3(%a6),%d0 # pass: prec,mode
				13663	bsr.l ovf_res # calculate default result
				13664	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				13665	fmovm.x (%a0),&0x80 # return default result in fp0
				13666	rts
				13667
				13668	#
				13669	# OVFL is enabled.
				13670	# the INEX2 bit has already been updated by the round to the correct precision.
				13671	# now, round to extended(and don't alter the FPSR).
				13672	#
				13673	fabs_sd_ovfl_ena:
				13674	mov.l %d2,-(%sp) # save d2
				13675	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				13676	mov.l %d1,%d2 # make a copy
				13677	andi.l &0x7fff,%d1 # strip sign
				13678	andi.w &0x8000,%d2 # keep old sign
				13679	sub.l %d0,%d1 # add scale factor
				13680	subi.l &0x6000,%d1 # subtract bias
				13681	andi.w &0x7fff,%d1
				13682	or.w %d2,%d1 # concat sign,exp
				13683	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				13684	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				13685	mov.l (%sp)+,%d2 # restore d2
				13686	bra.b fabs_sd_ovfl_dis
				13687
				13688	#
				13689	# the move in MAY underflow. so...
				13690	#
				13691	fabs_sd_may_ovfl:
				13692	fmov.l &0x0,%fpsr # clear FPSR
				13693	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				13694
				13695	fabs.x FP_SCR0(%a6),%fp0 # perform absolute
				13696
				13697	fmov.l %fpsr,%d1 # save status
				13698	fmov.l &0x0,%fpcr # clear FPCR
				13699
				13700	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				13701
				13702	fabs.x %fp0,%fp1 # make a copy of result
				13703	fcmp.b %fp1,&0x2 # is \|result\| >= 2.b?
				13704	fbge.w fabs_sd_ovfl_tst # yes; overflow has occurred
				13705
				13706	# no, it didn't overflow; we have correct result
				13707	bra.w fabs_sd_normal_exit
				13708
				13709	##########################################################################
				13710
				13711	#
				13712	# input is not normalized; what is it?
				13713	#
				13714	fabs_not_norm:
				13715	cmpi.b %d1,&DENORM # weed out DENORM
				13716	beq.w fabs_denorm
				13717	cmpi.b %d1,&SNAN # weed out SNAN
				13718	beq.l res_snan_1op
				13719	cmpi.b %d1,&QNAN # weed out QNAN
				13720	beq.l res_qnan_1op
				13721
				13722	fabs.x SRC(%a0),%fp0 # force absolute value
				13723
				13724	cmpi.b %d1,&INF # weed out INF
				13725	beq.b fabs_inf
				13726	fabs_zero:
				13727	mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
				13728	rts
				13729	fabs_inf:
				13730	mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
				13731	rts
				13732
				13733	#########################################################################
				13734	# XDEF **************************************************************** #
				13735	# fcmp(): fp compare op routine #
				13736	# #
				13737	# XREF **************************************************************** #
				13738	# res_qnan() - return QNAN result #
				13739	# res_snan() - return SNAN result #
				13740	# #
				13741	# INPUT *************************************************************** #
				13742	# a0 = pointer to extended precision source operand #
				13743	# a1 = pointer to extended precision destination operand #
				13744	# d0 = round prec/mode #
				13745	# #
				13746	# OUTPUT ************************************************************** #
				13747	# None #
				13748	# #
				13749	# ALGORITHM *********************************************************** #
				13750	# Handle NANs and denorms as special cases. For everything else, #
				13751	# just use the actual fcmp instruction to produce the correct condition #
				13752	# codes. #
				13753	# #
				13754	#########################################################################
				13755
				13756	global fcmp
				13757	fcmp:
				13758	clr.w %d1
				13759	mov.b DTAG(%a6),%d1
				13760	lsl.b &0x3,%d1
				13761	or.b STAG(%a6),%d1
				13762	bne.b fcmp_not_norm # optimize on non-norm input
				13763
				13764	#
				13765	# COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
				13766	#
				13767	fcmp_norm:
				13768	fmovm.x DST(%a1),&0x80 # load dst op
				13769
				13770	fcmp.x %fp0,SRC(%a0) # do compare
				13771
				13772	fmov.l %fpsr,%d0 # save FPSR
				13773	rol.l &0x8,%d0 # extract ccode bits
				13774	mov.b %d0,FPSR_CC(%a6) # set ccode bits(no exc bits are set)
				13775
				13776	rts
				13777
				13778	#
				13779	# fcmp: inputs are not both normalized; what are they?
				13780	#
				13781	fcmp_not_norm:
				13782	mov.w (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
				13783	jmp (tbl_fcmp_op.b,%pc,%d1.w*1)
				13784
				13785	swbeg &48
				13786	tbl_fcmp_op:
				13787	short fcmp_norm - tbl_fcmp_op # NORM - NORM
				13788	short fcmp_norm - tbl_fcmp_op # NORM - ZERO
				13789	short fcmp_norm - tbl_fcmp_op # NORM - INF
				13790	short fcmp_res_qnan - tbl_fcmp_op # NORM - QNAN
				13791	short fcmp_nrm_dnrm - tbl_fcmp_op # NORM - DENORM
				13792	short fcmp_res_snan - tbl_fcmp_op # NORM - SNAN
				13793	short tbl_fcmp_op - tbl_fcmp_op #
				13794	short tbl_fcmp_op - tbl_fcmp_op #
				13795
				13796	short fcmp_norm - tbl_fcmp_op # ZERO - NORM
				13797	short fcmp_norm - tbl_fcmp_op # ZERO - ZERO
				13798	short fcmp_norm - tbl_fcmp_op # ZERO - INF
				13799	short fcmp_res_qnan - tbl_fcmp_op # ZERO - QNAN
				13800	short fcmp_dnrm_s - tbl_fcmp_op # ZERO - DENORM
				13801	short fcmp_res_snan - tbl_fcmp_op # ZERO - SNAN
				13802	short tbl_fcmp_op - tbl_fcmp_op #
				13803	short tbl_fcmp_op - tbl_fcmp_op #
				13804
				13805	short fcmp_norm - tbl_fcmp_op # INF - NORM
				13806	short fcmp_norm - tbl_fcmp_op # INF - ZERO
				13807	short fcmp_norm - tbl_fcmp_op # INF - INF
				13808	short fcmp_res_qnan - tbl_fcmp_op # INF - QNAN
				13809	short fcmp_dnrm_s - tbl_fcmp_op # INF - DENORM
				13810	short fcmp_res_snan - tbl_fcmp_op # INF - SNAN
				13811	short tbl_fcmp_op - tbl_fcmp_op #
				13812	short tbl_fcmp_op - tbl_fcmp_op #
				13813
				13814	short fcmp_res_qnan - tbl_fcmp_op # QNAN - NORM
				13815	short fcmp_res_qnan - tbl_fcmp_op # QNAN - ZERO
				13816	short fcmp_res_qnan - tbl_fcmp_op # QNAN - INF
				13817	short fcmp_res_qnan - tbl_fcmp_op # QNAN - QNAN
				13818	short fcmp_res_qnan - tbl_fcmp_op # QNAN - DENORM
				13819	short fcmp_res_snan - tbl_fcmp_op # QNAN - SNAN
				13820	short tbl_fcmp_op - tbl_fcmp_op #
				13821	short tbl_fcmp_op - tbl_fcmp_op #
				13822
				13823	short fcmp_dnrm_nrm - tbl_fcmp_op # DENORM - NORM
				13824	short fcmp_dnrm_d - tbl_fcmp_op # DENORM - ZERO
				13825	short fcmp_dnrm_d - tbl_fcmp_op # DENORM - INF
				13826	short fcmp_res_qnan - tbl_fcmp_op # DENORM - QNAN
				13827	short fcmp_dnrm_sd - tbl_fcmp_op # DENORM - DENORM
				13828	short fcmp_res_snan - tbl_fcmp_op # DENORM - SNAN
				13829	short tbl_fcmp_op - tbl_fcmp_op #
				13830	short tbl_fcmp_op - tbl_fcmp_op #
				13831
				13832	short fcmp_res_snan - tbl_fcmp_op # SNAN - NORM
				13833	short fcmp_res_snan - tbl_fcmp_op # SNAN - ZERO
				13834	short fcmp_res_snan - tbl_fcmp_op # SNAN - INF
				13835	short fcmp_res_snan - tbl_fcmp_op # SNAN - QNAN
				13836	short fcmp_res_snan - tbl_fcmp_op # SNAN - DENORM
				13837	short fcmp_res_snan - tbl_fcmp_op # SNAN - SNAN
				13838	short tbl_fcmp_op - tbl_fcmp_op #
				13839	short tbl_fcmp_op - tbl_fcmp_op #
				13840
				13841	# unlike all other functions for QNAN and SNAN, fcmp does NOT set the
				13842	# 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
				13843	fcmp_res_qnan:
				13844	bsr.l res_qnan
				13845	andi.b &0xf7,FPSR_CC(%a6)
				13846	rts
				13847	fcmp_res_snan:
				13848	bsr.l res_snan
				13849	andi.b &0xf7,FPSR_CC(%a6)
				13850	rts
				13851
				13852	#
				13853	# DENORMs are a little more difficult.
				13854	# If you have a 2 DENORMs, then you can just force the j-bit to a one
				13855	# and use the fcmp_norm routine.
				13856	# If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
				13857	# and use the fcmp_norm routine.
				13858	# If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
				13859	# But with a DENORM and a NORM of the same sign, the neg bit is set if the
				13860	# (1) signs are (+) and the DENORM is the dst or
				13861	# (2) signs are (-) and the DENORM is the src
				13862	#
				13863
				13864	fcmp_dnrm_s:
				13865	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				13866	mov.l SRC_HI(%a0),%d0
				13867	bset &31,%d0 # DENORM src; make into small norm
				13868	mov.l %d0,FP_SCR0_HI(%a6)
				13869	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				13870	lea FP_SCR0(%a6),%a0
				13871	bra.w fcmp_norm
				13872
				13873	fcmp_dnrm_d:
				13874	mov.l DST_EX(%a1),FP_SCR0_EX(%a6)
				13875	mov.l DST_HI(%a1),%d0
				13876	bset &31,%d0 # DENORM src; make into small norm
				13877	mov.l %d0,FP_SCR0_HI(%a6)
				13878	mov.l DST_LO(%a1),FP_SCR0_LO(%a6)
				13879	lea FP_SCR0(%a6),%a1
				13880	bra.w fcmp_norm
				13881
				13882	fcmp_dnrm_sd:
				13883	mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
				13884	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				13885	mov.l DST_HI(%a1),%d0
				13886	bset &31,%d0 # DENORM dst; make into small norm
				13887	mov.l %d0,FP_SCR1_HI(%a6)
				13888	mov.l SRC_HI(%a0),%d0
				13889	bset &31,%d0 # DENORM dst; make into small norm
				13890	mov.l %d0,FP_SCR0_HI(%a6)
				13891	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				13892	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				13893	lea FP_SCR1(%a6),%a1
				13894	lea FP_SCR0(%a6),%a0
				13895	bra.w fcmp_norm
				13896
				13897	fcmp_nrm_dnrm:
				13898	mov.b SRC_EX(%a0),%d0 # determine if like signs
				13899	mov.b DST_EX(%a1),%d1
				13900	eor.b %d0,%d1
				13901	bmi.w fcmp_dnrm_s
				13902
				13903	# signs are the same, so must determine the answer ourselves.
				13904	tst.b %d0 # is src op negative?
				13905	bmi.b fcmp_nrm_dnrm_m # yes
				13906	rts
				13907	fcmp_nrm_dnrm_m:
				13908	mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
				13909	rts
				13910
				13911	fcmp_dnrm_nrm:
				13912	mov.b SRC_EX(%a0),%d0 # determine if like signs
				13913	mov.b DST_EX(%a1),%d1
				13914	eor.b %d0,%d1
				13915	bmi.w fcmp_dnrm_d
				13916
				13917	# signs are the same, so must determine the answer ourselves.
				13918	tst.b %d0 # is src op negative?
				13919	bpl.b fcmp_dnrm_nrm_m # no
				13920	rts
				13921	fcmp_dnrm_nrm_m:
				13922	mov.b &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
				13923	rts
				13924
				13925	#########################################################################
				13926	# XDEF **************************************************************** #
				13927	# fsglmul(): emulates the fsglmul instruction #
				13928	# #
				13929	# XREF **************************************************************** #
				13930	# scale_to_zero_src() - scale src exponent to zero #
				13931	# scale_to_zero_dst() - scale dst exponent to zero #
				13932	# unf_res4() - return default underflow result for sglop #
				13933	# ovf_res() - return default overflow result #
				13934	# res_qnan() - return QNAN result #
				13935	# res_snan() - return SNAN result #
				13936	# #
				13937	# INPUT *************************************************************** #
				13938	# a0 = pointer to extended precision source operand #
				13939	# a1 = pointer to extended precision destination operand #
				13940	# d0 rnd prec,mode #
				13941	# #
				13942	# OUTPUT ************************************************************** #
				13943	# fp0 = result #
				13944	# fp1 = EXOP (if exception occurred) #
				13945	# #
				13946	# ALGORITHM *********************************************************** #
				13947	# Handle NANs, infinities, and zeroes as special cases. Divide #
				13948	# norms/denorms into ext/sgl/dbl precision. #
				13949	# For norms/denorms, scale the exponents such that a multiply #
				13950	# instruction won't cause an exception. Use the regular fsglmul to #
				13951	# compute a result. Check if the regular operands would have taken #
				13952	# an exception. If so, return the default overflow/underflow result #
				13953	# and return the EXOP if exceptions are enabled. Else, scale the #
				13954	# result operand to the proper exponent. #
				13955	# #
				13956	#########################################################################
				13957
				13958	global fsglmul
				13959	fsglmul:
				13960	mov.l %d0,L_SCR3(%a6) # store rnd info
				13961
				13962	clr.w %d1
				13963	mov.b DTAG(%a6),%d1
				13964	lsl.b &0x3,%d1
				13965	or.b STAG(%a6),%d1
				13966
				13967	bne.w fsglmul_not_norm # optimize on non-norm input
				13968
				13969	fsglmul_norm:
				13970	mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
				13971	mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
				13972	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				13973
				13974	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				13975	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				13976	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				13977
				13978	bsr.l scale_to_zero_src # scale exponent
				13979	mov.l %d0,-(%sp) # save scale factor 1
				13980
				13981	bsr.l scale_to_zero_dst # scale dst exponent
				13982
				13983	add.l (%sp)+,%d0 # SCALE_FACTOR = scale1 + scale2
				13984
				13985	cmpi.l %d0,&0x3fff-0x7ffe # would result ovfl?
				13986	beq.w fsglmul_may_ovfl # result may rnd to overflow
				13987	blt.w fsglmul_ovfl # result will overflow
				13988
				13989	cmpi.l %d0,&0x3fff+0x0001 # would result unfl?
				13990	beq.w fsglmul_may_unfl # result may rnd to no unfl
				13991	bgt.w fsglmul_unfl # result will underflow
				13992
				13993	fsglmul_normal:
				13994	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				13995
				13996	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				13997	fmov.l &0x0,%fpsr # clear FPSR
				13998
				13999	fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
				14000
				14001	fmov.l %fpsr,%d1 # save status
				14002	fmov.l &0x0,%fpcr # clear FPCR
				14003
				14004	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14005
				14006	fsglmul_normal_exit:
				14007	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				14008	mov.l %d2,-(%sp) # save d2
				14009	mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
				14010	mov.l %d1,%d2 # make a copy
				14011	andi.l &0x7fff,%d1 # strip sign
				14012	andi.w &0x8000,%d2 # keep old sign
				14013	sub.l %d0,%d1 # add scale factor
				14014	or.w %d2,%d1 # concat old sign,new exp
				14015	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				14016	mov.l (%sp)+,%d2 # restore d2
				14017	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				14018	rts
				14019
				14020	fsglmul_ovfl:
				14021	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14022
				14023	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14024	fmov.l &0x0,%fpsr # clear FPSR
				14025
				14026	fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
				14027
				14028	fmov.l %fpsr,%d1 # save status
				14029	fmov.l &0x0,%fpcr # clear FPCR
				14030
				14031	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14032
				14033	fsglmul_ovfl_tst:
				14034
				14035	# save setting this until now because this is where fsglmul_may_ovfl may jump in
				14036	or.l &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
				14037
				14038	mov.b FPCR_ENABLE(%a6),%d1
				14039	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				14040	bne.b fsglmul_ovfl_ena # yes
				14041
				14042	fsglmul_ovfl_dis:
				14043	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				14044	sne %d1 # set sign param accordingly
				14045	mov.l L_SCR3(%a6),%d0 # pass prec:rnd
				14046	andi.b &0x30,%d0 # force prec = ext
				14047	bsr.l ovf_res # calculate default result
				14048	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				14049	fmovm.x (%a0),&0x80 # return default result in fp0
				14050	rts
				14051
				14052	fsglmul_ovfl_ena:
				14053	fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
				14054
				14055	mov.l %d2,-(%sp) # save d2
				14056	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				14057	mov.l %d1,%d2 # make a copy
				14058	andi.l &0x7fff,%d1 # strip sign
				14059	sub.l %d0,%d1 # add scale factor
				14060	subi.l &0x6000,%d1 # subtract bias
				14061	andi.w &0x7fff,%d1
				14062	andi.w &0x8000,%d2 # keep old sign
				14063	or.w %d2,%d1 # concat old sign,new exp
				14064	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				14065	mov.l (%sp)+,%d2 # restore d2
				14066	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				14067	bra.b fsglmul_ovfl_dis
				14068
				14069	fsglmul_may_ovfl:
				14070	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14071
				14072	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14073	fmov.l &0x0,%fpsr # clear FPSR
				14074
				14075	fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
				14076
				14077	fmov.l %fpsr,%d1 # save status
				14078	fmov.l &0x0,%fpcr # clear FPCR
				14079
				14080	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14081
				14082	fabs.x %fp0,%fp1 # make a copy of result
				14083	fcmp.b %fp1,&0x2 # is \|result\| >= 2.b?
				14084	fbge.w fsglmul_ovfl_tst # yes; overflow has occurred
				14085
				14086	# no, it didn't overflow; we have correct result
				14087	bra.w fsglmul_normal_exit
				14088
				14089	fsglmul_unfl:
				14090	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				14091
				14092	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14093
				14094	fmov.l &rz_mode*0x10,%fpcr # set FPCR
				14095	fmov.l &0x0,%fpsr # clear FPSR
				14096
				14097	fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
				14098
				14099	fmov.l %fpsr,%d1 # save status
				14100	fmov.l &0x0,%fpcr # clear FPCR
				14101
				14102	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14103
				14104	mov.b FPCR_ENABLE(%a6),%d1
				14105	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				14106	bne.b fsglmul_unfl_ena # yes
				14107
				14108	fsglmul_unfl_dis:
				14109	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				14110
				14111	lea FP_SCR0(%a6),%a0 # pass: result addr
				14112	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				14113	bsr.l unf_res4 # calculate default result
				14114	or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
				14115	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				14116	rts
				14117
				14118	#
				14119	# UNFL is enabled.
				14120	#
				14121	fsglmul_unfl_ena:
				14122	fmovm.x FP_SCR1(%a6),&0x40 # load dst op
				14123
				14124	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14125	fmov.l &0x0,%fpsr # clear FPSR
				14126
				14127	fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
				14128
				14129	fmov.l &0x0,%fpcr # clear FPCR
				14130
				14131	fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
				14132	mov.l %d2,-(%sp) # save d2
				14133	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				14134	mov.l %d1,%d2 # make a copy
				14135	andi.l &0x7fff,%d1 # strip sign
				14136	andi.w &0x8000,%d2 # keep old sign
				14137	sub.l %d0,%d1 # add scale factor
				14138	addi.l &0x6000,%d1 # add bias
				14139	andi.w &0x7fff,%d1
				14140	or.w %d2,%d1 # concat old sign,new exp
				14141	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				14142	mov.l (%sp)+,%d2 # restore d2
				14143	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				14144	bra.w fsglmul_unfl_dis
				14145
				14146	fsglmul_may_unfl:
				14147	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14148
				14149	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14150	fmov.l &0x0,%fpsr # clear FPSR
				14151
				14152	fsglmul.x FP_SCR0(%a6),%fp0 # execute sgl multiply
				14153
				14154	fmov.l %fpsr,%d1 # save status
				14155	fmov.l &0x0,%fpcr # clear FPCR
				14156
				14157	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14158
				14159	fabs.x %fp0,%fp1 # make a copy of result
				14160	fcmp.b %fp1,&0x2 # is \|result\| > 2.b?
				14161	fbgt.w fsglmul_normal_exit # no; no underflow occurred
				14162	fblt.w fsglmul_unfl # yes; underflow occurred
				14163
				14164	#
				14165	# we still don't know if underflow occurred. result is ~ equal to 2. but,
				14166	# we don't know if the result was an underflow that rounded up to a 2 or
				14167	# a normalized number that rounded down to a 2. so, redo the entire operation
				14168	# using RZ as the rounding mode to see what the pre-rounded result is.
				14169	# this case should be relatively rare.
				14170	#
				14171	fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
				14172
				14173	mov.l L_SCR3(%a6),%d1
				14174	andi.b &0xc0,%d1 # keep rnd prec
				14175	ori.b &rz_mode*0x10,%d1 # insert RZ
				14176
				14177	fmov.l %d1,%fpcr # set FPCR
				14178	fmov.l &0x0,%fpsr # clear FPSR
				14179
				14180	fsglmul.x FP_SCR0(%a6),%fp1 # execute sgl multiply
				14181
				14182	fmov.l &0x0,%fpcr # clear FPCR
				14183	fabs.x %fp1 # make absolute value
				14184	fcmp.b %fp1,&0x2 # is \|result\| < 2.b?
				14185	fbge.w fsglmul_normal_exit # no; no underflow occurred
				14186	bra.w fsglmul_unfl # yes, underflow occurred
				14187
				14188	##############################################################################
				14189
				14190	#
				14191	# Single Precision Multiply: inputs are not both normalized; what are they?
				14192	#
				14193	fsglmul_not_norm:
				14194	mov.w (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
				14195	jmp (tbl_fsglmul_op.b,%pc,%d1.w*1)
				14196
				14197	swbeg &48
				14198	tbl_fsglmul_op:
				14199	short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
				14200	short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
				14201	short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
				14202	short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
				14203	short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
				14204	short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
				14205	short tbl_fsglmul_op - tbl_fsglmul_op #
				14206	short tbl_fsglmul_op - tbl_fsglmul_op #
				14207
				14208	short fsglmul_zero - tbl_fsglmul_op # ZERO x NORM
				14209	short fsglmul_zero - tbl_fsglmul_op # ZERO x ZERO
				14210	short fsglmul_res_operr - tbl_fsglmul_op # ZERO x INF
				14211	short fsglmul_res_qnan - tbl_fsglmul_op # ZERO x QNAN
				14212	short fsglmul_zero - tbl_fsglmul_op # ZERO x DENORM
				14213	short fsglmul_res_snan - tbl_fsglmul_op # ZERO x SNAN
				14214	short tbl_fsglmul_op - tbl_fsglmul_op #
				14215	short tbl_fsglmul_op - tbl_fsglmul_op #
				14216
				14217	short fsglmul_inf_dst - tbl_fsglmul_op # INF x NORM
				14218	short fsglmul_res_operr - tbl_fsglmul_op # INF x ZERO
				14219	short fsglmul_inf_dst - tbl_fsglmul_op # INF x INF
				14220	short fsglmul_res_qnan - tbl_fsglmul_op # INF x QNAN
				14221	short fsglmul_inf_dst - tbl_fsglmul_op # INF x DENORM
				14222	short fsglmul_res_snan - tbl_fsglmul_op # INF x SNAN
				14223	short tbl_fsglmul_op - tbl_fsglmul_op #
				14224	short tbl_fsglmul_op - tbl_fsglmul_op #
				14225
				14226	short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x NORM
				14227	short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x ZERO
				14228	short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x INF
				14229	short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x QNAN
				14230	short fsglmul_res_qnan - tbl_fsglmul_op # QNAN x DENORM
				14231	short fsglmul_res_snan - tbl_fsglmul_op # QNAN x SNAN
				14232	short tbl_fsglmul_op - tbl_fsglmul_op #
				14233	short tbl_fsglmul_op - tbl_fsglmul_op #
				14234
				14235	short fsglmul_norm - tbl_fsglmul_op # NORM x NORM
				14236	short fsglmul_zero - tbl_fsglmul_op # NORM x ZERO
				14237	short fsglmul_inf_src - tbl_fsglmul_op # NORM x INF
				14238	short fsglmul_res_qnan - tbl_fsglmul_op # NORM x QNAN
				14239	short fsglmul_norm - tbl_fsglmul_op # NORM x DENORM
				14240	short fsglmul_res_snan - tbl_fsglmul_op # NORM x SNAN
				14241	short tbl_fsglmul_op - tbl_fsglmul_op #
				14242	short tbl_fsglmul_op - tbl_fsglmul_op #
				14243
				14244	short fsglmul_res_snan - tbl_fsglmul_op # SNAN x NORM
				14245	short fsglmul_res_snan - tbl_fsglmul_op # SNAN x ZERO
				14246	short fsglmul_res_snan - tbl_fsglmul_op # SNAN x INF
				14247	short fsglmul_res_snan - tbl_fsglmul_op # SNAN x QNAN
				14248	short fsglmul_res_snan - tbl_fsglmul_op # SNAN x DENORM
				14249	short fsglmul_res_snan - tbl_fsglmul_op # SNAN x SNAN
				14250	short tbl_fsglmul_op - tbl_fsglmul_op #
				14251	short tbl_fsglmul_op - tbl_fsglmul_op #
				14252
				14253	fsglmul_res_operr:
				14254	bra.l res_operr
				14255	fsglmul_res_snan:
				14256	bra.l res_snan
				14257	fsglmul_res_qnan:
				14258	bra.l res_qnan
				14259	fsglmul_zero:
				14260	bra.l fmul_zero
				14261	fsglmul_inf_src:
				14262	bra.l fmul_inf_src
				14263	fsglmul_inf_dst:
				14264	bra.l fmul_inf_dst
				14265
				14266	#########################################################################
				14267	# XDEF **************************************************************** #
				14268	# fsgldiv(): emulates the fsgldiv instruction #
				14269	# #
				14270	# XREF **************************************************************** #
				14271	# scale_to_zero_src() - scale src exponent to zero #
				14272	# scale_to_zero_dst() - scale dst exponent to zero #
				14273	# unf_res4() - return default underflow result for sglop #
				14274	# ovf_res() - return default overflow result #
				14275	# res_qnan() - return QNAN result #
				14276	# res_snan() - return SNAN result #
				14277	# #
				14278	# INPUT *************************************************************** #
				14279	# a0 = pointer to extended precision source operand #
				14280	# a1 = pointer to extended precision destination operand #
				14281	# d0 rnd prec,mode #
				14282	# #
				14283	# OUTPUT ************************************************************** #
				14284	# fp0 = result #
				14285	# fp1 = EXOP (if exception occurred) #
				14286	# #
				14287	# ALGORITHM *********************************************************** #
				14288	# Handle NANs, infinities, and zeroes as special cases. Divide #
				14289	# norms/denorms into ext/sgl/dbl precision. #
				14290	# For norms/denorms, scale the exponents such that a divide #
				14291	# instruction won't cause an exception. Use the regular fsgldiv to #
				14292	# compute a result. Check if the regular operands would have taken #
				14293	# an exception. If so, return the default overflow/underflow result #
				14294	# and return the EXOP if exceptions are enabled. Else, scale the #
				14295	# result operand to the proper exponent. #
				14296	# #
				14297	#########################################################################
				14298
				14299	global fsgldiv
				14300	fsgldiv:
				14301	mov.l %d0,L_SCR3(%a6) # store rnd info
				14302
				14303	clr.w %d1
				14304	mov.b DTAG(%a6),%d1
				14305	lsl.b &0x3,%d1
				14306	or.b STAG(%a6),%d1 # combine src tags
				14307
				14308	bne.w fsgldiv_not_norm # optimize on non-norm input
				14309
				14310	#
				14311	# DIVIDE: NORMs and DENORMs ONLY!
				14312	#
				14313	fsgldiv_norm:
				14314	mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
				14315	mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
				14316	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				14317
				14318	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				14319	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				14320	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				14321
				14322	bsr.l scale_to_zero_src # calculate scale factor 1
				14323	mov.l %d0,-(%sp) # save scale factor 1
				14324
				14325	bsr.l scale_to_zero_dst # calculate scale factor 2
				14326
				14327	neg.l (%sp) # S.F. = scale1 - scale2
				14328	add.l %d0,(%sp)
				14329
				14330	mov.w 2+L_SCR3(%a6),%d1 # fetch precision,mode
				14331	lsr.b &0x6,%d1
				14332	mov.l (%sp)+,%d0
				14333	cmpi.l %d0,&0x3fff-0x7ffe
				14334	ble.w fsgldiv_may_ovfl
				14335
				14336	cmpi.l %d0,&0x3fff-0x0000 # will result underflow?
				14337	beq.w fsgldiv_may_unfl # maybe
				14338	bgt.w fsgldiv_unfl # yes; go handle underflow
				14339
				14340	fsgldiv_normal:
				14341	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14342
				14343	fmov.l L_SCR3(%a6),%fpcr # save FPCR
				14344	fmov.l &0x0,%fpsr # clear FPSR
				14345
				14346	fsgldiv.x FP_SCR0(%a6),%fp0 # perform sgl divide
				14347
				14348	fmov.l %fpsr,%d1 # save FPSR
				14349	fmov.l &0x0,%fpcr # clear FPCR
				14350
				14351	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14352
				14353	fsgldiv_normal_exit:
				14354	fmovm.x &0x80,FP_SCR0(%a6) # store result on stack
				14355	mov.l %d2,-(%sp) # save d2
				14356	mov.w FP_SCR0_EX(%a6),%d1 # load {sgn,exp}
				14357	mov.l %d1,%d2 # make a copy
				14358	andi.l &0x7fff,%d1 # strip sign
				14359	andi.w &0x8000,%d2 # keep old sign
				14360	sub.l %d0,%d1 # add scale factor
				14361	or.w %d2,%d1 # concat old sign,new exp
				14362	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				14363	mov.l (%sp)+,%d2 # restore d2
				14364	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				14365	rts
				14366
				14367	fsgldiv_may_ovfl:
				14368	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14369
				14370	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14371	fmov.l &0x0,%fpsr # set FPSR
				14372
				14373	fsgldiv.x FP_SCR0(%a6),%fp0 # execute divide
				14374
				14375	fmov.l %fpsr,%d1
				14376	fmov.l &0x0,%fpcr
				14377
				14378	or.l %d1,USER_FPSR(%a6) # save INEX,N
				14379
				14380	fmovm.x &0x01,-(%sp) # save result to stack
				14381	mov.w (%sp),%d1 # fetch new exponent
				14382	add.l &0xc,%sp # clear result
				14383	andi.l &0x7fff,%d1 # strip sign
				14384	sub.l %d0,%d1 # add scale factor
				14385	cmp.l %d1,&0x7fff # did divide overflow?
				14386	blt.b fsgldiv_normal_exit
				14387
				14388	fsgldiv_ovfl_tst:
				14389	or.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
				14390
				14391	mov.b FPCR_ENABLE(%a6),%d1
				14392	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				14393	bne.b fsgldiv_ovfl_ena # yes
				14394
				14395	fsgldiv_ovfl_dis:
				14396	btst &neg_bit,FPSR_CC(%a6) # is result negative
				14397	sne %d1 # set sign param accordingly
				14398	mov.l L_SCR3(%a6),%d0 # pass prec:rnd
				14399	andi.b &0x30,%d0 # kill precision
				14400	bsr.l ovf_res # calculate default result
				14401	or.b %d0,FPSR_CC(%a6) # set INF if applicable
				14402	fmovm.x (%a0),&0x80 # return default result in fp0
				14403	rts
				14404
				14405	fsgldiv_ovfl_ena:
				14406	fmovm.x &0x80,FP_SCR0(%a6) # move result to stack
				14407
				14408	mov.l %d2,-(%sp) # save d2
				14409	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				14410	mov.l %d1,%d2 # make a copy
				14411	andi.l &0x7fff,%d1 # strip sign
				14412	andi.w &0x8000,%d2 # keep old sign
				14413	sub.l %d0,%d1 # add scale factor
				14414	subi.l &0x6000,%d1 # subtract new bias
				14415	andi.w &0x7fff,%d1 # clear ms bit
				14416	or.w %d2,%d1 # concat old sign,new exp
				14417	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				14418	mov.l (%sp)+,%d2 # restore d2
				14419	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				14420	bra.b fsgldiv_ovfl_dis
				14421
				14422	fsgldiv_unfl:
				14423	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				14424
				14425	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14426
				14427	fmov.l &rz_mode*0x10,%fpcr # set FPCR
				14428	fmov.l &0x0,%fpsr # clear FPSR
				14429
				14430	fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
				14431
				14432	fmov.l %fpsr,%d1 # save status
				14433	fmov.l &0x0,%fpcr # clear FPCR
				14434
				14435	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14436
				14437	mov.b FPCR_ENABLE(%a6),%d1
				14438	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				14439	bne.b fsgldiv_unfl_ena # yes
				14440
				14441	fsgldiv_unfl_dis:
				14442	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				14443
				14444	lea FP_SCR0(%a6),%a0 # pass: result addr
				14445	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				14446	bsr.l unf_res4 # calculate default result
				14447	or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
				14448	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				14449	rts
				14450
				14451	#
				14452	# UNFL is enabled.
				14453	#
				14454	fsgldiv_unfl_ena:
				14455	fmovm.x FP_SCR1(%a6),&0x40 # load dst op
				14456
				14457	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14458	fmov.l &0x0,%fpsr # clear FPSR
				14459
				14460	fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
				14461
				14462	fmov.l &0x0,%fpcr # clear FPCR
				14463
				14464	fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
				14465	mov.l %d2,-(%sp) # save d2
				14466	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				14467	mov.l %d1,%d2 # make a copy
				14468	andi.l &0x7fff,%d1 # strip sign
				14469	andi.w &0x8000,%d2 # keep old sign
				14470	sub.l %d0,%d1 # add scale factor
				14471	addi.l &0x6000,%d1 # add bias
				14472	andi.w &0x7fff,%d1 # clear top bit
				14473	or.w %d2,%d1 # concat old sign, new exp
				14474	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				14475	mov.l (%sp)+,%d2 # restore d2
				14476	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				14477	bra.b fsgldiv_unfl_dis
				14478
				14479	#
				14480	# the divide operation MAY underflow:
				14481	#
				14482	fsgldiv_may_unfl:
				14483	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14484
				14485	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14486	fmov.l &0x0,%fpsr # clear FPSR
				14487
				14488	fsgldiv.x FP_SCR0(%a6),%fp0 # execute sgl divide
				14489
				14490	fmov.l %fpsr,%d1 # save status
				14491	fmov.l &0x0,%fpcr # clear FPCR
				14492
				14493	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				14494
				14495	fabs.x %fp0,%fp1 # make a copy of result
				14496	fcmp.b %fp1,&0x1 # is \|result\| > 1.b?
				14497	fbgt.w fsgldiv_normal_exit # no; no underflow occurred
				14498	fblt.w fsgldiv_unfl # yes; underflow occurred
				14499
				14500	#
				14501	# we still don't know if underflow occurred. result is ~ equal to 1. but,
				14502	# we don't know if the result was an underflow that rounded up to a 1
				14503	# or a normalized number that rounded down to a 1. so, redo the entire
				14504	# operation using RZ as the rounding mode to see what the pre-rounded
				14505	# result is. this case should be relatively rare.
				14506	#
				14507	fmovm.x FP_SCR1(%a6),&0x40 # load dst op into %fp1
				14508
				14509	clr.l %d1 # clear scratch register
				14510	ori.b &rz_mode*0x10,%d1 # force RZ rnd mode
				14511
				14512	fmov.l %d1,%fpcr # set FPCR
				14513	fmov.l &0x0,%fpsr # clear FPSR
				14514
				14515	fsgldiv.x FP_SCR0(%a6),%fp1 # execute sgl divide
				14516
				14517	fmov.l &0x0,%fpcr # clear FPCR
				14518	fabs.x %fp1 # make absolute value
				14519	fcmp.b %fp1,&0x1 # is \|result\| < 1.b?
				14520	fbge.w fsgldiv_normal_exit # no; no underflow occurred
				14521	bra.w fsgldiv_unfl # yes; underflow occurred
				14522
				14523	############################################################################
				14524
				14525	#
				14526	# Divide: inputs are not both normalized; what are they?
				14527	#
				14528	fsgldiv_not_norm:
				14529	mov.w (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
				14530	jmp (tbl_fsgldiv_op.b,%pc,%d1.w*1)
				14531
				14532	swbeg &48
				14533	tbl_fsgldiv_op:
				14534	short fsgldiv_norm - tbl_fsgldiv_op # NORM / NORM
				14535	short fsgldiv_inf_load - tbl_fsgldiv_op # NORM / ZERO
				14536	short fsgldiv_zero_load - tbl_fsgldiv_op # NORM / INF
				14537	short fsgldiv_res_qnan - tbl_fsgldiv_op # NORM / QNAN
				14538	short fsgldiv_norm - tbl_fsgldiv_op # NORM / DENORM
				14539	short fsgldiv_res_snan - tbl_fsgldiv_op # NORM / SNAN
				14540	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14541	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14542
				14543	short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / NORM
				14544	short fsgldiv_res_operr - tbl_fsgldiv_op # ZERO / ZERO
				14545	short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / INF
				14546	short fsgldiv_res_qnan - tbl_fsgldiv_op # ZERO / QNAN
				14547	short fsgldiv_zero_load - tbl_fsgldiv_op # ZERO / DENORM
				14548	short fsgldiv_res_snan - tbl_fsgldiv_op # ZERO / SNAN
				14549	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14550	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14551
				14552	short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / NORM
				14553	short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / ZERO
				14554	short fsgldiv_res_operr - tbl_fsgldiv_op # INF / INF
				14555	short fsgldiv_res_qnan - tbl_fsgldiv_op # INF / QNAN
				14556	short fsgldiv_inf_dst - tbl_fsgldiv_op # INF / DENORM
				14557	short fsgldiv_res_snan - tbl_fsgldiv_op # INF / SNAN
				14558	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14559	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14560
				14561	short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / NORM
				14562	short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / ZERO
				14563	short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / INF
				14564	short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / QNAN
				14565	short fsgldiv_res_qnan - tbl_fsgldiv_op # QNAN / DENORM
				14566	short fsgldiv_res_snan - tbl_fsgldiv_op # QNAN / SNAN
				14567	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14568	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14569
				14570	short fsgldiv_norm - tbl_fsgldiv_op # DENORM / NORM
				14571	short fsgldiv_inf_load - tbl_fsgldiv_op # DENORM / ZERO
				14572	short fsgldiv_zero_load - tbl_fsgldiv_op # DENORM / INF
				14573	short fsgldiv_res_qnan - tbl_fsgldiv_op # DENORM / QNAN
				14574	short fsgldiv_norm - tbl_fsgldiv_op # DENORM / DENORM
				14575	short fsgldiv_res_snan - tbl_fsgldiv_op # DENORM / SNAN
				14576	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14577	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14578
				14579	short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / NORM
				14580	short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / ZERO
				14581	short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / INF
				14582	short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / QNAN
				14583	short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / DENORM
				14584	short fsgldiv_res_snan - tbl_fsgldiv_op # SNAN / SNAN
				14585	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14586	short tbl_fsgldiv_op - tbl_fsgldiv_op #
				14587
				14588	fsgldiv_res_qnan:
				14589	bra.l res_qnan
				14590	fsgldiv_res_snan:
				14591	bra.l res_snan
				14592	fsgldiv_res_operr:
				14593	bra.l res_operr
				14594	fsgldiv_inf_load:
				14595	bra.l fdiv_inf_load
				14596	fsgldiv_zero_load:
				14597	bra.l fdiv_zero_load
				14598	fsgldiv_inf_dst:
				14599	bra.l fdiv_inf_dst
				14600
				14601	#########################################################################
				14602	# XDEF **************************************************************** #
				14603	# fadd(): emulates the fadd instruction #
				14604	# fsadd(): emulates the fadd instruction #
				14605	# fdadd(): emulates the fdadd instruction #
				14606	# #
				14607	# XREF **************************************************************** #
				14608	# addsub_scaler2() - scale the operands so they won't take exc #
				14609	# ovf_res() - return default overflow result #
				14610	# unf_res() - return default underflow result #
				14611	# res_qnan() - set QNAN result #
				14612	# res_snan() - set SNAN result #
				14613	# res_operr() - set OPERR result #
				14614	# scale_to_zero_src() - set src operand exponent equal to zero #
				14615	# scale_to_zero_dst() - set dst operand exponent equal to zero #
				14616	# #
				14617	# INPUT *************************************************************** #
				14618	# a0 = pointer to extended precision source operand #
				14619	# a1 = pointer to extended precision destination operand #
				14620	# #
				14621	# OUTPUT ************************************************************** #
				14622	# fp0 = result #
				14623	# fp1 = EXOP (if exception occurred) #
				14624	# #
				14625	# ALGORITHM *********************************************************** #
				14626	# Handle NANs, infinities, and zeroes as special cases. Divide #
				14627	# norms into extended, single, and double precision. #
				14628	# Do addition after scaling exponents such that exception won't #
				14629	# occur. Then, check result exponent to see if exception would have #
				14630	# occurred. If so, return default result and maybe EXOP. Else, insert #
				14631	# the correct result exponent and return. Set FPSR bits as appropriate. #
				14632	# #
				14633	#########################################################################
				14634
				14635	global fsadd
				14636	fsadd:
				14637	andi.b &0x30,%d0 # clear rnd prec
				14638	ori.b &s_mode*0x10,%d0 # insert sgl prec
				14639	bra.b fadd
				14640
				14641	global fdadd
				14642	fdadd:
				14643	andi.b &0x30,%d0 # clear rnd prec
				14644	ori.b &d_mode*0x10,%d0 # insert dbl prec
				14645
				14646	global fadd
				14647	fadd:
				14648	mov.l %d0,L_SCR3(%a6) # store rnd info
				14649
				14650	clr.w %d1
				14651	mov.b DTAG(%a6),%d1
				14652	lsl.b &0x3,%d1
				14653	or.b STAG(%a6),%d1 # combine src tags
				14654
				14655	bne.w fadd_not_norm # optimize on non-norm input
				14656
				14657	#
				14658	# ADD: norms and denorms
				14659	#
				14660	fadd_norm:
				14661	bsr.l addsub_scaler2 # scale exponents
				14662
				14663	fadd_zero_entry:
				14664	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14665
				14666	fmov.l &0x0,%fpsr # clear FPSR
				14667	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14668
				14669	fadd.x FP_SCR0(%a6),%fp0 # execute add
				14670
				14671	fmov.l &0x0,%fpcr # clear FPCR
				14672	fmov.l %fpsr,%d1 # fetch INEX2,N,Z
				14673
				14674	or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
				14675
				14676	fbeq.w fadd_zero_exit # if result is zero, end now
				14677
				14678	mov.l %d2,-(%sp) # save d2
				14679
				14680	fmovm.x &0x01,-(%sp) # save result to stack
				14681
				14682	mov.w 2+L_SCR3(%a6),%d1
				14683	lsr.b &0x6,%d1
				14684
				14685	mov.w (%sp),%d2 # fetch new sign, exp
				14686	andi.l &0x7fff,%d2 # strip sign
				14687	sub.l %d0,%d2 # add scale factor
				14688
				14689	cmp.l %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
				14690	bge.b fadd_ovfl # yes
				14691
				14692	cmp.l %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
				14693	blt.w fadd_unfl # yes
				14694	beq.w fadd_may_unfl # maybe; go find out
				14695
				14696	fadd_normal:
				14697	mov.w (%sp),%d1
				14698	andi.w &0x8000,%d1 # keep sign
				14699	or.w %d2,%d1 # concat sign,new exp
				14700	mov.w %d1,(%sp) # insert new exponent
				14701
				14702	fmovm.x (%sp)+,&0x80 # return result in fp0
				14703
				14704	mov.l (%sp)+,%d2 # restore d2
				14705	rts
				14706
				14707	fadd_zero_exit:
				14708	# fmov.s &0x00000000,%fp0 # return zero in fp0
				14709	rts
				14710
				14711	tbl_fadd_ovfl:
				14712	long 0x7fff # ext ovfl
				14713	long 0x407f # sgl ovfl
				14714	long 0x43ff # dbl ovfl
				14715
				14716	tbl_fadd_unfl:
				14717	long 0x0000 # ext unfl
				14718	long 0x3f81 # sgl unfl
				14719	long 0x3c01 # dbl unfl
				14720
				14721	fadd_ovfl:
				14722	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				14723
				14724	mov.b FPCR_ENABLE(%a6),%d1
				14725	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				14726	bne.b fadd_ovfl_ena # yes
				14727
				14728	add.l &0xc,%sp
				14729	fadd_ovfl_dis:
				14730	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				14731	sne %d1 # set sign param accordingly
				14732	mov.l L_SCR3(%a6),%d0 # pass prec:rnd
				14733	bsr.l ovf_res # calculate default result
				14734	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				14735	fmovm.x (%a0),&0x80 # return default result in fp0
				14736	mov.l (%sp)+,%d2 # restore d2
				14737	rts
				14738
				14739	fadd_ovfl_ena:
				14740	mov.b L_SCR3(%a6),%d1
				14741	andi.b &0xc0,%d1 # is precision extended?
				14742	bne.b fadd_ovfl_ena_sd # no; prec = sgl or dbl
				14743
				14744	fadd_ovfl_ena_cont:
				14745	mov.w (%sp),%d1
				14746	andi.w &0x8000,%d1 # keep sign
				14747	subi.l &0x6000,%d2 # add extra bias
				14748	andi.w &0x7fff,%d2
				14749	or.w %d2,%d1 # concat sign,new exp
				14750	mov.w %d1,(%sp) # insert new exponent
				14751
				14752	fmovm.x (%sp)+,&0x40 # return EXOP in fp1
				14753	bra.b fadd_ovfl_dis
				14754
				14755	fadd_ovfl_ena_sd:
				14756	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14757
				14758	mov.l L_SCR3(%a6),%d1
				14759	andi.b &0x30,%d1 # keep rnd mode
				14760	fmov.l %d1,%fpcr # set FPCR
				14761
				14762	fadd.x FP_SCR0(%a6),%fp0 # execute add
				14763
				14764	fmov.l &0x0,%fpcr # clear FPCR
				14765
				14766	add.l &0xc,%sp
				14767	fmovm.x &0x01,-(%sp)
				14768	bra.b fadd_ovfl_ena_cont
				14769
				14770	fadd_unfl:
				14771	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				14772
				14773	add.l &0xc,%sp
				14774
				14775	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				14776
				14777	fmov.l &rz_mode*0x10,%fpcr # set FPCR
				14778	fmov.l &0x0,%fpsr # clear FPSR
				14779
				14780	fadd.x FP_SCR0(%a6),%fp0 # execute add
				14781
				14782	fmov.l &0x0,%fpcr # clear FPCR
				14783	fmov.l %fpsr,%d1 # save status
				14784
				14785	or.l %d1,USER_FPSR(%a6) # save INEX,N
				14786
				14787	mov.b FPCR_ENABLE(%a6),%d1
				14788	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				14789	bne.b fadd_unfl_ena # yes
				14790
				14791	fadd_unfl_dis:
				14792	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				14793
				14794	lea FP_SCR0(%a6),%a0 # pass: result addr
				14795	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				14796	bsr.l unf_res # calculate default result
				14797	or.b %d0,FPSR_CC(%a6) # 'Z' bit may have been set
				14798	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				14799	mov.l (%sp)+,%d2 # restore d2
				14800	rts
				14801
				14802	fadd_unfl_ena:
				14803	fmovm.x FP_SCR1(%a6),&0x40 # load dst op
				14804
				14805	mov.l L_SCR3(%a6),%d1
				14806	andi.b &0xc0,%d1 # is precision extended?
				14807	bne.b fadd_unfl_ena_sd # no; sgl or dbl
				14808
				14809	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				14810
				14811	fadd_unfl_ena_cont:
				14812	fmov.l &0x0,%fpsr # clear FPSR
				14813
				14814	fadd.x FP_SCR0(%a6),%fp1 # execute multiply
				14815
				14816	fmov.l &0x0,%fpcr # clear FPCR
				14817
				14818	fmovm.x &0x40,FP_SCR0(%a6) # save result to stack
				14819	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				14820	mov.l %d1,%d2 # make a copy
				14821	andi.l &0x7fff,%d1 # strip sign
				14822	andi.w &0x8000,%d2 # keep old sign
				14823	sub.l %d0,%d1 # add scale factor
				14824	addi.l &0x6000,%d1 # add new bias
				14825	andi.w &0x7fff,%d1 # clear top bit
				14826	or.w %d2,%d1 # concat sign,new exp
				14827	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				14828	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				14829	bra.w fadd_unfl_dis
				14830
				14831	fadd_unfl_ena_sd:
				14832	mov.l L_SCR3(%a6),%d1
				14833	andi.b &0x30,%d1 # use only rnd mode
				14834	fmov.l %d1,%fpcr # set FPCR
				14835
				14836	bra.b fadd_unfl_ena_cont
				14837
				14838	#
				14839	# result is equal to the smallest normalized number in the selected precision
				14840	# if the precision is extended, this result could not have come from an
				14841	# underflow that rounded up.
				14842	#
				14843	fadd_may_unfl:
				14844	mov.l L_SCR3(%a6),%d1
				14845	andi.b &0xc0,%d1
				14846	beq.w fadd_normal # yes; no underflow occurred
				14847
				14848	mov.l 0x4(%sp),%d1 # extract hi(man)
				14849	cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
				14850	bne.w fadd_normal # no; no underflow occurred
				14851
				14852	tst.l 0x8(%sp) # is lo(man) = 0x0?
				14853	bne.w fadd_normal # no; no underflow occurred
				14854
				14855	btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
				14856	beq.w fadd_normal # no; no underflow occurred
				14857
				14858	#
				14859	# ok, so now the result has a exponent equal to the smallest normalized
				14860	# exponent for the selected precision. also, the mantissa is equal to
				14861	# 0x8000000000000000 and this mantissa is the result of rounding non-zero
				14862	# g,r,s.
				14863	# now, we must determine whether the pre-rounded result was an underflow
				14864	# rounded "up" or a normalized number rounded "down".
				14865	# so, we do this be re-executing the add using RZ as the rounding mode and
				14866	# seeing if the new result is smaller or equal to the current result.
				14867	#
				14868	fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
				14869
				14870	mov.l L_SCR3(%a6),%d1
				14871	andi.b &0xc0,%d1 # keep rnd prec
				14872	ori.b &rz_mode*0x10,%d1 # insert rnd mode
				14873	fmov.l %d1,%fpcr # set FPCR
				14874	fmov.l &0x0,%fpsr # clear FPSR
				14875
				14876	fadd.x FP_SCR0(%a6),%fp1 # execute add
				14877
				14878	fmov.l &0x0,%fpcr # clear FPCR
				14879
				14880	fabs.x %fp0 # compare absolute values
				14881	fabs.x %fp1
				14882	fcmp.x %fp0,%fp1 # is first result > second?
				14883
				14884	fbgt.w fadd_unfl # yes; it's an underflow
				14885	bra.w fadd_normal # no; it's not an underflow
				14886
				14887	##########################################################################
				14888
				14889	#
				14890	# Add: inputs are not both normalized; what are they?
				14891	#
				14892	fadd_not_norm:
				14893	mov.w (tbl_fadd_op.b,%pc,%d1.w*2),%d1
				14894	jmp (tbl_fadd_op.b,%pc,%d1.w*1)
				14895
				14896	swbeg &48
				14897	tbl_fadd_op:
				14898	short fadd_norm - tbl_fadd_op # NORM + NORM
				14899	short fadd_zero_src - tbl_fadd_op # NORM + ZERO
				14900	short fadd_inf_src - tbl_fadd_op # NORM + INF
				14901	short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
				14902	short fadd_norm - tbl_fadd_op # NORM + DENORM
				14903	short fadd_res_snan - tbl_fadd_op # NORM + SNAN
				14904	short tbl_fadd_op - tbl_fadd_op #
				14905	short tbl_fadd_op - tbl_fadd_op #
				14906
				14907	short fadd_zero_dst - tbl_fadd_op # ZERO + NORM
				14908	short fadd_zero_2 - tbl_fadd_op # ZERO + ZERO
				14909	short fadd_inf_src - tbl_fadd_op # ZERO + INF
				14910	short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
				14911	short fadd_zero_dst - tbl_fadd_op # ZERO + DENORM
				14912	short fadd_res_snan - tbl_fadd_op # NORM + SNAN
				14913	short tbl_fadd_op - tbl_fadd_op #
				14914	short tbl_fadd_op - tbl_fadd_op #
				14915
				14916	short fadd_inf_dst - tbl_fadd_op # INF + NORM
				14917	short fadd_inf_dst - tbl_fadd_op # INF + ZERO
				14918	short fadd_inf_2 - tbl_fadd_op # INF + INF
				14919	short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
				14920	short fadd_inf_dst - tbl_fadd_op # INF + DENORM
				14921	short fadd_res_snan - tbl_fadd_op # NORM + SNAN
				14922	short tbl_fadd_op - tbl_fadd_op #
				14923	short tbl_fadd_op - tbl_fadd_op #
				14924
				14925	short fadd_res_qnan - tbl_fadd_op # QNAN + NORM
				14926	short fadd_res_qnan - tbl_fadd_op # QNAN + ZERO
				14927	short fadd_res_qnan - tbl_fadd_op # QNAN + INF
				14928	short fadd_res_qnan - tbl_fadd_op # QNAN + QNAN
				14929	short fadd_res_qnan - tbl_fadd_op # QNAN + DENORM
				14930	short fadd_res_snan - tbl_fadd_op # QNAN + SNAN
				14931	short tbl_fadd_op - tbl_fadd_op #
				14932	short tbl_fadd_op - tbl_fadd_op #
				14933
				14934	short fadd_norm - tbl_fadd_op # DENORM + NORM
				14935	short fadd_zero_src - tbl_fadd_op # DENORM + ZERO
				14936	short fadd_inf_src - tbl_fadd_op # DENORM + INF
				14937	short fadd_res_qnan - tbl_fadd_op # NORM + QNAN
				14938	short fadd_norm - tbl_fadd_op # DENORM + DENORM
				14939	short fadd_res_snan - tbl_fadd_op # NORM + SNAN
				14940	short tbl_fadd_op - tbl_fadd_op #
				14941	short tbl_fadd_op - tbl_fadd_op #
				14942
				14943	short fadd_res_snan - tbl_fadd_op # SNAN + NORM
				14944	short fadd_res_snan - tbl_fadd_op # SNAN + ZERO
				14945	short fadd_res_snan - tbl_fadd_op # SNAN + INF
				14946	short fadd_res_snan - tbl_fadd_op # SNAN + QNAN
				14947	short fadd_res_snan - tbl_fadd_op # SNAN + DENORM
				14948	short fadd_res_snan - tbl_fadd_op # SNAN + SNAN
				14949	short tbl_fadd_op - tbl_fadd_op #
				14950	short tbl_fadd_op - tbl_fadd_op #
				14951
				14952	fadd_res_qnan:
				14953	bra.l res_qnan
				14954	fadd_res_snan:
				14955	bra.l res_snan
				14956
				14957	#
				14958	# both operands are ZEROes
				14959	#
				14960	fadd_zero_2:
				14961	mov.b SRC_EX(%a0),%d0 # are the signs opposite
				14962	mov.b DST_EX(%a1),%d1
				14963	eor.b %d0,%d1
				14964	bmi.w fadd_zero_2_chk_rm # weed out (-ZERO)+(+ZERO)
				14965
				14966	# the signs are the same. so determine whether they are positive or negative
				14967	# and return the appropriately signed zero.
				14968	tst.b %d0 # are ZEROes positive or negative?
				14969	bmi.b fadd_zero_rm # negative
				14970	fmov.s &0x00000000,%fp0 # return +ZERO
				14971	mov.b &z_bmask,FPSR_CC(%a6) # set Z
				14972	rts
				14973
				14974	#
				14975	# the ZEROes have opposite signs:
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	14976	# - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	14977	# - -ZERO is returned in the case of RM.
				14978	#
				14979	fadd_zero_2_chk_rm:
				14980	mov.b 3+L_SCR3(%a6),%d1
				14981	andi.b &0x30,%d1 # extract rnd mode
				14982	cmpi.b %d1,&rm_mode*0x10 # is rnd mode == RM?
				14983	beq.b fadd_zero_rm # yes
				14984	fmov.s &0x00000000,%fp0 # return +ZERO
				14985	mov.b &z_bmask,FPSR_CC(%a6) # set Z
				14986	rts
				14987
				14988	fadd_zero_rm:
				14989	fmov.s &0x80000000,%fp0 # return -ZERO
				14990	mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
				14991	rts
				14992
				14993	#
				14994	# one operand is a ZERO and the other is a DENORM or NORM. scale
				14995	# the DENORM or NORM and jump to the regular fadd routine.
				14996	#
				14997	fadd_zero_dst:
				14998	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				14999	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				15000	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				15001	bsr.l scale_to_zero_src # scale the operand
				15002	clr.w FP_SCR1_EX(%a6)
				15003	clr.l FP_SCR1_HI(%a6)
				15004	clr.l FP_SCR1_LO(%a6)
				15005	bra.w fadd_zero_entry # go execute fadd
				15006
				15007	fadd_zero_src:
				15008	mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
				15009	mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
				15010	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				15011	bsr.l scale_to_zero_dst # scale the operand
				15012	clr.w FP_SCR0_EX(%a6)
				15013	clr.l FP_SCR0_HI(%a6)
				15014	clr.l FP_SCR0_LO(%a6)
				15015	bra.w fadd_zero_entry # go execute fadd
				15016
				15017	#
				15018	# both operands are INFs. an OPERR will result if the INFs have
				15019	# different signs. else, an INF of the same sign is returned
				15020	#
				15021	fadd_inf_2:
				15022	mov.b SRC_EX(%a0),%d0 # exclusive or the signs
				15023	mov.b DST_EX(%a1),%d1
				15024	eor.b %d1,%d0
				15025	bmi.l res_operr # weed out (-INF)+(+INF)
				15026
				15027	# ok, so it's not an OPERR. but, we do have to remember to return the
				15028	# src INF since that's where the 881/882 gets the j-bit from...
				15029
				15030	#
				15031	# operands are INF and one of {ZERO, INF, DENORM, NORM}
				15032	#
				15033	fadd_inf_src:
				15034	fmovm.x SRC(%a0),&0x80 # return src INF
				15035	tst.b SRC_EX(%a0) # is INF positive?
				15036	bpl.b fadd_inf_done # yes; we're done
				15037	mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
				15038	rts
				15039
				15040	#
				15041	# operands are INF and one of {ZERO, INF, DENORM, NORM}
				15042	#
				15043	fadd_inf_dst:
				15044	fmovm.x DST(%a1),&0x80 # return dst INF
				15045	tst.b DST_EX(%a1) # is INF positive?
				15046	bpl.b fadd_inf_done # yes; we're done
				15047	mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
				15048	rts
				15049
				15050	fadd_inf_done:
				15051	mov.b &inf_bmask,FPSR_CC(%a6) # set INF
				15052	rts
				15053
				15054	#########################################################################
				15055	# XDEF **************************************************************** #
				15056	# fsub(): emulates the fsub instruction #
				15057	# fssub(): emulates the fssub instruction #
				15058	# fdsub(): emulates the fdsub instruction #
				15059	# #
				15060	# XREF **************************************************************** #
				15061	# addsub_scaler2() - scale the operands so they won't take exc #
				15062	# ovf_res() - return default overflow result #
				15063	# unf_res() - return default underflow result #
				15064	# res_qnan() - set QNAN result #
				15065	# res_snan() - set SNAN result #
				15066	# res_operr() - set OPERR result #
				15067	# scale_to_zero_src() - set src operand exponent equal to zero #
				15068	# scale_to_zero_dst() - set dst operand exponent equal to zero #
				15069	# #
				15070	# INPUT *************************************************************** #
				15071	# a0 = pointer to extended precision source operand #
				15072	# a1 = pointer to extended precision destination operand #
				15073	# #
				15074	# OUTPUT ************************************************************** #
				15075	# fp0 = result #
				15076	# fp1 = EXOP (if exception occurred) #
				15077	# #
				15078	# ALGORITHM *********************************************************** #
				15079	# Handle NANs, infinities, and zeroes as special cases. Divide #
				15080	# norms into extended, single, and double precision. #
				15081	# Do subtraction after scaling exponents such that exception won't#
				15082	# occur. Then, check result exponent to see if exception would have #
				15083	# occurred. If so, return default result and maybe EXOP. Else, insert #
				15084	# the correct result exponent and return. Set FPSR bits as appropriate. #
				15085	# #
				15086	#########################################################################
				15087
				15088	global fssub
				15089	fssub:
				15090	andi.b &0x30,%d0 # clear rnd prec
				15091	ori.b &s_mode*0x10,%d0 # insert sgl prec
				15092	bra.b fsub
				15093
				15094	global fdsub
				15095	fdsub:
				15096	andi.b &0x30,%d0 # clear rnd prec
				15097	ori.b &d_mode*0x10,%d0 # insert dbl prec
				15098
				15099	global fsub
				15100	fsub:
				15101	mov.l %d0,L_SCR3(%a6) # store rnd info
				15102
				15103	clr.w %d1
				15104	mov.b DTAG(%a6),%d1
				15105	lsl.b &0x3,%d1
				15106	or.b STAG(%a6),%d1 # combine src tags
				15107
				15108	bne.w fsub_not_norm # optimize on non-norm input
				15109
				15110	#
				15111	# SUB: norms and denorms
				15112	#
				15113	fsub_norm:
				15114	bsr.l addsub_scaler2 # scale exponents
				15115
				15116	fsub_zero_entry:
				15117	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				15118
				15119	fmov.l &0x0,%fpsr # clear FPSR
				15120	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				15121
				15122	fsub.x FP_SCR0(%a6),%fp0 # execute subtract
				15123
				15124	fmov.l &0x0,%fpcr # clear FPCR
				15125	fmov.l %fpsr,%d1 # fetch INEX2, N, Z
				15126
				15127	or.l %d1,USER_FPSR(%a6) # save exc and ccode bits
				15128
				15129	fbeq.w fsub_zero_exit # if result zero, end now
				15130
				15131	mov.l %d2,-(%sp) # save d2
				15132
				15133	fmovm.x &0x01,-(%sp) # save result to stack
				15134
				15135	mov.w 2+L_SCR3(%a6),%d1
				15136	lsr.b &0x6,%d1
				15137
				15138	mov.w (%sp),%d2 # fetch new exponent
				15139	andi.l &0x7fff,%d2 # strip sign
				15140	sub.l %d0,%d2 # add scale factor
				15141
				15142	cmp.l %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
				15143	bge.b fsub_ovfl # yes
				15144
				15145	cmp.l %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
				15146	blt.w fsub_unfl # yes
				15147	beq.w fsub_may_unfl # maybe; go find out
				15148
				15149	fsub_normal:
				15150	mov.w (%sp),%d1
				15151	andi.w &0x8000,%d1 # keep sign
				15152	or.w %d2,%d1 # insert new exponent
				15153	mov.w %d1,(%sp) # insert new exponent
				15154
				15155	fmovm.x (%sp)+,&0x80 # return result in fp0
				15156
				15157	mov.l (%sp)+,%d2 # restore d2
				15158	rts
				15159
				15160	fsub_zero_exit:
				15161	# fmov.s &0x00000000,%fp0 # return zero in fp0
				15162	rts
				15163
				15164	tbl_fsub_ovfl:
				15165	long 0x7fff # ext ovfl
				15166	long 0x407f # sgl ovfl
				15167	long 0x43ff # dbl ovfl
				15168
				15169	tbl_fsub_unfl:
				15170	long 0x0000 # ext unfl
				15171	long 0x3f81 # sgl unfl
				15172	long 0x3c01 # dbl unfl
				15173
				15174	fsub_ovfl:
				15175	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				15176
				15177	mov.b FPCR_ENABLE(%a6),%d1
				15178	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				15179	bne.b fsub_ovfl_ena # yes
				15180
				15181	add.l &0xc,%sp
				15182	fsub_ovfl_dis:
				15183	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				15184	sne %d1 # set sign param accordingly
				15185	mov.l L_SCR3(%a6),%d0 # pass prec:rnd
				15186	bsr.l ovf_res # calculate default result
				15187	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				15188	fmovm.x (%a0),&0x80 # return default result in fp0
				15189	mov.l (%sp)+,%d2 # restore d2
				15190	rts
				15191
				15192	fsub_ovfl_ena:
				15193	mov.b L_SCR3(%a6),%d1
				15194	andi.b &0xc0,%d1 # is precision extended?
				15195	bne.b fsub_ovfl_ena_sd # no
				15196
				15197	fsub_ovfl_ena_cont:
				15198	mov.w (%sp),%d1 # fetch {sgn,exp}
				15199	andi.w &0x8000,%d1 # keep sign
				15200	subi.l &0x6000,%d2 # subtract new bias
				15201	andi.w &0x7fff,%d2 # clear top bit
				15202	or.w %d2,%d1 # concat sign,exp
				15203	mov.w %d1,(%sp) # insert new exponent
				15204
				15205	fmovm.x (%sp)+,&0x40 # return EXOP in fp1
				15206	bra.b fsub_ovfl_dis
				15207
				15208	fsub_ovfl_ena_sd:
				15209	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				15210
				15211	mov.l L_SCR3(%a6),%d1
				15212	andi.b &0x30,%d1 # clear rnd prec
				15213	fmov.l %d1,%fpcr # set FPCR
				15214
				15215	fsub.x FP_SCR0(%a6),%fp0 # execute subtract
				15216
				15217	fmov.l &0x0,%fpcr # clear FPCR
				15218
				15219	add.l &0xc,%sp
				15220	fmovm.x &0x01,-(%sp)
				15221	bra.b fsub_ovfl_ena_cont
				15222
				15223	fsub_unfl:
				15224	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				15225
				15226	add.l &0xc,%sp
				15227
				15228	fmovm.x FP_SCR1(%a6),&0x80 # load dst op
				15229
				15230	fmov.l &rz_mode*0x10,%fpcr # set FPCR
				15231	fmov.l &0x0,%fpsr # clear FPSR
				15232
				15233	fsub.x FP_SCR0(%a6),%fp0 # execute subtract
				15234
				15235	fmov.l &0x0,%fpcr # clear FPCR
				15236	fmov.l %fpsr,%d1 # save status
				15237
				15238	or.l %d1,USER_FPSR(%a6)
				15239
				15240	mov.b FPCR_ENABLE(%a6),%d1
				15241	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				15242	bne.b fsub_unfl_ena # yes
				15243
				15244	fsub_unfl_dis:
				15245	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				15246
				15247	lea FP_SCR0(%a6),%a0 # pass: result addr
				15248	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				15249	bsr.l unf_res # calculate default result
				15250	or.b %d0,FPSR_CC(%a6) # 'Z' may have been set
				15251	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				15252	mov.l (%sp)+,%d2 # restore d2
				15253	rts
				15254
				15255	fsub_unfl_ena:
				15256	fmovm.x FP_SCR1(%a6),&0x40
				15257
				15258	mov.l L_SCR3(%a6),%d1
				15259	andi.b &0xc0,%d1 # is precision extended?
				15260	bne.b fsub_unfl_ena_sd # no
				15261
				15262	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				15263
				15264	fsub_unfl_ena_cont:
				15265	fmov.l &0x0,%fpsr # clear FPSR
				15266
				15267	fsub.x FP_SCR0(%a6),%fp1 # execute subtract
				15268
				15269	fmov.l &0x0,%fpcr # clear FPCR
				15270
				15271	fmovm.x &0x40,FP_SCR0(%a6) # store result to stack
				15272	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				15273	mov.l %d1,%d2 # make a copy
				15274	andi.l &0x7fff,%d1 # strip sign
				15275	andi.w &0x8000,%d2 # keep old sign
				15276	sub.l %d0,%d1 # add scale factor
				15277	addi.l &0x6000,%d1 # subtract new bias
				15278	andi.w &0x7fff,%d1 # clear top bit
				15279	or.w %d2,%d1 # concat sgn,exp
				15280	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				15281	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				15282	bra.w fsub_unfl_dis
				15283
				15284	fsub_unfl_ena_sd:
				15285	mov.l L_SCR3(%a6),%d1
				15286	andi.b &0x30,%d1 # clear rnd prec
				15287	fmov.l %d1,%fpcr # set FPCR
				15288
				15289	bra.b fsub_unfl_ena_cont
				15290
				15291	#
				15292	# result is equal to the smallest normalized number in the selected precision
				15293	# if the precision is extended, this result could not have come from an
				15294	# underflow that rounded up.
				15295	#
				15296	fsub_may_unfl:
				15297	mov.l L_SCR3(%a6),%d1
				15298	andi.b &0xc0,%d1 # fetch rnd prec
				15299	beq.w fsub_normal # yes; no underflow occurred
				15300
				15301	mov.l 0x4(%sp),%d1
				15302	cmpi.l %d1,&0x80000000 # is hi(man) = 0x80000000?
				15303	bne.w fsub_normal # no; no underflow occurred
				15304
				15305	tst.l 0x8(%sp) # is lo(man) = 0x0?
				15306	bne.w fsub_normal # no; no underflow occurred
				15307
				15308	btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
				15309	beq.w fsub_normal # no; no underflow occurred
				15310
				15311	#
				15312	# ok, so now the result has a exponent equal to the smallest normalized
				15313	# exponent for the selected precision. also, the mantissa is equal to
				15314	# 0x8000000000000000 and this mantissa is the result of rounding non-zero
				15315	# g,r,s.
				15316	# now, we must determine whether the pre-rounded result was an underflow
				15317	# rounded "up" or a normalized number rounded "down".
				15318	# so, we do this be re-executing the add using RZ as the rounding mode and
				15319	# seeing if the new result is smaller or equal to the current result.
				15320	#
				15321	fmovm.x FP_SCR1(%a6),&0x40 # load dst op into fp1
				15322
				15323	mov.l L_SCR3(%a6),%d1
				15324	andi.b &0xc0,%d1 # keep rnd prec
				15325	ori.b &rz_mode*0x10,%d1 # insert rnd mode
				15326	fmov.l %d1,%fpcr # set FPCR
				15327	fmov.l &0x0,%fpsr # clear FPSR
				15328
				15329	fsub.x FP_SCR0(%a6),%fp1 # execute subtract
				15330
				15331	fmov.l &0x0,%fpcr # clear FPCR
				15332
				15333	fabs.x %fp0 # compare absolute values
				15334	fabs.x %fp1
				15335	fcmp.x %fp0,%fp1 # is first result > second?
				15336
				15337	fbgt.w fsub_unfl # yes; it's an underflow
				15338	bra.w fsub_normal # no; it's not an underflow
				15339
				15340	##########################################################################
				15341
				15342	#
				15343	# Sub: inputs are not both normalized; what are they?
				15344	#
				15345	fsub_not_norm:
				15346	mov.w (tbl_fsub_op.b,%pc,%d1.w*2),%d1
				15347	jmp (tbl_fsub_op.b,%pc,%d1.w*1)
				15348
				15349	swbeg &48
				15350	tbl_fsub_op:
				15351	short fsub_norm - tbl_fsub_op # NORM - NORM
				15352	short fsub_zero_src - tbl_fsub_op # NORM - ZERO
				15353	short fsub_inf_src - tbl_fsub_op # NORM - INF
				15354	short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
				15355	short fsub_norm - tbl_fsub_op # NORM - DENORM
				15356	short fsub_res_snan - tbl_fsub_op # NORM - SNAN
				15357	short tbl_fsub_op - tbl_fsub_op #
				15358	short tbl_fsub_op - tbl_fsub_op #
				15359
				15360	short fsub_zero_dst - tbl_fsub_op # ZERO - NORM
				15361	short fsub_zero_2 - tbl_fsub_op # ZERO - ZERO
				15362	short fsub_inf_src - tbl_fsub_op # ZERO - INF
				15363	short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
				15364	short fsub_zero_dst - tbl_fsub_op # ZERO - DENORM
				15365	short fsub_res_snan - tbl_fsub_op # NORM - SNAN
				15366	short tbl_fsub_op - tbl_fsub_op #
				15367	short tbl_fsub_op - tbl_fsub_op #
				15368
				15369	short fsub_inf_dst - tbl_fsub_op # INF - NORM
				15370	short fsub_inf_dst - tbl_fsub_op # INF - ZERO
				15371	short fsub_inf_2 - tbl_fsub_op # INF - INF
				15372	short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
				15373	short fsub_inf_dst - tbl_fsub_op # INF - DENORM
				15374	short fsub_res_snan - tbl_fsub_op # NORM - SNAN
				15375	short tbl_fsub_op - tbl_fsub_op #
				15376	short tbl_fsub_op - tbl_fsub_op #
				15377
				15378	short fsub_res_qnan - tbl_fsub_op # QNAN - NORM
				15379	short fsub_res_qnan - tbl_fsub_op # QNAN - ZERO
				15380	short fsub_res_qnan - tbl_fsub_op # QNAN - INF
				15381	short fsub_res_qnan - tbl_fsub_op # QNAN - QNAN
				15382	short fsub_res_qnan - tbl_fsub_op # QNAN - DENORM
				15383	short fsub_res_snan - tbl_fsub_op # QNAN - SNAN
				15384	short tbl_fsub_op - tbl_fsub_op #
				15385	short tbl_fsub_op - tbl_fsub_op #
				15386
				15387	short fsub_norm - tbl_fsub_op # DENORM - NORM
				15388	short fsub_zero_src - tbl_fsub_op # DENORM - ZERO
				15389	short fsub_inf_src - tbl_fsub_op # DENORM - INF
				15390	short fsub_res_qnan - tbl_fsub_op # NORM - QNAN
				15391	short fsub_norm - tbl_fsub_op # DENORM - DENORM
				15392	short fsub_res_snan - tbl_fsub_op # NORM - SNAN
				15393	short tbl_fsub_op - tbl_fsub_op #
				15394	short tbl_fsub_op - tbl_fsub_op #
				15395
				15396	short fsub_res_snan - tbl_fsub_op # SNAN - NORM
				15397	short fsub_res_snan - tbl_fsub_op # SNAN - ZERO
				15398	short fsub_res_snan - tbl_fsub_op # SNAN - INF
				15399	short fsub_res_snan - tbl_fsub_op # SNAN - QNAN
				15400	short fsub_res_snan - tbl_fsub_op # SNAN - DENORM
				15401	short fsub_res_snan - tbl_fsub_op # SNAN - SNAN
				15402	short tbl_fsub_op - tbl_fsub_op #
				15403	short tbl_fsub_op - tbl_fsub_op #
				15404
				15405	fsub_res_qnan:
				15406	bra.l res_qnan
				15407	fsub_res_snan:
				15408	bra.l res_snan
				15409
				15410	#
				15411	# both operands are ZEROes
				15412	#
				15413	fsub_zero_2:
				15414	mov.b SRC_EX(%a0),%d0
				15415	mov.b DST_EX(%a1),%d1
				15416	eor.b %d1,%d0
				15417	bpl.b fsub_zero_2_chk_rm
				15418
				15419	# the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
				15420	tst.b %d0 # is dst negative?
				15421	bmi.b fsub_zero_2_rm # yes
				15422	fmov.s &0x00000000,%fp0 # no; return +ZERO
				15423	mov.b &z_bmask,FPSR_CC(%a6) # set Z
				15424	rts
				15425
				15426	#
				15427	# the ZEROes have the same signs:
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	15428	# - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	15429	# - -ZERO is returned in the case of RM.
				15430	#
				15431	fsub_zero_2_chk_rm:
				15432	mov.b 3+L_SCR3(%a6),%d1
				15433	andi.b &0x30,%d1 # extract rnd mode
				15434	cmpi.b %d1,&rm_mode*0x10 # is rnd mode = RM?
				15435	beq.b fsub_zero_2_rm # yes
				15436	fmov.s &0x00000000,%fp0 # no; return +ZERO
				15437	mov.b &z_bmask,FPSR_CC(%a6) # set Z
				15438	rts
				15439
				15440	fsub_zero_2_rm:
				15441	fmov.s &0x80000000,%fp0 # return -ZERO
				15442	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
				15443	rts
				15444
				15445	#
				15446	# one operand is a ZERO and the other is a DENORM or a NORM.
				15447	# scale the DENORM or NORM and jump to the regular fsub routine.
				15448	#
				15449	fsub_zero_dst:
				15450	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				15451	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				15452	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				15453	bsr.l scale_to_zero_src # scale the operand
				15454	clr.w FP_SCR1_EX(%a6)
				15455	clr.l FP_SCR1_HI(%a6)
				15456	clr.l FP_SCR1_LO(%a6)
				15457	bra.w fsub_zero_entry # go execute fsub
				15458
				15459	fsub_zero_src:
				15460	mov.w DST_EX(%a1),FP_SCR1_EX(%a6)
				15461	mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
				15462	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				15463	bsr.l scale_to_zero_dst # scale the operand
				15464	clr.w FP_SCR0_EX(%a6)
				15465	clr.l FP_SCR0_HI(%a6)
				15466	clr.l FP_SCR0_LO(%a6)
				15467	bra.w fsub_zero_entry # go execute fsub
				15468
				15469	#
				15470	# both operands are INFs. an OPERR will result if the INFs have the
				15471	# same signs. else,
				15472	#
				15473	fsub_inf_2:
				15474	mov.b SRC_EX(%a0),%d0 # exclusive or the signs
				15475	mov.b DST_EX(%a1),%d1
				15476	eor.b %d1,%d0
				15477	bpl.l res_operr # weed out (-INF)+(+INF)
				15478
				15479	# ok, so it's not an OPERR. but we do have to remember to return
				15480	# the src INF since that's where the 881/882 gets the j-bit.
				15481
				15482	fsub_inf_src:
				15483	fmovm.x SRC(%a0),&0x80 # return src INF
				15484	fneg.x %fp0 # invert sign
				15485	fbge.w fsub_inf_done # sign is now positive
				15486	mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
				15487	rts
				15488
				15489	fsub_inf_dst:
				15490	fmovm.x DST(%a1),&0x80 # return dst INF
				15491	tst.b DST_EX(%a1) # is INF negative?
				15492	bpl.b fsub_inf_done # no
				15493	mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
				15494	rts
				15495
				15496	fsub_inf_done:
				15497	mov.b &inf_bmask,FPSR_CC(%a6) # set INF
				15498	rts
				15499
				15500	#########################################################################
				15501	# XDEF **************************************************************** #
				15502	# fsqrt(): emulates the fsqrt instruction #
				15503	# fssqrt(): emulates the fssqrt instruction #
				15504	# fdsqrt(): emulates the fdsqrt instruction #
				15505	# #
				15506	# XREF **************************************************************** #
				15507	# scale_sqrt() - scale the source operand #
				15508	# unf_res() - return default underflow result #
				15509	# ovf_res() - return default overflow result #
				15510	# res_qnan_1op() - return QNAN result #
				15511	# res_snan_1op() - return SNAN result #
				15512	# #
				15513	# INPUT *************************************************************** #
				15514	# a0 = pointer to extended precision source operand #
				15515	# d0 rnd prec,mode #
				15516	# #
				15517	# OUTPUT ************************************************************** #
				15518	# fp0 = result #
				15519	# fp1 = EXOP (if exception occurred) #
				15520	# #
				15521	# ALGORITHM *********************************************************** #
				15522	# Handle NANs, infinities, and zeroes as special cases. Divide #
				15523	# norms/denorms into ext/sgl/dbl precision. #
				15524	# For norms/denorms, scale the exponents such that a sqrt #
				15525	# instruction won't cause an exception. Use the regular fsqrt to #
				15526	# compute a result. Check if the regular operands would have taken #
				15527	# an exception. If so, return the default overflow/underflow result #
				15528	# and return the EXOP if exceptions are enabled. Else, scale the #
				15529	# result operand to the proper exponent. #
				15530	# #
				15531	#########################################################################
				15532
				15533	global fssqrt
				15534	fssqrt:
				15535	andi.b &0x30,%d0 # clear rnd prec
				15536	ori.b &s_mode*0x10,%d0 # insert sgl precision
				15537	bra.b fsqrt
				15538
				15539	global fdsqrt
				15540	fdsqrt:
				15541	andi.b &0x30,%d0 # clear rnd prec
				15542	ori.b &d_mode*0x10,%d0 # insert dbl precision
				15543
				15544	global fsqrt
				15545	fsqrt:
				15546	mov.l %d0,L_SCR3(%a6) # store rnd info
				15547	clr.w %d1
				15548	mov.b STAG(%a6),%d1
				15549	bne.w fsqrt_not_norm # optimize on non-norm input
				15550
				15551	#
				15552	# SQUARE ROOT: norms and denorms ONLY!
				15553	#
				15554	fsqrt_norm:
				15555	tst.b SRC_EX(%a0) # is operand negative?
				15556	bmi.l res_operr # yes
				15557
				15558	andi.b &0xc0,%d0 # is precision extended?
				15559	bne.b fsqrt_not_ext # no; go handle sgl or dbl
				15560
				15561	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				15562	fmov.l &0x0,%fpsr # clear FPSR
				15563
				15564	fsqrt.x (%a0),%fp0 # execute square root
				15565
				15566	fmov.l %fpsr,%d1
				15567	or.l %d1,USER_FPSR(%a6) # set N,INEX
				15568
				15569	rts
				15570
				15571	fsqrt_denorm:
				15572	tst.b SRC_EX(%a0) # is operand negative?
				15573	bmi.l res_operr # yes
				15574
				15575	andi.b &0xc0,%d0 # is precision extended?
				15576	bne.b fsqrt_not_ext # no; go handle sgl or dbl
				15577
				15578	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				15579	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				15580	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				15581
				15582	bsr.l scale_sqrt # calculate scale factor
				15583
				15584	bra.w fsqrt_sd_normal
				15585
				15586	#
				15587	# operand is either single or double
				15588	#
				15589	fsqrt_not_ext:
				15590	cmpi.b %d0,&s_mode*0x10 # separate sgl/dbl prec
				15591	bne.w fsqrt_dbl
				15592
				15593	#
				15594	# operand is to be rounded to single precision
				15595	#
				15596	fsqrt_sgl:
				15597	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				15598	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				15599	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				15600
				15601	bsr.l scale_sqrt # calculate scale factor
				15602
				15603	cmpi.l %d0,&0x3fff-0x3f81 # will move in underflow?
				15604	beq.w fsqrt_sd_may_unfl
				15605	bgt.w fsqrt_sd_unfl # yes; go handle underflow
				15606	cmpi.l %d0,&0x3fff-0x407f # will move in overflow?
				15607	beq.w fsqrt_sd_may_ovfl # maybe; go check
				15608	blt.w fsqrt_sd_ovfl # yes; go handle overflow
				15609
				15610	#
				15611	# operand will NOT overflow or underflow when moved in to the fp reg file
				15612	#
				15613	fsqrt_sd_normal:
				15614	fmov.l &0x0,%fpsr # clear FPSR
				15615	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				15616
				15617	fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
				15618
				15619	fmov.l %fpsr,%d1 # save FPSR
				15620	fmov.l &0x0,%fpcr # clear FPCR
				15621
				15622	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				15623
				15624	fsqrt_sd_normal_exit:
				15625	mov.l %d2,-(%sp) # save d2
				15626	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				15627	mov.w FP_SCR0_EX(%a6),%d1 # load sgn,exp
				15628	mov.l %d1,%d2 # make a copy
				15629	andi.l &0x7fff,%d1 # strip sign
				15630	sub.l %d0,%d1 # add scale factor
				15631	andi.w &0x8000,%d2 # keep old sign
				15632	or.w %d1,%d2 # concat old sign,new exp
				15633	mov.w %d2,FP_SCR0_EX(%a6) # insert new exponent
				15634	mov.l (%sp)+,%d2 # restore d2
				15635	fmovm.x FP_SCR0(%a6),&0x80 # return result in fp0
				15636	rts
				15637
				15638	#
				15639	# operand is to be rounded to double precision
				15640	#
				15641	fsqrt_dbl:
				15642	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				15643	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				15644	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				15645
				15646	bsr.l scale_sqrt # calculate scale factor
				15647
				15648	cmpi.l %d0,&0x3fff-0x3c01 # will move in underflow?
				15649	beq.w fsqrt_sd_may_unfl
				15650	bgt.b fsqrt_sd_unfl # yes; go handle underflow
				15651	cmpi.l %d0,&0x3fff-0x43ff # will move in overflow?
				15652	beq.w fsqrt_sd_may_ovfl # maybe; go check
				15653	blt.w fsqrt_sd_ovfl # yes; go handle overflow
				15654	bra.w fsqrt_sd_normal # no; ho handle normalized op
				15655
				15656	# we're on the line here and the distinguising characteristic is whether
				15657	# the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
				15658	# elsewise fall through to underflow.
				15659	fsqrt_sd_may_unfl:
				15660	btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
				15661	bne.w fsqrt_sd_normal # yes, so no underflow
				15662
				15663	#
				15664	# operand WILL underflow when moved in to the fp register file
				15665	#
				15666	fsqrt_sd_unfl:
				15667	bset &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
				15668
				15669	fmov.l &rz_mode*0x10,%fpcr # set FPCR
				15670	fmov.l &0x0,%fpsr # clear FPSR
				15671
				15672	fsqrt.x FP_SCR0(%a6),%fp0 # execute square root
				15673
				15674	fmov.l %fpsr,%d1 # save status
				15675	fmov.l &0x0,%fpcr # clear FPCR
				15676
				15677	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				15678
				15679	# if underflow or inexact is enabled, go calculate EXOP first.
				15680	mov.b FPCR_ENABLE(%a6),%d1
				15681	andi.b &0x0b,%d1 # is UNFL or INEX enabled?
				15682	bne.b fsqrt_sd_unfl_ena # yes
				15683
				15684	fsqrt_sd_unfl_dis:
				15685	fmovm.x &0x80,FP_SCR0(%a6) # store out result
				15686
				15687	lea FP_SCR0(%a6),%a0 # pass: result addr
				15688	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				15689	bsr.l unf_res # calculate default result
				15690	or.b %d0,FPSR_CC(%a6) # set possible 'Z' ccode
				15691	fmovm.x FP_SCR0(%a6),&0x80 # return default result in fp0
				15692	rts
				15693
				15694	#
				15695	# operand will underflow AND underflow is enabled.
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	15696	# Therefore, we must return the result rounded to extended precision.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	15697	#
				15698	fsqrt_sd_unfl_ena:
				15699	mov.l FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
				15700	mov.l FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
				15701	mov.w FP_SCR0_EX(%a6),%d1 # load current exponent
				15702
				15703	mov.l %d2,-(%sp) # save d2
				15704	mov.l %d1,%d2 # make a copy
				15705	andi.l &0x7fff,%d1 # strip sign
				15706	andi.w &0x8000,%d2 # keep old sign
				15707	sub.l %d0,%d1 # subtract scale factor
				15708	addi.l &0x6000,%d1 # add new bias
				15709	andi.w &0x7fff,%d1
				15710	or.w %d2,%d1 # concat new sign,new exp
				15711	mov.w %d1,FP_SCR1_EX(%a6) # insert new exp
				15712	fmovm.x FP_SCR1(%a6),&0x40 # return EXOP in fp1
				15713	mov.l (%sp)+,%d2 # restore d2
				15714	bra.b fsqrt_sd_unfl_dis
				15715
				15716	#
				15717	# operand WILL overflow.
				15718	#
				15719	fsqrt_sd_ovfl:
				15720	fmov.l &0x0,%fpsr # clear FPSR
				15721	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				15722
				15723	fsqrt.x FP_SCR0(%a6),%fp0 # perform square root
				15724
				15725	fmov.l &0x0,%fpcr # clear FPCR
				15726	fmov.l %fpsr,%d1 # save FPSR
				15727
				15728	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				15729
				15730	fsqrt_sd_ovfl_tst:
				15731	or.l &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
				15732
				15733	mov.b FPCR_ENABLE(%a6),%d1
				15734	andi.b &0x13,%d1 # is OVFL or INEX enabled?
				15735	bne.b fsqrt_sd_ovfl_ena # yes
				15736
				15737	#
				15738	# OVFL is not enabled; therefore, we must create the default result by
				15739	# calling ovf_res().
				15740	#
				15741	fsqrt_sd_ovfl_dis:
				15742	btst &neg_bit,FPSR_CC(%a6) # is result negative?
				15743	sne %d1 # set sign param accordingly
				15744	mov.l L_SCR3(%a6),%d0 # pass: prec,mode
				15745	bsr.l ovf_res # calculate default result
				15746	or.b %d0,FPSR_CC(%a6) # set INF,N if applicable
				15747	fmovm.x (%a0),&0x80 # return default result in fp0
				15748	rts
				15749
				15750	#
				15751	# OVFL is enabled.
				15752	# the INEX2 bit has already been updated by the round to the correct precision.
				15753	# now, round to extended(and don't alter the FPSR).
				15754	#
				15755	fsqrt_sd_ovfl_ena:
				15756	mov.l %d2,-(%sp) # save d2
				15757	mov.w FP_SCR0_EX(%a6),%d1 # fetch {sgn,exp}
				15758	mov.l %d1,%d2 # make a copy
				15759	andi.l &0x7fff,%d1 # strip sign
				15760	andi.w &0x8000,%d2 # keep old sign
				15761	sub.l %d0,%d1 # add scale factor
				15762	subi.l &0x6000,%d1 # subtract bias
				15763	andi.w &0x7fff,%d1
				15764	or.w %d2,%d1 # concat sign,exp
				15765	mov.w %d1,FP_SCR0_EX(%a6) # insert new exponent
				15766	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				15767	mov.l (%sp)+,%d2 # restore d2
				15768	bra.b fsqrt_sd_ovfl_dis
				15769
				15770	#
				15771	# the move in MAY underflow. so...
				15772	#
				15773	fsqrt_sd_may_ovfl:
				15774	btst &0x0,1+FP_SCR0_EX(%a6) # is exponent 0x3fff?
				15775	bne.w fsqrt_sd_ovfl # yes, so overflow
				15776
				15777	fmov.l &0x0,%fpsr # clear FPSR
				15778	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				15779
				15780	fsqrt.x FP_SCR0(%a6),%fp0 # perform absolute
				15781
				15782	fmov.l %fpsr,%d1 # save status
				15783	fmov.l &0x0,%fpcr # clear FPCR
				15784
				15785	or.l %d1,USER_FPSR(%a6) # save INEX2,N
				15786
				15787	fmov.x %fp0,%fp1 # make a copy of result
				15788	fcmp.b %fp1,&0x1 # is \|result\| >= 1.b?
				15789	fbge.w fsqrt_sd_ovfl_tst # yes; overflow has occurred
				15790
				15791	# no, it didn't overflow; we have correct result
				15792	bra.w fsqrt_sd_normal_exit
				15793
				15794	##########################################################################
				15795
				15796	#
				15797	# input is not normalized; what is it?
				15798	#
				15799	fsqrt_not_norm:
				15800	cmpi.b %d1,&DENORM # weed out DENORM
				15801	beq.w fsqrt_denorm
				15802	cmpi.b %d1,&ZERO # weed out ZERO
				15803	beq.b fsqrt_zero
				15804	cmpi.b %d1,&INF # weed out INF
				15805	beq.b fsqrt_inf
				15806	cmpi.b %d1,&SNAN # weed out SNAN
				15807	beq.l res_snan_1op
				15808	bra.l res_qnan_1op
				15809
				15810	#
				15811	# fsqrt(+0) = +0
				15812	# fsqrt(-0) = -0
				15813	# fsqrt(+INF) = +INF
				15814	# fsqrt(-INF) = OPERR
				15815	#
				15816	fsqrt_zero:
				15817	tst.b SRC_EX(%a0) # is ZERO positive or negative?
				15818	bmi.b fsqrt_zero_m # negative
				15819	fsqrt_zero_p:
				15820	fmov.s &0x00000000,%fp0 # return +ZERO
				15821	mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
				15822	rts
				15823	fsqrt_zero_m:
				15824	fmov.s &0x80000000,%fp0 # return -ZERO
				15825	mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
				15826	rts
				15827
				15828	fsqrt_inf:
				15829	tst.b SRC_EX(%a0) # is INF positive or negative?
				15830	bmi.l res_operr # negative
				15831	fsqrt_inf_p:
				15832	fmovm.x SRC(%a0),&0x80 # return +INF in fp0
				15833	mov.b &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
				15834	rts
				15835
				15836	##########################################################################
				15837
				15838	#########################################################################
				15839	# XDEF **************************************************************** #
				15840	# addsub_scaler2(): scale inputs to fadd/fsub such that no #
				15841	# OVFL/UNFL exceptions will result #
				15842	# #
				15843	# XREF **************************************************************** #
				15844	# norm() - normalize mantissa after adjusting exponent #
				15845	# #
				15846	# INPUT *************************************************************** #
				15847	# FP_SRC(a6) = fp op1(src) #
				15848	# FP_DST(a6) = fp op2(dst) #
				15849	# #
				15850	# OUTPUT ************************************************************** #
				15851	# FP_SRC(a6) = fp op1 scaled(src) #
				15852	# FP_DST(a6) = fp op2 scaled(dst) #
				15853	# d0 = scale amount #
				15854	# #
				15855	# ALGORITHM *********************************************************** #
				15856	# If the DST exponent is > the SRC exponent, set the DST exponent #
				15857	# equal to 0x3fff and scale the SRC exponent by the value that the #
				15858	# DST exponent was scaled by. If the SRC exponent is greater or equal, #
				15859	# do the opposite. Return this scale factor in d0. #
				15860	# If the two exponents differ by > the number of mantissa bits #
				15861	# plus two, then set the smallest exponent to a very small value as a #
				15862	# quick shortcut. #
				15863	# #
				15864	#########################################################################
				15865
				15866	global addsub_scaler2
				15867	addsub_scaler2:
				15868	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				15869	mov.l DST_HI(%a1),FP_SCR1_HI(%a6)
				15870	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				15871	mov.l DST_LO(%a1),FP_SCR1_LO(%a6)
				15872	mov.w SRC_EX(%a0),%d0
				15873	mov.w DST_EX(%a1),%d1
				15874	mov.w %d0,FP_SCR0_EX(%a6)
				15875	mov.w %d1,FP_SCR1_EX(%a6)
				15876
				15877	andi.w &0x7fff,%d0
				15878	andi.w &0x7fff,%d1
				15879	mov.w %d0,L_SCR1(%a6) # store src exponent
				15880	mov.w %d1,2+L_SCR1(%a6) # store dst exponent
				15881
				15882	cmp.w %d0, %d1 # is src exp >= dst exp?
				15883	bge.l src_exp_ge2
				15884
				15885	# dst exp is > src exp; scale dst to exp = 0x3fff
				15886	dst_exp_gt2:
				15887	bsr.l scale_to_zero_dst
				15888	mov.l %d0,-(%sp) # save scale factor
				15889
				15890	cmpi.b STAG(%a6),&DENORM # is dst denormalized?
				15891	bne.b cmpexp12
				15892
				15893	lea FP_SCR0(%a6),%a0
				15894	bsr.l norm # normalize the denorm; result is new exp
				15895	neg.w %d0 # new exp = -(shft val)
				15896	mov.w %d0,L_SCR1(%a6) # inset new exp
				15897
				15898	cmpexp12:
				15899	mov.w 2+L_SCR1(%a6),%d0
				15900	subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
				15901
				15902	cmp.w %d0,L_SCR1(%a6) # is difference >= len(mantissa)+2?
				15903	bge.b quick_scale12
				15904
				15905	mov.w L_SCR1(%a6),%d0
				15906	add.w 0x2(%sp),%d0 # scale src exponent by scale factor
				15907	mov.w FP_SCR0_EX(%a6),%d1
				15908	and.w &0x8000,%d1
				15909	or.w %d1,%d0 # concat {sgn,new exp}
				15910	mov.w %d0,FP_SCR0_EX(%a6) # insert new dst exponent
				15911
				15912	mov.l (%sp)+,%d0 # return SCALE factor
				15913	rts
				15914
				15915	quick_scale12:
				15916	andi.w &0x8000,FP_SCR0_EX(%a6) # zero src exponent
				15917	bset &0x0,1+FP_SCR0_EX(%a6) # set exp = 1
				15918
				15919	mov.l (%sp)+,%d0 # return SCALE factor
				15920	rts
				15921
				15922	# src exp is >= dst exp; scale src to exp = 0x3fff
				15923	src_exp_ge2:
				15924	bsr.l scale_to_zero_src
				15925	mov.l %d0,-(%sp) # save scale factor
				15926
				15927	cmpi.b DTAG(%a6),&DENORM # is dst denormalized?
				15928	bne.b cmpexp22
				15929	lea FP_SCR1(%a6),%a0
				15930	bsr.l norm # normalize the denorm; result is new exp
				15931	neg.w %d0 # new exp = -(shft val)
				15932	mov.w %d0,2+L_SCR1(%a6) # inset new exp
				15933
				15934	cmpexp22:
				15935	mov.w L_SCR1(%a6),%d0
				15936	subi.w &mantissalen+2,%d0 # subtract mantissalen+2 from larger exp
				15937
				15938	cmp.w %d0,2+L_SCR1(%a6) # is difference >= len(mantissa)+2?
				15939	bge.b quick_scale22
				15940
				15941	mov.w 2+L_SCR1(%a6),%d0
				15942	add.w 0x2(%sp),%d0 # scale dst exponent by scale factor
				15943	mov.w FP_SCR1_EX(%a6),%d1
				15944	andi.w &0x8000,%d1
				15945	or.w %d1,%d0 # concat {sgn,new exp}
				15946	mov.w %d0,FP_SCR1_EX(%a6) # insert new dst exponent
				15947
				15948	mov.l (%sp)+,%d0 # return SCALE factor
				15949	rts
				15950
				15951	quick_scale22:
				15952	andi.w &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
				15953	bset &0x0,1+FP_SCR1_EX(%a6) # set exp = 1
				15954
				15955	mov.l (%sp)+,%d0 # return SCALE factor
				15956	rts
				15957
				15958	##########################################################################
				15959
				15960	#########################################################################
				15961	# XDEF **************************************************************** #
				15962	# scale_to_zero_src(): scale the exponent of extended precision #
				15963	# value at FP_SCR0(a6). #
				15964	# #
				15965	# XREF **************************************************************** #
				15966	# norm() - normalize the mantissa if the operand was a DENORM #
				15967	# #
				15968	# INPUT *************************************************************** #
				15969	# FP_SCR0(a6) = extended precision operand to be scaled #
				15970	# #
				15971	# OUTPUT ************************************************************** #
				15972	# FP_SCR0(a6) = scaled extended precision operand #
				15973	# d0 = scale value #
				15974	# #
				15975	# ALGORITHM *********************************************************** #
				15976	# Set the exponent of the input operand to 0x3fff. Save the value #
				15977	# of the difference between the original and new exponent. Then, #
				15978	# normalize the operand if it was a DENORM. Add this normalization #
				15979	# value to the previous value. Return the result. #
				15980	# #
				15981	#########################################################################
				15982
				15983	global scale_to_zero_src
				15984	scale_to_zero_src:
				15985	mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
				15986	mov.w %d1,%d0 # make a copy
				15987
				15988	andi.l &0x7fff,%d1 # extract operand's exponent
				15989
				15990	andi.w &0x8000,%d0 # extract operand's sgn
				15991	or.w &0x3fff,%d0 # insert new operand's exponent(=0)
				15992
				15993	mov.w %d0,FP_SCR0_EX(%a6) # insert biased exponent
				15994
				15995	cmpi.b STAG(%a6),&DENORM # is operand normalized?
				15996	beq.b stzs_denorm # normalize the DENORM
				15997
				15998	stzs_norm:
				15999	mov.l &0x3fff,%d0
				16000	sub.l %d1,%d0 # scale = BIAS + (-exp)
				16001
				16002	rts
				16003
				16004	stzs_denorm:
				16005	lea FP_SCR0(%a6),%a0 # pass ptr to src op
				16006	bsr.l norm # normalize denorm
				16007	neg.l %d0 # new exponent = -(shft val)
				16008	mov.l %d0,%d1 # prepare for op_norm call
				16009	bra.b stzs_norm # finish scaling
				16010
				16011	###
				16012
				16013	#########################################################################
				16014	# XDEF **************************************************************** #
				16015	# scale_sqrt(): scale the input operand exponent so a subsequent #
				16016	# fsqrt operation won't take an exception. #
				16017	# #
				16018	# XREF **************************************************************** #
				16019	# norm() - normalize the mantissa if the operand was a DENORM #
				16020	# #
				16021	# INPUT *************************************************************** #
				16022	# FP_SCR0(a6) = extended precision operand to be scaled #
				16023	# #
				16024	# OUTPUT ************************************************************** #
				16025	# FP_SCR0(a6) = scaled extended precision operand #
				16026	# d0 = scale value #
				16027	# #
				16028	# ALGORITHM *********************************************************** #
				16029	# If the input operand is a DENORM, normalize it. #
				16030	# If the exponent of the input operand is even, set the exponent #
				16031	# to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the #
				16032	# exponent of the input operand is off, set the exponent to ox3fff and #
				16033	# return a scale factor of "(exp-0x3fff)/2". #
				16034	# #
				16035	#########################################################################
				16036
				16037	global scale_sqrt
				16038	scale_sqrt:
				16039	cmpi.b STAG(%a6),&DENORM # is operand normalized?
				16040	beq.b ss_denorm # normalize the DENORM
				16041
				16042	mov.w FP_SCR0_EX(%a6),%d1 # extract operand's {sgn,exp}
				16043	andi.l &0x7fff,%d1 # extract operand's exponent
				16044
				16045	andi.w &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
				16046
				16047	btst &0x0,%d1 # is exp even or odd?
				16048	beq.b ss_norm_even
				16049
				16050	ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
				16051
				16052	mov.l &0x3fff,%d0
				16053	sub.l %d1,%d0 # scale = BIAS + (-exp)
				16054	asr.l &0x1,%d0 # divide scale factor by 2
				16055	rts
				16056
				16057	ss_norm_even:
				16058	ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
				16059
				16060	mov.l &0x3ffe,%d0
				16061	sub.l %d1,%d0 # scale = BIAS + (-exp)
				16062	asr.l &0x1,%d0 # divide scale factor by 2
				16063	rts
				16064
				16065	ss_denorm:
				16066	lea FP_SCR0(%a6),%a0 # pass ptr to src op
				16067	bsr.l norm # normalize denorm
				16068
				16069	btst &0x0,%d0 # is exp even or odd?
				16070	beq.b ss_denorm_even
				16071
				16072	ori.w &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
				16073
				16074	add.l &0x3fff,%d0
				16075	asr.l &0x1,%d0 # divide scale factor by 2
				16076	rts
				16077
				16078	ss_denorm_even:
				16079	ori.w &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
				16080
				16081	add.l &0x3ffe,%d0
				16082	asr.l &0x1,%d0 # divide scale factor by 2
				16083	rts
				16084
				16085	###
				16086
				16087	#########################################################################
				16088	# XDEF **************************************************************** #
				16089	# scale_to_zero_dst(): scale the exponent of extended precision #
				16090	# value at FP_SCR1(a6). #
				16091	# #
				16092	# XREF **************************************************************** #
				16093	# norm() - normalize the mantissa if the operand was a DENORM #
				16094	# #
				16095	# INPUT *************************************************************** #
				16096	# FP_SCR1(a6) = extended precision operand to be scaled #
				16097	# #
				16098	# OUTPUT ************************************************************** #
				16099	# FP_SCR1(a6) = scaled extended precision operand #
				16100	# d0 = scale value #
				16101	# #
				16102	# ALGORITHM *********************************************************** #
				16103	# Set the exponent of the input operand to 0x3fff. Save the value #
				16104	# of the difference between the original and new exponent. Then, #
				16105	# normalize the operand if it was a DENORM. Add this normalization #
				16106	# value to the previous value. Return the result. #
				16107	# #
				16108	#########################################################################
				16109
				16110	global scale_to_zero_dst
				16111	scale_to_zero_dst:
				16112	mov.w FP_SCR1_EX(%a6),%d1 # extract operand's {sgn,exp}
				16113	mov.w %d1,%d0 # make a copy
				16114
				16115	andi.l &0x7fff,%d1 # extract operand's exponent
				16116
				16117	andi.w &0x8000,%d0 # extract operand's sgn
				16118	or.w &0x3fff,%d0 # insert new operand's exponent(=0)
				16119
				16120	mov.w %d0,FP_SCR1_EX(%a6) # insert biased exponent
				16121
				16122	cmpi.b DTAG(%a6),&DENORM # is operand normalized?
				16123	beq.b stzd_denorm # normalize the DENORM
				16124
				16125	stzd_norm:
				16126	mov.l &0x3fff,%d0
				16127	sub.l %d1,%d0 # scale = BIAS + (-exp)
				16128	rts
				16129
				16130	stzd_denorm:
				16131	lea FP_SCR1(%a6),%a0 # pass ptr to dst op
				16132	bsr.l norm # normalize denorm
				16133	neg.l %d0 # new exponent = -(shft val)
				16134	mov.l %d0,%d1 # prepare for op_norm call
				16135	bra.b stzd_norm # finish scaling
				16136
				16137	##########################################################################
				16138
				16139	#########################################################################
				16140	# XDEF **************************************************************** #
				16141	# res_qnan(): return default result w/ QNAN operand for dyadic #
				16142	# res_snan(): return default result w/ SNAN operand for dyadic #
				16143	# res_qnan_1op(): return dflt result w/ QNAN operand for monadic #
				16144	# res_snan_1op(): return dflt result w/ SNAN operand for monadic #
				16145	# #
				16146	# XREF **************************************************************** #
				16147	# None #
				16148	# #
				16149	# INPUT *************************************************************** #
				16150	# FP_SRC(a6) = pointer to extended precision src operand #
				16151	# FP_DST(a6) = pointer to extended precision dst operand #
				16152	# #
				16153	# OUTPUT ************************************************************** #
				16154	# fp0 = default result #
				16155	# #
				16156	# ALGORITHM *********************************************************** #
				16157	# If either operand (but not both operands) of an operation is a #
				16158	# nonsignalling NAN, then that NAN is returned as the result. If both #
				16159	# operands are nonsignalling NANs, then the destination operand #
				16160	# nonsignalling NAN is returned as the result. #
				16161	# If either operand to an operation is a signalling NAN (SNAN), #
				16162	# then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap #
				16163	# enable bit is set in the FPCR, then the trap is taken and the #
				16164	# destination is not modified. If the SNAN trap enable bit is not set, #
				16165	# then the SNAN is converted to a nonsignalling NAN (by setting the #
				16166	# SNAN bit in the operand to one), and the operation continues as #
				16167	# described in the preceding paragraph, for nonsignalling NANs. #
				16168	# Make sure the appropriate FPSR bits are set before exiting. #
				16169	# #
				16170	#########################################################################
				16171
				16172	global res_qnan
				16173	global res_snan
				16174	res_qnan:
				16175	res_snan:
				16176	cmp.b DTAG(%a6), &SNAN # is the dst an SNAN?
				16177	beq.b dst_snan2
				16178	cmp.b DTAG(%a6), &QNAN # is the dst a QNAN?
				16179	beq.b dst_qnan2
				16180	src_nan:
				16181	cmp.b STAG(%a6), &QNAN
				16182	beq.b src_qnan2
				16183	global res_snan_1op
				16184	res_snan_1op:
				16185	src_snan2:
				16186	bset &0x6, FP_SRC_HI(%a6) # set SNAN bit
				16187	or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
				16188	lea FP_SRC(%a6), %a0
				16189	bra.b nan_comp
				16190	global res_qnan_1op
				16191	res_qnan_1op:
				16192	src_qnan2:
				16193	or.l &nan_mask, USER_FPSR(%a6)
				16194	lea FP_SRC(%a6), %a0
				16195	bra.b nan_comp
				16196	dst_snan2:
				16197	or.l &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
				16198	bset &0x6, FP_DST_HI(%a6) # set SNAN bit
				16199	lea FP_DST(%a6), %a0
				16200	bra.b nan_comp
				16201	dst_qnan2:
				16202	lea FP_DST(%a6), %a0
				16203	cmp.b STAG(%a6), &SNAN
				16204	bne nan_done
				16205	or.l &aiop_mask+snan_mask, USER_FPSR(%a6)
				16206	nan_done:
				16207	or.l &nan_mask, USER_FPSR(%a6)
				16208	nan_comp:
				16209	btst &0x7, FTEMP_EX(%a0) # is NAN neg?
				16210	beq.b nan_not_neg
				16211	or.l &neg_mask, USER_FPSR(%a6)
				16212	nan_not_neg:
				16213	fmovm.x (%a0), &0x80
				16214	rts
				16215
				16216	#########################################################################
				16217	# XDEF **************************************************************** #
				16218	# res_operr(): return default result during operand error #
				16219	# #
				16220	# XREF **************************************************************** #
				16221	# None #
				16222	# #
				16223	# INPUT *************************************************************** #
				16224	# None #
				16225	# #
				16226	# OUTPUT ************************************************************** #
				16227	# fp0 = default operand error result #
				16228	# #
				16229	# ALGORITHM *********************************************************** #
				16230	# An nonsignalling NAN is returned as the default result when #
				16231	# an operand error occurs for the following cases: #
				16232	# #
				16233	# Multiply: (Infinity x Zero) #
				16234	# Divide : (Zero / Zero) \|\| (Infinity / Infinity) #
				16235	# #
				16236	#########################################################################
				16237
				16238	global res_operr
				16239	res_operr:
				16240	or.l &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
				16241	fmovm.x nan_return(%pc), &0x80
				16242	rts
				16243
				16244	nan_return:
				16245	long 0x7fff0000, 0xffffffff, 0xffffffff
				16246
				16247	#########################################################################
				16248	# fdbcc(): routine to emulate the fdbcc instruction #
				16249	# #
				16250	# XDEF **************************************************************** #
				16251	# _fdbcc() #
				16252	# #
				16253	# XREF **************************************************************** #
				16254	# fetch_dreg() - fetch Dn value #
				16255	# store_dreg_l() - store updated Dn value #
				16256	# #
				16257	# INPUT *************************************************************** #
				16258	# d0 = displacement #
				16259	# #
				16260	# OUTPUT ************************************************************** #
				16261	# none #
				16262	# #
				16263	# ALGORITHM *********************************************************** #
				16264	# This routine checks which conditional predicate is specified by #
				16265	# the stacked fdbcc instruction opcode and then branches to a routine #
				16266	# for that predicate. The corresponding fbcc instruction is then used #
				16267	# to see whether the condition (specified by the stacked FPSR) is true #
				16268	# or false. #
				16269	# If a BSUN exception should be indicated, the BSUN and ABSUN #
				16270	# bits are set in the stacked FPSR. If the BSUN exception is enabled, #
				16271	# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
				16272	# enabled BSUN should not be flagged and the predicate is true, then #
				16273	# Dn is fetched and decremented by one. If Dn is not equal to -1, add #
				16274	# the displacement value to the stacked PC so that when an "rte" is #
				16275	# finally executed, the branch occurs. #
				16276	# #
				16277	#########################################################################
				16278	global _fdbcc
				16279	_fdbcc:
				16280	mov.l %d0,L_SCR1(%a6) # save displacement
				16281
				16282	mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
				16283
				16284	clr.l %d1 # clear scratch reg
				16285	mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
				16286	ror.l &0x8,%d1 # rotate to top byte
				16287	fmov.l %d1,%fpsr # insert into FPSR
				16288
				16289	mov.w (tbl_fdbcc.b,%pc,%d0.w*2),%d1 # load table
				16290	jmp (tbl_fdbcc.b,%pc,%d1.w) # jump to fdbcc routine
				16291
				16292	tbl_fdbcc:
				16293	short fdbcc_f - tbl_fdbcc # 00
				16294	short fdbcc_eq - tbl_fdbcc # 01
				16295	short fdbcc_ogt - tbl_fdbcc # 02
				16296	short fdbcc_oge - tbl_fdbcc # 03
				16297	short fdbcc_olt - tbl_fdbcc # 04
				16298	short fdbcc_ole - tbl_fdbcc # 05
				16299	short fdbcc_ogl - tbl_fdbcc # 06
				16300	short fdbcc_or - tbl_fdbcc # 07
				16301	short fdbcc_un - tbl_fdbcc # 08
				16302	short fdbcc_ueq - tbl_fdbcc # 09
				16303	short fdbcc_ugt - tbl_fdbcc # 10
				16304	short fdbcc_uge - tbl_fdbcc # 11
				16305	short fdbcc_ult - tbl_fdbcc # 12
				16306	short fdbcc_ule - tbl_fdbcc # 13
				16307	short fdbcc_neq - tbl_fdbcc # 14
				16308	short fdbcc_t - tbl_fdbcc # 15
				16309	short fdbcc_sf - tbl_fdbcc # 16
				16310	short fdbcc_seq - tbl_fdbcc # 17
				16311	short fdbcc_gt - tbl_fdbcc # 18
				16312	short fdbcc_ge - tbl_fdbcc # 19
				16313	short fdbcc_lt - tbl_fdbcc # 20
				16314	short fdbcc_le - tbl_fdbcc # 21
				16315	short fdbcc_gl - tbl_fdbcc # 22
				16316	short fdbcc_gle - tbl_fdbcc # 23
				16317	short fdbcc_ngle - tbl_fdbcc # 24
				16318	short fdbcc_ngl - tbl_fdbcc # 25
				16319	short fdbcc_nle - tbl_fdbcc # 26
				16320	short fdbcc_nlt - tbl_fdbcc # 27
				16321	short fdbcc_nge - tbl_fdbcc # 28
				16322	short fdbcc_ngt - tbl_fdbcc # 29
				16323	short fdbcc_sneq - tbl_fdbcc # 30
				16324	short fdbcc_st - tbl_fdbcc # 31
				16325
				16326	#########################################################################
				16327	# #
				16328	# IEEE Nonaware tests #
				16329	# #
				16330	# For the IEEE nonaware tests, only the false branch changes the #
				16331	# counter. However, the true branch may set bsun so we check to see #
				16332	# if the NAN bit is set, in which case BSUN and AIOP will be set. #
				16333	# #
				16334	# The cases EQ and NE are shared by the Aware and Nonaware groups #
				16335	# and are incapable of setting the BSUN exception bit. #
				16336	# #
				16337	# Typically, only one of the two possible branch directions could #
				16338	# have the NAN bit set. #
				16339	# (This is assuming the mutual exclusiveness of FPSR cc bit groupings #
				16340	# is preserved.) #
				16341	# #
				16342	#########################################################################
				16343
				16344	#
				16345	# equal:
				16346	#
				16347	# Z
				16348	#
				16349	fdbcc_eq:
				16350	fbeq.w fdbcc_eq_yes # equal?
				16351	fdbcc_eq_no:
				16352	bra.w fdbcc_false # no; go handle counter
				16353	fdbcc_eq_yes:
				16354	rts
				16355
				16356	#
				16357	# not equal:
				16358	# _
				16359	# Z
				16360	#
				16361	fdbcc_neq:
				16362	fbneq.w fdbcc_neq_yes # not equal?
				16363	fdbcc_neq_no:
				16364	bra.w fdbcc_false # no; go handle counter
				16365	fdbcc_neq_yes:
				16366	rts
				16367
				16368	#
				16369	# greater than:
				16370	# _______
				16371	# NANvZvN
				16372	#
				16373	fdbcc_gt:
				16374	fbgt.w fdbcc_gt_yes # greater than?
				16375	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16376	beq.w fdbcc_false # no;go handle counter
				16377	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16378	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16379	bne.w fdbcc_bsun # yes; we have an exception
				16380	bra.w fdbcc_false # no; go handle counter
				16381	fdbcc_gt_yes:
				16382	rts # do nothing
				16383
				16384	#
				16385	# not greater than:
				16386	#
				16387	# NANvZvN
				16388	#
				16389	fdbcc_ngt:
				16390	fbngt.w fdbcc_ngt_yes # not greater than?
				16391	fdbcc_ngt_no:
				16392	bra.w fdbcc_false # no; go handle counter
				16393	fdbcc_ngt_yes:
				16394	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16395	beq.b fdbcc_ngt_done # no;go finish
				16396	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16397	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16398	bne.w fdbcc_bsun # yes; we have an exception
				16399	fdbcc_ngt_done:
				16400	rts # no; do nothing
				16401
				16402	#
				16403	# greater than or equal:
				16404	# _____
				16405	# Zv(NANvN)
				16406	#
				16407	fdbcc_ge:
				16408	fbge.w fdbcc_ge_yes # greater than or equal?
				16409	fdbcc_ge_no:
				16410	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16411	beq.w fdbcc_false # no;go handle counter
				16412	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16413	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16414	bne.w fdbcc_bsun # yes; we have an exception
				16415	bra.w fdbcc_false # no; go handle counter
				16416	fdbcc_ge_yes:
				16417	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16418	beq.b fdbcc_ge_yes_done # no;go do nothing
				16419	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16420	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16421	bne.w fdbcc_bsun # yes; we have an exception
				16422	fdbcc_ge_yes_done:
				16423	rts # do nothing
				16424
				16425	#
				16426	# not (greater than or equal):
				16427	# _
				16428	# NANv(N^Z)
				16429	#
				16430	fdbcc_nge:
				16431	fbnge.w fdbcc_nge_yes # not (greater than or equal)?
				16432	fdbcc_nge_no:
				16433	bra.w fdbcc_false # no; go handle counter
				16434	fdbcc_nge_yes:
				16435	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16436	beq.b fdbcc_nge_done # no;go finish
				16437	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16438	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16439	bne.w fdbcc_bsun # yes; we have an exception
				16440	fdbcc_nge_done:
				16441	rts # no; do nothing
				16442
				16443	#
				16444	# less than:
				16445	# _____
				16446	# N^(NANvZ)
				16447	#
				16448	fdbcc_lt:
				16449	fblt.w fdbcc_lt_yes # less than?
				16450	fdbcc_lt_no:
				16451	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16452	beq.w fdbcc_false # no; go handle counter
				16453	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16454	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16455	bne.w fdbcc_bsun # yes; we have an exception
				16456	bra.w fdbcc_false # no; go handle counter
				16457	fdbcc_lt_yes:
				16458	rts # do nothing
				16459
				16460	#
				16461	# not less than:
				16462	# _
				16463	# NANv(ZvN)
				16464	#
				16465	fdbcc_nlt:
				16466	fbnlt.w fdbcc_nlt_yes # not less than?
				16467	fdbcc_nlt_no:
				16468	bra.w fdbcc_false # no; go handle counter
				16469	fdbcc_nlt_yes:
				16470	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16471	beq.b fdbcc_nlt_done # no;go finish
				16472	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16473	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16474	bne.w fdbcc_bsun # yes; we have an exception
				16475	fdbcc_nlt_done:
				16476	rts # no; do nothing
				16477
				16478	#
				16479	# less than or equal:
				16480	# ___
				16481	# Zv(N^NAN)
				16482	#
				16483	fdbcc_le:
				16484	fble.w fdbcc_le_yes # less than or equal?
				16485	fdbcc_le_no:
				16486	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16487	beq.w fdbcc_false # no; go handle counter
				16488	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16489	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16490	bne.w fdbcc_bsun # yes; we have an exception
				16491	bra.w fdbcc_false # no; go handle counter
				16492	fdbcc_le_yes:
				16493	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16494	beq.b fdbcc_le_yes_done # no; go do nothing
				16495	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16496	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16497	bne.w fdbcc_bsun # yes; we have an exception
				16498	fdbcc_le_yes_done:
				16499	rts # do nothing
				16500
				16501	#
				16502	# not (less than or equal):
				16503	# ___
				16504	# NANv(NvZ)
				16505	#
				16506	fdbcc_nle:
				16507	fbnle.w fdbcc_nle_yes # not (less than or equal)?
				16508	fdbcc_nle_no:
				16509	bra.w fdbcc_false # no; go handle counter
				16510	fdbcc_nle_yes:
				16511	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16512	beq.w fdbcc_nle_done # no; go finish
				16513	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16514	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16515	bne.w fdbcc_bsun # yes; we have an exception
				16516	fdbcc_nle_done:
				16517	rts # no; do nothing
				16518
				16519	#
				16520	# greater or less than:
				16521	# _____
				16522	# NANvZ
				16523	#
				16524	fdbcc_gl:
				16525	fbgl.w fdbcc_gl_yes # greater or less than?
				16526	fdbcc_gl_no:
				16527	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16528	beq.w fdbcc_false # no; handle counter
				16529	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16530	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16531	bne.w fdbcc_bsun # yes; we have an exception
				16532	bra.w fdbcc_false # no; go handle counter
				16533	fdbcc_gl_yes:
				16534	rts # do nothing
				16535
				16536	#
				16537	# not (greater or less than):
				16538	#
				16539	# NANvZ
				16540	#
				16541	fdbcc_ngl:
				16542	fbngl.w fdbcc_ngl_yes # not (greater or less than)?
				16543	fdbcc_ngl_no:
				16544	bra.w fdbcc_false # no; go handle counter
				16545	fdbcc_ngl_yes:
				16546	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16547	beq.b fdbcc_ngl_done # no; go finish
				16548	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16549	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16550	bne.w fdbcc_bsun # yes; we have an exception
				16551	fdbcc_ngl_done:
				16552	rts # no; do nothing
				16553
				16554	#
				16555	# greater, less, or equal:
				16556	# ___
				16557	# NAN
				16558	#
				16559	fdbcc_gle:
				16560	fbgle.w fdbcc_gle_yes # greater, less, or equal?
				16561	fdbcc_gle_no:
				16562	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16563	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16564	bne.w fdbcc_bsun # yes; we have an exception
				16565	bra.w fdbcc_false # no; go handle counter
				16566	fdbcc_gle_yes:
				16567	rts # do nothing
				16568
				16569	#
				16570	# not (greater, less, or equal):
				16571	#
				16572	# NAN
				16573	#
				16574	fdbcc_ngle:
				16575	fbngle.w fdbcc_ngle_yes # not (greater, less, or equal)?
				16576	fdbcc_ngle_no:
				16577	bra.w fdbcc_false # no; go handle counter
				16578	fdbcc_ngle_yes:
				16579	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16580	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16581	bne.w fdbcc_bsun # yes; we have an exception
				16582	rts # no; do nothing
				16583
				16584	#########################################################################
				16585	# #
				16586	# Miscellaneous tests #
				16587	# #
				16588	# For the IEEE miscellaneous tests, all but fdbf and fdbt can set bsun. #
				16589	# #
				16590	#########################################################################
				16591
				16592	#
				16593	# false:
				16594	#
				16595	# False
				16596	#
				16597	fdbcc_f: # no bsun possible
				16598	bra.w fdbcc_false # go handle counter
				16599
				16600	#
				16601	# true:
				16602	#
				16603	# True
				16604	#
				16605	fdbcc_t: # no bsun possible
				16606	rts # do nothing
				16607
				16608	#
				16609	# signalling false:
				16610	#
				16611	# False
				16612	#
				16613	fdbcc_sf:
				16614	btst &nan_bit, FPSR_CC(%a6) # is NAN set?
				16615	beq.w fdbcc_false # no;go handle counter
				16616	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16617	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16618	bne.w fdbcc_bsun # yes; we have an exception
				16619	bra.w fdbcc_false # go handle counter
				16620
				16621	#
				16622	# signalling true:
				16623	#
				16624	# True
				16625	#
				16626	fdbcc_st:
				16627	btst &nan_bit, FPSR_CC(%a6) # is NAN set?
				16628	beq.b fdbcc_st_done # no;go finish
				16629	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16630	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16631	bne.w fdbcc_bsun # yes; we have an exception
				16632	fdbcc_st_done:
				16633	rts
				16634
				16635	#
				16636	# signalling equal:
				16637	#
				16638	# Z
				16639	#
				16640	fdbcc_seq:
				16641	fbseq.w fdbcc_seq_yes # signalling equal?
				16642	fdbcc_seq_no:
				16643	btst &nan_bit, FPSR_CC(%a6) # is NAN set?
				16644	beq.w fdbcc_false # no;go handle counter
				16645	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16646	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16647	bne.w fdbcc_bsun # yes; we have an exception
				16648	bra.w fdbcc_false # go handle counter
				16649	fdbcc_seq_yes:
				16650	btst &nan_bit, FPSR_CC(%a6) # is NAN set?
				16651	beq.b fdbcc_seq_yes_done # no;go do nothing
				16652	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16653	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16654	bne.w fdbcc_bsun # yes; we have an exception
				16655	fdbcc_seq_yes_done:
				16656	rts # yes; do nothing
				16657
				16658	#
				16659	# signalling not equal:
				16660	# _
				16661	# Z
				16662	#
				16663	fdbcc_sneq:
				16664	fbsneq.w fdbcc_sneq_yes # signalling not equal?
				16665	fdbcc_sneq_no:
				16666	btst &nan_bit, FPSR_CC(%a6) # is NAN set?
				16667	beq.w fdbcc_false # no;go handle counter
				16668	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16669	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16670	bne.w fdbcc_bsun # yes; we have an exception
				16671	bra.w fdbcc_false # go handle counter
				16672	fdbcc_sneq_yes:
				16673	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				16674	beq.w fdbcc_sneq_done # no;go finish
				16675	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				16676	btst &bsun_bit, FPCR_ENABLE(%a6) # is BSUN enabled?
				16677	bne.w fdbcc_bsun # yes; we have an exception
				16678	fdbcc_sneq_done:
				16679	rts
				16680
				16681	#########################################################################
				16682	# #
				16683	# IEEE Aware tests #
				16684	# #
				16685	# For the IEEE aware tests, action is only taken if the result is false.#
				16686	# Therefore, the opposite branch type is used to jump to the decrement #
				16687	# routine. #
				16688	# The BSUN exception will not be set for any of these tests. #
				16689	# #
				16690	#########################################################################
				16691
				16692	#
				16693	# ordered greater than:
				16694	# _______
				16695	# NANvZvN
				16696	#
				16697	fdbcc_ogt:
				16698	fbogt.w fdbcc_ogt_yes # ordered greater than?
				16699	fdbcc_ogt_no:
				16700	bra.w fdbcc_false # no; go handle counter
				16701	fdbcc_ogt_yes:
				16702	rts # yes; do nothing
				16703
				16704	#
				16705	# unordered or less or equal:
				16706	# _______
				16707	# NANvZvN
				16708	#
				16709	fdbcc_ule:
				16710	fbule.w fdbcc_ule_yes # unordered or less or equal?
				16711	fdbcc_ule_no:
				16712	bra.w fdbcc_false # no; go handle counter
				16713	fdbcc_ule_yes:
				16714	rts # yes; do nothing
				16715
				16716	#
				16717	# ordered greater than or equal:
				16718	# _____
				16719	# Zv(NANvN)
				16720	#
				16721	fdbcc_oge:
				16722	fboge.w fdbcc_oge_yes # ordered greater than or equal?
				16723	fdbcc_oge_no:
				16724	bra.w fdbcc_false # no; go handle counter
				16725	fdbcc_oge_yes:
				16726	rts # yes; do nothing
				16727
				16728	#
				16729	# unordered or less than:
				16730	# _
				16731	# NANv(N^Z)
				16732	#
				16733	fdbcc_ult:
				16734	fbult.w fdbcc_ult_yes # unordered or less than?
				16735	fdbcc_ult_no:
				16736	bra.w fdbcc_false # no; go handle counter
				16737	fdbcc_ult_yes:
				16738	rts # yes; do nothing
				16739
				16740	#
				16741	# ordered less than:
				16742	# _____
				16743	# N^(NANvZ)
				16744	#
				16745	fdbcc_olt:
				16746	fbolt.w fdbcc_olt_yes # ordered less than?
				16747	fdbcc_olt_no:
				16748	bra.w fdbcc_false # no; go handle counter
				16749	fdbcc_olt_yes:
				16750	rts # yes; do nothing
				16751
				16752	#
				16753	# unordered or greater or equal:
				16754	#
				16755	# NANvZvN
				16756	#
				16757	fdbcc_uge:
				16758	fbuge.w fdbcc_uge_yes # unordered or greater than?
				16759	fdbcc_uge_no:
				16760	bra.w fdbcc_false # no; go handle counter
				16761	fdbcc_uge_yes:
				16762	rts # yes; do nothing
				16763
				16764	#
				16765	# ordered less than or equal:
				16766	# ___
				16767	# Zv(N^NAN)
				16768	#
				16769	fdbcc_ole:
				16770	fbole.w fdbcc_ole_yes # ordered greater or less than?
				16771	fdbcc_ole_no:
				16772	bra.w fdbcc_false # no; go handle counter
				16773	fdbcc_ole_yes:
				16774	rts # yes; do nothing
				16775
				16776	#
				16777	# unordered or greater than:
				16778	# ___
				16779	# NANv(NvZ)
				16780	#
				16781	fdbcc_ugt:
				16782	fbugt.w fdbcc_ugt_yes # unordered or greater than?
				16783	fdbcc_ugt_no:
				16784	bra.w fdbcc_false # no; go handle counter
				16785	fdbcc_ugt_yes:
				16786	rts # yes; do nothing
				16787
				16788	#
				16789	# ordered greater or less than:
				16790	# _____
				16791	# NANvZ
				16792	#
				16793	fdbcc_ogl:
				16794	fbogl.w fdbcc_ogl_yes # ordered greater or less than?
				16795	fdbcc_ogl_no:
				16796	bra.w fdbcc_false # no; go handle counter
				16797	fdbcc_ogl_yes:
				16798	rts # yes; do nothing
				16799
				16800	#
				16801	# unordered or equal:
				16802	#
				16803	# NANvZ
				16804	#
				16805	fdbcc_ueq:
				16806	fbueq.w fdbcc_ueq_yes # unordered or equal?
				16807	fdbcc_ueq_no:
				16808	bra.w fdbcc_false # no; go handle counter
				16809	fdbcc_ueq_yes:
				16810	rts # yes; do nothing
				16811
				16812	#
				16813	# ordered:
				16814	# ___
				16815	# NAN
				16816	#
				16817	fdbcc_or:
				16818	fbor.w fdbcc_or_yes # ordered?
				16819	fdbcc_or_no:
				16820	bra.w fdbcc_false # no; go handle counter
				16821	fdbcc_or_yes:
				16822	rts # yes; do nothing
				16823
				16824	#
				16825	# unordered:
				16826	#
				16827	# NAN
				16828	#
				16829	fdbcc_un:
				16830	fbun.w fdbcc_un_yes # unordered?
				16831	fdbcc_un_no:
				16832	bra.w fdbcc_false # no; go handle counter
				16833	fdbcc_un_yes:
				16834	rts # yes; do nothing
				16835
				16836	#######################################################################
				16837
				16838	#
				16839	# the bsun exception bit was not set.
				16840	#
				16841	# (1) subtract 1 from the count register
				16842	# (2) if (cr == -1) then
				16843	# pc = pc of next instruction
				16844	# else
				16845	# pc += sign_ext(16-bit displacement)
				16846	#
				16847	fdbcc_false:
				16848	mov.b 1+EXC_OPWORD(%a6), %d1 # fetch lo opword
				16849	andi.w &0x7, %d1 # extract count register
				16850
				16851	bsr.l fetch_dreg # fetch count value
				16852	# make sure that d0 isn't corrupted between calls...
				16853
				16854	subq.w &0x1, %d0 # Dn - 1 -> Dn
				16855
				16856	bsr.l store_dreg_l # store new count value
				16857
				16858	cmpi.w %d0, &-0x1 # is (Dn == -1)?
				16859	bne.b fdbcc_false_cont # no;
				16860	rts
				16861
				16862	fdbcc_false_cont:
				16863	mov.l L_SCR1(%a6),%d0 # fetch displacement
				16864	add.l USER_FPIAR(%a6),%d0 # add instruction PC
				16865	addq.l &0x4,%d0 # add instruction length
				16866	mov.l %d0,EXC_PC(%a6) # set new PC
				16867	rts
				16868
				16869	# the emulation routine set bsun and BSUN was enabled. have to
				16870	# fix stack and jump to the bsun handler.
				16871	# let the caller of this routine shift the stack frame up to
				16872	# eliminate the effective address field.
				16873	fdbcc_bsun:
				16874	mov.b &fbsun_flg,SPCOND_FLG(%a6)
				16875	rts
				16876
				16877	#########################################################################
				16878	# ftrapcc(): routine to emulate the ftrapcc instruction #
				16879	# #
				16880	# XDEF **************************************************************** #
				16881	# _ftrapcc() #
				16882	# #
				16883	# XREF **************************************************************** #
				16884	# none #
				16885	# #
				16886	# INPUT *************************************************************** #
				16887	# none #
				16888	# #
				16889	# OUTPUT ************************************************************** #
				16890	# none #
				16891	# #
				16892	# ALGORITHM *********************************************************** #
				16893	# This routine checks which conditional predicate is specified by #
				16894	# the stacked ftrapcc instruction opcode and then branches to a routine #
				16895	# for that predicate. The corresponding fbcc instruction is then used #
				16896	# to see whether the condition (specified by the stacked FPSR) is true #
				16897	# or false. #
				16898	# If a BSUN exception should be indicated, the BSUN and ABSUN #
				16899	# bits are set in the stacked FPSR. If the BSUN exception is enabled, #
				16900	# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
				16901	# enabled BSUN should not be flagged and the predicate is true, then #
				16902	# the ftrapcc_flg is set in the SPCOND_FLG location. These special #
				16903	# flags indicate to the calling routine to emulate the exceptional #
				16904	# condition. #
				16905	# #
				16906	#########################################################################
				16907
				16908	global _ftrapcc
				16909	_ftrapcc:
				16910	mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
				16911
				16912	clr.l %d1 # clear scratch reg
				16913	mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
				16914	ror.l &0x8,%d1 # rotate to top byte
				16915	fmov.l %d1,%fpsr # insert into FPSR
				16916
				16917	mov.w (tbl_ftrapcc.b,%pc,%d0.w*2), %d1 # load table
				16918	jmp (tbl_ftrapcc.b,%pc,%d1.w) # jump to ftrapcc routine
				16919
				16920	tbl_ftrapcc:
				16921	short ftrapcc_f - tbl_ftrapcc # 00
				16922	short ftrapcc_eq - tbl_ftrapcc # 01
				16923	short ftrapcc_ogt - tbl_ftrapcc # 02
				16924	short ftrapcc_oge - tbl_ftrapcc # 03
				16925	short ftrapcc_olt - tbl_ftrapcc # 04
				16926	short ftrapcc_ole - tbl_ftrapcc # 05
				16927	short ftrapcc_ogl - tbl_ftrapcc # 06
				16928	short ftrapcc_or - tbl_ftrapcc # 07
				16929	short ftrapcc_un - tbl_ftrapcc # 08
				16930	short ftrapcc_ueq - tbl_ftrapcc # 09
				16931	short ftrapcc_ugt - tbl_ftrapcc # 10
				16932	short ftrapcc_uge - tbl_ftrapcc # 11
				16933	short ftrapcc_ult - tbl_ftrapcc # 12
				16934	short ftrapcc_ule - tbl_ftrapcc # 13
				16935	short ftrapcc_neq - tbl_ftrapcc # 14
				16936	short ftrapcc_t - tbl_ftrapcc # 15
				16937	short ftrapcc_sf - tbl_ftrapcc # 16
				16938	short ftrapcc_seq - tbl_ftrapcc # 17
				16939	short ftrapcc_gt - tbl_ftrapcc # 18
				16940	short ftrapcc_ge - tbl_ftrapcc # 19
				16941	short ftrapcc_lt - tbl_ftrapcc # 20
				16942	short ftrapcc_le - tbl_ftrapcc # 21
				16943	short ftrapcc_gl - tbl_ftrapcc # 22
				16944	short ftrapcc_gle - tbl_ftrapcc # 23
				16945	short ftrapcc_ngle - tbl_ftrapcc # 24
				16946	short ftrapcc_ngl - tbl_ftrapcc # 25
				16947	short ftrapcc_nle - tbl_ftrapcc # 26
				16948	short ftrapcc_nlt - tbl_ftrapcc # 27
				16949	short ftrapcc_nge - tbl_ftrapcc # 28
				16950	short ftrapcc_ngt - tbl_ftrapcc # 29
				16951	short ftrapcc_sneq - tbl_ftrapcc # 30
				16952	short ftrapcc_st - tbl_ftrapcc # 31
				16953
				16954	#########################################################################
				16955	# #
				16956	# IEEE Nonaware tests #
				16957	# #
				16958	# For the IEEE nonaware tests, we set the result based on the #
				16959	# floating point condition codes. In addition, we check to see #
				16960	# if the NAN bit is set, in which case BSUN and AIOP will be set. #
				16961	# #
				16962	# The cases EQ and NE are shared by the Aware and Nonaware groups #
				16963	# and are incapable of setting the BSUN exception bit. #
				16964	# #
				16965	# Typically, only one of the two possible branch directions could #
				16966	# have the NAN bit set. #
				16967	# #
				16968	#########################################################################
				16969
				16970	#
				16971	# equal:
				16972	#
				16973	# Z
				16974	#
				16975	ftrapcc_eq:
				16976	fbeq.w ftrapcc_trap # equal?
				16977	ftrapcc_eq_no:
				16978	rts # do nothing
				16979
				16980	#
				16981	# not equal:
				16982	# _
				16983	# Z
				16984	#
				16985	ftrapcc_neq:
				16986	fbneq.w ftrapcc_trap # not equal?
				16987	ftrapcc_neq_no:
				16988	rts # do nothing
				16989
				16990	#
				16991	# greater than:
				16992	# _______
				16993	# NANvZvN
				16994	#
				16995	ftrapcc_gt:
				16996	fbgt.w ftrapcc_trap # greater than?
				16997	ftrapcc_gt_no:
				16998	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				16999	beq.b ftrapcc_gt_done # no
				17000	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17001	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17002	bne.w ftrapcc_bsun # yes
				17003	ftrapcc_gt_done:
				17004	rts # no; do nothing
				17005
				17006	#
				17007	# not greater than:
				17008	#
				17009	# NANvZvN
				17010	#
				17011	ftrapcc_ngt:
				17012	fbngt.w ftrapcc_ngt_yes # not greater than?
				17013	ftrapcc_ngt_no:
				17014	rts # do nothing
				17015	ftrapcc_ngt_yes:
				17016	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17017	beq.w ftrapcc_trap # no; go take trap
				17018	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17019	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17020	bne.w ftrapcc_bsun # yes
				17021	bra.w ftrapcc_trap # no; go take trap
				17022
				17023	#
				17024	# greater than or equal:
				17025	# _____
				17026	# Zv(NANvN)
				17027	#
				17028	ftrapcc_ge:
				17029	fbge.w ftrapcc_ge_yes # greater than or equal?
				17030	ftrapcc_ge_no:
				17031	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17032	beq.b ftrapcc_ge_done # no; go finish
				17033	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17034	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17035	bne.w ftrapcc_bsun # yes
				17036	ftrapcc_ge_done:
				17037	rts # no; do nothing
				17038	ftrapcc_ge_yes:
				17039	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17040	beq.w ftrapcc_trap # no; go take trap
				17041	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17042	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17043	bne.w ftrapcc_bsun # yes
				17044	bra.w ftrapcc_trap # no; go take trap
				17045
				17046	#
				17047	# not (greater than or equal):
				17048	# _
				17049	# NANv(N^Z)
				17050	#
				17051	ftrapcc_nge:
				17052	fbnge.w ftrapcc_nge_yes # not (greater than or equal)?
				17053	ftrapcc_nge_no:
				17054	rts # do nothing
				17055	ftrapcc_nge_yes:
				17056	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17057	beq.w ftrapcc_trap # no; go take trap
				17058	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17059	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17060	bne.w ftrapcc_bsun # yes
				17061	bra.w ftrapcc_trap # no; go take trap
				17062
				17063	#
				17064	# less than:
				17065	# _____
				17066	# N^(NANvZ)
				17067	#
				17068	ftrapcc_lt:
				17069	fblt.w ftrapcc_trap # less than?
				17070	ftrapcc_lt_no:
				17071	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17072	beq.b ftrapcc_lt_done # no; go finish
				17073	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17074	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17075	bne.w ftrapcc_bsun # yes
				17076	ftrapcc_lt_done:
				17077	rts # no; do nothing
				17078
				17079	#
				17080	# not less than:
				17081	# _
				17082	# NANv(ZvN)
				17083	#
				17084	ftrapcc_nlt:
				17085	fbnlt.w ftrapcc_nlt_yes # not less than?
				17086	ftrapcc_nlt_no:
				17087	rts # do nothing
				17088	ftrapcc_nlt_yes:
				17089	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17090	beq.w ftrapcc_trap # no; go take trap
				17091	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17092	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17093	bne.w ftrapcc_bsun # yes
				17094	bra.w ftrapcc_trap # no; go take trap
				17095
				17096	#
				17097	# less than or equal:
				17098	# ___
				17099	# Zv(N^NAN)
				17100	#
				17101	ftrapcc_le:
				17102	fble.w ftrapcc_le_yes # less than or equal?
				17103	ftrapcc_le_no:
				17104	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17105	beq.b ftrapcc_le_done # no; go finish
				17106	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17107	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17108	bne.w ftrapcc_bsun # yes
				17109	ftrapcc_le_done:
				17110	rts # no; do nothing
				17111	ftrapcc_le_yes:
				17112	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17113	beq.w ftrapcc_trap # no; go take trap
				17114	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17115	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17116	bne.w ftrapcc_bsun # yes
				17117	bra.w ftrapcc_trap # no; go take trap
				17118
				17119	#
				17120	# not (less than or equal):
				17121	# ___
				17122	# NANv(NvZ)
				17123	#
				17124	ftrapcc_nle:
				17125	fbnle.w ftrapcc_nle_yes # not (less than or equal)?
				17126	ftrapcc_nle_no:
				17127	rts # do nothing
				17128	ftrapcc_nle_yes:
				17129	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17130	beq.w ftrapcc_trap # no; go take trap
				17131	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17132	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17133	bne.w ftrapcc_bsun # yes
				17134	bra.w ftrapcc_trap # no; go take trap
				17135
				17136	#
				17137	# greater or less than:
				17138	# _____
				17139	# NANvZ
				17140	#
				17141	ftrapcc_gl:
				17142	fbgl.w ftrapcc_trap # greater or less than?
				17143	ftrapcc_gl_no:
				17144	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17145	beq.b ftrapcc_gl_done # no; go finish
				17146	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17147	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17148	bne.w ftrapcc_bsun # yes
				17149	ftrapcc_gl_done:
				17150	rts # no; do nothing
				17151
				17152	#
				17153	# not (greater or less than):
				17154	#
				17155	# NANvZ
				17156	#
				17157	ftrapcc_ngl:
				17158	fbngl.w ftrapcc_ngl_yes # not (greater or less than)?
				17159	ftrapcc_ngl_no:
				17160	rts # do nothing
				17161	ftrapcc_ngl_yes:
				17162	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17163	beq.w ftrapcc_trap # no; go take trap
				17164	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17165	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17166	bne.w ftrapcc_bsun # yes
				17167	bra.w ftrapcc_trap # no; go take trap
				17168
				17169	#
				17170	# greater, less, or equal:
				17171	# ___
				17172	# NAN
				17173	#
				17174	ftrapcc_gle:
				17175	fbgle.w ftrapcc_trap # greater, less, or equal?
				17176	ftrapcc_gle_no:
				17177	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17178	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17179	bne.w ftrapcc_bsun # yes
				17180	rts # no; do nothing
				17181
				17182	#
				17183	# not (greater, less, or equal):
				17184	#
				17185	# NAN
				17186	#
				17187	ftrapcc_ngle:
				17188	fbngle.w ftrapcc_ngle_yes # not (greater, less, or equal)?
				17189	ftrapcc_ngle_no:
				17190	rts # do nothing
				17191	ftrapcc_ngle_yes:
				17192	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17193	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17194	bne.w ftrapcc_bsun # yes
				17195	bra.w ftrapcc_trap # no; go take trap
				17196
				17197	#########################################################################
				17198	# #
				17199	# Miscellaneous tests #
				17200	# #
				17201	# For the IEEE aware tests, we only have to set the result based on the #
				17202	# floating point condition codes. The BSUN exception will not be #
				17203	# set for any of these tests. #
				17204	# #
				17205	#########################################################################
				17206
				17207	#
				17208	# false:
				17209	#
				17210	# False
				17211	#
				17212	ftrapcc_f:
				17213	rts # do nothing
				17214
				17215	#
				17216	# true:
				17217	#
				17218	# True
				17219	#
				17220	ftrapcc_t:
				17221	bra.w ftrapcc_trap # go take trap
				17222
				17223	#
				17224	# signalling false:
				17225	#
				17226	# False
				17227	#
				17228	ftrapcc_sf:
				17229	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17230	beq.b ftrapcc_sf_done # no; go finish
				17231	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17232	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17233	bne.w ftrapcc_bsun # yes
				17234	ftrapcc_sf_done:
				17235	rts # no; do nothing
				17236
				17237	#
				17238	# signalling true:
				17239	#
				17240	# True
				17241	#
				17242	ftrapcc_st:
				17243	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17244	beq.w ftrapcc_trap # no; go take trap
				17245	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17246	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17247	bne.w ftrapcc_bsun # yes
				17248	bra.w ftrapcc_trap # no; go take trap
				17249
				17250	#
				17251	# signalling equal:
				17252	#
				17253	# Z
				17254	#
				17255	ftrapcc_seq:
				17256	fbseq.w ftrapcc_seq_yes # signalling equal?
				17257	ftrapcc_seq_no:
				17258	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17259	beq.w ftrapcc_seq_done # no; go finish
				17260	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17261	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17262	bne.w ftrapcc_bsun # yes
				17263	ftrapcc_seq_done:
				17264	rts # no; do nothing
				17265	ftrapcc_seq_yes:
				17266	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17267	beq.w ftrapcc_trap # no; go take trap
				17268	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17269	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17270	bne.w ftrapcc_bsun # yes
				17271	bra.w ftrapcc_trap # no; go take trap
				17272
				17273	#
				17274	# signalling not equal:
				17275	# _
				17276	# Z
				17277	#
				17278	ftrapcc_sneq:
				17279	fbsneq.w ftrapcc_sneq_yes # signalling equal?
				17280	ftrapcc_sneq_no:
				17281	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17282	beq.w ftrapcc_sneq_no_done # no; go finish
				17283	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17284	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17285	bne.w ftrapcc_bsun # yes
				17286	ftrapcc_sneq_no_done:
				17287	rts # do nothing
				17288	ftrapcc_sneq_yes:
				17289	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17290	beq.w ftrapcc_trap # no; go take trap
				17291	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17292	btst &bsun_bit, FPCR_ENABLE(%a6) # was BSUN set?
				17293	bne.w ftrapcc_bsun # yes
				17294	bra.w ftrapcc_trap # no; go take trap
				17295
				17296	#########################################################################
				17297	# #
				17298	# IEEE Aware tests #
				17299	# #
				17300	# For the IEEE aware tests, we only have to set the result based on the #
				17301	# floating point condition codes. The BSUN exception will not be #
				17302	# set for any of these tests. #
				17303	# #
				17304	#########################################################################
				17305
				17306	#
				17307	# ordered greater than:
				17308	# _______
				17309	# NANvZvN
				17310	#
				17311	ftrapcc_ogt:
				17312	fbogt.w ftrapcc_trap # ordered greater than?
				17313	ftrapcc_ogt_no:
				17314	rts # do nothing
				17315
				17316	#
				17317	# unordered or less or equal:
				17318	# _______
				17319	# NANvZvN
				17320	#
				17321	ftrapcc_ule:
				17322	fbule.w ftrapcc_trap # unordered or less or equal?
				17323	ftrapcc_ule_no:
				17324	rts # do nothing
				17325
				17326	#
				17327	# ordered greater than or equal:
				17328	# _____
				17329	# Zv(NANvN)
				17330	#
				17331	ftrapcc_oge:
				17332	fboge.w ftrapcc_trap # ordered greater than or equal?
				17333	ftrapcc_oge_no:
				17334	rts # do nothing
				17335
				17336	#
				17337	# unordered or less than:
				17338	# _
				17339	# NANv(N^Z)
				17340	#
				17341	ftrapcc_ult:
				17342	fbult.w ftrapcc_trap # unordered or less than?
				17343	ftrapcc_ult_no:
				17344	rts # do nothing
				17345
				17346	#
				17347	# ordered less than:
				17348	# _____
				17349	# N^(NANvZ)
				17350	#
				17351	ftrapcc_olt:
				17352	fbolt.w ftrapcc_trap # ordered less than?
				17353	ftrapcc_olt_no:
				17354	rts # do nothing
				17355
				17356	#
				17357	# unordered or greater or equal:
				17358	#
				17359	# NANvZvN
				17360	#
				17361	ftrapcc_uge:
				17362	fbuge.w ftrapcc_trap # unordered or greater than?
				17363	ftrapcc_uge_no:
				17364	rts # do nothing
				17365
				17366	#
				17367	# ordered less than or equal:
				17368	# ___
				17369	# Zv(N^NAN)
				17370	#
				17371	ftrapcc_ole:
				17372	fbole.w ftrapcc_trap # ordered greater or less than?
				17373	ftrapcc_ole_no:
				17374	rts # do nothing
				17375
				17376	#
				17377	# unordered or greater than:
				17378	# ___
				17379	# NANv(NvZ)
				17380	#
				17381	ftrapcc_ugt:
				17382	fbugt.w ftrapcc_trap # unordered or greater than?
				17383	ftrapcc_ugt_no:
				17384	rts # do nothing
				17385
				17386	#
				17387	# ordered greater or less than:
				17388	# _____
				17389	# NANvZ
				17390	#
				17391	ftrapcc_ogl:
				17392	fbogl.w ftrapcc_trap # ordered greater or less than?
				17393	ftrapcc_ogl_no:
				17394	rts # do nothing
				17395
				17396	#
				17397	# unordered or equal:
				17398	#
				17399	# NANvZ
				17400	#
				17401	ftrapcc_ueq:
				17402	fbueq.w ftrapcc_trap # unordered or equal?
				17403	ftrapcc_ueq_no:
				17404	rts # do nothing
				17405
				17406	#
				17407	# ordered:
				17408	# ___
				17409	# NAN
				17410	#
				17411	ftrapcc_or:
				17412	fbor.w ftrapcc_trap # ordered?
				17413	ftrapcc_or_no:
				17414	rts # do nothing
				17415
				17416	#
				17417	# unordered:
				17418	#
				17419	# NAN
				17420	#
				17421	ftrapcc_un:
				17422	fbun.w ftrapcc_trap # unordered?
				17423	ftrapcc_un_no:
				17424	rts # do nothing
				17425
				17426	#######################################################################
				17427
				17428	# the bsun exception bit was not set.
				17429	# we will need to jump to the ftrapcc vector. the stack frame
				17430	# is the same size as that of the fp unimp instruction. the
				17431	# only difference is that the <ea> field should hold the PC
				17432	# of the ftrapcc instruction and the vector offset field
				17433	# should denote the ftrapcc trap.
				17434	ftrapcc_trap:
				17435	mov.b &ftrapcc_flg,SPCOND_FLG(%a6)
				17436	rts
				17437
				17438	# the emulation routine set bsun and BSUN was enabled. have to
				17439	# fix stack and jump to the bsun handler.
				17440	# let the caller of this routine shift the stack frame up to
				17441	# eliminate the effective address field.
				17442	ftrapcc_bsun:
				17443	mov.b &fbsun_flg,SPCOND_FLG(%a6)
				17444	rts
				17445
				17446	#########################################################################
				17447	# fscc(): routine to emulate the fscc instruction #
				17448	# #
				17449	# XDEF **************************************************************** #
				17450	# _fscc() #
				17451	# #
				17452	# XREF **************************************************************** #
				17453	# store_dreg_b() - store result to data register file #
				17454	# dec_areg() - decrement an areg for -(an) mode #
				17455	# inc_areg() - increment an areg for (an)+ mode #
				17456	# _dmem_write_byte() - store result to memory #
				17457	# #
				17458	# INPUT *************************************************************** #
				17459	# none #
				17460	# #
				17461	# OUTPUT ************************************************************** #
				17462	# none #
				17463	# #
				17464	# ALGORITHM *********************************************************** #
				17465	# This routine checks which conditional predicate is specified by #
				17466	# the stacked fscc instruction opcode and then branches to a routine #
				17467	# for that predicate. The corresponding fbcc instruction is then used #
				17468	# to see whether the condition (specified by the stacked FPSR) is true #
				17469	# or false. #
				17470	# If a BSUN exception should be indicated, the BSUN and ABSUN #
				17471	# bits are set in the stacked FPSR. If the BSUN exception is enabled, #
				17472	# the fbsun_flg is set in the SPCOND_FLG location on the stack. If an #
				17473	# enabled BSUN should not be flagged and the predicate is true, then #
				17474	# the result is stored to the data register file or memory #
				17475	# #
				17476	#########################################################################
				17477
				17478	global _fscc
				17479	_fscc:
				17480	mov.w EXC_CMDREG(%a6),%d0 # fetch predicate
				17481
				17482	clr.l %d1 # clear scratch reg
				17483	mov.b FPSR_CC(%a6),%d1 # fetch fp ccodes
				17484	ror.l &0x8,%d1 # rotate to top byte
				17485	fmov.l %d1,%fpsr # insert into FPSR
				17486
				17487	mov.w (tbl_fscc.b,%pc,%d0.w*2),%d1 # load table
				17488	jmp (tbl_fscc.b,%pc,%d1.w) # jump to fscc routine
				17489
				17490	tbl_fscc:
				17491	short fscc_f - tbl_fscc # 00
				17492	short fscc_eq - tbl_fscc # 01
				17493	short fscc_ogt - tbl_fscc # 02
				17494	short fscc_oge - tbl_fscc # 03
				17495	short fscc_olt - tbl_fscc # 04
				17496	short fscc_ole - tbl_fscc # 05
				17497	short fscc_ogl - tbl_fscc # 06
				17498	short fscc_or - tbl_fscc # 07
				17499	short fscc_un - tbl_fscc # 08
				17500	short fscc_ueq - tbl_fscc # 09
				17501	short fscc_ugt - tbl_fscc # 10
				17502	short fscc_uge - tbl_fscc # 11
				17503	short fscc_ult - tbl_fscc # 12
				17504	short fscc_ule - tbl_fscc # 13
				17505	short fscc_neq - tbl_fscc # 14
				17506	short fscc_t - tbl_fscc # 15
				17507	short fscc_sf - tbl_fscc # 16
				17508	short fscc_seq - tbl_fscc # 17
				17509	short fscc_gt - tbl_fscc # 18
				17510	short fscc_ge - tbl_fscc # 19
				17511	short fscc_lt - tbl_fscc # 20
				17512	short fscc_le - tbl_fscc # 21
				17513	short fscc_gl - tbl_fscc # 22
				17514	short fscc_gle - tbl_fscc # 23
				17515	short fscc_ngle - tbl_fscc # 24
				17516	short fscc_ngl - tbl_fscc # 25
				17517	short fscc_nle - tbl_fscc # 26
				17518	short fscc_nlt - tbl_fscc # 27
				17519	short fscc_nge - tbl_fscc # 28
				17520	short fscc_ngt - tbl_fscc # 29
				17521	short fscc_sneq - tbl_fscc # 30
				17522	short fscc_st - tbl_fscc # 31
				17523
				17524	#########################################################################
				17525	# #
				17526	# IEEE Nonaware tests #
				17527	# #
				17528	# For the IEEE nonaware tests, we set the result based on the #
				17529	# floating point condition codes. In addition, we check to see #
				17530	# if the NAN bit is set, in which case BSUN and AIOP will be set. #
				17531	# #
				17532	# The cases EQ and NE are shared by the Aware and Nonaware groups #
				17533	# and are incapable of setting the BSUN exception bit. #
				17534	# #
				17535	# Typically, only one of the two possible branch directions could #
				17536	# have the NAN bit set. #
				17537	# #
				17538	#########################################################################
				17539
				17540	#
				17541	# equal:
				17542	#
				17543	# Z
				17544	#
				17545	fscc_eq:
				17546	fbeq.w fscc_eq_yes # equal?
				17547	fscc_eq_no:
				17548	clr.b %d0 # set false
				17549	bra.w fscc_done # go finish
				17550	fscc_eq_yes:
				17551	st %d0 # set true
				17552	bra.w fscc_done # go finish
				17553
				17554	#
				17555	# not equal:
				17556	# _
				17557	# Z
				17558	#
				17559	fscc_neq:
				17560	fbneq.w fscc_neq_yes # not equal?
				17561	fscc_neq_no:
				17562	clr.b %d0 # set false
				17563	bra.w fscc_done # go finish
				17564	fscc_neq_yes:
				17565	st %d0 # set true
				17566	bra.w fscc_done # go finish
				17567
				17568	#
				17569	# greater than:
				17570	# _______
				17571	# NANvZvN
				17572	#
				17573	fscc_gt:
				17574	fbgt.w fscc_gt_yes # greater than?
				17575	fscc_gt_no:
				17576	clr.b %d0 # set false
				17577	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17578	beq.w fscc_done # no;go finish
				17579	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17580	bra.w fscc_chk_bsun # go finish
				17581	fscc_gt_yes:
				17582	st %d0 # set true
				17583	bra.w fscc_done # go finish
				17584
				17585	#
				17586	# not greater than:
				17587	#
				17588	# NANvZvN
				17589	#
				17590	fscc_ngt:
				17591	fbngt.w fscc_ngt_yes # not greater than?
				17592	fscc_ngt_no:
				17593	clr.b %d0 # set false
				17594	bra.w fscc_done # go finish
				17595	fscc_ngt_yes:
				17596	st %d0 # set true
				17597	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17598	beq.w fscc_done # no;go finish
				17599	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17600	bra.w fscc_chk_bsun # go finish
				17601
				17602	#
				17603	# greater than or equal:
				17604	# _____
				17605	# Zv(NANvN)
				17606	#
				17607	fscc_ge:
				17608	fbge.w fscc_ge_yes # greater than or equal?
				17609	fscc_ge_no:
				17610	clr.b %d0 # set false
				17611	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17612	beq.w fscc_done # no;go finish
				17613	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17614	bra.w fscc_chk_bsun # go finish
				17615	fscc_ge_yes:
				17616	st %d0 # set true
				17617	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17618	beq.w fscc_done # no;go finish
				17619	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17620	bra.w fscc_chk_bsun # go finish
				17621
				17622	#
				17623	# not (greater than or equal):
				17624	# _
				17625	# NANv(N^Z)
				17626	#
				17627	fscc_nge:
				17628	fbnge.w fscc_nge_yes # not (greater than or equal)?
				17629	fscc_nge_no:
				17630	clr.b %d0 # set false
				17631	bra.w fscc_done # go finish
				17632	fscc_nge_yes:
				17633	st %d0 # set true
				17634	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17635	beq.w fscc_done # no;go finish
				17636	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17637	bra.w fscc_chk_bsun # go finish
				17638
				17639	#
				17640	# less than:
				17641	# _____
				17642	# N^(NANvZ)
				17643	#
				17644	fscc_lt:
				17645	fblt.w fscc_lt_yes # less than?
				17646	fscc_lt_no:
				17647	clr.b %d0 # set false
				17648	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17649	beq.w fscc_done # no;go finish
				17650	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17651	bra.w fscc_chk_bsun # go finish
				17652	fscc_lt_yes:
				17653	st %d0 # set true
				17654	bra.w fscc_done # go finish
				17655
				17656	#
				17657	# not less than:
				17658	# _
				17659	# NANv(ZvN)
				17660	#
				17661	fscc_nlt:
				17662	fbnlt.w fscc_nlt_yes # not less than?
				17663	fscc_nlt_no:
				17664	clr.b %d0 # set false
				17665	bra.w fscc_done # go finish
				17666	fscc_nlt_yes:
				17667	st %d0 # set true
				17668	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17669	beq.w fscc_done # no;go finish
				17670	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17671	bra.w fscc_chk_bsun # go finish
				17672
				17673	#
				17674	# less than or equal:
				17675	# ___
				17676	# Zv(N^NAN)
				17677	#
				17678	fscc_le:
				17679	fble.w fscc_le_yes # less than or equal?
				17680	fscc_le_no:
				17681	clr.b %d0 # set false
				17682	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17683	beq.w fscc_done # no;go finish
				17684	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17685	bra.w fscc_chk_bsun # go finish
				17686	fscc_le_yes:
				17687	st %d0 # set true
				17688	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17689	beq.w fscc_done # no;go finish
				17690	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17691	bra.w fscc_chk_bsun # go finish
				17692
				17693	#
				17694	# not (less than or equal):
				17695	# ___
				17696	# NANv(NvZ)
				17697	#
				17698	fscc_nle:
				17699	fbnle.w fscc_nle_yes # not (less than or equal)?
				17700	fscc_nle_no:
				17701	clr.b %d0 # set false
				17702	bra.w fscc_done # go finish
				17703	fscc_nle_yes:
				17704	st %d0 # set true
				17705	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17706	beq.w fscc_done # no;go finish
				17707	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17708	bra.w fscc_chk_bsun # go finish
				17709
				17710	#
				17711	# greater or less than:
				17712	# _____
				17713	# NANvZ
				17714	#
				17715	fscc_gl:
				17716	fbgl.w fscc_gl_yes # greater or less than?
				17717	fscc_gl_no:
				17718	clr.b %d0 # set false
				17719	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17720	beq.w fscc_done # no;go finish
				17721	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17722	bra.w fscc_chk_bsun # go finish
				17723	fscc_gl_yes:
				17724	st %d0 # set true
				17725	bra.w fscc_done # go finish
				17726
				17727	#
				17728	# not (greater or less than):
				17729	#
				17730	# NANvZ
				17731	#
				17732	fscc_ngl:
				17733	fbngl.w fscc_ngl_yes # not (greater or less than)?
				17734	fscc_ngl_no:
				17735	clr.b %d0 # set false
				17736	bra.w fscc_done # go finish
				17737	fscc_ngl_yes:
				17738	st %d0 # set true
				17739	btst &nan_bit, FPSR_CC(%a6) # is NAN set in cc?
				17740	beq.w fscc_done # no;go finish
				17741	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17742	bra.w fscc_chk_bsun # go finish
				17743
				17744	#
				17745	# greater, less, or equal:
				17746	# ___
				17747	# NAN
				17748	#
				17749	fscc_gle:
				17750	fbgle.w fscc_gle_yes # greater, less, or equal?
				17751	fscc_gle_no:
				17752	clr.b %d0 # set false
				17753	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17754	bra.w fscc_chk_bsun # go finish
				17755	fscc_gle_yes:
				17756	st %d0 # set true
				17757	bra.w fscc_done # go finish
				17758
				17759	#
				17760	# not (greater, less, or equal):
				17761	#
				17762	# NAN
				17763	#
				17764	fscc_ngle:
				17765	fbngle.w fscc_ngle_yes # not (greater, less, or equal)?
				17766	fscc_ngle_no:
				17767	clr.b %d0 # set false
				17768	bra.w fscc_done # go finish
				17769	fscc_ngle_yes:
				17770	st %d0 # set true
				17771	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17772	bra.w fscc_chk_bsun # go finish
				17773
				17774	#########################################################################
				17775	# #
				17776	# Miscellaneous tests #
				17777	# #
				17778	# For the IEEE aware tests, we only have to set the result based on the #
				17779	# floating point condition codes. The BSUN exception will not be #
				17780	# set for any of these tests. #
				17781	# #
				17782	#########################################################################
				17783
				17784	#
				17785	# false:
				17786	#
				17787	# False
				17788	#
				17789	fscc_f:
				17790	clr.b %d0 # set false
				17791	bra.w fscc_done # go finish
				17792
				17793	#
				17794	# true:
				17795	#
				17796	# True
				17797	#
				17798	fscc_t:
				17799	st %d0 # set true
				17800	bra.w fscc_done # go finish
				17801
				17802	#
				17803	# signalling false:
				17804	#
				17805	# False
				17806	#
				17807	fscc_sf:
				17808	clr.b %d0 # set false
				17809	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17810	beq.w fscc_done # no;go finish
				17811	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17812	bra.w fscc_chk_bsun # go finish
				17813
				17814	#
				17815	# signalling true:
				17816	#
				17817	# True
				17818	#
				17819	fscc_st:
				17820	st %d0 # set false
				17821	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17822	beq.w fscc_done # no;go finish
				17823	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17824	bra.w fscc_chk_bsun # go finish
				17825
				17826	#
				17827	# signalling equal:
				17828	#
				17829	# Z
				17830	#
				17831	fscc_seq:
				17832	fbseq.w fscc_seq_yes # signalling equal?
				17833	fscc_seq_no:
				17834	clr.b %d0 # set false
				17835	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17836	beq.w fscc_done # no;go finish
				17837	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17838	bra.w fscc_chk_bsun # go finish
				17839	fscc_seq_yes:
				17840	st %d0 # set true
				17841	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17842	beq.w fscc_done # no;go finish
				17843	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17844	bra.w fscc_chk_bsun # go finish
				17845
				17846	#
				17847	# signalling not equal:
				17848	# _
				17849	# Z
				17850	#
				17851	fscc_sneq:
				17852	fbsneq.w fscc_sneq_yes # signalling equal?
				17853	fscc_sneq_no:
				17854	clr.b %d0 # set false
				17855	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17856	beq.w fscc_done # no;go finish
				17857	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17858	bra.w fscc_chk_bsun # go finish
				17859	fscc_sneq_yes:
				17860	st %d0 # set true
				17861	btst &nan_bit, FPSR_CC(%a6) # set BSUN exc bit
				17862	beq.w fscc_done # no;go finish
				17863	ori.l &bsun_mask+aiop_mask, USER_FPSR(%a6) # set BSUN exc bit
				17864	bra.w fscc_chk_bsun # go finish
				17865
				17866	#########################################################################
				17867	# #
				17868	# IEEE Aware tests #
				17869	# #
				17870	# For the IEEE aware tests, we only have to set the result based on the #
				17871	# floating point condition codes. The BSUN exception will not be #
				17872	# set for any of these tests. #
				17873	# #
				17874	#########################################################################
				17875
				17876	#
				17877	# ordered greater than:
				17878	# _______
				17879	# NANvZvN
				17880	#
				17881	fscc_ogt:
				17882	fbogt.w fscc_ogt_yes # ordered greater than?
				17883	fscc_ogt_no:
				17884	clr.b %d0 # set false
				17885	bra.w fscc_done # go finish
				17886	fscc_ogt_yes:
				17887	st %d0 # set true
				17888	bra.w fscc_done # go finish
				17889
				17890	#
				17891	# unordered or less or equal:
				17892	# _______
				17893	# NANvZvN
				17894	#
				17895	fscc_ule:
				17896	fbule.w fscc_ule_yes # unordered or less or equal?
				17897	fscc_ule_no:
				17898	clr.b %d0 # set false
				17899	bra.w fscc_done # go finish
				17900	fscc_ule_yes:
				17901	st %d0 # set true
				17902	bra.w fscc_done # go finish
				17903
				17904	#
				17905	# ordered greater than or equal:
				17906	# _____
				17907	# Zv(NANvN)
				17908	#
				17909	fscc_oge:
				17910	fboge.w fscc_oge_yes # ordered greater than or equal?
				17911	fscc_oge_no:
				17912	clr.b %d0 # set false
				17913	bra.w fscc_done # go finish
				17914	fscc_oge_yes:
				17915	st %d0 # set true
				17916	bra.w fscc_done # go finish
				17917
				17918	#
				17919	# unordered or less than:
				17920	# _
				17921	# NANv(N^Z)
				17922	#
				17923	fscc_ult:
				17924	fbult.w fscc_ult_yes # unordered or less than?
				17925	fscc_ult_no:
				17926	clr.b %d0 # set false
				17927	bra.w fscc_done # go finish
				17928	fscc_ult_yes:
				17929	st %d0 # set true
				17930	bra.w fscc_done # go finish
				17931
				17932	#
				17933	# ordered less than:
				17934	# _____
				17935	# N^(NANvZ)
				17936	#
				17937	fscc_olt:
				17938	fbolt.w fscc_olt_yes # ordered less than?
				17939	fscc_olt_no:
				17940	clr.b %d0 # set false
				17941	bra.w fscc_done # go finish
				17942	fscc_olt_yes:
				17943	st %d0 # set true
				17944	bra.w fscc_done # go finish
				17945
				17946	#
				17947	# unordered or greater or equal:
				17948	#
				17949	# NANvZvN
				17950	#
				17951	fscc_uge:
				17952	fbuge.w fscc_uge_yes # unordered or greater than?
				17953	fscc_uge_no:
				17954	clr.b %d0 # set false
				17955	bra.w fscc_done # go finish
				17956	fscc_uge_yes:
				17957	st %d0 # set true
				17958	bra.w fscc_done # go finish
				17959
				17960	#
				17961	# ordered less than or equal:
				17962	# ___
				17963	# Zv(N^NAN)
				17964	#
				17965	fscc_ole:
				17966	fbole.w fscc_ole_yes # ordered greater or less than?
				17967	fscc_ole_no:
				17968	clr.b %d0 # set false
				17969	bra.w fscc_done # go finish
				17970	fscc_ole_yes:
				17971	st %d0 # set true
				17972	bra.w fscc_done # go finish
				17973
				17974	#
				17975	# unordered or greater than:
				17976	# ___
				17977	# NANv(NvZ)
				17978	#
				17979	fscc_ugt:
				17980	fbugt.w fscc_ugt_yes # unordered or greater than?
				17981	fscc_ugt_no:
				17982	clr.b %d0 # set false
				17983	bra.w fscc_done # go finish
				17984	fscc_ugt_yes:
				17985	st %d0 # set true
				17986	bra.w fscc_done # go finish
				17987
				17988	#
				17989	# ordered greater or less than:
				17990	# _____
				17991	# NANvZ
				17992	#
				17993	fscc_ogl:
				17994	fbogl.w fscc_ogl_yes # ordered greater or less than?
				17995	fscc_ogl_no:
				17996	clr.b %d0 # set false
				17997	bra.w fscc_done # go finish
				17998	fscc_ogl_yes:
				17999	st %d0 # set true
				18000	bra.w fscc_done # go finish
				18001
				18002	#
				18003	# unordered or equal:
				18004	#
				18005	# NANvZ
				18006	#
				18007	fscc_ueq:
				18008	fbueq.w fscc_ueq_yes # unordered or equal?
				18009	fscc_ueq_no:
				18010	clr.b %d0 # set false
				18011	bra.w fscc_done # go finish
				18012	fscc_ueq_yes:
				18013	st %d0 # set true
				18014	bra.w fscc_done # go finish
				18015
				18016	#
				18017	# ordered:
				18018	# ___
				18019	# NAN
				18020	#
				18021	fscc_or:
				18022	fbor.w fscc_or_yes # ordered?
				18023	fscc_or_no:
				18024	clr.b %d0 # set false
				18025	bra.w fscc_done # go finish
				18026	fscc_or_yes:
				18027	st %d0 # set true
				18028	bra.w fscc_done # go finish
				18029
				18030	#
				18031	# unordered:
				18032	#
				18033	# NAN
				18034	#
				18035	fscc_un:
				18036	fbun.w fscc_un_yes # unordered?
				18037	fscc_un_no:
				18038	clr.b %d0 # set false
				18039	bra.w fscc_done # go finish
				18040	fscc_un_yes:
				18041	st %d0 # set true
				18042	bra.w fscc_done # go finish
				18043
				18044	#######################################################################
				18045
				18046	#
				18047	# the bsun exception bit was set. now, check to see is BSUN
				18048	# is enabled. if so, don't store result and correct stack frame
				18049	# for a bsun exception.
				18050	#
				18051	fscc_chk_bsun:
				18052	btst &bsun_bit,FPCR_ENABLE(%a6) # was BSUN set?
				18053	bne.w fscc_bsun
				18054
				18055	#
				18056	# the bsun exception bit was not set.
				18057	# the result has been selected.
				18058	# now, check to see if the result is to be stored in the data register
				18059	# file or in memory.
				18060	#
				18061	fscc_done:
				18062	mov.l %d0,%a0 # save result for a moment
				18063
				18064	mov.b 1+EXC_OPWORD(%a6),%d1 # fetch lo opword
				18065	mov.l %d1,%d0 # make a copy
				18066	andi.b &0x38,%d1 # extract src mode
				18067
				18068	bne.b fscc_mem_op # it's a memory operation
				18069
				18070	mov.l %d0,%d1
				18071	andi.w &0x7,%d1 # pass index in d1
				18072	mov.l %a0,%d0 # pass result in d0
				18073	bsr.l store_dreg_b # save result in regfile
				18074	rts
				18075
				18076	#
				18077	# the stacked <ea> is correct with the exception of:
				18078	# -> Dn : <ea> is garbage
				18079	#
				18080	# if the addressing mode is post-increment or pre-decrement,
				18081	# then the address registers have not been updated.
				18082	#
				18083	fscc_mem_op:
				18084	cmpi.b %d1,&0x18 # is <ea> (An)+ ?
				18085	beq.b fscc_mem_inc # yes
				18086	cmpi.b %d1,&0x20 # is <ea> -(An) ?
				18087	beq.b fscc_mem_dec # yes
				18088
				18089	mov.l %a0,%d0 # pass result in d0
				18090	mov.l EXC_EA(%a6),%a0 # fetch <ea>
				18091	bsr.l _dmem_write_byte # write result byte
				18092
				18093	tst.l %d1 # did dstore fail?
				18094	bne.w fscc_err # yes
				18095
				18096	rts
				18097
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	18098	# addressing mode is post-increment. write the result byte. if the write
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	18099	# fails then don't update the address register. if write passes then
				18100	# call inc_areg() to update the address register.
				18101	fscc_mem_inc:
				18102	mov.l %a0,%d0 # pass result in d0
				18103	mov.l EXC_EA(%a6),%a0 # fetch <ea>
				18104	bsr.l _dmem_write_byte # write result byte
				18105
				18106	tst.l %d1 # did dstore fail?
				18107	bne.w fscc_err # yes
				18108
				18109	mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
				18110	andi.w &0x7,%d1 # pass index in d1
				18111	movq.l &0x1,%d0 # pass amt to inc by
				18112	bsr.l inc_areg # increment address register
				18113
				18114	rts
				18115
				18116	# addressing mode is pre-decrement. write the result byte. if the write
				18117	# fails then don't update the address register. if the write passes then
				18118	# call dec_areg() to update the address register.
				18119	fscc_mem_dec:
				18120	mov.l %a0,%d0 # pass result in d0
				18121	mov.l EXC_EA(%a6),%a0 # fetch <ea>
				18122	bsr.l _dmem_write_byte # write result byte
				18123
				18124	tst.l %d1 # did dstore fail?
				18125	bne.w fscc_err # yes
				18126
				18127	mov.b 0x1+EXC_OPWORD(%a6),%d1 # fetch opword
				18128	andi.w &0x7,%d1 # pass index in d1
				18129	movq.l &0x1,%d0 # pass amt to dec by
				18130	bsr.l dec_areg # decrement address register
				18131
				18132	rts
				18133
				18134	# the emulation routine set bsun and BSUN was enabled. have to
				18135	# fix stack and jump to the bsun handler.
				18136	# let the caller of this routine shift the stack frame up to
				18137	# eliminate the effective address field.
				18138	fscc_bsun:
				18139	mov.b &fbsun_flg,SPCOND_FLG(%a6)
				18140	rts
				18141
				18142	# the byte write to memory has failed. pass the failing effective address
				18143	# and a FSLW to funimp_dacc().
				18144	fscc_err:
				18145	mov.w &0x00a1,EXC_VOFF(%a6)
				18146	bra.l facc_finish
				18147
				18148	#########################################################################
				18149	# XDEF **************************************************************** #
				18150	# fmovm_dynamic(): emulate "fmovm" dynamic instruction #
				18151	# #
				18152	# XREF **************************************************************** #
				18153	# fetch_dreg() - fetch data register #
				18154	# {i,d,}mem_read() - fetch data from memory #
				18155	# _mem_write() - write data to memory #
				18156	# iea_iacc() - instruction memory access error occurred #
				18157	# iea_dacc() - data memory access error occurred #
				18158	# restore() - restore An index regs if access error occurred #
				18159	# #
				18160	# INPUT *************************************************************** #
				18161	# None #
				18162	# #
				18163	# OUTPUT ************************************************************** #
				18164	# If instr is "fmovm Dn,-(A7)" from supervisor mode, #
				18165	# d0 = size of dump #
				18166	# d1 = Dn #
				18167	# Else if instruction access error, #
				18168	# d0 = FSLW #
				18169	# Else if data access error, #
				18170	# d0 = FSLW #
				18171	# a0 = address of fault #
				18172	# Else #
				18173	# none. #
				18174	# #
				18175	# ALGORITHM *********************************************************** #
				18176	# The effective address must be calculated since this is entered #
				18177	# from an "Unimplemented Effective Address" exception handler. So, we #
				18178	# have our own fcalc_ea() routine here. If an access error is flagged #
				18179	# by a _{i,d,}mem_read() call, we must exit through the special #
				18180	# handler. #
				18181	# The data register is determined and its value loaded to get the #
				18182	# string of FP registers affected. This value is used as an index into #
				18183	# a lookup table such that we can determine the number of bytes #
				18184	# involved. #
				18185	# If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used #
				18186	# to read in all FP values. Again, _mem_read() may fail and require a #
				18187	# special exit. #
				18188	# If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
				18189	# to write all FP values. _mem_write() may also fail. #
				18190	# If the instruction is "fmovm.x DN,-(a7)" from supervisor mode, #
				18191	# then we return the size of the dump and the string to the caller #
				18192	# so that the move can occur outside of this routine. This special #
				18193	# case is required so that moves to the system stack are handled #
				18194	# correctly. #
				18195	# #
				18196	# DYNAMIC: #
				18197	# fmovm.x dn, <ea> #
				18198	# fmovm.x <ea>, dn #
				18199	# #
				18200	# <WORD 1> <WORD2> #
				18201	# 1111 0010 00 \|<ea>\| 11@& 1000 0$$$ 0000 #
				18202	# #
				18203	# & = (0): predecrement addressing mode #
				18204	# (1): postincrement or control addressing mode #
				18205	# @ = (0): move listed regs from memory to the FPU #
				18206	# (1): move listed regs from the FPU to memory #
				18207	# $$$ : index of data register holding reg select mask #
				18208	# #
				18209	# NOTES: #
				18210	# If the data register holds a zero, then the #
				18211	# instruction is a nop. #
				18212	# #
				18213	#########################################################################
				18214
				18215	global fmovm_dynamic
				18216	fmovm_dynamic:
				18217
				18218	# extract the data register in which the bit string resides...
				18219	mov.b 1+EXC_EXTWORD(%a6),%d1 # fetch extword
				18220	andi.w &0x70,%d1 # extract reg bits
				18221	lsr.b &0x4,%d1 # shift into lo bits
				18222
				18223	# fetch the bit string into d0...
				18224	bsr.l fetch_dreg # fetch reg string
				18225
				18226	andi.l &0x000000ff,%d0 # keep only lo byte
				18227
				18228	mov.l %d0,-(%sp) # save strg
				18229	mov.b (tbl_fmovm_size.w,%pc,%d0),%d0
				18230	mov.l %d0,-(%sp) # save size
				18231	bsr.l fmovm_calc_ea # calculate <ea>
				18232	mov.l (%sp)+,%d0 # restore size
				18233	mov.l (%sp)+,%d1 # restore strg
				18234
				18235	# if the bit string is a zero, then the operation is a no-op
				18236	# but, make sure that we've calculated ea and advanced the opword pointer
				18237	beq.w fmovm_data_done
				18238
				18239	# separate move ins from move outs...
				18240	btst &0x5,EXC_EXTWORD(%a6) # is it a move in or out?
				18241	beq.w fmovm_data_in # it's a move out
				18242
				18243	#############
				18244	# MOVE OUT: #
				18245	#############
				18246	fmovm_data_out:
				18247	btst &0x4,EXC_EXTWORD(%a6) # control or predecrement?
				18248	bne.w fmovm_out_ctrl # control
				18249
				18250	############################
				18251	fmovm_out_predec:
				18252	# for predecrement mode, the bit string is the opposite of both control
				18253	# operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
				18254	# here, we convert it to be just like the others...
				18255	mov.b (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
				18256
				18257	btst &0x5,EXC_SR(%a6) # user or supervisor mode?
				18258	beq.b fmovm_out_ctrl # user
				18259
				18260	fmovm_out_predec_s:
				18261	cmpi.b SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
				18262	bne.b fmovm_out_ctrl
				18263
				18264	# the operation was unfortunately an: fmovm.x dn,-(sp)
				18265	# called from supervisor mode.
				18266	# we're also passing "size" and "strg" back to the calling routine
				18267	rts
				18268
				18269	############################
				18270	fmovm_out_ctrl:
				18271	mov.l %a0,%a1 # move <ea> to a1
				18272
				18273	sub.l %d0,%sp # subtract size of dump
				18274	lea (%sp),%a0
				18275
				18276	tst.b %d1 # should FP0 be moved?
				18277	bpl.b fmovm_out_ctrl_fp1 # no
				18278
				18279	mov.l 0x0+EXC_FP0(%a6),(%a0)+ # yes
				18280	mov.l 0x4+EXC_FP0(%a6),(%a0)+
				18281	mov.l 0x8+EXC_FP0(%a6),(%a0)+
				18282
				18283	fmovm_out_ctrl_fp1:
				18284	lsl.b &0x1,%d1 # should FP1 be moved?
				18285	bpl.b fmovm_out_ctrl_fp2 # no
				18286
				18287	mov.l 0x0+EXC_FP1(%a6),(%a0)+ # yes
				18288	mov.l 0x4+EXC_FP1(%a6),(%a0)+
				18289	mov.l 0x8+EXC_FP1(%a6),(%a0)+
				18290
				18291	fmovm_out_ctrl_fp2:
				18292	lsl.b &0x1,%d1 # should FP2 be moved?
				18293	bpl.b fmovm_out_ctrl_fp3 # no
				18294
				18295	fmovm.x &0x20,(%a0) # yes
				18296	add.l &0xc,%a0
				18297
				18298	fmovm_out_ctrl_fp3:
				18299	lsl.b &0x1,%d1 # should FP3 be moved?
				18300	bpl.b fmovm_out_ctrl_fp4 # no
				18301
				18302	fmovm.x &0x10,(%a0) # yes
				18303	add.l &0xc,%a0
				18304
				18305	fmovm_out_ctrl_fp4:
				18306	lsl.b &0x1,%d1 # should FP4 be moved?
				18307	bpl.b fmovm_out_ctrl_fp5 # no
				18308
				18309	fmovm.x &0x08,(%a0) # yes
				18310	add.l &0xc,%a0
				18311
				18312	fmovm_out_ctrl_fp5:
				18313	lsl.b &0x1,%d1 # should FP5 be moved?
				18314	bpl.b fmovm_out_ctrl_fp6 # no
				18315
				18316	fmovm.x &0x04,(%a0) # yes
				18317	add.l &0xc,%a0
				18318
				18319	fmovm_out_ctrl_fp6:
				18320	lsl.b &0x1,%d1 # should FP6 be moved?
				18321	bpl.b fmovm_out_ctrl_fp7 # no
				18322
				18323	fmovm.x &0x02,(%a0) # yes
				18324	add.l &0xc,%a0
				18325
				18326	fmovm_out_ctrl_fp7:
				18327	lsl.b &0x1,%d1 # should FP7 be moved?
				18328	bpl.b fmovm_out_ctrl_done # no
				18329
				18330	fmovm.x &0x01,(%a0) # yes
				18331	add.l &0xc,%a0
				18332
				18333	fmovm_out_ctrl_done:
				18334	mov.l %a1,L_SCR1(%a6)
				18335
				18336	lea (%sp),%a0 # pass: supervisor src
				18337	mov.l %d0,-(%sp) # save size
				18338	bsr.l _dmem_write # copy data to user mem
				18339
				18340	mov.l (%sp)+,%d0
				18341	add.l %d0,%sp # clear fpreg data from stack
				18342
				18343	tst.l %d1 # did dstore err?
				18344	bne.w fmovm_out_err # yes
				18345
				18346	rts
				18347
				18348	############
				18349	# MOVE IN: #
				18350	############
				18351	fmovm_data_in:
				18352	mov.l %a0,L_SCR1(%a6)
				18353
				18354	sub.l %d0,%sp # make room for fpregs
				18355	lea (%sp),%a1
				18356
				18357	mov.l %d1,-(%sp) # save bit string for later
				18358	mov.l %d0,-(%sp) # save # of bytes
				18359
				18360	bsr.l _dmem_read # copy data from user mem
				18361
				18362	mov.l (%sp)+,%d0 # retrieve # of bytes
				18363
				18364	tst.l %d1 # did dfetch fail?
				18365	bne.w fmovm_in_err # yes
				18366
				18367	mov.l (%sp)+,%d1 # load bit string
				18368
				18369	lea (%sp),%a0 # addr of stack
				18370
				18371	tst.b %d1 # should FP0 be moved?
				18372	bpl.b fmovm_data_in_fp1 # no
				18373
				18374	mov.l (%a0)+,0x0+EXC_FP0(%a6) # yes
				18375	mov.l (%a0)+,0x4+EXC_FP0(%a6)
				18376	mov.l (%a0)+,0x8+EXC_FP0(%a6)
				18377
				18378	fmovm_data_in_fp1:
				18379	lsl.b &0x1,%d1 # should FP1 be moved?
				18380	bpl.b fmovm_data_in_fp2 # no
				18381
				18382	mov.l (%a0)+,0x0+EXC_FP1(%a6) # yes
				18383	mov.l (%a0)+,0x4+EXC_FP1(%a6)
				18384	mov.l (%a0)+,0x8+EXC_FP1(%a6)
				18385
				18386	fmovm_data_in_fp2:
				18387	lsl.b &0x1,%d1 # should FP2 be moved?
				18388	bpl.b fmovm_data_in_fp3 # no
				18389
				18390	fmovm.x (%a0)+,&0x20 # yes
				18391
				18392	fmovm_data_in_fp3:
				18393	lsl.b &0x1,%d1 # should FP3 be moved?
				18394	bpl.b fmovm_data_in_fp4 # no
				18395
				18396	fmovm.x (%a0)+,&0x10 # yes
				18397
				18398	fmovm_data_in_fp4:
				18399	lsl.b &0x1,%d1 # should FP4 be moved?
				18400	bpl.b fmovm_data_in_fp5 # no
				18401
				18402	fmovm.x (%a0)+,&0x08 # yes
				18403
				18404	fmovm_data_in_fp5:
				18405	lsl.b &0x1,%d1 # should FP5 be moved?
				18406	bpl.b fmovm_data_in_fp6 # no
				18407
				18408	fmovm.x (%a0)+,&0x04 # yes
				18409
				18410	fmovm_data_in_fp6:
				18411	lsl.b &0x1,%d1 # should FP6 be moved?
				18412	bpl.b fmovm_data_in_fp7 # no
				18413
				18414	fmovm.x (%a0)+,&0x02 # yes
				18415
				18416	fmovm_data_in_fp7:
				18417	lsl.b &0x1,%d1 # should FP7 be moved?
				18418	bpl.b fmovm_data_in_done # no
				18419
				18420	fmovm.x (%a0)+,&0x01 # yes
				18421
				18422	fmovm_data_in_done:
				18423	add.l %d0,%sp # remove fpregs from stack
				18424	rts
				18425
				18426	#####################################
				18427
				18428	fmovm_data_done:
				18429	rts
				18430
				18431	##############################################################################
				18432
				18433	#
				18434	# table indexed by the operation's bit string that gives the number
				18435	# of bytes that will be moved.
				18436	#
				18437	# number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
				18438	#
				18439	tbl_fmovm_size:
				18440	byte 0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
				18441	byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
				18442	byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
				18443	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18444	byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
				18445	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18446	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18447	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18448	byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
				18449	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18450	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18451	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18452	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18453	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18454	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18455	byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
				18456	byte 0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
				18457	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18458	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18459	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18460	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18461	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18462	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18463	byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
				18464	byte 0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
				18465	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18466	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18467	byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
				18468	byte 0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
				18469	byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
				18470	byte 0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
				18471	byte 0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
				18472
				18473	#
				18474	# table to convert a pre-decrement bit string into a post-increment
				18475	# or control bit string.
				18476	# ex: 0x00 ==> 0x00
				18477	# 0x01 ==> 0x80
				18478	# 0x02 ==> 0x40
				18479	# .
				18480	# .
				18481	# 0xfd ==> 0xbf
				18482	# 0xfe ==> 0x7f
				18483	# 0xff ==> 0xff
				18484	#
				18485	tbl_fmovm_convert:
				18486	byte 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
				18487	byte 0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
				18488	byte 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
				18489	byte 0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
				18490	byte 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
				18491	byte 0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
				18492	byte 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
				18493	byte 0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
				18494	byte 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
				18495	byte 0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
				18496	byte 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
				18497	byte 0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
				18498	byte 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
				18499	byte 0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
				18500	byte 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
				18501	byte 0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
				18502	byte 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
				18503	byte 0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
				18504	byte 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
				18505	byte 0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
				18506	byte 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
				18507	byte 0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
				18508	byte 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
				18509	byte 0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
				18510	byte 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
				18511	byte 0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
				18512	byte 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
				18513	byte 0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
				18514	byte 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
				18515	byte 0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
				18516	byte 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
				18517	byte 0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
				18518
				18519	global fmovm_calc_ea
				18520	###############################################
				18521	# _fmovm_calc_ea: calculate effective address #
				18522	###############################################
				18523	fmovm_calc_ea:
				18524	mov.l %d0,%a0 # move # bytes to a0
				18525
				18526	# currently, MODE and REG are taken from the EXC_OPWORD. this could be
				18527	# easily changed if they were inputs passed in registers.
				18528	mov.w EXC_OPWORD(%a6),%d0 # fetch opcode word
				18529	mov.w %d0,%d1 # make a copy
				18530
				18531	andi.w &0x3f,%d0 # extract mode field
				18532	andi.l &0x7,%d1 # extract reg field
				18533
				18534	# jump to the corresponding function for each {MODE,REG} pair.
				18535	mov.w (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
				18536	jmp (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
				18537
				18538	swbeg &64
				18539	tbl_fea_mode:
				18540	short tbl_fea_mode - tbl_fea_mode
				18541	short tbl_fea_mode - tbl_fea_mode
				18542	short tbl_fea_mode - tbl_fea_mode
				18543	short tbl_fea_mode - tbl_fea_mode
				18544	short tbl_fea_mode - tbl_fea_mode
				18545	short tbl_fea_mode - tbl_fea_mode
				18546	short tbl_fea_mode - tbl_fea_mode
				18547	short tbl_fea_mode - tbl_fea_mode
				18548
				18549	short tbl_fea_mode - tbl_fea_mode
				18550	short tbl_fea_mode - tbl_fea_mode
				18551	short tbl_fea_mode - tbl_fea_mode
				18552	short tbl_fea_mode - tbl_fea_mode
				18553	short tbl_fea_mode - tbl_fea_mode
				18554	short tbl_fea_mode - tbl_fea_mode
				18555	short tbl_fea_mode - tbl_fea_mode
				18556	short tbl_fea_mode - tbl_fea_mode
				18557
				18558	short faddr_ind_a0 - tbl_fea_mode
				18559	short faddr_ind_a1 - tbl_fea_mode
				18560	short faddr_ind_a2 - tbl_fea_mode
				18561	short faddr_ind_a3 - tbl_fea_mode
				18562	short faddr_ind_a4 - tbl_fea_mode
				18563	short faddr_ind_a5 - tbl_fea_mode
				18564	short faddr_ind_a6 - tbl_fea_mode
				18565	short faddr_ind_a7 - tbl_fea_mode
				18566
				18567	short faddr_ind_p_a0 - tbl_fea_mode
				18568	short faddr_ind_p_a1 - tbl_fea_mode
				18569	short faddr_ind_p_a2 - tbl_fea_mode
				18570	short faddr_ind_p_a3 - tbl_fea_mode
				18571	short faddr_ind_p_a4 - tbl_fea_mode
				18572	short faddr_ind_p_a5 - tbl_fea_mode
				18573	short faddr_ind_p_a6 - tbl_fea_mode
				18574	short faddr_ind_p_a7 - tbl_fea_mode
				18575
				18576	short faddr_ind_m_a0 - tbl_fea_mode
				18577	short faddr_ind_m_a1 - tbl_fea_mode
				18578	short faddr_ind_m_a2 - tbl_fea_mode
				18579	short faddr_ind_m_a3 - tbl_fea_mode
				18580	short faddr_ind_m_a4 - tbl_fea_mode
				18581	short faddr_ind_m_a5 - tbl_fea_mode
				18582	short faddr_ind_m_a6 - tbl_fea_mode
				18583	short faddr_ind_m_a7 - tbl_fea_mode
				18584
				18585	short faddr_ind_disp_a0 - tbl_fea_mode
				18586	short faddr_ind_disp_a1 - tbl_fea_mode
				18587	short faddr_ind_disp_a2 - tbl_fea_mode
				18588	short faddr_ind_disp_a3 - tbl_fea_mode
				18589	short faddr_ind_disp_a4 - tbl_fea_mode
				18590	short faddr_ind_disp_a5 - tbl_fea_mode
				18591	short faddr_ind_disp_a6 - tbl_fea_mode
				18592	short faddr_ind_disp_a7 - tbl_fea_mode
				18593
				18594	short faddr_ind_ext - tbl_fea_mode
				18595	short faddr_ind_ext - tbl_fea_mode
				18596	short faddr_ind_ext - tbl_fea_mode
				18597	short faddr_ind_ext - tbl_fea_mode
				18598	short faddr_ind_ext - tbl_fea_mode
				18599	short faddr_ind_ext - tbl_fea_mode
				18600	short faddr_ind_ext - tbl_fea_mode
				18601	short faddr_ind_ext - tbl_fea_mode
				18602
				18603	short fabs_short - tbl_fea_mode
				18604	short fabs_long - tbl_fea_mode
				18605	short fpc_ind - tbl_fea_mode
				18606	short fpc_ind_ext - tbl_fea_mode
				18607	short tbl_fea_mode - tbl_fea_mode
				18608	short tbl_fea_mode - tbl_fea_mode
				18609	short tbl_fea_mode - tbl_fea_mode
				18610	short tbl_fea_mode - tbl_fea_mode
				18611
				18612	###################################
				18613	# Address register indirect: (An) #
				18614	###################################
				18615	faddr_ind_a0:
				18616	mov.l EXC_DREGS+0x8(%a6),%a0 # Get current a0
				18617	rts
				18618
				18619	faddr_ind_a1:
				18620	mov.l EXC_DREGS+0xc(%a6),%a0 # Get current a1
				18621	rts
				18622
				18623	faddr_ind_a2:
				18624	mov.l %a2,%a0 # Get current a2
				18625	rts
				18626
				18627	faddr_ind_a3:
				18628	mov.l %a3,%a0 # Get current a3
				18629	rts
				18630
				18631	faddr_ind_a4:
				18632	mov.l %a4,%a0 # Get current a4
				18633	rts
				18634
				18635	faddr_ind_a5:
				18636	mov.l %a5,%a0 # Get current a5
				18637	rts
				18638
				18639	faddr_ind_a6:
				18640	mov.l (%a6),%a0 # Get current a6
				18641	rts
				18642
				18643	faddr_ind_a7:
				18644	mov.l EXC_A7(%a6),%a0 # Get current a7
				18645	rts
				18646
				18647	#####################################################
				18648	# Address register indirect w/ postincrement: (An)+ #
				18649	#####################################################
				18650	faddr_ind_p_a0:
				18651	mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
				18652	mov.l %d0,%d1
				18653	add.l %a0,%d1 # Increment
				18654	mov.l %d1,EXC_DREGS+0x8(%a6) # Save incr value
				18655	mov.l %d0,%a0
				18656	rts
				18657
				18658	faddr_ind_p_a1:
				18659	mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
				18660	mov.l %d0,%d1
				18661	add.l %a0,%d1 # Increment
				18662	mov.l %d1,EXC_DREGS+0xc(%a6) # Save incr value
				18663	mov.l %d0,%a0
				18664	rts
				18665
				18666	faddr_ind_p_a2:
				18667	mov.l %a2,%d0 # Get current a2
				18668	mov.l %d0,%d1
				18669	add.l %a0,%d1 # Increment
				18670	mov.l %d1,%a2 # Save incr value
				18671	mov.l %d0,%a0
				18672	rts
				18673
				18674	faddr_ind_p_a3:
				18675	mov.l %a3,%d0 # Get current a3
				18676	mov.l %d0,%d1
				18677	add.l %a0,%d1 # Increment
				18678	mov.l %d1,%a3 # Save incr value
				18679	mov.l %d0,%a0
				18680	rts
				18681
				18682	faddr_ind_p_a4:
				18683	mov.l %a4,%d0 # Get current a4
				18684	mov.l %d0,%d1
				18685	add.l %a0,%d1 # Increment
				18686	mov.l %d1,%a4 # Save incr value
				18687	mov.l %d0,%a0
				18688	rts
				18689
				18690	faddr_ind_p_a5:
				18691	mov.l %a5,%d0 # Get current a5
				18692	mov.l %d0,%d1
				18693	add.l %a0,%d1 # Increment
				18694	mov.l %d1,%a5 # Save incr value
				18695	mov.l %d0,%a0
				18696	rts
				18697
				18698	faddr_ind_p_a6:
				18699	mov.l (%a6),%d0 # Get current a6
				18700	mov.l %d0,%d1
				18701	add.l %a0,%d1 # Increment
				18702	mov.l %d1,(%a6) # Save incr value
				18703	mov.l %d0,%a0
				18704	rts
				18705
				18706	faddr_ind_p_a7:
				18707	mov.b &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
				18708
				18709	mov.l EXC_A7(%a6),%d0 # Get current a7
				18710	mov.l %d0,%d1
				18711	add.l %a0,%d1 # Increment
				18712	mov.l %d1,EXC_A7(%a6) # Save incr value
				18713	mov.l %d0,%a0
				18714	rts
				18715
				18716	####################################################
				18717	# Address register indirect w/ predecrement: -(An) #
				18718	####################################################
				18719	faddr_ind_m_a0:
				18720	mov.l EXC_DREGS+0x8(%a6),%d0 # Get current a0
				18721	sub.l %a0,%d0 # Decrement
				18722	mov.l %d0,EXC_DREGS+0x8(%a6) # Save decr value
				18723	mov.l %d0,%a0
				18724	rts
				18725
				18726	faddr_ind_m_a1:
				18727	mov.l EXC_DREGS+0xc(%a6),%d0 # Get current a1
				18728	sub.l %a0,%d0 # Decrement
				18729	mov.l %d0,EXC_DREGS+0xc(%a6) # Save decr value
				18730	mov.l %d0,%a0
				18731	rts
				18732
				18733	faddr_ind_m_a2:
				18734	mov.l %a2,%d0 # Get current a2
				18735	sub.l %a0,%d0 # Decrement
				18736	mov.l %d0,%a2 # Save decr value
				18737	mov.l %d0,%a0
				18738	rts
				18739
				18740	faddr_ind_m_a3:
				18741	mov.l %a3,%d0 # Get current a3
				18742	sub.l %a0,%d0 # Decrement
				18743	mov.l %d0,%a3 # Save decr value
				18744	mov.l %d0,%a0
				18745	rts
				18746
				18747	faddr_ind_m_a4:
				18748	mov.l %a4,%d0 # Get current a4
				18749	sub.l %a0,%d0 # Decrement
				18750	mov.l %d0,%a4 # Save decr value
				18751	mov.l %d0,%a0
				18752	rts
				18753
				18754	faddr_ind_m_a5:
				18755	mov.l %a5,%d0 # Get current a5
				18756	sub.l %a0,%d0 # Decrement
				18757	mov.l %d0,%a5 # Save decr value
				18758	mov.l %d0,%a0
				18759	rts
				18760
				18761	faddr_ind_m_a6:
				18762	mov.l (%a6),%d0 # Get current a6
				18763	sub.l %a0,%d0 # Decrement
				18764	mov.l %d0,(%a6) # Save decr value
				18765	mov.l %d0,%a0
				18766	rts
				18767
				18768	faddr_ind_m_a7:
				18769	mov.b &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
				18770
				18771	mov.l EXC_A7(%a6),%d0 # Get current a7
				18772	sub.l %a0,%d0 # Decrement
				18773	mov.l %d0,EXC_A7(%a6) # Save decr value
				18774	mov.l %d0,%a0
				18775	rts
				18776
				18777	########################################################
				18778	# Address register indirect w/ displacement: (d16, An) #
				18779	########################################################
				18780	faddr_ind_disp_a0:
				18781	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18782	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18783	bsr.l _imem_read_word
				18784
				18785	tst.l %d1 # did ifetch fail?
				18786	bne.l iea_iacc # yes
				18787
				18788	mov.w %d0,%a0 # sign extend displacement
				18789
				18790	add.l EXC_DREGS+0x8(%a6),%a0 # a0 + d16
				18791	rts
				18792
				18793	faddr_ind_disp_a1:
				18794	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18795	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18796	bsr.l _imem_read_word
				18797
				18798	tst.l %d1 # did ifetch fail?
				18799	bne.l iea_iacc # yes
				18800
				18801	mov.w %d0,%a0 # sign extend displacement
				18802
				18803	add.l EXC_DREGS+0xc(%a6),%a0 # a1 + d16
				18804	rts
				18805
				18806	faddr_ind_disp_a2:
				18807	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18808	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18809	bsr.l _imem_read_word
				18810
				18811	tst.l %d1 # did ifetch fail?
				18812	bne.l iea_iacc # yes
				18813
				18814	mov.w %d0,%a0 # sign extend displacement
				18815
				18816	add.l %a2,%a0 # a2 + d16
				18817	rts
				18818
				18819	faddr_ind_disp_a3:
				18820	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18821	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18822	bsr.l _imem_read_word
				18823
				18824	tst.l %d1 # did ifetch fail?
				18825	bne.l iea_iacc # yes
				18826
				18827	mov.w %d0,%a0 # sign extend displacement
				18828
				18829	add.l %a3,%a0 # a3 + d16
				18830	rts
				18831
				18832	faddr_ind_disp_a4:
				18833	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18834	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18835	bsr.l _imem_read_word
				18836
				18837	tst.l %d1 # did ifetch fail?
				18838	bne.l iea_iacc # yes
				18839
				18840	mov.w %d0,%a0 # sign extend displacement
				18841
				18842	add.l %a4,%a0 # a4 + d16
				18843	rts
				18844
				18845	faddr_ind_disp_a5:
				18846	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18847	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18848	bsr.l _imem_read_word
				18849
				18850	tst.l %d1 # did ifetch fail?
				18851	bne.l iea_iacc # yes
				18852
				18853	mov.w %d0,%a0 # sign extend displacement
				18854
				18855	add.l %a5,%a0 # a5 + d16
				18856	rts
				18857
				18858	faddr_ind_disp_a6:
				18859	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18860	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18861	bsr.l _imem_read_word
				18862
				18863	tst.l %d1 # did ifetch fail?
				18864	bne.l iea_iacc # yes
				18865
				18866	mov.w %d0,%a0 # sign extend displacement
				18867
				18868	add.l (%a6),%a0 # a6 + d16
				18869	rts
				18870
				18871	faddr_ind_disp_a7:
				18872	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18873	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18874	bsr.l _imem_read_word
				18875
				18876	tst.l %d1 # did ifetch fail?
				18877	bne.l iea_iacc # yes
				18878
				18879	mov.w %d0,%a0 # sign extend displacement
				18880
				18881	add.l EXC_A7(%a6),%a0 # a7 + d16
				18882	rts
				18883
				18884	########################################################################
				18885	# Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
				18886	# " " " w/ " (base displacement): (bd, An, Xn) #
				18887	# Memory indirect postindexed: ([bd, An], Xn, od) #
				18888	# Memory indirect preindexed: ([bd, An, Xn], od) #
				18889	########################################################################
				18890	faddr_ind_ext:
				18891	addq.l &0x8,%d1
				18892	bsr.l fetch_dreg # fetch base areg
				18893	mov.l %d0,-(%sp)
				18894
				18895	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18896	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18897	bsr.l _imem_read_word # fetch extword in d0
				18898
				18899	tst.l %d1 # did ifetch fail?
				18900	bne.l iea_iacc # yes
				18901
				18902	mov.l (%sp)+,%a0
				18903
				18904	btst &0x8,%d0
				18905	bne.w fcalc_mem_ind
				18906
				18907	mov.l %d0,L_SCR1(%a6) # hold opword
				18908
				18909	mov.l %d0,%d1
				18910	rol.w &0x4,%d1
				18911	andi.w &0xf,%d1 # extract index regno
				18912
				18913	# count on fetch_dreg() not to alter a0...
				18914	bsr.l fetch_dreg # fetch index
				18915
				18916	mov.l %d2,-(%sp) # save d2
				18917	mov.l L_SCR1(%a6),%d2 # fetch opword
				18918
				18919	btst &0xb,%d2 # is it word or long?
				18920	bne.b faii8_long
				18921	ext.l %d0 # sign extend word index
				18922	faii8_long:
				18923	mov.l %d2,%d1
				18924	rol.w &0x7,%d1
				18925	andi.l &0x3,%d1 # extract scale value
				18926
				18927	lsl.l %d1,%d0 # shift index by scale
				18928
				18929	extb.l %d2 # sign extend displacement
				18930	add.l %d2,%d0 # index + disp
				18931	add.l %d0,%a0 # An + (index + disp)
				18932
				18933	mov.l (%sp)+,%d2 # restore old d2
				18934	rts
				18935
				18936	###########################
				18937	# Absolute short: (XXX).W #
				18938	###########################
				18939	fabs_short:
				18940	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18941	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18942	bsr.l _imem_read_word # fetch short address
				18943
				18944	tst.l %d1 # did ifetch fail?
				18945	bne.l iea_iacc # yes
				18946
				18947	mov.w %d0,%a0 # return <ea> in a0
				18948	rts
				18949
				18950	##########################
				18951	# Absolute long: (XXX).L #
				18952	##########################
				18953	fabs_long:
				18954	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18955	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				18956	bsr.l _imem_read_long # fetch long address
				18957
				18958	tst.l %d1 # did ifetch fail?
				18959	bne.l iea_iacc # yes
				18960
				18961	mov.l %d0,%a0 # return <ea> in a0
				18962	rts
				18963
				18964	#######################################################
				18965	# Program counter indirect w/ displacement: (d16, PC) #
				18966	#######################################################
				18967	fpc_ind:
				18968	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18969	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18970	bsr.l _imem_read_word # fetch word displacement
				18971
				18972	tst.l %d1 # did ifetch fail?
				18973	bne.l iea_iacc # yes
				18974
				18975	mov.w %d0,%a0 # sign extend displacement
				18976
				18977	add.l EXC_EXTWPTR(%a6),%a0 # pc + d16
				18978
				18979	# _imem_read_word() increased the extwptr by 2. need to adjust here.
				18980	subq.l &0x2,%a0 # adjust <ea>
				18981	rts
				18982
				18983	##########################################################
				18984	# PC indirect w/ index(8-bit displacement): (d8, PC, An) #
				18985	# " " w/ " (base displacement): (bd, PC, An) #
				18986	# PC memory indirect postindexed: ([bd, PC], Xn, od) #
				18987	# PC memory indirect preindexed: ([bd, PC, Xn], od) #
				18988	##########################################################
				18989	fpc_ind_ext:
				18990	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				18991	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				18992	bsr.l _imem_read_word # fetch ext word
				18993
				18994	tst.l %d1 # did ifetch fail?
				18995	bne.l iea_iacc # yes
				18996
				18997	mov.l EXC_EXTWPTR(%a6),%a0 # put base in a0
				18998	subq.l &0x2,%a0 # adjust base
				18999
				19000	btst &0x8,%d0 # is disp only 8 bits?
				19001	bne.w fcalc_mem_ind # calc memory indirect
				19002
				19003	mov.l %d0,L_SCR1(%a6) # store opword
				19004
				19005	mov.l %d0,%d1 # make extword copy
				19006	rol.w &0x4,%d1 # rotate reg num into place
				19007	andi.w &0xf,%d1 # extract register number
				19008
				19009	# count on fetch_dreg() not to alter a0...
				19010	bsr.l fetch_dreg # fetch index
				19011
				19012	mov.l %d2,-(%sp) # save d2
				19013	mov.l L_SCR1(%a6),%d2 # fetch opword
				19014
				19015	btst &0xb,%d2 # is index word or long?
				19016	bne.b fpii8_long # long
				19017	ext.l %d0 # sign extend word index
				19018	fpii8_long:
				19019	mov.l %d2,%d1
				19020	rol.w &0x7,%d1 # rotate scale value into place
				19021	andi.l &0x3,%d1 # extract scale value
				19022
				19023	lsl.l %d1,%d0 # shift index by scale
				19024
				19025	extb.l %d2 # sign extend displacement
				19026	add.l %d2,%d0 # disp + index
				19027	add.l %d0,%a0 # An + (index + disp)
				19028
				19029	mov.l (%sp)+,%d2 # restore temp register
				19030	rts
				19031
				19032	# d2 = index
				19033	# d3 = base
				19034	# d4 = od
				19035	# d5 = extword
				19036	fcalc_mem_ind:
				19037	btst &0x6,%d0 # is the index suppressed?
				19038	beq.b fcalc_index
				19039
				19040	movm.l &0x3c00,-(%sp) # save d2-d5
				19041
				19042	mov.l %d0,%d5 # put extword in d5
				19043	mov.l %a0,%d3 # put base in d3
				19044
				19045	clr.l %d2 # yes, so index = 0
				19046	bra.b fbase_supp_ck
				19047
				19048	# index:
				19049	fcalc_index:
				19050	mov.l %d0,L_SCR1(%a6) # save d0 (opword)
				19051	bfextu %d0{&16:&4},%d1 # fetch dreg index
				19052	bsr.l fetch_dreg
				19053
				19054	movm.l &0x3c00,-(%sp) # save d2-d5
				19055	mov.l %d0,%d2 # put index in d2
				19056	mov.l L_SCR1(%a6),%d5
				19057	mov.l %a0,%d3
				19058
				19059	btst &0xb,%d5 # is index word or long?
				19060	bne.b fno_ext
				19061	ext.l %d2
				19062
				19063	fno_ext:
				19064	bfextu %d5{&21:&2},%d0
				19065	lsl.l %d0,%d2
				19066
				19067	# base address (passed as parameter in d3):
				19068	# we clear the value here if it should actually be suppressed.
				19069	fbase_supp_ck:
				19070	btst &0x7,%d5 # is the bd suppressed?
				19071	beq.b fno_base_sup
				19072	clr.l %d3
				19073
				19074	# base displacement:
				19075	fno_base_sup:
				19076	bfextu %d5{&26:&2},%d0 # get bd size
				19077	# beq.l fmovm_error # if (size == 0) it's reserved
				19078
				19079	cmpi.b %d0,&0x2
				19080	blt.b fno_bd
				19081	beq.b fget_word_bd
				19082
				19083	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19084	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19085	bsr.l _imem_read_long
				19086
				19087	tst.l %d1 # did ifetch fail?
				19088	bne.l fcea_iacc # yes
				19089
				19090	bra.b fchk_ind
				19091
				19092	fget_word_bd:
				19093	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19094	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				19095	bsr.l _imem_read_word
				19096
				19097	tst.l %d1 # did ifetch fail?
				19098	bne.l fcea_iacc # yes
				19099
				19100	ext.l %d0 # sign extend bd
				19101
				19102	fchk_ind:
				19103	add.l %d0,%d3 # base += bd
				19104
				19105	# outer displacement:
				19106	fno_bd:
				19107	bfextu %d5{&30:&2},%d0 # is od suppressed?
				19108	beq.w faii_bd
				19109
				19110	cmpi.b %d0,&0x2
				19111	blt.b fnull_od
				19112	beq.b fword_od
				19113
				19114	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19115	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19116	bsr.l _imem_read_long
				19117
				19118	tst.l %d1 # did ifetch fail?
				19119	bne.l fcea_iacc # yes
				19120
				19121	bra.b fadd_them
				19122
				19123	fword_od:
				19124	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19125	addq.l &0x2,EXC_EXTWPTR(%a6) # incr instruction ptr
				19126	bsr.l _imem_read_word
				19127
				19128	tst.l %d1 # did ifetch fail?
				19129	bne.l fcea_iacc # yes
				19130
				19131	ext.l %d0 # sign extend od
				19132	bra.b fadd_them
				19133
				19134	fnull_od:
				19135	clr.l %d0
				19136
				19137	fadd_them:
				19138	mov.l %d0,%d4
				19139
				19140	btst &0x2,%d5 # pre or post indexing?
				19141	beq.b fpre_indexed
				19142
				19143	mov.l %d3,%a0
				19144	bsr.l _dmem_read_long
				19145
				19146	tst.l %d1 # did dfetch fail?
				19147	bne.w fcea_err # yes
				19148
				19149	add.l %d2,%d0 # <ea> += index
				19150	add.l %d4,%d0 # <ea> += od
				19151	bra.b fdone_ea
				19152
				19153	fpre_indexed:
				19154	add.l %d2,%d3 # preindexing
				19155	mov.l %d3,%a0
				19156	bsr.l _dmem_read_long
				19157
				19158	tst.l %d1 # did dfetch fail?
				19159	bne.w fcea_err # yes
				19160
				19161	add.l %d4,%d0 # ea += od
				19162	bra.b fdone_ea
				19163
				19164	faii_bd:
				19165	add.l %d2,%d3 # ea = (base + bd) + index
				19166	mov.l %d3,%d0
				19167	fdone_ea:
				19168	mov.l %d0,%a0
				19169
				19170	movm.l (%sp)+,&0x003c # restore d2-d5
				19171	rts
				19172
				19173	#########################################################
				19174	fcea_err:
				19175	mov.l %d3,%a0
				19176
				19177	movm.l (%sp)+,&0x003c # restore d2-d5
				19178	mov.w &0x0101,%d0
				19179	bra.l iea_dacc
				19180
				19181	fcea_iacc:
				19182	movm.l (%sp)+,&0x003c # restore d2-d5
				19183	bra.l iea_iacc
				19184
				19185	fmovm_out_err:
				19186	bsr.l restore
				19187	mov.w &0x00e1,%d0
				19188	bra.b fmovm_err
				19189
				19190	fmovm_in_err:
				19191	bsr.l restore
				19192	mov.w &0x0161,%d0
				19193
				19194	fmovm_err:
				19195	mov.l L_SCR1(%a6),%a0
				19196	bra.l iea_dacc
				19197
				19198	#########################################################################
				19199	# XDEF **************************************************************** #
				19200	# fmovm_ctrl(): emulate fmovm.l of control registers instr #
				19201	# #
				19202	# XREF **************************************************************** #
				19203	# _imem_read_long() - read longword from memory #
				19204	# iea_iacc() - _imem_read_long() failed; error recovery #
				19205	# #
				19206	# INPUT *************************************************************** #
				19207	# None #
				19208	# #
				19209	# OUTPUT ************************************************************** #
				19210	# If _imem_read_long() doesn't fail: #
				19211	# USER_FPCR(a6) = new FPCR value #
				19212	# USER_FPSR(a6) = new FPSR value #
				19213	# USER_FPIAR(a6) = new FPIAR value #
				19214	# #
				19215	# ALGORITHM *********************************************************** #
				19216	# Decode the instruction type by looking at the extension word #
				19217	# in order to see how many control registers to fetch from memory. #
				19218	# Fetch them using _imem_read_long(). If this fetch fails, exit through #
				19219	# the special access error exit handler iea_iacc(). #
				19220	# #
				19221	# Instruction word decoding: #
				19222	# #
				19223	# fmovem.l #<data>, {FPIAR&\|FPCR&\|FPSR} #
				19224	# #
				19225	# WORD1 WORD2 #
				19226	# 1111 0010 00 111100 100$ $$00 0000 0000 #
				19227	# #
				19228	# $$$ (100): FPCR #
				19229	# (010): FPSR #
				19230	# (001): FPIAR #
				19231	# (000): FPIAR #
				19232	# #
				19233	#########################################################################
				19234
				19235	global fmovm_ctrl
				19236	fmovm_ctrl:
				19237	mov.b EXC_EXTWORD(%a6),%d0 # fetch reg select bits
				19238	cmpi.b %d0,&0x9c # fpcr & fpsr & fpiar ?
				19239	beq.w fctrl_in_7 # yes
				19240	cmpi.b %d0,&0x98 # fpcr & fpsr ?
				19241	beq.w fctrl_in_6 # yes
				19242	cmpi.b %d0,&0x94 # fpcr & fpiar ?
				19243	beq.b fctrl_in_5 # yes
				19244
				19245	# fmovem.l #<data>, fpsr/fpiar
				19246	fctrl_in_3:
				19247	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19248	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19249	bsr.l _imem_read_long # fetch FPSR from mem
				19250
				19251	tst.l %d1 # did ifetch fail?
				19252	bne.l iea_iacc # yes
				19253
				19254	mov.l %d0,USER_FPSR(%a6) # store new FPSR to stack
				19255	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19256	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19257	bsr.l _imem_read_long # fetch FPIAR from mem
				19258
				19259	tst.l %d1 # did ifetch fail?
				19260	bne.l iea_iacc # yes
				19261
				19262	mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
				19263	rts
				19264
				19265	# fmovem.l #<data>, fpcr/fpiar
				19266	fctrl_in_5:
				19267	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19268	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19269	bsr.l _imem_read_long # fetch FPCR from mem
				19270
				19271	tst.l %d1 # did ifetch fail?
				19272	bne.l iea_iacc # yes
				19273
				19274	mov.l %d0,USER_FPCR(%a6) # store new FPCR to stack
				19275	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19276	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19277	bsr.l _imem_read_long # fetch FPIAR from mem
				19278
				19279	tst.l %d1 # did ifetch fail?
				19280	bne.l iea_iacc # yes
				19281
				19282	mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to stack
				19283	rts
				19284
				19285	# fmovem.l #<data>, fpcr/fpsr
				19286	fctrl_in_6:
				19287	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19288	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19289	bsr.l _imem_read_long # fetch FPCR from mem
				19290
				19291	tst.l %d1 # did ifetch fail?
				19292	bne.l iea_iacc # yes
				19293
				19294	mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
				19295	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19296	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19297	bsr.l _imem_read_long # fetch FPSR from mem
				19298
				19299	tst.l %d1 # did ifetch fail?
				19300	bne.l iea_iacc # yes
				19301
				19302	mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
				19303	rts
				19304
				19305	# fmovem.l #<data>, fpcr/fpsr/fpiar
				19306	fctrl_in_7:
				19307	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19308	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19309	bsr.l _imem_read_long # fetch FPCR from mem
				19310
				19311	tst.l %d1 # did ifetch fail?
				19312	bne.l iea_iacc # yes
				19313
				19314	mov.l %d0,USER_FPCR(%a6) # store new FPCR to mem
				19315	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19316	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19317	bsr.l _imem_read_long # fetch FPSR from mem
				19318
				19319	tst.l %d1 # did ifetch fail?
				19320	bne.l iea_iacc # yes
				19321
				19322	mov.l %d0,USER_FPSR(%a6) # store new FPSR to mem
				19323	mov.l EXC_EXTWPTR(%a6),%a0 # fetch instruction addr
				19324	addq.l &0x4,EXC_EXTWPTR(%a6) # incr instruction ptr
				19325	bsr.l _imem_read_long # fetch FPIAR from mem
				19326
				19327	tst.l %d1 # did ifetch fail?
				19328	bne.l iea_iacc # yes
				19329
				19330	mov.l %d0,USER_FPIAR(%a6) # store new FPIAR to mem
				19331	rts
				19332
				19333	#########################################################################
				19334	# XDEF **************************************************************** #
				19335	# _dcalc_ea(): calc correct <ea> from <ea> stacked on exception #
				19336	# #
				19337	# XREF **************************************************************** #
				19338	# inc_areg() - increment an address register #
				19339	# dec_areg() - decrement an address register #
				19340	# #
				19341	# INPUT *************************************************************** #
				19342	# d0 = number of bytes to adjust <ea> by #
				19343	# #
				19344	# OUTPUT ************************************************************** #
				19345	# None #
				19346	# #
				19347	# ALGORITHM *********************************************************** #
				19348	# "Dummy" CALCulate Effective Address: #
				19349	# The stacked <ea> for FP unimplemented instructions and opclass #
				19350	# two packed instructions is correct with the exception of... #
				19351	# #
				19352	# 1) -(An) : The register is not updated regardless of size. #
				19353	# Also, for extended precision and packed, the #
				19354	# stacked <ea> value is 8 bytes too big #
				19355	# 2) (An)+ : The register is not updated. #
				19356	# 3) #<data> : The upper longword of the immediate operand is #
				19357	# stacked b,w,l and s sizes are completely stacked. #
				19358	# d,x, and p are not. #
				19359	# #
				19360	#########################################################################
				19361
				19362	global _dcalc_ea
				19363	_dcalc_ea:
				19364	mov.l %d0, %a0 # move # bytes to %a0
				19365
				19366	mov.b 1+EXC_OPWORD(%a6), %d0 # fetch opcode word
				19367	mov.l %d0, %d1 # make a copy
				19368
				19369	andi.w &0x38, %d0 # extract mode field
				19370	andi.l &0x7, %d1 # extract reg field
				19371
				19372	cmpi.b %d0,&0x18 # is mode (An)+ ?
				19373	beq.b dcea_pi # yes
				19374
				19375	cmpi.b %d0,&0x20 # is mode -(An) ?
				19376	beq.b dcea_pd # yes
				19377
				19378	or.w %d1,%d0 # concat mode,reg
				19379	cmpi.b %d0,&0x3c # is mode #<data>?
				19380
				19381	beq.b dcea_imm # yes
				19382
				19383	mov.l EXC_EA(%a6),%a0 # return <ea>
				19384	rts
				19385
				19386	# need to set immediate data flag here since we'll need to do
				19387	# an imem_read to fetch this later.
				19388	dcea_imm:
				19389	mov.b &immed_flg,SPCOND_FLG(%a6)
				19390	lea ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
				19391	rts
				19392
				19393	# here, the <ea> is stacked correctly. however, we must update the
				19394	# address register...
				19395	dcea_pi:
				19396	mov.l %a0,%d0 # pass amt to inc by
				19397	bsr.l inc_areg # inc addr register
				19398
				19399	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				19400	rts
				19401
				19402	# the <ea> is stacked correctly for all but extended and packed which
				19403	# the <ea>s are 8 bytes too large.
				19404	# it would make no sense to have a pre-decrement to a7 in supervisor
				19405	# mode so we don't even worry about this tricky case here : )
				19406	dcea_pd:
				19407	mov.l %a0,%d0 # pass amt to dec by
				19408	bsr.l dec_areg # dec addr register
				19409
				19410	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				19411
				19412	cmpi.b %d0,&0xc # is opsize ext or packed?
				19413	beq.b dcea_pd2 # yes
				19414	rts
				19415	dcea_pd2:
				19416	sub.l &0x8,%a0 # correct <ea>
				19417	mov.l %a0,EXC_EA(%a6) # put correct <ea> on stack
				19418	rts
				19419
				19420	#########################################################################
				19421	# XDEF **************************************************************** #
				19422	# _calc_ea_fout(): calculate correct stacked <ea> for extended #
				19423	# and packed data opclass 3 operations. #
				19424	# #
				19425	# XREF **************************************************************** #
				19426	# None #
				19427	# #
				19428	# INPUT *************************************************************** #
				19429	# None #
				19430	# #
				19431	# OUTPUT ************************************************************** #
				19432	# a0 = return correct effective address #
				19433	# #
				19434	# ALGORITHM *********************************************************** #
				19435	# For opclass 3 extended and packed data operations, the <ea> #
				19436	# stacked for the exception is incorrect for -(an) and (an)+ addressing #
				19437	# modes. Also, while we're at it, the index register itself must get #
				19438	# updated. #
				19439	# So, for -(an), we must subtract 8 off of the stacked <ea> value #
				19440	# and return that value as the correct <ea> and store that value in An. #
				19441	# For (an)+, the stacked <ea> is correct but we must adjust An by +12. #
				19442	# #
				19443	#########################################################################
				19444
				19445	# This calc_ea is currently used to retrieve the correct <ea>
				19446	# for fmove outs of type extended and packed.
				19447	global _calc_ea_fout
				19448	_calc_ea_fout:
				19449	mov.b 1+EXC_OPWORD(%a6),%d0 # fetch opcode word
				19450	mov.l %d0,%d1 # make a copy
				19451
				19452	andi.w &0x38,%d0 # extract mode field
				19453	andi.l &0x7,%d1 # extract reg field
				19454
				19455	cmpi.b %d0,&0x18 # is mode (An)+ ?
				19456	beq.b ceaf_pi # yes
				19457
				19458	cmpi.b %d0,&0x20 # is mode -(An) ?
				19459	beq.w ceaf_pd # yes
				19460
				19461	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				19462	rts
				19463
				19464	# (An)+ : extended and packed fmove out
				19465	# : stacked <ea> is correct
				19466	# : "An" not updated
				19467	ceaf_pi:
				19468	mov.w (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
				19469	mov.l EXC_EA(%a6),%a0
				19470	jmp (tbl_ceaf_pi.b,%pc,%d1.w*1)
				19471
				19472	swbeg &0x8
				19473	tbl_ceaf_pi:
				19474	short ceaf_pi0 - tbl_ceaf_pi
				19475	short ceaf_pi1 - tbl_ceaf_pi
				19476	short ceaf_pi2 - tbl_ceaf_pi
				19477	short ceaf_pi3 - tbl_ceaf_pi
				19478	short ceaf_pi4 - tbl_ceaf_pi
				19479	short ceaf_pi5 - tbl_ceaf_pi
				19480	short ceaf_pi6 - tbl_ceaf_pi
				19481	short ceaf_pi7 - tbl_ceaf_pi
				19482
				19483	ceaf_pi0:
				19484	addi.l &0xc,EXC_DREGS+0x8(%a6)
				19485	rts
				19486	ceaf_pi1:
				19487	addi.l &0xc,EXC_DREGS+0xc(%a6)
				19488	rts
				19489	ceaf_pi2:
				19490	add.l &0xc,%a2
				19491	rts
				19492	ceaf_pi3:
				19493	add.l &0xc,%a3
				19494	rts
				19495	ceaf_pi4:
				19496	add.l &0xc,%a4
				19497	rts
				19498	ceaf_pi5:
				19499	add.l &0xc,%a5
				19500	rts
				19501	ceaf_pi6:
				19502	addi.l &0xc,EXC_A6(%a6)
				19503	rts
				19504	ceaf_pi7:
				19505	mov.b &mia7_flg,SPCOND_FLG(%a6)
				19506	addi.l &0xc,EXC_A7(%a6)
				19507	rts
				19508
				19509	# -(An) : extended and packed fmove out
				19510	# : stacked <ea> = actual <ea> + 8
				19511	# : "An" not updated
				19512	ceaf_pd:
				19513	mov.w (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
				19514	mov.l EXC_EA(%a6),%a0
				19515	sub.l &0x8,%a0
				19516	sub.l &0x8,EXC_EA(%a6)
				19517	jmp (tbl_ceaf_pd.b,%pc,%d1.w*1)
				19518
				19519	swbeg &0x8
				19520	tbl_ceaf_pd:
				19521	short ceaf_pd0 - tbl_ceaf_pd
				19522	short ceaf_pd1 - tbl_ceaf_pd
				19523	short ceaf_pd2 - tbl_ceaf_pd
				19524	short ceaf_pd3 - tbl_ceaf_pd
				19525	short ceaf_pd4 - tbl_ceaf_pd
				19526	short ceaf_pd5 - tbl_ceaf_pd
				19527	short ceaf_pd6 - tbl_ceaf_pd
				19528	short ceaf_pd7 - tbl_ceaf_pd
				19529
				19530	ceaf_pd0:
				19531	mov.l %a0,EXC_DREGS+0x8(%a6)
				19532	rts
				19533	ceaf_pd1:
				19534	mov.l %a0,EXC_DREGS+0xc(%a6)
				19535	rts
				19536	ceaf_pd2:
				19537	mov.l %a0,%a2
				19538	rts
				19539	ceaf_pd3:
				19540	mov.l %a0,%a3
				19541	rts
				19542	ceaf_pd4:
				19543	mov.l %a0,%a4
				19544	rts
				19545	ceaf_pd5:
				19546	mov.l %a0,%a5
				19547	rts
				19548	ceaf_pd6:
				19549	mov.l %a0,EXC_A6(%a6)
				19550	rts
				19551	ceaf_pd7:
				19552	mov.l %a0,EXC_A7(%a6)
				19553	mov.b &mda7_flg,SPCOND_FLG(%a6)
				19554	rts
				19555
				19556	#########################################################################
				19557	# XDEF **************************************************************** #
				19558	# _load_fop(): load operand for unimplemented FP exception #
				19559	# #
				19560	# XREF **************************************************************** #
				19561	# set_tag_x() - determine ext prec optype tag #
				19562	# set_tag_s() - determine sgl prec optype tag #
				19563	# set_tag_d() - determine dbl prec optype tag #
				19564	# unnorm_fix() - convert normalized number to denorm or zero #
				19565	# norm() - normalize a denormalized number #
				19566	# get_packed() - fetch a packed operand from memory #
				19567	# _dcalc_ea() - calculate <ea>, fixing An in process #
				19568	# #
				19569	# _imem_read_{word,long}() - read from instruction memory #
				19570	# _dmem_read() - read from data memory #
				19571	# _dmem_read_{byte,word,long}() - read from data memory #
				19572	# #
				19573	# facc_in_{b,w,l,d,x}() - mem read failed; special exit point #
				19574	# #
				19575	# INPUT *************************************************************** #
				19576	# None #
				19577	# #
				19578	# OUTPUT ************************************************************** #
				19579	# If memory access doesn't fail: #
				19580	# FP_SRC(a6) = source operand in extended precision #
				19581	# FP_DST(a6) = destination operand in extended precision #
				19582	# #
				19583	# ALGORITHM *********************************************************** #
				19584	# This is called from the Unimplemented FP exception handler in #
				19585	# order to load the source and maybe destination operand into #
				19586	# FP_SRC(a6) and FP_DST(a6). If the instruction was opclass zero, load #
				19587	# the source and destination from the FP register file. Set the optype #
				19588	# tags for both if dyadic, one for monadic. If a number is an UNNORM, #
				19589	# convert it to a DENORM or a ZERO. #
				19590	# If the instruction is opclass two (memory->reg), then fetch #
				19591	# the destination from the register file and the source operand from #
				19592	# memory. Tag and fix both as above w/ opclass zero instructions. #
				19593	# If the source operand is byte,word,long, or single, it may be #
				19594	# in the data register file. If it's actually out in memory, use one of #
				19595	# the mem_read() routines to fetch it. If the mem_read() access returns #
				19596	# a failing value, exit through the special facc_in() routine which #
				19597	# will create an access error exception frame from the current exception #
				19598	# frame. #
				19599	# Immediate data and regular data accesses are separated because #
				19600	# if an immediate data access fails, the resulting fault status #
				19601	# longword stacked for the access error exception must have the #
				19602	# instruction bit set. #
				19603	# #
				19604	#########################################################################
				19605
				19606	global _load_fop
				19607	_load_fop:
				19608
				19609	# 15 13 12 10 9 7 6 0
				19610	# / \ / \ / \ / \
				19611	# ---------------------------------
				19612	# \| opclass \| RX \| RY \| EXTENSION \| (2nd word of general FP instruction)
				19613	# ---------------------------------
				19614	#
				19615
				19616	# bfextu EXC_CMDREG(%a6){&0:&3}, %d0 # extract opclass
				19617	# cmpi.b %d0, &0x2 # which class is it? ('000,'010,'011)
				19618	# beq.w op010 # handle <ea> -> fpn
				19619	# bgt.w op011 # handle fpn -> <ea>
				19620
				19621	# we're not using op011 for now...
				19622	btst &0x6,EXC_CMDREG(%a6)
				19623	bne.b op010
				19624
				19625	############################
				19626	# OPCLASS '000: reg -> reg #
				19627	############################
				19628	op000:
				19629	mov.b 1+EXC_CMDREG(%a6),%d0 # fetch extension word lo
				19630	btst &0x5,%d0 # testing extension bits
				19631	beq.b op000_src # (bit 5 == 0) => monadic
				19632	btst &0x4,%d0 # (bit 5 == 1)
				19633	beq.b op000_dst # (bit 4 == 0) => dyadic
				19634	and.w &0x007f,%d0 # extract extension bits {6:0}
				19635	cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
				19636	bne.b op000_src # it's an fcmp
				19637
				19638	op000_dst:
				19639	bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
				19640	bsr.l load_fpn2 # fetch dst fpreg into FP_DST
				19641
				19642	bsr.l set_tag_x # get dst optype tag
				19643
				19644	cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
				19645	beq.b op000_dst_unnorm # yes
				19646	op000_dst_cont:
				19647	mov.b %d0, DTAG(%a6) # store the dst optype tag
				19648
				19649	op000_src:
				19650	bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src field
				19651	bsr.l load_fpn1 # fetch src fpreg into FP_SRC
				19652
				19653	bsr.l set_tag_x # get src optype tag
				19654
				19655	cmpi.b %d0, &UNNORM # is src fpreg an UNNORM?
				19656	beq.b op000_src_unnorm # yes
				19657	op000_src_cont:
				19658	mov.b %d0, STAG(%a6) # store the src optype tag
				19659	rts
				19660
				19661	op000_dst_unnorm:
				19662	bsr.l unnorm_fix # fix the dst UNNORM
				19663	bra.b op000_dst_cont
				19664	op000_src_unnorm:
				19665	bsr.l unnorm_fix # fix the src UNNORM
				19666	bra.b op000_src_cont
				19667
				19668	#############################
				19669	# OPCLASS '010: <ea> -> reg #
				19670	#############################
				19671	op010:
				19672	mov.w EXC_CMDREG(%a6),%d0 # fetch extension word
				19673	btst &0x5,%d0 # testing extension bits
				19674	beq.b op010_src # (bit 5 == 0) => monadic
				19675	btst &0x4,%d0 # (bit 5 == 1)
				19676	beq.b op010_dst # (bit 4 == 0) => dyadic
				19677	and.w &0x007f,%d0 # extract extension bits {6:0}
				19678	cmpi.w %d0,&0x0038 # is it an fcmp (dyadic) ?
				19679	bne.b op010_src # it's an fcmp
				19680
				19681	op010_dst:
				19682	bfextu EXC_CMDREG(%a6){&6:&3}, %d0 # extract dst field
				19683	bsr.l load_fpn2 # fetch dst fpreg ptr
				19684
				19685	bsr.l set_tag_x # get dst type tag
				19686
				19687	cmpi.b %d0, &UNNORM # is dst fpreg an UNNORM?
				19688	beq.b op010_dst_unnorm # yes
				19689	op010_dst_cont:
				19690	mov.b %d0, DTAG(%a6) # store the dst optype tag
				19691
				19692	op010_src:
				19693	bfextu EXC_CMDREG(%a6){&3:&3}, %d0 # extract src type field
				19694
				19695	bfextu EXC_OPWORD(%a6){&10:&3}, %d1 # extract <ea> mode field
				19696	bne.w fetch_from_mem # src op is in memory
				19697
				19698	op010_dreg:
				19699	clr.b STAG(%a6) # either NORM or ZERO
				19700	bfextu EXC_OPWORD(%a6){&13:&3}, %d1 # extract src reg field
				19701
				19702	mov.w (tbl_op010_dreg.b,%pc,%d0.w*2), %d0 # jmp based on optype
				19703	jmp (tbl_op010_dreg.b,%pc,%d0.w*1) # fetch src from dreg
				19704
				19705	op010_dst_unnorm:
				19706	bsr.l unnorm_fix # fix the dst UNNORM
				19707	bra.b op010_dst_cont
				19708
				19709	swbeg &0x8
				19710	tbl_op010_dreg:
				19711	short opd_long - tbl_op010_dreg
				19712	short opd_sgl - tbl_op010_dreg
				19713	short tbl_op010_dreg - tbl_op010_dreg
				19714	short tbl_op010_dreg - tbl_op010_dreg
				19715	short opd_word - tbl_op010_dreg
				19716	short tbl_op010_dreg - tbl_op010_dreg
				19717	short opd_byte - tbl_op010_dreg
				19718	short tbl_op010_dreg - tbl_op010_dreg
				19719
				19720	#
				19721	# LONG: can be either NORM or ZERO...
				19722	#
				19723	opd_long:
				19724	bsr.l fetch_dreg # fetch long in d0
				19725	fmov.l %d0, %fp0 # load a long
				19726	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19727	fbeq.w opd_long_zero # long is a ZERO
				19728	rts
				19729	opd_long_zero:
				19730	mov.b &ZERO, STAG(%a6) # set ZERO optype flag
				19731	rts
				19732
				19733	#
				19734	# WORD: can be either NORM or ZERO...
				19735	#
				19736	opd_word:
				19737	bsr.l fetch_dreg # fetch word in d0
				19738	fmov.w %d0, %fp0 # load a word
				19739	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19740	fbeq.w opd_word_zero # WORD is a ZERO
				19741	rts
				19742	opd_word_zero:
				19743	mov.b &ZERO, STAG(%a6) # set ZERO optype flag
				19744	rts
				19745
				19746	#
				19747	# BYTE: can be either NORM or ZERO...
				19748	#
				19749	opd_byte:
				19750	bsr.l fetch_dreg # fetch word in d0
				19751	fmov.b %d0, %fp0 # load a byte
				19752	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19753	fbeq.w opd_byte_zero # byte is a ZERO
				19754	rts
				19755	opd_byte_zero:
				19756	mov.b &ZERO, STAG(%a6) # set ZERO optype flag
				19757	rts
				19758
				19759	#
				19760	# SGL: can be either NORM, DENORM, ZERO, INF, QNAN or SNAN but not UNNORM
				19761	#
				19762	# separate SNANs and DENORMs so they can be loaded w/ special care.
				19763	# all others can simply be moved "in" using fmove.
				19764	#
				19765	opd_sgl:
				19766	bsr.l fetch_dreg # fetch sgl in d0
				19767	mov.l %d0,L_SCR1(%a6)
				19768
				19769	lea L_SCR1(%a6), %a0 # pass: ptr to the sgl
				19770	bsr.l set_tag_s # determine sgl type
				19771	mov.b %d0, STAG(%a6) # save the src tag
				19772
				19773	cmpi.b %d0, &SNAN # is it an SNAN?
				19774	beq.w get_sgl_snan # yes
				19775
				19776	cmpi.b %d0, &DENORM # is it a DENORM?
				19777	beq.w get_sgl_denorm # yes
				19778
				19779	fmov.s (%a0), %fp0 # no, so can load it regular
				19780	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19781	rts
				19782
				19783	##############################################################################
				19784
				19785	#########################################################################
				19786	# fetch_from_mem(): #
				19787	# - src is out in memory. must: #
				19788	# (1) calc ea - must read AFTER you know the src type since #
				19789	# if the ea is -() or ()+, need to know # of bytes. #
				19790	# (2) read it in from either user or supervisor space #
				19791	# (3) if (b \|\| w \|\| l) then simply read in #
				19792	# if (s \|\| d \|\| x) then check for SNAN,UNNORM,DENORM #
				19793	# if (packed) then punt for now #
				19794	# INPUT: #
				19795	# %d0 : src type field #
				19796	#########################################################################
				19797	fetch_from_mem:
				19798	clr.b STAG(%a6) # either NORM or ZERO
				19799
				19800	mov.w (tbl_fp_type.b,%pc,%d0.w*2), %d0 # index by src type field
				19801	jmp (tbl_fp_type.b,%pc,%d0.w*1)
				19802
				19803	swbeg &0x8
				19804	tbl_fp_type:
				19805	short load_long - tbl_fp_type
				19806	short load_sgl - tbl_fp_type
				19807	short load_ext - tbl_fp_type
				19808	short load_packed - tbl_fp_type
				19809	short load_word - tbl_fp_type
				19810	short load_dbl - tbl_fp_type
				19811	short load_byte - tbl_fp_type
				19812	short tbl_fp_type - tbl_fp_type
				19813
				19814	#########################################
				19815	# load a LONG into %fp0: #
				19816	# -number can't fault #
				19817	# (1) calc ea #
				19818	# (2) read 4 bytes into L_SCR1 #
				19819	# (3) fmov.l into %fp0 #
				19820	#########################################
				19821	load_long:
				19822	movq.l &0x4, %d0 # pass: 4 (bytes)
				19823	bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
				19824
				19825	cmpi.b SPCOND_FLG(%a6),&immed_flg
				19826	beq.b load_long_immed
				19827
				19828	bsr.l _dmem_read_long # fetch src operand from memory
				19829
				19830	tst.l %d1 # did dfetch fail?
				19831	bne.l facc_in_l # yes
				19832
				19833	load_long_cont:
				19834	fmov.l %d0, %fp0 # read into %fp0;convert to xprec
				19835	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19836
				19837	fbeq.w load_long_zero # src op is a ZERO
				19838	rts
				19839	load_long_zero:
				19840	mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
				19841	rts
				19842
				19843	load_long_immed:
				19844	bsr.l _imem_read_long # fetch src operand immed data
				19845
				19846	tst.l %d1 # did ifetch fail?
				19847	bne.l funimp_iacc # yes
				19848	bra.b load_long_cont
				19849
				19850	#########################################
				19851	# load a WORD into %fp0: #
				19852	# -number can't fault #
				19853	# (1) calc ea #
				19854	# (2) read 2 bytes into L_SCR1 #
				19855	# (3) fmov.w into %fp0 #
				19856	#########################################
				19857	load_word:
				19858	movq.l &0x2, %d0 # pass: 2 (bytes)
				19859	bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
				19860
				19861	cmpi.b SPCOND_FLG(%a6),&immed_flg
				19862	beq.b load_word_immed
				19863
				19864	bsr.l _dmem_read_word # fetch src operand from memory
				19865
				19866	tst.l %d1 # did dfetch fail?
				19867	bne.l facc_in_w # yes
				19868
				19869	load_word_cont:
				19870	fmov.w %d0, %fp0 # read into %fp0;convert to xprec
				19871	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19872
				19873	fbeq.w load_word_zero # src op is a ZERO
				19874	rts
				19875	load_word_zero:
				19876	mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
				19877	rts
				19878
				19879	load_word_immed:
				19880	bsr.l _imem_read_word # fetch src operand immed data
				19881
				19882	tst.l %d1 # did ifetch fail?
				19883	bne.l funimp_iacc # yes
				19884	bra.b load_word_cont
				19885
				19886	#########################################
				19887	# load a BYTE into %fp0: #
				19888	# -number can't fault #
				19889	# (1) calc ea #
				19890	# (2) read 1 byte into L_SCR1 #
				19891	# (3) fmov.b into %fp0 #
				19892	#########################################
				19893	load_byte:
				19894	movq.l &0x1, %d0 # pass: 1 (byte)
				19895	bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
				19896
				19897	cmpi.b SPCOND_FLG(%a6),&immed_flg
				19898	beq.b load_byte_immed
				19899
				19900	bsr.l _dmem_read_byte # fetch src operand from memory
				19901
				19902	tst.l %d1 # did dfetch fail?
				19903	bne.l facc_in_b # yes
				19904
				19905	load_byte_cont:
				19906	fmov.b %d0, %fp0 # read into %fp0;convert to xprec
				19907	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19908
				19909	fbeq.w load_byte_zero # src op is a ZERO
				19910	rts
				19911	load_byte_zero:
				19912	mov.b &ZERO, STAG(%a6) # set optype tag to ZERO
				19913	rts
				19914
				19915	load_byte_immed:
				19916	bsr.l _imem_read_word # fetch src operand immed data
				19917
				19918	tst.l %d1 # did ifetch fail?
				19919	bne.l funimp_iacc # yes
				19920	bra.b load_byte_cont
				19921
				19922	#########################################
				19923	# load a SGL into %fp0: #
				19924	# -number can't fault #
				19925	# (1) calc ea #
				19926	# (2) read 4 bytes into L_SCR1 #
				19927	# (3) fmov.s into %fp0 #
				19928	#########################################
				19929	load_sgl:
				19930	movq.l &0x4, %d0 # pass: 4 (bytes)
				19931	bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
				19932
				19933	cmpi.b SPCOND_FLG(%a6),&immed_flg
				19934	beq.b load_sgl_immed
				19935
				19936	bsr.l _dmem_read_long # fetch src operand from memory
				19937	mov.l %d0, L_SCR1(%a6) # store src op on stack
				19938
				19939	tst.l %d1 # did dfetch fail?
				19940	bne.l facc_in_l # yes
				19941
				19942	load_sgl_cont:
				19943	lea L_SCR1(%a6), %a0 # pass: ptr to sgl src op
				19944	bsr.l set_tag_s # determine src type tag
				19945	mov.b %d0, STAG(%a6) # save src optype tag on stack
				19946
				19947	cmpi.b %d0, &DENORM # is it a sgl DENORM?
				19948	beq.w get_sgl_denorm # yes
				19949
				19950	cmpi.b %d0, &SNAN # is it a sgl SNAN?
				19951	beq.w get_sgl_snan # yes
				19952
				19953	fmov.s L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
				19954	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				19955	rts
				19956
				19957	load_sgl_immed:
				19958	bsr.l _imem_read_long # fetch src operand immed data
				19959
				19960	tst.l %d1 # did ifetch fail?
				19961	bne.l funimp_iacc # yes
				19962	bra.b load_sgl_cont
				19963
				19964	# must convert sgl denorm format to an Xprec denorm fmt suitable for
				19965	# normalization...
				19966	# %a0 : points to sgl denorm
				19967	get_sgl_denorm:
				19968	clr.w FP_SRC_EX(%a6)
				19969	bfextu (%a0){&9:&23}, %d0 # fetch sgl hi(_mantissa)
				19970	lsl.l &0x8, %d0
				19971	mov.l %d0, FP_SRC_HI(%a6) # set ext hi(_mantissa)
				19972	clr.l FP_SRC_LO(%a6) # set ext lo(_mantissa)
				19973
				19974	clr.w FP_SRC_EX(%a6)
				19975	btst &0x7, (%a0) # is sgn bit set?
				19976	beq.b sgl_dnrm_norm
				19977	bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
				19978
				19979	sgl_dnrm_norm:
				19980	lea FP_SRC(%a6), %a0
				19981	bsr.l norm # normalize number
				19982	mov.w &0x3f81, %d1 # xprec exp = 0x3f81
				19983	sub.w %d0, %d1 # exp = 0x3f81 - shft amt.
				19984	or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
				19985
				19986	mov.b &NORM, STAG(%a6) # fix src type tag
				19987	rts
				19988
				19989	# convert sgl to ext SNAN
				19990	# %a0 : points to sgl SNAN
				19991	get_sgl_snan:
				19992	mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
				19993	bfextu (%a0){&9:&23}, %d0
				19994	lsl.l &0x8, %d0 # extract and insert hi(man)
				19995	mov.l %d0, FP_SRC_HI(%a6)
				19996	clr.l FP_SRC_LO(%a6)
				19997
				19998	btst &0x7, (%a0) # see if sign of SNAN is set
				19999	beq.b no_sgl_snan_sgn
				20000	bset &0x7, FP_SRC_EX(%a6)
				20001	no_sgl_snan_sgn:
				20002	rts
				20003
				20004	#########################################
				20005	# load a DBL into %fp0: #
				20006	# -number can't fault #
				20007	# (1) calc ea #
				20008	# (2) read 8 bytes into L_SCR(1,2)#
				20009	# (3) fmov.d into %fp0 #
				20010	#########################################
				20011	load_dbl:
				20012	movq.l &0x8, %d0 # pass: 8 (bytes)
				20013	bsr.l _dcalc_ea # calc <ea>; <ea> in %a0
				20014
				20015	cmpi.b SPCOND_FLG(%a6),&immed_flg
				20016	beq.b load_dbl_immed
				20017
				20018	lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
				20019	movq.l &0x8, %d0 # pass: # bytes to read
				20020	bsr.l _dmem_read # fetch src operand from memory
				20021
				20022	tst.l %d1 # did dfetch fail?
				20023	bne.l facc_in_d # yes
				20024
				20025	load_dbl_cont:
				20026	lea L_SCR1(%a6), %a0 # pass: ptr to input dbl
				20027	bsr.l set_tag_d # determine src type tag
				20028	mov.b %d0, STAG(%a6) # set src optype tag
				20029
				20030	cmpi.b %d0, &DENORM # is it a dbl DENORM?
				20031	beq.w get_dbl_denorm # yes
				20032
				20033	cmpi.b %d0, &SNAN # is it a dbl SNAN?
				20034	beq.w get_dbl_snan # yes
				20035
				20036	fmov.d L_SCR1(%a6), %fp0 # read into %fp0;convert to xprec
				20037	fmovm.x &0x80, FP_SRC(%a6) # return src op in FP_SRC
				20038	rts
				20039
				20040	load_dbl_immed:
				20041	lea L_SCR1(%a6), %a1 # pass: ptr to input dbl tmp space
				20042	movq.l &0x8, %d0 # pass: # bytes to read
				20043	bsr.l _imem_read # fetch src operand from memory
				20044
				20045	tst.l %d1 # did ifetch fail?
				20046	bne.l funimp_iacc # yes
				20047	bra.b load_dbl_cont
				20048
				20049	# must convert dbl denorm format to an Xprec denorm fmt suitable for
				20050	# normalization...
				20051	# %a0 : loc. of dbl denorm
				20052	get_dbl_denorm:
				20053	clr.w FP_SRC_EX(%a6)
				20054	bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
				20055	mov.l %d0, FP_SRC_HI(%a6)
				20056	bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
				20057	mov.l &0xb, %d1
				20058	lsl.l %d1, %d0
				20059	mov.l %d0, FP_SRC_LO(%a6)
				20060
				20061	btst &0x7, (%a0) # is sgn bit set?
				20062	beq.b dbl_dnrm_norm
				20063	bset &0x7, FP_SRC_EX(%a6) # set sgn of xprec value
				20064
				20065	dbl_dnrm_norm:
				20066	lea FP_SRC(%a6), %a0
				20067	bsr.l norm # normalize number
				20068	mov.w &0x3c01, %d1 # xprec exp = 0x3c01
				20069	sub.w %d0, %d1 # exp = 0x3c01 - shft amt.
				20070	or.w %d1, FP_SRC_EX(%a6) # {sgn,exp}
				20071
				20072	mov.b &NORM, STAG(%a6) # fix src type tag
				20073	rts
				20074
				20075	# convert dbl to ext SNAN
				20076	# %a0 : points to dbl SNAN
				20077	get_dbl_snan:
				20078	mov.w &0x7fff, FP_SRC_EX(%a6) # set exp of SNAN
				20079
				20080	bfextu (%a0){&12:&31}, %d0 # fetch hi(_mantissa)
				20081	mov.l %d0, FP_SRC_HI(%a6)
				20082	bfextu 4(%a0){&11:&21}, %d0 # fetch lo(_mantissa)
				20083	mov.l &0xb, %d1
				20084	lsl.l %d1, %d0
				20085	mov.l %d0, FP_SRC_LO(%a6)
				20086
				20087	btst &0x7, (%a0) # see if sign of SNAN is set
				20088	beq.b no_dbl_snan_sgn
				20089	bset &0x7, FP_SRC_EX(%a6)
				20090	no_dbl_snan_sgn:
				20091	rts
				20092
				20093	#################################################
				20094	# load a Xprec into %fp0: #
				20095	# -number can't fault #
				20096	# (1) calc ea #
				20097	# (2) read 12 bytes into L_SCR(1,2) #
				20098	# (3) fmov.x into %fp0 #
				20099	#################################################
				20100	load_ext:
				20101	mov.l &0xc, %d0 # pass: 12 (bytes)
				20102	bsr.l _dcalc_ea # calc <ea>
				20103
				20104	lea FP_SRC(%a6), %a1 # pass: ptr to input ext tmp space
				20105	mov.l &0xc, %d0 # pass: # of bytes to read
				20106	bsr.l _dmem_read # fetch src operand from memory
				20107
				20108	tst.l %d1 # did dfetch fail?
				20109	bne.l facc_in_x # yes
				20110
				20111	lea FP_SRC(%a6), %a0 # pass: ptr to src op
				20112	bsr.l set_tag_x # determine src type tag
				20113
				20114	cmpi.b %d0, &UNNORM # is the src op an UNNORM?
				20115	beq.b load_ext_unnorm # yes
				20116
				20117	mov.b %d0, STAG(%a6) # store the src optype tag
				20118	rts
				20119
				20120	load_ext_unnorm:
				20121	bsr.l unnorm_fix # fix the src UNNORM
				20122	mov.b %d0, STAG(%a6) # store the src optype tag
				20123	rts
				20124
				20125	#################################################
				20126	# load a packed into %fp0: #
				20127	# -number can't fault #
				20128	# (1) calc ea #
				20129	# (2) read 12 bytes into L_SCR(1,2,3) #
				20130	# (3) fmov.x into %fp0 #
				20131	#################################################
				20132	load_packed:
				20133	bsr.l get_packed
				20134
				20135	lea FP_SRC(%a6),%a0 # pass ptr to src op
				20136	bsr.l set_tag_x # determine src type tag
				20137	cmpi.b %d0,&UNNORM # is the src op an UNNORM ZERO?
				20138	beq.b load_packed_unnorm # yes
				20139
				20140	mov.b %d0,STAG(%a6) # store the src optype tag
				20141	rts
				20142
				20143	load_packed_unnorm:
				20144	bsr.l unnorm_fix # fix the UNNORM ZERO
				20145	mov.b %d0,STAG(%a6) # store the src optype tag
				20146	rts
				20147
				20148	#########################################################################
				20149	# XDEF **************************************************************** #
				20150	# fout(): move from fp register to memory or data register #
				20151	# #
				20152	# XREF **************************************************************** #
				20153	# _round() - needed to create EXOP for sgl/dbl precision #
				20154	# norm() - needed to create EXOP for extended precision #
				20155	# ovf_res() - create default overflow result for sgl/dbl precision#
				20156	# unf_res() - create default underflow result for sgl/dbl prec. #
				20157	# dst_dbl() - create rounded dbl precision result. #
				20158	# dst_sgl() - create rounded sgl precision result. #
				20159	# fetch_dreg() - fetch dynamic k-factor reg for packed. #
				20160	# bindec() - convert FP binary number to packed number. #
				20161	# _mem_write() - write data to memory. #
				20162	# _mem_write2() - write data to memory unless supv mode -(a7) exc.#
				20163	# _dmem_write_{byte,word,long}() - write data to memory. #
				20164	# store_dreg_{b,w,l}() - store data to data register file. #
				20165	# facc_out_{b,w,l,d,x}() - data access error occurred. #
				20166	# #
				20167	# INPUT *************************************************************** #
				20168	# a0 = pointer to extended precision source operand #
				20169	# d0 = round prec,mode #
				20170	# #
				20171	# OUTPUT ************************************************************** #
				20172	# fp0 : intermediate underflow or overflow result if #
				20173	# OVFL/UNFL occurred for a sgl or dbl operand #
				20174	# #
				20175	# ALGORITHM *********************************************************** #
				20176	# This routine is accessed by many handlers that need to do an #
				20177	# opclass three move of an operand out to memory. #
				20178	# Decode an fmove out (opclass 3) instruction to determine if #
				20179	# it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data #
				20180	# register or memory. The algorithm uses a standard "fmove" to create #
				20181	# the rounded result. Also, since exceptions are disabled, this also #
				20182	# create the correct OPERR default result if appropriate. #
				20183	# For sgl or dbl precision, overflow or underflow can occur. If #
				20184	# either occurs and is enabled, the EXOP. #
				20185	# For extended precision, the stacked <ea> must be fixed along #
				20186	# w/ the address index register as appropriate w/ _calc_ea_fout(). If #
				20187	# the source is a denorm and if underflow is enabled, an EXOP must be #
				20188	# created. #
				20189	# For packed, the k-factor must be fetched from the instruction #
				20190	# word or a data register. The <ea> must be fixed as w/ extended #
				20191	# precision. Then, bindec() is called to create the appropriate #
				20192	# packed result. #
				20193	# If at any time an access error is flagged by one of the move- #
				20194	# to-memory routines, then a special exit must be made so that the #
				20195	# access error can be handled properly. #
				20196	# #
				20197	#########################################################################
				20198
				20199	global fout
				20200	fout:
				20201	bfextu EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
				20202	mov.w (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
				20203	jmp (tbl_fout.b,%pc,%a1) # jump to routine
				20204
				20205	swbeg &0x8
				20206	tbl_fout:
				20207	short fout_long - tbl_fout
				20208	short fout_sgl - tbl_fout
				20209	short fout_ext - tbl_fout
				20210	short fout_pack - tbl_fout
				20211	short fout_word - tbl_fout
				20212	short fout_dbl - tbl_fout
				20213	short fout_byte - tbl_fout
				20214	short fout_pack - tbl_fout
				20215
				20216	#################################################################
				20217	# fmove.b out ###################################################
				20218	#################################################################
				20219
				20220	# Only "Unimplemented Data Type" exceptions enter here. The operand
				20221	# is either a DENORM or a NORM.
				20222	fout_byte:
				20223	tst.b STAG(%a6) # is operand normalized?
				20224	bne.b fout_byte_denorm # no
				20225
				20226	fmovm.x SRC(%a0),&0x80 # load value
				20227
				20228	fout_byte_norm:
				20229	fmov.l %d0,%fpcr # insert rnd prec,mode
				20230
				20231	fmov.b %fp0,%d0 # exec move out w/ correct rnd mode
				20232
				20233	fmov.l &0x0,%fpcr # clear FPCR
				20234	fmov.l %fpsr,%d1 # fetch FPSR
				20235	or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
				20236
				20237	mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
				20238	andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
				20239	beq.b fout_byte_dn # must save to integer regfile
				20240
				20241	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				20242	bsr.l _dmem_write_byte # write byte
				20243
				20244	tst.l %d1 # did dstore fail?
				20245	bne.l facc_out_b # yes
				20246
				20247	rts
				20248
				20249	fout_byte_dn:
				20250	mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
				20251	andi.w &0x7,%d1
				20252	bsr.l store_dreg_b
				20253	rts
				20254
				20255	fout_byte_denorm:
				20256	mov.l SRC_EX(%a0),%d1
				20257	andi.l &0x80000000,%d1 # keep DENORM sign
				20258	ori.l &0x00800000,%d1 # make smallest sgl
				20259	fmov.s %d1,%fp0
				20260	bra.b fout_byte_norm
				20261
				20262	#################################################################
				20263	# fmove.w out ###################################################
				20264	#################################################################
				20265
				20266	# Only "Unimplemented Data Type" exceptions enter here. The operand
				20267	# is either a DENORM or a NORM.
				20268	fout_word:
				20269	tst.b STAG(%a6) # is operand normalized?
				20270	bne.b fout_word_denorm # no
				20271
				20272	fmovm.x SRC(%a0),&0x80 # load value
				20273
				20274	fout_word_norm:
				20275	fmov.l %d0,%fpcr # insert rnd prec:mode
				20276
				20277	fmov.w %fp0,%d0 # exec move out w/ correct rnd mode
				20278
				20279	fmov.l &0x0,%fpcr # clear FPCR
				20280	fmov.l %fpsr,%d1 # fetch FPSR
				20281	or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
				20282
				20283	mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
				20284	andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
				20285	beq.b fout_word_dn # must save to integer regfile
				20286
				20287	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				20288	bsr.l _dmem_write_word # write word
				20289
				20290	tst.l %d1 # did dstore fail?
				20291	bne.l facc_out_w # yes
				20292
				20293	rts
				20294
				20295	fout_word_dn:
				20296	mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
				20297	andi.w &0x7,%d1
				20298	bsr.l store_dreg_w
				20299	rts
				20300
				20301	fout_word_denorm:
				20302	mov.l SRC_EX(%a0),%d1
				20303	andi.l &0x80000000,%d1 # keep DENORM sign
				20304	ori.l &0x00800000,%d1 # make smallest sgl
				20305	fmov.s %d1,%fp0
				20306	bra.b fout_word_norm
				20307
				20308	#################################################################
				20309	# fmove.l out ###################################################
				20310	#################################################################
				20311
				20312	# Only "Unimplemented Data Type" exceptions enter here. The operand
				20313	# is either a DENORM or a NORM.
				20314	fout_long:
				20315	tst.b STAG(%a6) # is operand normalized?
				20316	bne.b fout_long_denorm # no
				20317
				20318	fmovm.x SRC(%a0),&0x80 # load value
				20319
				20320	fout_long_norm:
				20321	fmov.l %d0,%fpcr # insert rnd prec:mode
				20322
				20323	fmov.l %fp0,%d0 # exec move out w/ correct rnd mode
				20324
				20325	fmov.l &0x0,%fpcr # clear FPCR
				20326	fmov.l %fpsr,%d1 # fetch FPSR
				20327	or.w %d1,2+USER_FPSR(%a6) # save new exc,accrued bits
				20328
				20329	fout_long_write:
				20330	mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
				20331	andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
				20332	beq.b fout_long_dn # must save to integer regfile
				20333
				20334	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				20335	bsr.l _dmem_write_long # write long
				20336
				20337	tst.l %d1 # did dstore fail?
				20338	bne.l facc_out_l # yes
				20339
				20340	rts
				20341
				20342	fout_long_dn:
				20343	mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
				20344	andi.w &0x7,%d1
				20345	bsr.l store_dreg_l
				20346	rts
				20347
				20348	fout_long_denorm:
				20349	mov.l SRC_EX(%a0),%d1
				20350	andi.l &0x80000000,%d1 # keep DENORM sign
				20351	ori.l &0x00800000,%d1 # make smallest sgl
				20352	fmov.s %d1,%fp0
				20353	bra.b fout_long_norm
				20354
				20355	#################################################################
				20356	# fmove.x out ###################################################
				20357	#################################################################
				20358
				20359	# Only "Unimplemented Data Type" exceptions enter here. The operand
				20360	# is either a DENORM or a NORM.
				20361	# The DENORM causes an Underflow exception.
				20362	fout_ext:
				20363
				20364	# we copy the extended precision result to FP_SCR0 so that the reserved
				20365	# 16-bit field gets zeroed. we do this since we promise not to disturb
				20366	# what's at SRC(a0).
				20367	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				20368	clr.w 2+FP_SCR0_EX(%a6) # clear reserved field
				20369	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				20370	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				20371
				20372	fmovm.x SRC(%a0),&0x80 # return result
				20373
				20374	bsr.l _calc_ea_fout # fix stacked <ea>
				20375
				20376	mov.l %a0,%a1 # pass: dst addr
				20377	lea FP_SCR0(%a6),%a0 # pass: src addr
				20378	mov.l &0xc,%d0 # pass: opsize is 12 bytes
				20379
				20380	# we must not yet write the extended precision data to the stack
				20381	# in the pre-decrement case from supervisor mode or else we'll corrupt
				20382	# the stack frame. so, leave it in FP_SRC for now and deal with it later...
				20383	cmpi.b SPCOND_FLG(%a6),&mda7_flg
				20384	beq.b fout_ext_a7
				20385
				20386	bsr.l _dmem_write # write ext prec number to memory
				20387
				20388	tst.l %d1 # did dstore fail?
				20389	bne.w fout_ext_err # yes
				20390
				20391	tst.b STAG(%a6) # is operand normalized?
				20392	bne.b fout_ext_denorm # no
				20393	rts
				20394
				20395	# the number is a DENORM. must set the underflow exception bit
				20396	fout_ext_denorm:
				20397	bset &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
				20398
				20399	mov.b FPCR_ENABLE(%a6),%d0
				20400	andi.b &0x0a,%d0 # is UNFL or INEX enabled?
				20401	bne.b fout_ext_exc # yes
				20402	rts
				20403
				20404	# we don't want to do the write if the exception occurred in supervisor mode
				20405	# so _mem_write2() handles this for us.
				20406	fout_ext_a7:
				20407	bsr.l _mem_write2 # write ext prec number to memory
				20408
				20409	tst.l %d1 # did dstore fail?
				20410	bne.w fout_ext_err # yes
				20411
				20412	tst.b STAG(%a6) # is operand normalized?
				20413	bne.b fout_ext_denorm # no
				20414	rts
				20415
				20416	fout_ext_exc:
				20417	lea FP_SCR0(%a6),%a0
				20418	bsr.l norm # normalize the mantissa
				20419	neg.w %d0 # new exp = -(shft amt)
				20420	andi.w &0x7fff,%d0
				20421	andi.w &0x8000,FP_SCR0_EX(%a6) # keep only old sign
				20422	or.w %d0,FP_SCR0_EX(%a6) # insert new exponent
				20423	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				20424	rts
				20425
				20426	fout_ext_err:
				20427	mov.l EXC_A6(%a6),(%a6) # fix stacked a6
				20428	bra.l facc_out_x
				20429
				20430	#########################################################################
				20431	# fmove.s out ###########################################################
				20432	#########################################################################
				20433	fout_sgl:
				20434	andi.b &0x30,%d0 # clear rnd prec
				20435	ori.b &s_mode*0x10,%d0 # insert sgl prec
				20436	mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
				20437
				20438	#
				20439	# operand is a normalized number. first, we check to see if the move out
				20440	# would cause either an underflow or overflow. these cases are handled
				20441	# separately. otherwise, set the FPCR to the proper rounding mode and
				20442	# execute the move.
				20443	#
				20444	mov.w SRC_EX(%a0),%d0 # extract exponent
				20445	andi.w &0x7fff,%d0 # strip sign
				20446
				20447	cmpi.w %d0,&SGL_HI # will operand overflow?
				20448	bgt.w fout_sgl_ovfl # yes; go handle OVFL
				20449	beq.w fout_sgl_may_ovfl # maybe; go handle possible OVFL
				20450	cmpi.w %d0,&SGL_LO # will operand underflow?
				20451	blt.w fout_sgl_unfl # yes; go handle underflow
				20452
				20453	#
				20454	# NORMs(in range) can be stored out by a simple "fmov.s"
				20455	# Unnormalized inputs can come through this point.
				20456	#
				20457	fout_sgl_exg:
				20458	fmovm.x SRC(%a0),&0x80 # fetch fop from stack
				20459
				20460	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				20461	fmov.l &0x0,%fpsr # clear FPSR
				20462
				20463	fmov.s %fp0,%d0 # store does convert and round
				20464
				20465	fmov.l &0x0,%fpcr # clear FPCR
				20466	fmov.l %fpsr,%d1 # save FPSR
				20467
				20468	or.w %d1,2+USER_FPSR(%a6) # set possible inex2/ainex
				20469
				20470	fout_sgl_exg_write:
				20471	mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
				20472	andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
				20473	beq.b fout_sgl_exg_write_dn # must save to integer regfile
				20474
				20475	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				20476	bsr.l _dmem_write_long # write long
				20477
				20478	tst.l %d1 # did dstore fail?
				20479	bne.l facc_out_l # yes
				20480
				20481	rts
				20482
				20483	fout_sgl_exg_write_dn:
				20484	mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
				20485	andi.w &0x7,%d1
				20486	bsr.l store_dreg_l
				20487	rts
				20488
				20489	#
				20490	# here, we know that the operand would UNFL if moved out to single prec,
				20491	# so, denorm and round and then use generic store single routine to
				20492	# write the value to memory.
				20493	#
				20494	fout_sgl_unfl:
				20495	bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
				20496
				20497	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				20498	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				20499	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				20500	mov.l %a0,-(%sp)
				20501
				20502	clr.l %d0 # pass: S.F. = 0
				20503
				20504	cmpi.b STAG(%a6),&DENORM # fetch src optype tag
				20505	bne.b fout_sgl_unfl_cont # let DENORMs fall through
				20506
				20507	lea FP_SCR0(%a6),%a0
				20508	bsr.l norm # normalize the DENORM
				20509
				20510	fout_sgl_unfl_cont:
				20511	lea FP_SCR0(%a6),%a0 # pass: ptr to operand
				20512	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				20513	bsr.l unf_res # calc default underflow result
				20514
				20515	lea FP_SCR0(%a6),%a0 # pass: ptr to fop
				20516	bsr.l dst_sgl # convert to single prec
				20517
				20518	mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
				20519	andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
				20520	beq.b fout_sgl_unfl_dn # must save to integer regfile
				20521
				20522	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				20523	bsr.l _dmem_write_long # write long
				20524
				20525	tst.l %d1 # did dstore fail?
				20526	bne.l facc_out_l # yes
				20527
				20528	bra.b fout_sgl_unfl_chkexc
				20529
				20530	fout_sgl_unfl_dn:
				20531	mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
				20532	andi.w &0x7,%d1
				20533	bsr.l store_dreg_l
				20534
				20535	fout_sgl_unfl_chkexc:
				20536	mov.b FPCR_ENABLE(%a6),%d1
				20537	andi.b &0x0a,%d1 # is UNFL or INEX enabled?
				20538	bne.w fout_sd_exc_unfl # yes
				20539	addq.l &0x4,%sp
				20540	rts
				20541
				20542	#
				20543	# it's definitely an overflow so call ovf_res to get the correct answer
				20544	#
				20545	fout_sgl_ovfl:
				20546	tst.b 3+SRC_HI(%a0) # is result inexact?
				20547	bne.b fout_sgl_ovfl_inex2
				20548	tst.l SRC_LO(%a0) # is result inexact?
				20549	bne.b fout_sgl_ovfl_inex2
				20550	ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
				20551	bra.b fout_sgl_ovfl_cont
				20552	fout_sgl_ovfl_inex2:
				20553	ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
				20554
				20555	fout_sgl_ovfl_cont:
				20556	mov.l %a0,-(%sp)
				20557
				20558	# call ovf_res() w/ sgl prec and the correct rnd mode to create the default
				20559	# overflow result. DON'T save the returned ccodes from ovf_res() since
				20560	# fmove out doesn't alter them.
				20561	tst.b SRC_EX(%a0) # is operand negative?
				20562	smi %d1 # set if so
				20563	mov.l L_SCR3(%a6),%d0 # pass: sgl prec,rnd mode
				20564	bsr.l ovf_res # calc OVFL result
				20565	fmovm.x (%a0),&0x80 # load default overflow result
				20566	fmov.s %fp0,%d0 # store to single
				20567
				20568	mov.b 1+EXC_OPWORD(%a6),%d1 # extract dst mode
				20569	andi.b &0x38,%d1 # is mode == 0? (Dreg dst)
				20570	beq.b fout_sgl_ovfl_dn # must save to integer regfile
				20571
				20572	mov.l EXC_EA(%a6),%a0 # stacked <ea> is correct
				20573	bsr.l _dmem_write_long # write long
				20574
				20575	tst.l %d1 # did dstore fail?
				20576	bne.l facc_out_l # yes
				20577
				20578	bra.b fout_sgl_ovfl_chkexc
				20579
				20580	fout_sgl_ovfl_dn:
				20581	mov.b 1+EXC_OPWORD(%a6),%d1 # extract Dn
				20582	andi.w &0x7,%d1
				20583	bsr.l store_dreg_l
				20584
				20585	fout_sgl_ovfl_chkexc:
				20586	mov.b FPCR_ENABLE(%a6),%d1
				20587	andi.b &0x0a,%d1 # is UNFL or INEX enabled?
				20588	bne.w fout_sd_exc_ovfl # yes
				20589	addq.l &0x4,%sp
				20590	rts
				20591
				20592	#
				20593	# move out MAY overflow:
				20594	# (1) force the exp to 0x3fff
				20595	# (2) do a move w/ appropriate rnd mode
				20596	# (3) if exp still equals zero, then insert original exponent
				20597	# for the correct result.
				20598	# if exp now equals one, then it overflowed so call ovf_res.
				20599	#
				20600	fout_sgl_may_ovfl:
				20601	mov.w SRC_EX(%a0),%d1 # fetch current sign
				20602	andi.w &0x8000,%d1 # keep it,clear exp
				20603	ori.w &0x3fff,%d1 # insert exp = 0
				20604	mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
				20605	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
				20606	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
				20607
				20608	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				20609
				20610	fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
				20611	fmov.l &0x0,%fpcr # clear FPCR
				20612
				20613	fabs.x %fp0 # need absolute value
				20614	fcmp.b %fp0,&0x2 # did exponent increase?
				20615	fblt.w fout_sgl_exg # no; go finish NORM
				20616	bra.w fout_sgl_ovfl # yes; go handle overflow
				20617
				20618	################
				20619
				20620	fout_sd_exc_unfl:
				20621	mov.l (%sp)+,%a0
				20622
				20623	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				20624	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				20625	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				20626
				20627	cmpi.b STAG(%a6),&DENORM # was src a DENORM?
				20628	bne.b fout_sd_exc_cont # no
				20629
				20630	lea FP_SCR0(%a6),%a0
				20631	bsr.l norm
				20632	neg.l %d0
				20633	andi.w &0x7fff,%d0
				20634	bfins %d0,FP_SCR0_EX(%a6){&1:&15}
				20635	bra.b fout_sd_exc_cont
				20636
				20637	fout_sd_exc:
				20638	fout_sd_exc_ovfl:
				20639	mov.l (%sp)+,%a0 # restore a0
				20640
				20641	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				20642	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				20643	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				20644
				20645	fout_sd_exc_cont:
				20646	bclr &0x7,FP_SCR0_EX(%a6) # clear sign bit
				20647	sne.b 2+FP_SCR0_EX(%a6) # set internal sign bit
				20648	lea FP_SCR0(%a6),%a0 # pass: ptr to DENORM
				20649
				20650	mov.b 3+L_SCR3(%a6),%d1
				20651	lsr.b &0x4,%d1
				20652	andi.w &0x0c,%d1
				20653	swap %d1
				20654	mov.b 3+L_SCR3(%a6),%d1
				20655	lsr.b &0x4,%d1
				20656	andi.w &0x03,%d1
				20657	clr.l %d0 # pass: zero g,r,s
				20658	bsr.l _round # round the DENORM
				20659
				20660	tst.b 2+FP_SCR0_EX(%a6) # is EXOP negative?
				20661	beq.b fout_sd_exc_done # no
				20662	bset &0x7,FP_SCR0_EX(%a6) # yes
				20663
				20664	fout_sd_exc_done:
				20665	fmovm.x FP_SCR0(%a6),&0x40 # return EXOP in fp1
				20666	rts
				20667
				20668	#################################################################
				20669	# fmove.d out ###################################################
				20670	#################################################################
				20671	fout_dbl:
				20672	andi.b &0x30,%d0 # clear rnd prec
				20673	ori.b &d_mode*0x10,%d0 # insert dbl prec
				20674	mov.l %d0,L_SCR3(%a6) # save rnd prec,mode on stack
				20675
				20676	#
				20677	# operand is a normalized number. first, we check to see if the move out
				20678	# would cause either an underflow or overflow. these cases are handled
				20679	# separately. otherwise, set the FPCR to the proper rounding mode and
				20680	# execute the move.
				20681	#
				20682	mov.w SRC_EX(%a0),%d0 # extract exponent
				20683	andi.w &0x7fff,%d0 # strip sign
				20684
				20685	cmpi.w %d0,&DBL_HI # will operand overflow?
				20686	bgt.w fout_dbl_ovfl # yes; go handle OVFL
				20687	beq.w fout_dbl_may_ovfl # maybe; go handle possible OVFL
				20688	cmpi.w %d0,&DBL_LO # will operand underflow?
				20689	blt.w fout_dbl_unfl # yes; go handle underflow
				20690
				20691	#
				20692	# NORMs(in range) can be stored out by a simple "fmov.d"
				20693	# Unnormalized inputs can come through this point.
				20694	#
				20695	fout_dbl_exg:
				20696	fmovm.x SRC(%a0),&0x80 # fetch fop from stack
				20697
				20698	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				20699	fmov.l &0x0,%fpsr # clear FPSR
				20700
				20701	fmov.d %fp0,L_SCR1(%a6) # store does convert and round
				20702
				20703	fmov.l &0x0,%fpcr # clear FPCR
				20704	fmov.l %fpsr,%d0 # save FPSR
				20705
				20706	or.w %d0,2+USER_FPSR(%a6) # set possible inex2/ainex
				20707
				20708	mov.l EXC_EA(%a6),%a1 # pass: dst addr
				20709	lea L_SCR1(%a6),%a0 # pass: src addr
				20710	movq.l &0x8,%d0 # pass: opsize is 8 bytes
				20711	bsr.l _dmem_write # store dbl fop to memory
				20712
				20713	tst.l %d1 # did dstore fail?
				20714	bne.l facc_out_d # yes
				20715
				20716	rts # no; so we're finished
				20717
				20718	#
				20719	# here, we know that the operand would UNFL if moved out to double prec,
				20720	# so, denorm and round and then use generic store double routine to
				20721	# write the value to memory.
				20722	#
				20723	fout_dbl_unfl:
				20724	bset &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
				20725
				20726	mov.w SRC_EX(%a0),FP_SCR0_EX(%a6)
				20727	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6)
				20728	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6)
				20729	mov.l %a0,-(%sp)
				20730
				20731	clr.l %d0 # pass: S.F. = 0
				20732
				20733	cmpi.b STAG(%a6),&DENORM # fetch src optype tag
				20734	bne.b fout_dbl_unfl_cont # let DENORMs fall through
				20735
				20736	lea FP_SCR0(%a6),%a0
				20737	bsr.l norm # normalize the DENORM
				20738
				20739	fout_dbl_unfl_cont:
				20740	lea FP_SCR0(%a6),%a0 # pass: ptr to operand
				20741	mov.l L_SCR3(%a6),%d1 # pass: rnd prec,mode
				20742	bsr.l unf_res # calc default underflow result
				20743
				20744	lea FP_SCR0(%a6),%a0 # pass: ptr to fop
				20745	bsr.l dst_dbl # convert to single prec
				20746	mov.l %d0,L_SCR1(%a6)
				20747	mov.l %d1,L_SCR2(%a6)
				20748
				20749	mov.l EXC_EA(%a6),%a1 # pass: dst addr
				20750	lea L_SCR1(%a6),%a0 # pass: src addr
				20751	movq.l &0x8,%d0 # pass: opsize is 8 bytes
				20752	bsr.l _dmem_write # store dbl fop to memory
				20753
				20754	tst.l %d1 # did dstore fail?
				20755	bne.l facc_out_d # yes
				20756
				20757	mov.b FPCR_ENABLE(%a6),%d1
				20758	andi.b &0x0a,%d1 # is UNFL or INEX enabled?
				20759	bne.w fout_sd_exc_unfl # yes
				20760	addq.l &0x4,%sp
				20761	rts
				20762
				20763	#
				20764	# it's definitely an overflow so call ovf_res to get the correct answer
				20765	#
				20766	fout_dbl_ovfl:
				20767	mov.w 2+SRC_LO(%a0),%d0
				20768	andi.w &0x7ff,%d0
				20769	bne.b fout_dbl_ovfl_inex2
				20770
				20771	ori.w &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
				20772	bra.b fout_dbl_ovfl_cont
				20773	fout_dbl_ovfl_inex2:
				20774	ori.w &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
				20775
				20776	fout_dbl_ovfl_cont:
				20777	mov.l %a0,-(%sp)
				20778
				20779	# call ovf_res() w/ dbl prec and the correct rnd mode to create the default
				20780	# overflow result. DON'T save the returned ccodes from ovf_res() since
				20781	# fmove out doesn't alter them.
				20782	tst.b SRC_EX(%a0) # is operand negative?
				20783	smi %d1 # set if so
				20784	mov.l L_SCR3(%a6),%d0 # pass: dbl prec,rnd mode
				20785	bsr.l ovf_res # calc OVFL result
				20786	fmovm.x (%a0),&0x80 # load default overflow result
				20787	fmov.d %fp0,L_SCR1(%a6) # store to double
				20788
				20789	mov.l EXC_EA(%a6),%a1 # pass: dst addr
				20790	lea L_SCR1(%a6),%a0 # pass: src addr
				20791	movq.l &0x8,%d0 # pass: opsize is 8 bytes
				20792	bsr.l _dmem_write # store dbl fop to memory
				20793
				20794	tst.l %d1 # did dstore fail?
				20795	bne.l facc_out_d # yes
				20796
				20797	mov.b FPCR_ENABLE(%a6),%d1
				20798	andi.b &0x0a,%d1 # is UNFL or INEX enabled?
				20799	bne.w fout_sd_exc_ovfl # yes
				20800	addq.l &0x4,%sp
				20801	rts
				20802
				20803	#
				20804	# move out MAY overflow:
				20805	# (1) force the exp to 0x3fff
				20806	# (2) do a move w/ appropriate rnd mode
				20807	# (3) if exp still equals zero, then insert original exponent
				20808	# for the correct result.
				20809	# if exp now equals one, then it overflowed so call ovf_res.
				20810	#
				20811	fout_dbl_may_ovfl:
				20812	mov.w SRC_EX(%a0),%d1 # fetch current sign
				20813	andi.w &0x8000,%d1 # keep it,clear exp
				20814	ori.w &0x3fff,%d1 # insert exp = 0
				20815	mov.w %d1,FP_SCR0_EX(%a6) # insert scaled exp
				20816	mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
				20817	mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
				20818
				20819	fmov.l L_SCR3(%a6),%fpcr # set FPCR
				20820
				20821	fmov.x FP_SCR0(%a6),%fp0 # force fop to be rounded
				20822	fmov.l &0x0,%fpcr # clear FPCR
				20823
				20824	fabs.x %fp0 # need absolute value
				20825	fcmp.b %fp0,&0x2 # did exponent increase?
				20826	fblt.w fout_dbl_exg # no; go finish NORM
				20827	bra.w fout_dbl_ovfl # yes; go handle overflow
				20828
				20829	#########################################################################
				20830	# XDEF **************************************************************** #
				20831	# dst_dbl(): create double precision value from extended prec. #
				20832	# #
				20833	# XREF **************************************************************** #
				20834	# None #
				20835	# #
				20836	# INPUT *************************************************************** #
				20837	# a0 = pointer to source operand in extended precision #
				20838	# #
				20839	# OUTPUT ************************************************************** #
				20840	# d0 = hi(double precision result) #
				20841	# d1 = lo(double precision result) #
				20842	# #
				20843	# ALGORITHM *********************************************************** #
				20844	# #
				20845	# Changes extended precision to double precision. #
				20846	# Note: no attempt is made to round the extended value to double. #
				20847	# dbl_sign = ext_sign #
				20848	# dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias) #
				20849	# get rid of ext integer bit #
				20850	# dbl_mant = ext_mant{62:12} #
				20851	# #
				20852	# --------------- --------------- --------------- #
				20853	# extended -> \|s\| exp \| \|1\| ms mant \| \| ls mant \| #
				20854	# --------------- --------------- --------------- #
				20855	# 95 64 63 62 32 31 11 0 #
				20856	# \| \| #
				20857	# \| \| #
				20858	# \| \| #
				20859	# v v #
				20860	# --------------- --------------- #
				20861	# double -> \|s\|exp\| mant \| \| mant \| #
				20862	# --------------- --------------- #
				20863	# 63 51 32 31 0 #
				20864	# #
				20865	#########################################################################
				20866
				20867	dst_dbl:
				20868	clr.l %d0 # clear d0
				20869	mov.w FTEMP_EX(%a0),%d0 # get exponent
				20870	subi.w &EXT_BIAS,%d0 # subtract extended precision bias
				20871	addi.w &DBL_BIAS,%d0 # add double precision bias
				20872	tst.b FTEMP_HI(%a0) # is number a denorm?
				20873	bmi.b dst_get_dupper # no
				20874	subq.w &0x1,%d0 # yes; denorm bias = DBL_BIAS - 1
				20875	dst_get_dupper:
				20876	swap %d0 # d0 now in upper word
				20877	lsl.l &0x4,%d0 # d0 in proper place for dbl prec exp
				20878	tst.b FTEMP_EX(%a0) # test sign
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	20879	bpl.b dst_get_dman # if positive, go process mantissa
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20880	bset &0x1f,%d0 # if negative, set sign
				20881	dst_get_dman:
				20882	mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
				20883	bfextu %d1{&1:&20},%d1 # get upper 20 bits of ms
				20884	or.l %d1,%d0 # put these bits in ms word of double
				20885	mov.l %d0,L_SCR1(%a6) # put the new exp back on the stack
				20886	mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
				20887	mov.l &21,%d0 # load shift count
				20888	lsl.l %d0,%d1 # put lower 11 bits in upper bits
				20889	mov.l %d1,L_SCR2(%a6) # build lower lword in memory
				20890	mov.l FTEMP_LO(%a0),%d1 # get ls mantissa
				20891	bfextu %d1{&0:&21},%d0 # get ls 21 bits of double
				20892	mov.l L_SCR2(%a6),%d1
				20893	or.l %d0,%d1 # put them in double result
				20894	mov.l L_SCR1(%a6),%d0
				20895	rts
				20896
				20897	#########################################################################
				20898	# XDEF **************************************************************** #
				20899	# dst_sgl(): create single precision value from extended prec #
				20900	# #
				20901	# XREF **************************************************************** #
				20902	# #
				20903	# INPUT *************************************************************** #
				20904	# a0 = pointer to source operand in extended precision #
				20905	# #
				20906	# OUTPUT ************************************************************** #
				20907	# d0 = single precision result #
				20908	# #
				20909	# ALGORITHM *********************************************************** #
				20910	# #
				20911	# Changes extended precision to single precision. #
				20912	# sgl_sign = ext_sign #
				20913	# sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias) #
				20914	# get rid of ext integer bit #
				20915	# sgl_mant = ext_mant{62:12} #
				20916	# #
				20917	# --------------- --------------- --------------- #
				20918	# extended -> \|s\| exp \| \|1\| ms mant \| \| ls mant \| #
				20919	# --------------- --------------- --------------- #
				20920	# 95 64 63 62 40 32 31 12 0 #
				20921	# \| \| #
				20922	# \| \| #
				20923	# \| \| #
				20924	# v v #
				20925	# --------------- #
				20926	# single -> \|s\|exp\| mant \| #
				20927	# --------------- #
				20928	# 31 22 0 #
				20929	# #
				20930	#########################################################################
				20931
				20932	dst_sgl:
				20933	clr.l %d0
				20934	mov.w FTEMP_EX(%a0),%d0 # get exponent
				20935	subi.w &EXT_BIAS,%d0 # subtract extended precision bias
				20936	addi.w &SGL_BIAS,%d0 # add single precision bias
				20937	tst.b FTEMP_HI(%a0) # is number a denorm?
				20938	bmi.b dst_get_supper # no
				20939	subq.w &0x1,%d0 # yes; denorm bias = SGL_BIAS - 1
				20940	dst_get_supper:
				20941	swap %d0 # put exp in upper word of d0
				20942	lsl.l &0x7,%d0 # shift it into single exp bits
				20943	tst.b FTEMP_EX(%a0) # test sign
				20944	bpl.b dst_get_sman # if positive, continue
				20945	bset &0x1f,%d0 # if negative, put in sign first
				20946	dst_get_sman:
				20947	mov.l FTEMP_HI(%a0),%d1 # get ms mantissa
				20948	andi.l &0x7fffff00,%d1 # get upper 23 bits of ms
				20949	lsr.l &0x8,%d1 # and put them flush right
				20950	or.l %d1,%d0 # put these bits in ms word of single
				20951	rts
				20952
				20953	##############################################################################
				20954	fout_pack:
				20955	bsr.l _calc_ea_fout # fetch the <ea>
				20956	mov.l %a0,-(%sp)
				20957
				20958	mov.b STAG(%a6),%d0 # fetch input type
				20959	bne.w fout_pack_not_norm # input is not NORM
				20960
				20961	fout_pack_norm:
				20962	btst &0x4,EXC_CMDREG(%a6) # static or dynamic?
				20963	beq.b fout_pack_s # static
				20964
				20965	fout_pack_d:
				20966	mov.b 1+EXC_CMDREG(%a6),%d1 # fetch dynamic reg
				20967	lsr.b &0x4,%d1
				20968	andi.w &0x7,%d1
				20969
				20970	bsr.l fetch_dreg # fetch Dn w/ k-factor
				20971
				20972	bra.b fout_pack_type
				20973	fout_pack_s:
				20974	mov.b 1+EXC_CMDREG(%a6),%d0 # fetch static field
				20975
				20976	fout_pack_type:
				20977	bfexts %d0{&25:&7},%d0 # extract k-factor
				20978	mov.l %d0,-(%sp)
				20979
				20980	lea FP_SRC(%a6),%a0 # pass: ptr to input
				20981
				20982	# bindec is currently scrambling FP_SRC for denorm inputs.
				20983	# we'll have to change this, but for now, tough luck!!!
				20984	bsr.l bindec # convert xprec to packed
				20985
				20986	# andi.l &0xcfff000f,FP_SCR0(%a6) # clear unused fields
				20987	andi.l &0xcffff00f,FP_SCR0(%a6) # clear unused fields
				20988
				20989	mov.l (%sp)+,%d0
				20990
				20991	tst.b 3+FP_SCR0_EX(%a6)
				20992	bne.b fout_pack_set
				20993	tst.l FP_SCR0_HI(%a6)
				20994	bne.b fout_pack_set
				20995	tst.l FP_SCR0_LO(%a6)
				20996	bne.b fout_pack_set
				20997
				20998	# add the extra condition that only if the k-factor was zero, too, should
				20999	# we zero the exponent
				21000	tst.l %d0
				21001	bne.b fout_pack_set
				21002	# "mantissa" is all zero which means that the answer is zero. but, the '040
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	21003	# algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21004	# if the mantissa is zero, I will zero the exponent, too.
				21005	# the question now is whether the exponents sign bit is allowed to be non-zero
				21006	# for a zero, also...
				21007	andi.w &0xf000,FP_SCR0(%a6)
				21008
				21009	fout_pack_set:
				21010
				21011	lea FP_SCR0(%a6),%a0 # pass: src addr
				21012
				21013	fout_pack_write:
				21014	mov.l (%sp)+,%a1 # pass: dst addr
				21015	mov.l &0xc,%d0 # pass: opsize is 12 bytes
				21016
				21017	cmpi.b SPCOND_FLG(%a6),&mda7_flg
				21018	beq.b fout_pack_a7
				21019
				21020	bsr.l _dmem_write # write ext prec number to memory
				21021
				21022	tst.l %d1 # did dstore fail?
				21023	bne.w fout_ext_err # yes
				21024
				21025	rts
				21026
				21027	# we don't want to do the write if the exception occurred in supervisor mode
				21028	# so _mem_write2() handles this for us.
				21029	fout_pack_a7:
				21030	bsr.l _mem_write2 # write ext prec number to memory
				21031
				21032	tst.l %d1 # did dstore fail?
				21033	bne.w fout_ext_err # yes
				21034
				21035	rts
				21036
				21037	fout_pack_not_norm:
				21038	cmpi.b %d0,&DENORM # is it a DENORM?
				21039	beq.w fout_pack_norm # yes
				21040	lea FP_SRC(%a6),%a0
				21041	clr.w 2+FP_SRC_EX(%a6)
				21042	cmpi.b %d0,&SNAN # is it an SNAN?
				21043	beq.b fout_pack_snan # yes
				21044	bra.b fout_pack_write # no
				21045
				21046	fout_pack_snan:
				21047	ori.w &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
				21048	bset &0x6,FP_SRC_HI(%a6) # set snan bit
				21049	bra.b fout_pack_write
				21050
				21051	#########################################################################
				21052	# XDEF **************************************************************** #
				21053	# fetch_dreg(): fetch register according to index in d1 #
				21054	# #
				21055	# XREF **************************************************************** #
				21056	# None #
				21057	# #
				21058	# INPUT *************************************************************** #
				21059	# d1 = index of register to fetch from #
				21060	# #
				21061	# OUTPUT ************************************************************** #
				21062	# d0 = value of register fetched #
				21063	# #
				21064	# ALGORITHM *********************************************************** #
				21065	# According to the index value in d1 which can range from zero #
				21066	# to fifteen, load the corresponding register file value (where #
				21067	# address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the #
				21068	# stack. The rest should still be in their original places. #
				21069	# #
				21070	#########################################################################
				21071
				21072	# this routine leaves d1 intact for subsequent store_dreg calls.
				21073	global fetch_dreg
				21074	fetch_dreg:
				21075	mov.w (tbl_fdreg.b,%pc,%d1.w*2),%d0
				21076	jmp (tbl_fdreg.b,%pc,%d0.w*1)
				21077
				21078	tbl_fdreg:
				21079	short fdreg0 - tbl_fdreg
				21080	short fdreg1 - tbl_fdreg
				21081	short fdreg2 - tbl_fdreg
				21082	short fdreg3 - tbl_fdreg
				21083	short fdreg4 - tbl_fdreg
				21084	short fdreg5 - tbl_fdreg
				21085	short fdreg6 - tbl_fdreg
				21086	short fdreg7 - tbl_fdreg
				21087	short fdreg8 - tbl_fdreg
				21088	short fdreg9 - tbl_fdreg
				21089	short fdrega - tbl_fdreg
				21090	short fdregb - tbl_fdreg
				21091	short fdregc - tbl_fdreg
				21092	short fdregd - tbl_fdreg
				21093	short fdrege - tbl_fdreg
				21094	short fdregf - tbl_fdreg
				21095
				21096	fdreg0:
				21097	mov.l EXC_DREGS+0x0(%a6),%d0
				21098	rts
				21099	fdreg1:
				21100	mov.l EXC_DREGS+0x4(%a6),%d0
				21101	rts
				21102	fdreg2:
				21103	mov.l %d2,%d0
				21104	rts
				21105	fdreg3:
				21106	mov.l %d3,%d0
				21107	rts
				21108	fdreg4:
				21109	mov.l %d4,%d0
				21110	rts
				21111	fdreg5:
				21112	mov.l %d5,%d0
				21113	rts
				21114	fdreg6:
				21115	mov.l %d6,%d0
				21116	rts
				21117	fdreg7:
				21118	mov.l %d7,%d0
				21119	rts
				21120	fdreg8:
				21121	mov.l EXC_DREGS+0x8(%a6),%d0
				21122	rts
				21123	fdreg9:
				21124	mov.l EXC_DREGS+0xc(%a6),%d0
				21125	rts
				21126	fdrega:
				21127	mov.l %a2,%d0
				21128	rts
				21129	fdregb:
				21130	mov.l %a3,%d0
				21131	rts
				21132	fdregc:
				21133	mov.l %a4,%d0
				21134	rts
				21135	fdregd:
				21136	mov.l %a5,%d0
				21137	rts
				21138	fdrege:
				21139	mov.l (%a6),%d0
				21140	rts
				21141	fdregf:
				21142	mov.l EXC_A7(%a6),%d0
				21143	rts
				21144
				21145	#########################################################################
				21146	# XDEF **************************************************************** #
				21147	# store_dreg_l(): store longword to data register specified by d1 #
				21148	# #
				21149	# XREF **************************************************************** #
				21150	# None #
				21151	# #
				21152	# INPUT *************************************************************** #
				21153	# d0 = longowrd value to store #
				21154	# d1 = index of register to fetch from #
				21155	# #
				21156	# OUTPUT ************************************************************** #
				21157	# (data register is updated) #
				21158	# #
				21159	# ALGORITHM *********************************************************** #
				21160	# According to the index value in d1, store the longword value #
				21161	# in d0 to the corresponding data register. D0/D1 are on the stack #
				21162	# while the rest are in their initial places. #
				21163	# #
				21164	#########################################################################
				21165
				21166	global store_dreg_l
				21167	store_dreg_l:
				21168	mov.w (tbl_sdregl.b,%pc,%d1.w*2),%d1
				21169	jmp (tbl_sdregl.b,%pc,%d1.w*1)
				21170
				21171	tbl_sdregl:
				21172	short sdregl0 - tbl_sdregl
				21173	short sdregl1 - tbl_sdregl
				21174	short sdregl2 - tbl_sdregl
				21175	short sdregl3 - tbl_sdregl
				21176	short sdregl4 - tbl_sdregl
				21177	short sdregl5 - tbl_sdregl
				21178	short sdregl6 - tbl_sdregl
				21179	short sdregl7 - tbl_sdregl
				21180
				21181	sdregl0:
				21182	mov.l %d0,EXC_DREGS+0x0(%a6)
				21183	rts
				21184	sdregl1:
				21185	mov.l %d0,EXC_DREGS+0x4(%a6)
				21186	rts
				21187	sdregl2:
				21188	mov.l %d0,%d2
				21189	rts
				21190	sdregl3:
				21191	mov.l %d0,%d3
				21192	rts
				21193	sdregl4:
				21194	mov.l %d0,%d4
				21195	rts
				21196	sdregl5:
				21197	mov.l %d0,%d5
				21198	rts
				21199	sdregl6:
				21200	mov.l %d0,%d6
				21201	rts
				21202	sdregl7:
				21203	mov.l %d0,%d7
				21204	rts
				21205
				21206	#########################################################################
				21207	# XDEF **************************************************************** #
				21208	# store_dreg_w(): store word to data register specified by d1 #
				21209	# #
				21210	# XREF **************************************************************** #
				21211	# None #
				21212	# #
				21213	# INPUT *************************************************************** #
				21214	# d0 = word value to store #
				21215	# d1 = index of register to fetch from #
				21216	# #
				21217	# OUTPUT ************************************************************** #
				21218	# (data register is updated) #
				21219	# #
				21220	# ALGORITHM *********************************************************** #
				21221	# According to the index value in d1, store the word value #
				21222	# in d0 to the corresponding data register. D0/D1 are on the stack #
				21223	# while the rest are in their initial places. #
				21224	# #
				21225	#########################################################################
				21226
				21227	global store_dreg_w
				21228	store_dreg_w:
				21229	mov.w (tbl_sdregw.b,%pc,%d1.w*2),%d1
				21230	jmp (tbl_sdregw.b,%pc,%d1.w*1)
				21231
				21232	tbl_sdregw:
				21233	short sdregw0 - tbl_sdregw
				21234	short sdregw1 - tbl_sdregw
				21235	short sdregw2 - tbl_sdregw
				21236	short sdregw3 - tbl_sdregw
				21237	short sdregw4 - tbl_sdregw
				21238	short sdregw5 - tbl_sdregw
				21239	short sdregw6 - tbl_sdregw
				21240	short sdregw7 - tbl_sdregw
				21241
				21242	sdregw0:
				21243	mov.w %d0,2+EXC_DREGS+0x0(%a6)
				21244	rts
				21245	sdregw1:
				21246	mov.w %d0,2+EXC_DREGS+0x4(%a6)
				21247	rts
				21248	sdregw2:
				21249	mov.w %d0,%d2
				21250	rts
				21251	sdregw3:
				21252	mov.w %d0,%d3
				21253	rts
				21254	sdregw4:
				21255	mov.w %d0,%d4
				21256	rts
				21257	sdregw5:
				21258	mov.w %d0,%d5
				21259	rts
				21260	sdregw6:
				21261	mov.w %d0,%d6
				21262	rts
				21263	sdregw7:
				21264	mov.w %d0,%d7
				21265	rts
				21266
				21267	#########################################################################
				21268	# XDEF **************************************************************** #
				21269	# store_dreg_b(): store byte to data register specified by d1 #
				21270	# #
				21271	# XREF **************************************************************** #
				21272	# None #
				21273	# #
				21274	# INPUT *************************************************************** #
				21275	# d0 = byte value to store #
				21276	# d1 = index of register to fetch from #
				21277	# #
				21278	# OUTPUT ************************************************************** #
				21279	# (data register is updated) #
				21280	# #
				21281	# ALGORITHM *********************************************************** #
				21282	# According to the index value in d1, store the byte value #
				21283	# in d0 to the corresponding data register. D0/D1 are on the stack #
				21284	# while the rest are in their initial places. #
				21285	# #
				21286	#########################################################################
				21287
				21288	global store_dreg_b
				21289	store_dreg_b:
				21290	mov.w (tbl_sdregb.b,%pc,%d1.w*2),%d1
				21291	jmp (tbl_sdregb.b,%pc,%d1.w*1)
				21292
				21293	tbl_sdregb:
				21294	short sdregb0 - tbl_sdregb
				21295	short sdregb1 - tbl_sdregb
				21296	short sdregb2 - tbl_sdregb
				21297	short sdregb3 - tbl_sdregb
				21298	short sdregb4 - tbl_sdregb
				21299	short sdregb5 - tbl_sdregb
				21300	short sdregb6 - tbl_sdregb
				21301	short sdregb7 - tbl_sdregb
				21302
				21303	sdregb0:
				21304	mov.b %d0,3+EXC_DREGS+0x0(%a6)
				21305	rts
				21306	sdregb1:
				21307	mov.b %d0,3+EXC_DREGS+0x4(%a6)
				21308	rts
				21309	sdregb2:
				21310	mov.b %d0,%d2
				21311	rts
				21312	sdregb3:
				21313	mov.b %d0,%d3
				21314	rts
				21315	sdregb4:
				21316	mov.b %d0,%d4
				21317	rts
				21318	sdregb5:
				21319	mov.b %d0,%d5
				21320	rts
				21321	sdregb6:
				21322	mov.b %d0,%d6
				21323	rts
				21324	sdregb7:
				21325	mov.b %d0,%d7
				21326	rts
				21327
				21328	#########################################################################
				21329	# XDEF **************************************************************** #
				21330	# inc_areg(): increment an address register by the value in d0 #
				21331	# #
				21332	# XREF **************************************************************** #
				21333	# None #
				21334	# #
				21335	# INPUT *************************************************************** #
				21336	# d0 = amount to increment by #
				21337	# d1 = index of address register to increment #
				21338	# #
				21339	# OUTPUT ************************************************************** #
				21340	# (address register is updated) #
				21341	# #
				21342	# ALGORITHM *********************************************************** #
				21343	# Typically used for an instruction w/ a post-increment <ea>, #
				21344	# this routine adds the increment value in d0 to the address register #
				21345	# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
				21346	# in their original places. #
				21347	# For a7, if the increment amount is one, then we have to #
				21348	# increment by two. For any a7 update, set the mia7_flag so that if #
				21349	# an access error exception occurs later in emulation, this address #
				21350	# register update can be undone. #
				21351	# #
				21352	#########################################################################
				21353
				21354	global inc_areg
				21355	inc_areg:
				21356	mov.w (tbl_iareg.b,%pc,%d1.w*2),%d1
				21357	jmp (tbl_iareg.b,%pc,%d1.w*1)
				21358
				21359	tbl_iareg:
				21360	short iareg0 - tbl_iareg
				21361	short iareg1 - tbl_iareg
				21362	short iareg2 - tbl_iareg
				21363	short iareg3 - tbl_iareg
				21364	short iareg4 - tbl_iareg
				21365	short iareg5 - tbl_iareg
				21366	short iareg6 - tbl_iareg
				21367	short iareg7 - tbl_iareg
				21368
				21369	iareg0: add.l %d0,EXC_DREGS+0x8(%a6)
				21370	rts
				21371	iareg1: add.l %d0,EXC_DREGS+0xc(%a6)
				21372	rts
				21373	iareg2: add.l %d0,%a2
				21374	rts
				21375	iareg3: add.l %d0,%a3
				21376	rts
				21377	iareg4: add.l %d0,%a4
				21378	rts
				21379	iareg5: add.l %d0,%a5
				21380	rts
				21381	iareg6: add.l %d0,(%a6)
				21382	rts
				21383	iareg7: mov.b &mia7_flg,SPCOND_FLG(%a6)
				21384	cmpi.b %d0,&0x1
				21385	beq.b iareg7b
				21386	add.l %d0,EXC_A7(%a6)
				21387	rts
				21388	iareg7b:
				21389	addq.l &0x2,EXC_A7(%a6)
				21390	rts
				21391
				21392	#########################################################################
				21393	# XDEF **************************************************************** #
				21394	# dec_areg(): decrement an address register by the value in d0 #
				21395	# #
				21396	# XREF **************************************************************** #
				21397	# None #
				21398	# #
				21399	# INPUT *************************************************************** #
				21400	# d0 = amount to decrement by #
				21401	# d1 = index of address register to decrement #
				21402	# #
				21403	# OUTPUT ************************************************************** #
				21404	# (address register is updated) #
				21405	# #
				21406	# ALGORITHM *********************************************************** #
				21407	# Typically used for an instruction w/ a pre-decrement <ea>, #
				21408	# this routine adds the decrement value in d0 to the address register #
				21409	# specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside #
				21410	# in their original places. #
				21411	# For a7, if the decrement amount is one, then we have to #
				21412	# decrement by two. For any a7 update, set the mda7_flag so that if #
				21413	# an access error exception occurs later in emulation, this address #
				21414	# register update can be undone. #
				21415	# #
				21416	#########################################################################
				21417
				21418	global dec_areg
				21419	dec_areg:
				21420	mov.w (tbl_dareg.b,%pc,%d1.w*2),%d1
				21421	jmp (tbl_dareg.b,%pc,%d1.w*1)
				21422
				21423	tbl_dareg:
				21424	short dareg0 - tbl_dareg
				21425	short dareg1 - tbl_dareg
				21426	short dareg2 - tbl_dareg
				21427	short dareg3 - tbl_dareg
				21428	short dareg4 - tbl_dareg
				21429	short dareg5 - tbl_dareg
				21430	short dareg6 - tbl_dareg
				21431	short dareg7 - tbl_dareg
				21432
				21433	dareg0: sub.l %d0,EXC_DREGS+0x8(%a6)
				21434	rts
				21435	dareg1: sub.l %d0,EXC_DREGS+0xc(%a6)
				21436	rts
				21437	dareg2: sub.l %d0,%a2
				21438	rts
				21439	dareg3: sub.l %d0,%a3
				21440	rts
				21441	dareg4: sub.l %d0,%a4
				21442	rts
				21443	dareg5: sub.l %d0,%a5
				21444	rts
				21445	dareg6: sub.l %d0,(%a6)
				21446	rts
				21447	dareg7: mov.b &mda7_flg,SPCOND_FLG(%a6)
				21448	cmpi.b %d0,&0x1
				21449	beq.b dareg7b
				21450	sub.l %d0,EXC_A7(%a6)
				21451	rts
				21452	dareg7b:
				21453	subq.l &0x2,EXC_A7(%a6)
				21454	rts
				21455
				21456	##############################################################################
				21457
				21458	#########################################################################
				21459	# XDEF **************************************************************** #
				21460	# load_fpn1(): load FP register value into FP_SRC(a6). #
				21461	# #
				21462	# XREF **************************************************************** #
				21463	# None #
				21464	# #
				21465	# INPUT *************************************************************** #
				21466	# d0 = index of FP register to load #
				21467	# #
				21468	# OUTPUT ************************************************************** #
				21469	# FP_SRC(a6) = value loaded from FP register file #
				21470	# #
				21471	# ALGORITHM *********************************************************** #
				21472	# Using the index in d0, load FP_SRC(a6) with a number from the #
				21473	# FP register file. #
				21474	# #
				21475	#########################################################################
				21476
				21477	global load_fpn1
				21478	load_fpn1:
				21479	mov.w (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
				21480	jmp (tbl_load_fpn1.b,%pc,%d0.w*1)
				21481
				21482	tbl_load_fpn1:
				21483	short load_fpn1_0 - tbl_load_fpn1
				21484	short load_fpn1_1 - tbl_load_fpn1
				21485	short load_fpn1_2 - tbl_load_fpn1
				21486	short load_fpn1_3 - tbl_load_fpn1
				21487	short load_fpn1_4 - tbl_load_fpn1
				21488	short load_fpn1_5 - tbl_load_fpn1
				21489	short load_fpn1_6 - tbl_load_fpn1
				21490	short load_fpn1_7 - tbl_load_fpn1
				21491
				21492	load_fpn1_0:
				21493	mov.l 0+EXC_FP0(%a6), 0+FP_SRC(%a6)
				21494	mov.l 4+EXC_FP0(%a6), 4+FP_SRC(%a6)
				21495	mov.l 8+EXC_FP0(%a6), 8+FP_SRC(%a6)
				21496	lea FP_SRC(%a6), %a0
				21497	rts
				21498	load_fpn1_1:
				21499	mov.l 0+EXC_FP1(%a6), 0+FP_SRC(%a6)
				21500	mov.l 4+EXC_FP1(%a6), 4+FP_SRC(%a6)
				21501	mov.l 8+EXC_FP1(%a6), 8+FP_SRC(%a6)
				21502	lea FP_SRC(%a6), %a0
				21503	rts
				21504	load_fpn1_2:
				21505	fmovm.x &0x20, FP_SRC(%a6)
				21506	lea FP_SRC(%a6), %a0
				21507	rts
				21508	load_fpn1_3:
				21509	fmovm.x &0x10, FP_SRC(%a6)
				21510	lea FP_SRC(%a6), %a0
				21511	rts
				21512	load_fpn1_4:
				21513	fmovm.x &0x08, FP_SRC(%a6)
				21514	lea FP_SRC(%a6), %a0
				21515	rts
				21516	load_fpn1_5:
				21517	fmovm.x &0x04, FP_SRC(%a6)
				21518	lea FP_SRC(%a6), %a0
				21519	rts
				21520	load_fpn1_6:
				21521	fmovm.x &0x02, FP_SRC(%a6)
				21522	lea FP_SRC(%a6), %a0
				21523	rts
				21524	load_fpn1_7:
				21525	fmovm.x &0x01, FP_SRC(%a6)
				21526	lea FP_SRC(%a6), %a0
				21527	rts
				21528
				21529	#############################################################################
				21530
				21531	#########################################################################
				21532	# XDEF **************************************************************** #
				21533	# load_fpn2(): load FP register value into FP_DST(a6). #
				21534	# #
				21535	# XREF **************************************************************** #
				21536	# None #
				21537	# #
				21538	# INPUT *************************************************************** #
				21539	# d0 = index of FP register to load #
				21540	# #
				21541	# OUTPUT ************************************************************** #
				21542	# FP_DST(a6) = value loaded from FP register file #
				21543	# #
				21544	# ALGORITHM *********************************************************** #
				21545	# Using the index in d0, load FP_DST(a6) with a number from the #
				21546	# FP register file. #
				21547	# #
				21548	#########################################################################
				21549
				21550	global load_fpn2
				21551	load_fpn2:
				21552	mov.w (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
				21553	jmp (tbl_load_fpn2.b,%pc,%d0.w*1)
				21554
				21555	tbl_load_fpn2:
				21556	short load_fpn2_0 - tbl_load_fpn2
				21557	short load_fpn2_1 - tbl_load_fpn2
				21558	short load_fpn2_2 - tbl_load_fpn2
				21559	short load_fpn2_3 - tbl_load_fpn2
				21560	short load_fpn2_4 - tbl_load_fpn2
				21561	short load_fpn2_5 - tbl_load_fpn2
				21562	short load_fpn2_6 - tbl_load_fpn2
				21563	short load_fpn2_7 - tbl_load_fpn2
				21564
				21565	load_fpn2_0:
				21566	mov.l 0+EXC_FP0(%a6), 0+FP_DST(%a6)
				21567	mov.l 4+EXC_FP0(%a6), 4+FP_DST(%a6)
				21568	mov.l 8+EXC_FP0(%a6), 8+FP_DST(%a6)
				21569	lea FP_DST(%a6), %a0
				21570	rts
				21571	load_fpn2_1:
				21572	mov.l 0+EXC_FP1(%a6), 0+FP_DST(%a6)
				21573	mov.l 4+EXC_FP1(%a6), 4+FP_DST(%a6)
				21574	mov.l 8+EXC_FP1(%a6), 8+FP_DST(%a6)
				21575	lea FP_DST(%a6), %a0
				21576	rts
				21577	load_fpn2_2:
				21578	fmovm.x &0x20, FP_DST(%a6)
				21579	lea FP_DST(%a6), %a0
				21580	rts
				21581	load_fpn2_3:
				21582	fmovm.x &0x10, FP_DST(%a6)
				21583	lea FP_DST(%a6), %a0
				21584	rts
				21585	load_fpn2_4:
				21586	fmovm.x &0x08, FP_DST(%a6)
				21587	lea FP_DST(%a6), %a0
				21588	rts
				21589	load_fpn2_5:
				21590	fmovm.x &0x04, FP_DST(%a6)
				21591	lea FP_DST(%a6), %a0
				21592	rts
				21593	load_fpn2_6:
				21594	fmovm.x &0x02, FP_DST(%a6)
				21595	lea FP_DST(%a6), %a0
				21596	rts
				21597	load_fpn2_7:
				21598	fmovm.x &0x01, FP_DST(%a6)
				21599	lea FP_DST(%a6), %a0
				21600	rts
				21601
				21602	#############################################################################
				21603
				21604	#########################################################################
				21605	# XDEF **************************************************************** #
				21606	# store_fpreg(): store an fp value to the fpreg designated d0. #
				21607	# #
				21608	# XREF **************************************************************** #
				21609	# None #
				21610	# #
				21611	# INPUT *************************************************************** #
				21612	# fp0 = extended precision value to store #
				21613	# d0 = index of floating-point register #
				21614	# #
				21615	# OUTPUT ************************************************************** #
				21616	# None #
				21617	# #
				21618	# ALGORITHM *********************************************************** #
				21619	# Store the value in fp0 to the FP register designated by the #
				21620	# value in d0. The FP number can be DENORM or SNAN so we have to be #
				21621	# careful that we don't take an exception here. #
				21622	# #
				21623	#########################################################################
				21624
				21625	global store_fpreg
				21626	store_fpreg:
				21627	mov.w (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
				21628	jmp (tbl_store_fpreg.b,%pc,%d0.w*1)
				21629
				21630	tbl_store_fpreg:
				21631	short store_fpreg_0 - tbl_store_fpreg
				21632	short store_fpreg_1 - tbl_store_fpreg
				21633	short store_fpreg_2 - tbl_store_fpreg
				21634	short store_fpreg_3 - tbl_store_fpreg
				21635	short store_fpreg_4 - tbl_store_fpreg
				21636	short store_fpreg_5 - tbl_store_fpreg
				21637	short store_fpreg_6 - tbl_store_fpreg
				21638	short store_fpreg_7 - tbl_store_fpreg
				21639
				21640	store_fpreg_0:
				21641	fmovm.x &0x80, EXC_FP0(%a6)
				21642	rts
				21643	store_fpreg_1:
				21644	fmovm.x &0x80, EXC_FP1(%a6)
				21645	rts
				21646	store_fpreg_2:
				21647	fmovm.x &0x01, -(%sp)
				21648	fmovm.x (%sp)+, &0x20
				21649	rts
				21650	store_fpreg_3:
				21651	fmovm.x &0x01, -(%sp)
				21652	fmovm.x (%sp)+, &0x10
				21653	rts
				21654	store_fpreg_4:
				21655	fmovm.x &0x01, -(%sp)
				21656	fmovm.x (%sp)+, &0x08
				21657	rts
				21658	store_fpreg_5:
				21659	fmovm.x &0x01, -(%sp)
				21660	fmovm.x (%sp)+, &0x04
				21661	rts
				21662	store_fpreg_6:
				21663	fmovm.x &0x01, -(%sp)
				21664	fmovm.x (%sp)+, &0x02
				21665	rts
				21666	store_fpreg_7:
				21667	fmovm.x &0x01, -(%sp)
				21668	fmovm.x (%sp)+, &0x01
				21669	rts
				21670
				21671	#########################################################################
				21672	# XDEF **************************************************************** #
				21673	# _denorm(): denormalize an intermediate result #
				21674	# #
				21675	# XREF **************************************************************** #
				21676	# None #
				21677	# #
				21678	# INPUT *************************************************************** #
				21679	# a0 = points to the operand to be denormalized #
				21680	# (in the internal extended format) #
				21681	# #
				21682	# d0 = rounding precision #
				21683	# #
				21684	# OUTPUT ************************************************************** #
				21685	# a0 = pointer to the denormalized result #
				21686	# (in the internal extended format) #
				21687	# #
				21688	# d0 = guard,round,sticky #
				21689	# #
				21690	# ALGORITHM *********************************************************** #
				21691	# According to the exponent underflow threshold for the given #
				21692	# precision, shift the mantissa bits to the right in order raise the #
				21693	# exponent of the operand to the threshold value. While shifting the #
				21694	# mantissa bits right, maintain the value of the guard, round, and #
				21695	# sticky bits. #
				21696	# other notes: #
				21697	# (1) _denorm() is called by the underflow routines #
				21698	# (2) _denorm() does NOT affect the status register #
				21699	# #
				21700	#########################################################################
				21701
				21702	#
				21703	# table of exponent threshold values for each precision
				21704	#
				21705	tbl_thresh:
				21706	short 0x0
				21707	short sgl_thresh
				21708	short dbl_thresh
				21709
				21710	global _denorm
				21711	_denorm:
				21712	#
				21713	# Load the exponent threshold for the precision selected and check
				21714	# to see if (threshold - exponent) is > 65 in which case we can
				21715	# simply calculate the sticky bit and zero the mantissa. otherwise
				21716	# we have to call the denormalization routine.
				21717	#
				21718	lsr.b &0x2, %d0 # shift prec to lo bits
				21719	mov.w (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
				21720	mov.w %d1, %d0 # copy d1 into d0
				21721	sub.w FTEMP_EX(%a0), %d0 # diff = threshold - exp
				21722	cmpi.w %d0, &66 # is diff > 65? (mant + g,r bits)
				21723	bpl.b denorm_set_stky # yes; just calc sticky
				21724
				21725	clr.l %d0 # clear g,r,s
				21726	btst &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
				21727	beq.b denorm_call # no; don't change anything
				21728	bset &29, %d0 # yes; set sticky bit
				21729
				21730	denorm_call:
				21731	bsr.l dnrm_lp # denormalize the number
				21732	rts
				21733
				21734	#
				21735	# all bit would have been shifted off during the denorm so simply
				21736	# calculate if the sticky should be set and clear the entire mantissa.
				21737	#
				21738	denorm_set_stky:
				21739	mov.l &0x20000000, %d0 # set sticky bit in return value
				21740	mov.w %d1, FTEMP_EX(%a0) # load exp with threshold
				21741	clr.l FTEMP_HI(%a0) # set d1 = 0 (ms mantissa)
				21742	clr.l FTEMP_LO(%a0) # set d2 = 0 (ms mantissa)
				21743	rts
				21744
				21745	# #
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	21746	# dnrm_lp(): normalize exponent/mantissa to specified threshold #
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21747	# #
				21748	# INPUT: #
				21749	# %a0 : points to the operand to be denormalized #
				21750	# %d0{31:29} : initial guard,round,sticky #
				21751	# %d1{15:0} : denormalization threshold #
				21752	# OUTPUT: #
				21753	# %a0 : points to the denormalized operand #
				21754	# %d0{31:29} : final guard,round,sticky #
				21755	# #
				21756
				21757	# * Local Equates * #
				21758	set GRS, L_SCR2 # g,r,s temp storage
				21759	set FTEMP_LO2, L_SCR1 # FTEMP_LO copy
				21760
				21761	global dnrm_lp
				21762	dnrm_lp:
				21763
				21764	#
				21765	# make a copy of FTEMP_LO and place the g,r,s bits directly after it
				21766	# in memory so as to make the bitfield extraction for denormalization easier.
				21767	#
				21768	mov.l FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
				21769	mov.l %d0, GRS(%a6) # place g,r,s after it
				21770
				21771	#
				21772	# check to see how much less than the underflow threshold the operand
				21773	# exponent is.
				21774	#
				21775	mov.l %d1, %d0 # copy the denorm threshold
				21776	sub.w FTEMP_EX(%a0), %d1 # d1 = threshold - uns exponent
				21777	ble.b dnrm_no_lp # d1 <= 0
				21778	cmpi.w %d1, &0x20 # is ( 0 <= d1 < 32) ?
				21779	blt.b case_1 # yes
				21780	cmpi.w %d1, &0x40 # is (32 <= d1 < 64) ?
				21781	blt.b case_2 # yes
				21782	bra.w case_3 # (d1 >= 64)
				21783
				21784	#
				21785	# No normalization necessary
				21786	#
				21787	dnrm_no_lp:
				21788	mov.l GRS(%a6), %d0 # restore original g,r,s
				21789	rts
				21790
				21791	#
				21792	# case (0<d1<32)
				21793	#
				21794	# %d0 = denorm threshold
				21795	# %d1 = "n" = amt to shift
				21796	#
				21797	# ---------------------------------------------------------
				21798	# \| FTEMP_HI \| FTEMP_LO \|grs000.........000\|
				21799	# ---------------------------------------------------------
				21800	# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
				21801	# \ \ \ \
				21802	# \ \ \ \
				21803	# \ \ \ \
				21804	# \ \ \ \
				21805	# \ \ \ \
				21806	# \ \ \ \
				21807	# \ \ \ \
				21808	# \ \ \ \
				21809	# <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
				21810	# ---------------------------------------------------------
				21811	# \|0.....0\| NEW_HI \| NEW_FTEMP_LO \|grs \|
				21812	# ---------------------------------------------------------
				21813	#
				21814	case_1:
				21815	mov.l %d2, -(%sp) # create temp storage
				21816
				21817	mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
				21818	mov.l &32, %d0
				21819	sub.w %d1, %d0 # %d0 = 32 - %d1
				21820
				21821	cmpi.w %d1, &29 # is shft amt >= 29
				21822	blt.b case1_extract # no; no fix needed
				21823	mov.b GRS(%a6), %d2
				21824	or.b %d2, 3+FTEMP_LO2(%a6)
				21825
				21826	case1_extract:
				21827	bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
				21828	bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
				21829	bfextu FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
				21830
				21831	mov.l %d2, FTEMP_HI(%a0) # store new FTEMP_HI
				21832	mov.l %d1, FTEMP_LO(%a0) # store new FTEMP_LO
				21833
				21834	bftst %d0{&2:&30} # were bits shifted off?
				21835	beq.b case1_sticky_clear # no; go finish
				21836	bset &rnd_stky_bit, %d0 # yes; set sticky bit
				21837
				21838	case1_sticky_clear:
				21839	and.l &0xe0000000, %d0 # clear all but G,R,S
				21840	mov.l (%sp)+, %d2 # restore temp register
				21841	rts
				21842
				21843	#
				21844	# case (32<=d1<64)
				21845	#
				21846	# %d0 = denorm threshold
				21847	# %d1 = "n" = amt to shift
				21848	#
				21849	# ---------------------------------------------------------
				21850	# \| FTEMP_HI \| FTEMP_LO \|grs000.........000\|
				21851	# ---------------------------------------------------------
				21852	# <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
				21853	# \ \ \
				21854	# \ \ \
				21855	# \ \ -------------------
				21856	# \ -------------------- \
				21857	# ------------------- \ \
				21858	# \ \ \
				21859	# \ \ \
				21860	# \ \ \
				21861	# <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
				21862	# ---------------------------------------------------------
				21863	# \|0...............0\|0....0\| NEW_LO \|grs \|
				21864	# ---------------------------------------------------------
				21865	#
				21866	case_2:
				21867	mov.l %d2, -(%sp) # create temp storage
				21868
				21869	mov.w %d0, FTEMP_EX(%a0) # exponent = denorm threshold
				21870	subi.w &0x20, %d1 # %d1 now between 0 and 32
				21871	mov.l &0x20, %d0
				21872	sub.w %d1, %d0 # %d0 = 32 - %d1
				21873
				21874	# subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
				21875	# the number of bits to check for the sticky detect.
				21876	# it only plays a role in shift amounts of 61-63.
				21877	mov.b GRS(%a6), %d2
				21878	or.b %d2, 3+FTEMP_LO2(%a6)
				21879
				21880	bfextu FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
				21881	bfextu FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
				21882
				21883	bftst %d1{&2:&30} # were any bits shifted off?
				21884	bne.b case2_set_sticky # yes; set sticky bit
				21885	bftst FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
				21886	bne.b case2_set_sticky # yes; set sticky bit
				21887
				21888	mov.l %d1, %d0 # move new G,R,S to %d0
				21889	bra.b case2_end
				21890
				21891	case2_set_sticky:
				21892	mov.l %d1, %d0 # move new G,R,S to %d0
				21893	bset &rnd_stky_bit, %d0 # set sticky bit
				21894
				21895	case2_end:
				21896	clr.l FTEMP_HI(%a0) # store FTEMP_HI = 0
				21897	mov.l %d2, FTEMP_LO(%a0) # store FTEMP_LO
				21898	and.l &0xe0000000, %d0 # clear all but G,R,S
				21899
				21900	mov.l (%sp)+,%d2 # restore temp register
				21901	rts
				21902
				21903	#
				21904	# case (d1>=64)
				21905	#
				21906	# %d0 = denorm threshold
				21907	# %d1 = amt to shift
				21908	#
				21909	case_3:
				21910	mov.w %d0, FTEMP_EX(%a0) # insert denorm threshold
				21911
				21912	cmpi.w %d1, &65 # is shift amt > 65?
				21913	blt.b case3_64 # no; it's == 64
				21914	beq.b case3_65 # no; it's == 65
				21915
				21916	#
				21917	# case (d1>65)
				21918	#
				21919	# Shift value is > 65 and out of range. All bits are shifted off.
				21920	# Return a zero mantissa with the sticky bit set
				21921	#
				21922	clr.l FTEMP_HI(%a0) # clear hi(mantissa)
				21923	clr.l FTEMP_LO(%a0) # clear lo(mantissa)
				21924	mov.l &0x20000000, %d0 # set sticky bit
				21925	rts
				21926
				21927	#
				21928	# case (d1 == 64)
				21929	#
				21930	# ---------------------------------------------------------
				21931	# \| FTEMP_HI \| FTEMP_LO \|grs000.........000\|
				21932	# ---------------------------------------------------------
				21933	# <-------(32)------>
				21934	# \ \
				21935	# \ \
				21936	# \ \
				21937	# \ ------------------------------
				21938	# ------------------------------- \
				21939	# \ \
				21940	# \ \
				21941	# \ \
				21942	# <-------(32)------>
				21943	# ---------------------------------------------------------
				21944	# \|0...............0\|0................0\|grs \|
				21945	# ---------------------------------------------------------
				21946	#
				21947	case3_64:
				21948	mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
				21949	mov.l %d0, %d1 # make a copy
				21950	and.l &0xc0000000, %d0 # extract G,R
				21951	and.l &0x3fffffff, %d1 # extract other bits
				21952
				21953	bra.b case3_complete
				21954
				21955	#
				21956	# case (d1 == 65)
				21957	#
				21958	# ---------------------------------------------------------
				21959	# \| FTEMP_HI \| FTEMP_LO \|grs000.........000\|
				21960	# ---------------------------------------------------------
				21961	# <-------(32)------>
				21962	# \ \
				21963	# \ \
				21964	# \ \
				21965	# \ ------------------------------
				21966	# -------------------------------- \
				21967	# \ \
				21968	# \ \
				21969	# \ \
				21970	# <-------(31)----->
				21971	# ---------------------------------------------------------
				21972	# \|0...............0\|0................0\|0rs \|
				21973	# ---------------------------------------------------------
				21974	#
				21975	case3_65:
				21976	mov.l FTEMP_HI(%a0), %d0 # fetch hi(mantissa)
				21977	and.l &0x80000000, %d0 # extract R bit
				21978	lsr.l &0x1, %d0 # shift high bit into R bit
				21979	and.l &0x7fffffff, %d1 # extract other bits
				21980
				21981	case3_complete:
				21982	# last operation done was an "and" of the bits shifted off so the condition
				21983	# codes are already set so branch accordingly.
				21984	bne.b case3_set_sticky # yes; go set new sticky
				21985	tst.l FTEMP_LO(%a0) # were any bits shifted off?
				21986	bne.b case3_set_sticky # yes; go set new sticky
				21987	tst.b GRS(%a6) # were any bits shifted off?
				21988	bne.b case3_set_sticky # yes; go set new sticky
				21989
				21990	#
				21991	# no bits were shifted off so don't set the sticky bit.
				21992	# the guard and
				21993	# the entire mantissa is zero.
				21994	#
				21995	clr.l FTEMP_HI(%a0) # clear hi(mantissa)
				21996	clr.l FTEMP_LO(%a0) # clear lo(mantissa)
				21997	rts
				21998
				21999	#
				22000	# some bits were shifted off so set the sticky bit.
				22001	# the entire mantissa is zero.
				22002	#
				22003	case3_set_sticky:
				22004	bset &rnd_stky_bit,%d0 # set new sticky bit
				22005	clr.l FTEMP_HI(%a0) # clear hi(mantissa)
				22006	clr.l FTEMP_LO(%a0) # clear lo(mantissa)
				22007	rts
				22008
				22009	#########################################################################
				22010	# XDEF **************************************************************** #
				22011	# _round(): round result according to precision/mode #
				22012	# #
				22013	# XREF **************************************************************** #
				22014	# None #
				22015	# #
				22016	# INPUT *************************************************************** #
				22017	# a0 = ptr to input operand in internal extended format #
				22018	# d1(hi) = contains rounding precision: #
				22019	# ext = $0000xxxx #
				22020	# sgl = $0004xxxx #
				22021	# dbl = $0008xxxx #
				22022	# d1(lo) = contains rounding mode: #
				22023	# RN = $xxxx0000 #
				22024	# RZ = $xxxx0001 #
				22025	# RM = $xxxx0002 #
				22026	# RP = $xxxx0003 #
				22027	# d0{31:29} = contains the g,r,s bits (extended) #
				22028	# #
				22029	# OUTPUT ************************************************************** #
				22030	# a0 = pointer to rounded result #
				22031	# #
				22032	# ALGORITHM *********************************************************** #
				22033	# On return the value pointed to by a0 is correctly rounded, #
				22034	# a0 is preserved and the g-r-s bits in d0 are cleared. #
				22035	# The result is not typed - the tag field is invalid. The #
				22036	# result is still in the internal extended format. #
				22037	# #
				22038	# The INEX bit of USER_FPSR will be set if the rounded result was #
				22039	# inexact (i.e. if any of the g-r-s bits were set). #
				22040	# #
				22041	#########################################################################
				22042
				22043	global _round
				22044	_round:
				22045	#
				22046	# ext_grs() looks at the rounding precision and sets the appropriate
				22047	# G,R,S bits.
				22048	# If (G,R,S == 0) then result is exact and round is done, else set
				22049	# the inex flag in status reg and continue.
				22050	#
				22051	bsr.l ext_grs # extract G,R,S
				22052
				22053	tst.l %d0 # are G,R,S zero?
				22054	beq.w truncate # yes; round is complete
				22055
				22056	or.w &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
				22057
				22058	#
				22059	# Use rounding mode as an index into a jump table for these modes.
				22060	# All of the following assumes grs != 0.
				22061	#
				22062	mov.w (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
				22063	jmp (tbl_mode.b,%pc,%a1) # jmp to rnd mode handler
				22064
				22065	tbl_mode:
				22066	short rnd_near - tbl_mode
				22067	short truncate - tbl_mode # RZ always truncates
				22068	short rnd_mnus - tbl_mode
				22069	short rnd_plus - tbl_mode
				22070
				22071	#################################################################
				22072	# ROUND PLUS INFINITY #
				22073	# #
				22074	# If sign of fp number = 0 (positive), then add 1 to l. #
				22075	#################################################################
				22076	rnd_plus:
				22077	tst.b FTEMP_SGN(%a0) # check for sign
				22078	bmi.w truncate # if positive then truncate
				22079
				22080	mov.l &0xffffffff, %d0 # force g,r,s to be all f's
				22081	swap %d1 # set up d1 for round prec.
				22082
				22083	cmpi.b %d1, &s_mode # is prec = sgl?
				22084	beq.w add_sgl # yes
				22085	bgt.w add_dbl # no; it's dbl
				22086	bra.w add_ext # no; it's ext
				22087
				22088	#################################################################
				22089	# ROUND MINUS INFINITY #
				22090	# #
				22091	# If sign of fp number = 1 (negative), then add 1 to l. #
				22092	#################################################################
				22093	rnd_mnus:
				22094	tst.b FTEMP_SGN(%a0) # check for sign
				22095	bpl.w truncate # if negative then truncate
				22096
				22097	mov.l &0xffffffff, %d0 # force g,r,s to be all f's
				22098	swap %d1 # set up d1 for round prec.
				22099
				22100	cmpi.b %d1, &s_mode # is prec = sgl?
				22101	beq.w add_sgl # yes
				22102	bgt.w add_dbl # no; it's dbl
				22103	bra.w add_ext # no; it's ext
				22104
				22105	#################################################################
				22106	# ROUND NEAREST #
				22107	# #
				22108	# If (g=1), then add 1 to l and if (r=s=0), then clear l #
				22109	# Note that this will round to even in case of a tie. #
				22110	#################################################################
				22111	rnd_near:
				22112	asl.l &0x1, %d0 # shift g-bit to c-bit
				22113	bcc.w truncate # if (g=1) then
				22114
				22115	swap %d1 # set up d1 for round prec.
				22116
				22117	cmpi.b %d1, &s_mode # is prec = sgl?
				22118	beq.w add_sgl # yes
				22119	bgt.w add_dbl # no; it's dbl
				22120	bra.w add_ext # no; it's ext
				22121
				22122	# * LOCAL EQUATES *
				22123	set ad_1_sgl, 0x00000100 # constant to add 1 to l-bit in sgl prec
				22124	set ad_1_dbl, 0x00000800 # constant to add 1 to l-bit in dbl prec
				22125
				22126	#########################
				22127	# ADD SINGLE #
				22128	#########################
				22129	add_sgl:
				22130	add.l &ad_1_sgl, FTEMP_HI(%a0)
				22131	bcc.b scc_clr # no mantissa overflow
				22132	roxr.w FTEMP_HI(%a0) # shift v-bit back in
				22133	roxr.w FTEMP_HI+2(%a0) # shift v-bit back in
				22134	add.w &0x1, FTEMP_EX(%a0) # and incr exponent
				22135	scc_clr:
				22136	tst.l %d0 # test for rs = 0
				22137	bne.b sgl_done
				22138	and.w &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
				22139	sgl_done:
				22140	and.l &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
				22141	clr.l FTEMP_LO(%a0) # clear d2
				22142	rts
				22143
				22144	#########################
				22145	# ADD EXTENDED #
				22146	#########################
				22147	add_ext:
				22148	addq.l &1,FTEMP_LO(%a0) # add 1 to l-bit
				22149	bcc.b xcc_clr # test for carry out
				22150	addq.l &1,FTEMP_HI(%a0) # propagate carry
				22151	bcc.b xcc_clr
				22152	roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
				22153	roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
				22154	roxr.w FTEMP_LO(%a0)
				22155	roxr.w FTEMP_LO+2(%a0)
				22156	add.w &0x1,FTEMP_EX(%a0) # and inc exp
				22157	xcc_clr:
				22158	tst.l %d0 # test rs = 0
				22159	bne.b add_ext_done
				22160	and.b &0xfe,FTEMP_LO+3(%a0) # clear the l bit
				22161	add_ext_done:
				22162	rts
				22163
				22164	#########################
				22165	# ADD DOUBLE #
				22166	#########################
				22167	add_dbl:
				22168	add.l &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
				22169	bcc.b dcc_clr # no carry
				22170	addq.l &0x1, FTEMP_HI(%a0) # propagate carry
				22171	bcc.b dcc_clr # no carry
				22172
				22173	roxr.w FTEMP_HI(%a0) # mant is 0 so restore v-bit
				22174	roxr.w FTEMP_HI+2(%a0) # mant is 0 so restore v-bit
				22175	roxr.w FTEMP_LO(%a0)
				22176	roxr.w FTEMP_LO+2(%a0)
				22177	addq.w &0x1, FTEMP_EX(%a0) # incr exponent
				22178	dcc_clr:
				22179	tst.l %d0 # test for rs = 0
				22180	bne.b dbl_done
				22181	and.w &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
				22182
				22183	dbl_done:
				22184	and.l &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
				22185	rts
				22186
				22187	###########################
				22188	# Truncate all other bits #
				22189	###########################
				22190	truncate:
				22191	swap %d1 # select rnd prec
				22192
				22193	cmpi.b %d1, &s_mode # is prec sgl?
				22194	beq.w sgl_done # yes
				22195	bgt.b dbl_done # no; it's dbl
				22196	rts # no; it's ext
				22197
				22198
				22199	#
				22200	# ext_grs(): extract guard, round and sticky bits according to
				22201	# rounding precision.
				22202	#
				22203	# INPUT
				22204	# d0 = extended precision g,r,s (in d0{31:29})
				22205	# d1 = {PREC,ROUND}
				22206	# OUTPUT
				22207	# d0{31:29} = guard, round, sticky
				22208	#
				22209	# The ext_grs extract the guard/round/sticky bits according to the
				22210	# selected rounding precision. It is called by the round subroutine
				22211	# only. All registers except d0 are kept intact. d0 becomes an
				22212	# updated guard,round,sticky in d0{31:29}
				22213	#
				22214	# Notes: the ext_grs uses the round PREC, and therefore has to swap d1
				22215	# prior to usage, and needs to restore d1 to original. this
				22216	# routine is tightly tied to the round routine and not meant to
				22217	# uphold standard subroutine calling practices.
				22218	#
				22219
				22220	ext_grs:
				22221	swap %d1 # have d1.w point to round precision
				22222	tst.b %d1 # is rnd prec = extended?
				22223	bne.b ext_grs_not_ext # no; go handle sgl or dbl
				22224
				22225	#
				22226	# %d0 actually already hold g,r,s since _round() had it before calling
				22227	# this function. so, as long as we don't disturb it, we are "returning" it.
				22228	#
				22229	ext_grs_ext:
				22230	swap %d1 # yes; return to correct positions
				22231	rts
				22232
				22233	ext_grs_not_ext:
				22234	movm.l &0x3000, -(%sp) # make some temp registers {d2/d3}
				22235
				22236	cmpi.b %d1, &s_mode # is rnd prec = sgl?
				22237	bne.b ext_grs_dbl # no; go handle dbl
				22238
				22239	#
				22240	# sgl:
				22241	# 96 64 40 32 0
				22242	# -----------------------------------------------------
				22243	# \| EXP \|XXXXXXX\| \|xx \| \|grs\|
				22244	# -----------------------------------------------------
				22245	# <--(24)--->nn\ /
				22246	# ee ---------------------
				22247	# ww \|
				22248	# v
				22249	# gr new sticky
				22250	#
				22251	ext_grs_sgl:
				22252	bfextu FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
				22253	mov.l &30, %d2 # of the sgl prec. limits
				22254	lsl.l %d2, %d3 # shift g-r bits to MSB of d3
				22255	mov.l FTEMP_HI(%a0), %d2 # get word 2 for s-bit test
				22256	and.l &0x0000003f, %d2 # s bit is the or of all other
				22257	bne.b ext_grs_st_stky # bits to the right of g-r
				22258	tst.l FTEMP_LO(%a0) # test lower mantissa
				22259	bne.b ext_grs_st_stky # if any are set, set sticky
				22260	tst.l %d0 # test original g,r,s
				22261	bne.b ext_grs_st_stky # if any are set, set sticky
				22262	bra.b ext_grs_end_sd # if words 3 and 4 are clr, exit
				22263
				22264	#
				22265	# dbl:
				22266	# 96 64 32 11 0
				22267	# -----------------------------------------------------
				22268	# \| EXP \|XXXXXXX\| \| \|xx \|grs\|
				22269	# -----------------------------------------------------
				22270	# nn\ /
				22271	# ee -------
				22272	# ww \|
				22273	# v
				22274	# gr new sticky
				22275	#
				22276	ext_grs_dbl:
				22277	bfextu FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
				22278	mov.l &30, %d2 # of the dbl prec. limits
				22279	lsl.l %d2, %d3 # shift g-r bits to the MSB of d3
				22280	mov.l FTEMP_LO(%a0), %d2 # get lower mantissa for s-bit test
				22281	and.l &0x000001ff, %d2 # s bit is the or-ing of all
				22282	bne.b ext_grs_st_stky # other bits to the right of g-r
				22283	tst.l %d0 # test word original g,r,s
				22284	bne.b ext_grs_st_stky # if any are set, set sticky
				22285	bra.b ext_grs_end_sd # if clear, exit
				22286
				22287	ext_grs_st_stky:
				22288	bset &rnd_stky_bit, %d3 # set sticky bit
				22289	ext_grs_end_sd:
				22290	mov.l %d3, %d0 # return grs to d0
				22291
				22292	movm.l (%sp)+, &0xc # restore scratch registers {d2/d3}
				22293
				22294	swap %d1 # restore d1 to original
				22295	rts
				22296
				22297	#########################################################################
				22298	# norm(): normalize the mantissa of an extended precision input. the #
				22299	# input operand should not be normalized already. #
				22300	# #
				22301	# XDEF **************************************************************** #
				22302	# norm() #
				22303	# #
				22304	# XREF **************************************************************** #
				22305	# none #
				22306	# #
				22307	# INPUT *************************************************************** #
				22308	# a0 = pointer fp extended precision operand to normalize #
				22309	# #
				22310	# OUTPUT ************************************************************** #
				22311	# d0 = number of bit positions the mantissa was shifted #
				22312	# a0 = the input operand's mantissa is normalized; the exponent #
				22313	# is unchanged. #
				22314	# #
				22315	#########################################################################
				22316	global norm
				22317	norm:
				22318	mov.l %d2, -(%sp) # create some temp regs
				22319	mov.l %d3, -(%sp)
				22320
				22321	mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa)
				22322	mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa)
				22323
				22324	bfffo %d0{&0:&32}, %d2 # how many places to shift?
				22325	beq.b norm_lo # hi(man) is all zeroes!
				22326
				22327	norm_hi:
				22328	lsl.l %d2, %d0 # left shift hi(man)
				22329	bfextu %d1{&0:%d2}, %d3 # extract lo bits
				22330
				22331	or.l %d3, %d0 # create hi(man)
				22332	lsl.l %d2, %d1 # create lo(man)
				22333
				22334	mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
				22335	mov.l %d1, FTEMP_LO(%a0) # store new lo(man)
				22336
				22337	mov.l %d2, %d0 # return shift amount
				22338
				22339	mov.l (%sp)+, %d3 # restore temp regs
				22340	mov.l (%sp)+, %d2
				22341
				22342	rts
				22343
				22344	norm_lo:
				22345	bfffo %d1{&0:&32}, %d2 # how many places to shift?
				22346	lsl.l %d2, %d1 # shift lo(man)
				22347	add.l &32, %d2 # add 32 to shft amount
				22348
				22349	mov.l %d1, FTEMP_HI(%a0) # store hi(man)
				22350	clr.l FTEMP_LO(%a0) # lo(man) is now zero
				22351
				22352	mov.l %d2, %d0 # return shift amount
				22353
				22354	mov.l (%sp)+, %d3 # restore temp regs
				22355	mov.l (%sp)+, %d2
				22356
				22357	rts
				22358
				22359	#########################################################################
				22360	# unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO #
				22361	# - returns corresponding optype tag #
				22362	# #
				22363	# XDEF **************************************************************** #
				22364	# unnorm_fix() #
				22365	# #
				22366	# XREF **************************************************************** #
				22367	# norm() - normalize the mantissa #
				22368	# #
				22369	# INPUT *************************************************************** #
				22370	# a0 = pointer to unnormalized extended precision number #
				22371	# #
				22372	# OUTPUT ************************************************************** #
				22373	# d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO #
				22374	# a0 = input operand has been converted to a norm, denorm, or #
				22375	# zero; both the exponent and mantissa are changed. #
				22376	# #
				22377	#########################################################################
				22378
				22379	global unnorm_fix
				22380	unnorm_fix:
				22381	bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
				22382	bne.b unnorm_shift # hi(man) is not all zeroes
				22383
				22384	#
				22385	# hi(man) is all zeroes so see if any bits in lo(man) are set
				22386	#
				22387	unnorm_chk_lo:
				22388	bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
				22389	beq.w unnorm_zero # yes
				22390
				22391	add.w &32, %d0 # no; fix shift distance
				22392
				22393	#
				22394	# d0 = # shifts needed for complete normalization
				22395	#
				22396	unnorm_shift:
				22397	clr.l %d1 # clear top word
				22398	mov.w FTEMP_EX(%a0), %d1 # extract exponent
				22399	and.w &0x7fff, %d1 # strip off sgn
				22400
				22401	cmp.w %d0, %d1 # will denorm push exp < 0?
				22402	bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0
				22403
				22404	#
André Goddard Rosa	af901ca	2009-11-14 13:09:05 -0200	[diff] [blame]	22405	# exponent would not go < 0. Therefore, number stays normalized
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22406	#
				22407	sub.w %d0, %d1 # shift exponent value
				22408	mov.w FTEMP_EX(%a0), %d0 # load old exponent
				22409	and.w &0x8000, %d0 # save old sign
				22410	or.w %d0, %d1 # {sgn,new exp}
				22411	mov.w %d1, FTEMP_EX(%a0) # insert new exponent
				22412
				22413	bsr.l norm # normalize UNNORM
				22414
				22415	mov.b &NORM, %d0 # return new optype tag
				22416	rts
				22417
				22418	#
				22419	# exponent would go < 0, so only denormalize until exp = 0
				22420	#
				22421	unnorm_nrm_zero:
				22422	cmp.b %d1, &32 # is exp <= 32?
				22423	bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent
				22424
				22425	bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
				22426	mov.l %d0, FTEMP_HI(%a0) # save new hi(man)
				22427
				22428	mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
				22429	lsl.l %d1, %d0 # extract new lo(man)
				22430	mov.l %d0, FTEMP_LO(%a0) # save new lo(man)
				22431
				22432	and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
				22433
				22434	mov.b &DENORM, %d0 # return new optype tag
				22435	rts
				22436
				22437	#
				22438	# only mantissa bits set are in lo(man)
				22439	#
				22440	unnorm_nrm_zero_lrg:
				22441	sub.w &32, %d1 # adjust shft amt by 32
				22442
				22443	mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man)
				22444	lsl.l %d1, %d0 # left shift lo(man)
				22445
				22446	mov.l %d0, FTEMP_HI(%a0) # store new hi(man)
				22447	clr.l FTEMP_LO(%a0) # lo(man) = 0
				22448
				22449	and.w &0x8000, FTEMP_EX(%a0) # set exp = 0
				22450
				22451	mov.b &DENORM, %d0 # return new optype tag
				22452	rts
				22453
				22454	#
				22455	# whole mantissa is zero so this UNNORM is actually a zero
				22456	#
				22457	unnorm_zero:
				22458	and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero
				22459
				22460	mov.b &ZERO, %d0 # fix optype tag
				22461	rts
				22462
				22463	#########################################################################
				22464	# XDEF **************************************************************** #
				22465	# set_tag_x(): return the optype of the input ext fp number #
				22466	# #
				22467	# XREF **************************************************************** #
				22468	# None #
				22469	# #
				22470	# INPUT *************************************************************** #
				22471	# a0 = pointer to extended precision operand #
				22472	# #
				22473	# OUTPUT ************************************************************** #
				22474	# d0 = value of type tag #
				22475	# one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO #
				22476	# #
				22477	# ALGORITHM *********************************************************** #
				22478	# Simply test the exponent, j-bit, and mantissa values to #
				22479	# determine the type of operand. #
				22480	# If it's an unnormalized zero, alter the operand and force it #
				22481	# to be a normal zero. #
				22482	# #
				22483	#########################################################################
				22484
				22485	global set_tag_x
				22486	set_tag_x:
				22487	mov.w FTEMP_EX(%a0), %d0 # extract exponent
				22488	andi.w &0x7fff, %d0 # strip off sign
				22489	cmpi.w %d0, &0x7fff # is (EXP == MAX)?
				22490	beq.b inf_or_nan_x
				22491	not_inf_or_nan_x:
				22492	btst &0x7,FTEMP_HI(%a0)
				22493	beq.b not_norm_x
				22494	is_norm_x:
				22495	mov.b &NORM, %d0
				22496	rts
				22497	not_norm_x:
				22498	tst.w %d0 # is exponent = 0?
				22499	bne.b is_unnorm_x
				22500	not_unnorm_x:
				22501	tst.l FTEMP_HI(%a0)
				22502	bne.b is_denorm_x
				22503	tst.l FTEMP_LO(%a0)
				22504	bne.b is_denorm_x
				22505	is_zero_x:
				22506	mov.b &ZERO, %d0
				22507	rts
				22508	is_denorm_x:
				22509	mov.b &DENORM, %d0
				22510	rts
				22511	# must distinguish now "Unnormalized zeroes" which we
				22512	# must convert to zero.
				22513	is_unnorm_x:
				22514	tst.l FTEMP_HI(%a0)
				22515	bne.b is_unnorm_reg_x
				22516	tst.l FTEMP_LO(%a0)
				22517	bne.b is_unnorm_reg_x
				22518	# it's an "unnormalized zero". let's convert it to an actual zero...
				22519	andi.w &0x8000,FTEMP_EX(%a0) # clear exponent
				22520	mov.b &ZERO, %d0
				22521	rts
				22522	is_unnorm_reg_x:
				22523	mov.b &UNNORM, %d0
				22524	rts
				22525	inf_or_nan_x:
				22526	tst.l FTEMP_LO(%a0)
				22527	bne.b is_nan_x
				22528	mov.l FTEMP_HI(%a0), %d0
				22529	and.l &0x7fffffff, %d0 # msb is a don't care!
				22530	bne.b is_nan_x
				22531	is_inf_x:
				22532	mov.b &INF, %d0
				22533	rts
				22534	is_nan_x:
				22535	btst &0x6, FTEMP_HI(%a0)
				22536	beq.b is_snan_x
				22537	mov.b &QNAN, %d0
				22538	rts
				22539	is_snan_x:
				22540	mov.b &SNAN, %d0
				22541	rts
				22542
				22543	#########################################################################
				22544	# XDEF **************************************************************** #
				22545	# set_tag_d(): return the optype of the input dbl fp number #
				22546	# #
				22547	# XREF **************************************************************** #
				22548	# None #
				22549	# #
				22550	# INPUT *************************************************************** #
				22551	# a0 = points to double precision operand #
				22552	# #
				22553	# OUTPUT ************************************************************** #
				22554	# d0 = value of type tag #
				22555	# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
				22556	# #
				22557	# ALGORITHM *********************************************************** #
				22558	# Simply test the exponent, j-bit, and mantissa values to #
				22559	# determine the type of operand. #
				22560	# #
				22561	#########################################################################
				22562
				22563	global set_tag_d
				22564	set_tag_d:
				22565	mov.l FTEMP(%a0), %d0
				22566	mov.l %d0, %d1
				22567
				22568	andi.l &0x7ff00000, %d0
				22569	beq.b zero_or_denorm_d
				22570
				22571	cmpi.l %d0, &0x7ff00000
				22572	beq.b inf_or_nan_d
				22573
				22574	is_norm_d:
				22575	mov.b &NORM, %d0
				22576	rts
				22577	zero_or_denorm_d:
				22578	and.l &0x000fffff, %d1
				22579	bne is_denorm_d
				22580	tst.l 4+FTEMP(%a0)
				22581	bne is_denorm_d
				22582	is_zero_d:
				22583	mov.b &ZERO, %d0
				22584	rts
				22585	is_denorm_d:
				22586	mov.b &DENORM, %d0
				22587	rts
				22588	inf_or_nan_d:
				22589	and.l &0x000fffff, %d1
				22590	bne is_nan_d
				22591	tst.l 4+FTEMP(%a0)
				22592	bne is_nan_d
				22593	is_inf_d:
				22594	mov.b &INF, %d0
				22595	rts
				22596	is_nan_d:
				22597	btst &19, %d1
				22598	bne is_qnan_d
				22599	is_snan_d:
				22600	mov.b &SNAN, %d0
				22601	rts
				22602	is_qnan_d:
				22603	mov.b &QNAN, %d0
				22604	rts
				22605
				22606	#########################################################################
				22607	# XDEF **************************************************************** #
				22608	# set_tag_s(): return the optype of the input sgl fp number #
				22609	# #
				22610	# XREF **************************************************************** #
				22611	# None #
				22612	# #
				22613	# INPUT *************************************************************** #
				22614	# a0 = pointer to single precision operand #
				22615	# #
				22616	# OUTPUT ************************************************************** #
				22617	# d0 = value of type tag #
				22618	# one of: NORM, INF, QNAN, SNAN, DENORM, ZERO #
				22619	# #
				22620	# ALGORITHM *********************************************************** #
				22621	# Simply test the exponent, j-bit, and mantissa values to #
				22622	# determine the type of operand. #
				22623	# #
				22624	#########################################################################
				22625
				22626	global set_tag_s
				22627	set_tag_s:
				22628	mov.l FTEMP(%a0), %d0
				22629	mov.l %d0, %d1
				22630
				22631	andi.l &0x7f800000, %d0
				22632	beq.b zero_or_denorm_s
				22633
				22634	cmpi.l %d0, &0x7f800000
				22635	beq.b inf_or_nan_s
				22636
				22637	is_norm_s:
				22638	mov.b &NORM, %d0
				22639	rts
				22640	zero_or_denorm_s:
				22641	and.l &0x007fffff, %d1
				22642	bne is_denorm_s
				22643	is_zero_s:
				22644	mov.b &ZERO, %d0
				22645	rts
				22646	is_denorm_s:
				22647	mov.b &DENORM, %d0
				22648	rts
				22649	inf_or_nan_s:
				22650	and.l &0x007fffff, %d1
				22651	bne is_nan_s
				22652	is_inf_s:
				22653	mov.b &INF, %d0
				22654	rts
				22655	is_nan_s:
				22656	btst &22, %d1
				22657	bne is_qnan_s
				22658	is_snan_s:
				22659	mov.b &SNAN, %d0
				22660	rts
				22661	is_qnan_s:
				22662	mov.b &QNAN, %d0
				22663	rts
				22664
				22665	#########################################################################
				22666	# XDEF **************************************************************** #
				22667	# unf_res(): routine to produce default underflow result of a #
				22668	# scaled extended precision number; this is used by #
				22669	# fadd/fdiv/fmul/etc. emulation routines. #
				22670	# unf_res4(): same as above but for fsglmul/fsgldiv which use #
				22671	# single round prec and extended prec mode. #
				22672	# #
				22673	# XREF **************************************************************** #
				22674	# _denorm() - denormalize according to scale factor #
				22675	# _round() - round denormalized number according to rnd prec #
				22676	# #
				22677	# INPUT *************************************************************** #
				22678	# a0 = pointer to extended precison operand #
				22679	# d0 = scale factor #
				22680	# d1 = rounding precision/mode #
				22681	# #
				22682	# OUTPUT ************************************************************** #
				22683	# a0 = pointer to default underflow result in extended precision #
				22684	# d0.b = result FPSR_cc which caller may or may not want to save #
				22685	# #
				22686	# ALGORITHM *********************************************************** #
				22687	# Convert the input operand to "internal format" which means the #
				22688	# exponent is extended to 16 bits and the sign is stored in the unused #
				22689	# portion of the extended precison operand. Denormalize the number #
				22690	# according to the scale factor passed in d0. Then, round the #
				22691	# denormalized result. #
				22692	# Set the FPSR_exc bits as appropriate but return the cc bits in #
				22693	# d0 in case the caller doesn't want to save them (as is the case for #
				22694	# fmove out). #
				22695	# unf_res4() for fsglmul/fsgldiv forces the denorm to extended #
				22696	# precision and the rounding mode to single. #
				22697	# #
				22698	#########################################################################
				22699	global unf_res
				22700	unf_res:
				22701	mov.l %d1, -(%sp) # save rnd prec,mode on stack
				22702
				22703	btst &0x7, FTEMP_EX(%a0) # make "internal" format
				22704	sne FTEMP_SGN(%a0)
				22705
				22706	mov.w FTEMP_EX(%a0), %d1 # extract exponent
				22707	and.w &0x7fff, %d1
				22708	sub.w %d0, %d1
				22709	mov.w %d1, FTEMP_EX(%a0) # insert 16 bit exponent
				22710
				22711	mov.l %a0, -(%sp) # save operand ptr during calls
				22712
				22713	mov.l 0x4(%sp),%d0 # pass rnd prec.
				22714	andi.w &0x00c0,%d0
				22715	lsr.w &0x4,%d0
				22716	bsr.l _denorm # denorm result
				22717
				22718	mov.l (%sp),%a0
				22719	mov.w 0x6(%sp),%d1 # load prec:mode into %d1
				22720	andi.w &0xc0,%d1 # extract rnd prec
				22721	lsr.w &0x4,%d1
				22722	swap %d1
				22723	mov.w 0x6(%sp),%d1
				22724	andi.w &0x30,%d1
				22725	lsr.w &0x4,%d1
				22726	bsr.l _round # round the denorm
				22727
				22728	mov.l (%sp)+, %a0
				22729
				22730	# result is now rounded properly. convert back to normal format
				22731	bclr &0x7, FTEMP_EX(%a0) # clear sgn first; may have residue
				22732	tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
				22733	beq.b unf_res_chkifzero # no; result is positive
				22734	bset &0x7, FTEMP_EX(%a0) # set result sgn
				22735	clr.b FTEMP_SGN(%a0) # clear temp sign
				22736
				22737	# the number may have become zero after rounding. set ccodes accordingly.
				22738	unf_res_chkifzero:
				22739	clr.l %d0
				22740	tst.l FTEMP_HI(%a0) # is value now a zero?
				22741	bne.b unf_res_cont # no
				22742	tst.l FTEMP_LO(%a0)
				22743	bne.b unf_res_cont # no
				22744	# bset &z_bit, FPSR_CC(%a6) # yes; set zero ccode bit
				22745	bset &z_bit, %d0 # yes; set zero ccode bit
				22746
				22747	unf_res_cont:
				22748
				22749	#
				22750	# can inex1 also be set along with unfl and inex2???
				22751	#
				22752	# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
				22753	#
				22754	btst &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
				22755	beq.b unf_res_end # no
				22756	bset &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
				22757
				22758	unf_res_end:
				22759	add.l &0x4, %sp # clear stack
				22760	rts
				22761
				22762	# unf_res() for fsglmul() and fsgldiv().
				22763	global unf_res4
				22764	unf_res4:
				22765	mov.l %d1,-(%sp) # save rnd prec,mode on stack
				22766
				22767	btst &0x7,FTEMP_EX(%a0) # make "internal" format
				22768	sne FTEMP_SGN(%a0)
				22769
				22770	mov.w FTEMP_EX(%a0),%d1 # extract exponent
				22771	and.w &0x7fff,%d1
				22772	sub.w %d0,%d1
				22773	mov.w %d1,FTEMP_EX(%a0) # insert 16 bit exponent
				22774
				22775	mov.l %a0,-(%sp) # save operand ptr during calls
				22776
				22777	clr.l %d0 # force rnd prec = ext
				22778	bsr.l _denorm # denorm result
				22779
				22780	mov.l (%sp),%a0
				22781	mov.w &s_mode,%d1 # force rnd prec = sgl
				22782	swap %d1
				22783	mov.w 0x6(%sp),%d1 # load rnd mode
				22784	andi.w &0x30,%d1 # extract rnd prec
				22785	lsr.w &0x4,%d1
				22786	bsr.l _round # round the denorm
				22787
				22788	mov.l (%sp)+,%a0
				22789
				22790	# result is now rounded properly. convert back to normal format
				22791	bclr &0x7,FTEMP_EX(%a0) # clear sgn first; may have residue
				22792	tst.b FTEMP_SGN(%a0) # is "internal result" sign set?
				22793	beq.b unf_res4_chkifzero # no; result is positive
				22794	bset &0x7,FTEMP_EX(%a0) # set result sgn
				22795	clr.b FTEMP_SGN(%a0) # clear temp sign
				22796
				22797	# the number may have become zero after rounding. set ccodes accordingly.
				22798	unf_res4_chkifzero:
				22799	clr.l %d0
				22800	tst.l FTEMP_HI(%a0) # is value now a zero?
				22801	bne.b unf_res4_cont # no
				22802	tst.l FTEMP_LO(%a0)
				22803	bne.b unf_res4_cont # no
				22804	# bset &z_bit,FPSR_CC(%a6) # yes; set zero ccode bit
				22805	bset &z_bit,%d0 # yes; set zero ccode bit
				22806
				22807	unf_res4_cont:
				22808
				22809	#
				22810	# can inex1 also be set along with unfl and inex2???
				22811	#
				22812	# we know that underflow has occurred. aunfl should be set if INEX2 is also set.
				22813	#
				22814	btst &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
				22815	beq.b unf_res4_end # no
				22816	bset &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
				22817
				22818	unf_res4_end:
				22819	add.l &0x4,%sp # clear stack
				22820	rts
				22821
				22822	#########################################################################
				22823	# XDEF **************************************************************** #
				22824	# ovf_res(): routine to produce the default overflow result of #
				22825	# an overflowing number. #
				22826	# ovf_res2(): same as above but the rnd mode/prec are passed #
				22827	# differently. #
				22828	# #
				22829	# XREF **************************************************************** #
				22830	# none #
				22831	# #
				22832	# INPUT *************************************************************** #
				22833	# d1.b = '-1' => (-); '0' => (+) #
				22834	# ovf_res(): #
				22835	# d0 = rnd mode/prec #
				22836	# ovf_res2(): #
				22837	# hi(d0) = rnd prec #
				22838	# lo(d0) = rnd mode #
				22839	# #
				22840	# OUTPUT ************************************************************** #
				22841	# a0 = points to extended precision result #
				22842	# d0.b = condition code bits #
				22843	# #
				22844	# ALGORITHM *********************************************************** #
				22845	# The default overflow result can be determined by the sign of #
				22846	# the result and the rounding mode/prec in effect. These bits are #
				22847	# concatenated together to create an index into the default result #
				22848	# table. A pointer to the correct result is returned in a0. The #
				22849	# resulting condition codes are returned in d0 in case the caller #
				22850	# doesn't want FPSR_cc altered (as is the case for fmove out). #
				22851	# #
				22852	#########################################################################
				22853
				22854	global ovf_res
				22855	ovf_res:
				22856	andi.w &0x10,%d1 # keep result sign
				22857	lsr.b &0x4,%d0 # shift prec/mode
				22858	or.b %d0,%d1 # concat the two
				22859	mov.w %d1,%d0 # make a copy
				22860	lsl.b &0x1,%d1 # multiply d1 by 2
				22861	bra.b ovf_res_load
				22862
				22863	global ovf_res2
				22864	ovf_res2:
				22865	and.w &0x10, %d1 # keep result sign
				22866	or.b %d0, %d1 # insert rnd mode
				22867	swap %d0
				22868	or.b %d0, %d1 # insert rnd prec
				22869	mov.w %d1, %d0 # make a copy
				22870	lsl.b &0x1, %d1 # shift left by 1
				22871
				22872	#
				22873	# use the rounding mode, precision, and result sign as in index into the
				22874	# two tables below to fetch the default result and the result ccodes.
				22875	#
				22876	ovf_res_load:
				22877	mov.b (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
				22878	lea (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
				22879
				22880	rts
				22881
				22882	tbl_ovfl_cc:
				22883	byte 0x2, 0x0, 0x0, 0x2
				22884	byte 0x2, 0x0, 0x0, 0x2
				22885	byte 0x2, 0x0, 0x0, 0x2
				22886	byte 0x0, 0x0, 0x0, 0x0
				22887	byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
				22888	byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
				22889	byte 0x2+0x8, 0x8, 0x2+0x8, 0x8
				22890
				22891	tbl_ovfl_result:
				22892	long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
				22893	long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
				22894	long 0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
				22895	long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
				22896
				22897	long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
				22898	long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
				22899	long 0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
				22900	long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
				22901
				22902	long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
				22903	long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
				22904	long 0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
				22905	long 0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
				22906
				22907	long 0x00000000,0x00000000,0x00000000,0x00000000
				22908	long 0x00000000,0x00000000,0x00000000,0x00000000
				22909	long 0x00000000,0x00000000,0x00000000,0x00000000
				22910	long 0x00000000,0x00000000,0x00000000,0x00000000
				22911
				22912	long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
				22913	long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
				22914	long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
				22915	long 0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
				22916
				22917	long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
				22918	long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
				22919	long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
				22920	long 0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
				22921
				22922	long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
				22923	long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
				22924	long 0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
				22925	long 0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
				22926
				22927	#########################################################################
				22928	# XDEF **************************************************************** #
				22929	# get_packed(): fetch a packed operand from memory and then #
				22930	# convert it to a floating-point binary number. #
				22931	# #
				22932	# XREF **************************************************************** #
				22933	# _dcalc_ea() - calculate the correct <ea> #
				22934	# _mem_read() - fetch the packed operand from memory #
				22935	# facc_in_x() - the fetch failed so jump to special exit code #
				22936	# decbin() - convert packed to binary extended precision #
				22937	# #
				22938	# INPUT *************************************************************** #
				22939	# None #
				22940	# #
				22941	# OUTPUT ************************************************************** #
				22942	# If no failure on _mem_read(): #
				22943	# FP_SRC(a6) = packed operand now as a binary FP number #
				22944	# #
				22945	# ALGORITHM *********************************************************** #
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	22946	# Get the correct <ea> which is the value on the exception stack #
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	22947	# frame w/ maybe a correction factor if the <ea> is -(an) or (an)+. #
				22948	# Then, fetch the operand from memory. If the fetch fails, exit #
				22949	# through facc_in_x(). #
				22950	# If the packed operand is a ZERO,NAN, or INF, convert it to #
				22951	# its binary representation here. Else, call decbin() which will #
				22952	# convert the packed value to an extended precision binary value. #
				22953	# #
				22954	#########################################################################
				22955
				22956	# the stacked <ea> for packed is correct except for -(An).
				22957	# the base reg must be updated for both -(An) and (An)+.
				22958	global get_packed
				22959	get_packed:
				22960	mov.l &0xc,%d0 # packed is 12 bytes
				22961	bsr.l _dcalc_ea # fetch <ea>; correct An
				22962
				22963	lea FP_SRC(%a6),%a1 # pass: ptr to super dst
				22964	mov.l &0xc,%d0 # pass: 12 bytes
				22965	bsr.l _dmem_read # read packed operand
				22966
				22967	tst.l %d1 # did dfetch fail?
				22968	bne.l facc_in_x # yes
				22969
				22970	# The packed operand is an INF or a NAN if the exponent field is all ones.
				22971	bfextu FP_SRC(%a6){&1:&15},%d0 # get exp
				22972	cmpi.w %d0,&0x7fff # INF or NAN?
				22973	bne.b gp_try_zero # no
				22974	rts # operand is an INF or NAN
				22975
				22976	# The packed operand is a zero if the mantissa is all zero, else it's
				22977	# a normal packed op.
				22978	gp_try_zero:
				22979	mov.b 3+FP_SRC(%a6),%d0 # get byte 4
				22980	andi.b &0x0f,%d0 # clear all but last nybble
				22981	bne.b gp_not_spec # not a zero
				22982	tst.l FP_SRC_HI(%a6) # is lw 2 zero?
				22983	bne.b gp_not_spec # not a zero
				22984	tst.l FP_SRC_LO(%a6) # is lw 3 zero?
				22985	bne.b gp_not_spec # not a zero
				22986	rts # operand is a ZERO
				22987	gp_not_spec:
				22988	lea FP_SRC(%a6),%a0 # pass: ptr to packed op
				22989	bsr.l decbin # convert to extended
				22990	fmovm.x &0x80,FP_SRC(%a6) # make this the srcop
				22991	rts
				22992
				22993	#########################################################################
				22994	# decbin(): Converts normalized packed bcd value pointed to by register #
				22995	# a0 to extended-precision value in fp0. #
				22996	# #
				22997	# INPUT *************************************************************** #
				22998	# a0 = pointer to normalized packed bcd value #
				22999	# #
				23000	# OUTPUT ************************************************************** #
				23001	# fp0 = exact fp representation of the packed bcd value. #
				23002	# #
				23003	# ALGORITHM *********************************************************** #
				23004	# Expected is a normal bcd (i.e. non-exceptional; all inf, zero, #
				23005	# and NaN operands are dispatched without entering this routine) #
				23006	# value in 68881/882 format at location (a0). #
				23007	# #
				23008	# A1. Convert the bcd exponent to binary by successive adds and #
				23009	# muls. Set the sign according to SE. Subtract 16 to compensate #
				23010	# for the mantissa which is to be interpreted as 17 integer #
				23011	# digits, rather than 1 integer and 16 fraction digits. #
				23012	# Note: this operation can never overflow. #
				23013	# #
				23014	# A2. Convert the bcd mantissa to binary by successive #
				23015	# adds and muls in FP0. Set the sign according to SM. #
				23016	# The mantissa digits will be converted with the decimal point #
				23017	# assumed following the least-significant digit. #
				23018	# Note: this operation can never overflow. #
				23019	# #
				23020	# A3. Count the number of leading/trailing zeros in the #
				23021	# bcd string. If SE is positive, count the leading zeros; #
				23022	# if negative, count the trailing zeros. Set the adjusted #
				23023	# exponent equal to the exponent from A1 and the zero count #
				23024	# added if SM = 1 and subtracted if SM = 0. Scale the #
				23025	# mantissa the equivalent of forcing in the bcd value: #
				23026	# #
				23027	# SM = 0 a non-zero digit in the integer position #
				23028	# SM = 1 a non-zero digit in Mant0, lsd of the fraction #
				23029	# #
				23030	# this will insure that any value, regardless of its #
				23031	# representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted #
				23032	# consistently. #
				23033	# #
				23034	# A4. Calculate the factor 10^exp in FP1 using a table of #
				23035	# 10^(2^n) values. To reduce the error in forming factors #
				23036	# greater than 10^27, a directed rounding scheme is used with #
				23037	# tables rounded to RN, RM, and RP, according to the table #
				23038	# in the comments of the pwrten section. #
				23039	# #
				23040	# A5. Form the final binary number by scaling the mantissa by #
				23041	# the exponent factor. This is done by multiplying the #
				23042	# mantissa in FP0 by the factor in FP1 if the adjusted #
				23043	# exponent sign is positive, and dividing FP0 by FP1 if #
				23044	# it is negative. #
				23045	# #
				23046	# Clean up and return. Check if the final mul or div was inexact. #
				23047	# If so, set INEX1 in USER_FPSR. #
				23048	# #
				23049	#########################################################################
				23050
				23051	#
				23052	# PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
				23053	# to nearest, minus, and plus, respectively. The tables include
				23054	# 10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}. No rounding
				23055	# is required until the power is greater than 27, however, all
				23056	# tables include the first 5 for ease of indexing.
				23057	#
				23058	RTABLE:
				23059	byte 0,0,0,0
				23060	byte 2,3,2,3
				23061	byte 2,3,3,2
				23062	byte 3,2,2,3
				23063
				23064	set FNIBS,7
				23065	set FSTRT,0
				23066
				23067	set ESTRT,4
				23068	set EDIGITS,2
				23069
				23070	global decbin
				23071	decbin:
				23072	mov.l 0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
				23073	mov.l 0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
				23074	mov.l 0x8(%a0),FP_SCR0_LO(%a6)
				23075
				23076	lea FP_SCR0(%a6),%a0
				23077
				23078	movm.l &0x3c00,-(%sp) # save d2-d5
				23079	fmovm.x &0x1,-(%sp) # save fp1
				23080	#
				23081	# Calculate exponent:
				23082	# 1. Copy bcd value in memory for use as a working copy.
				23083	# 2. Calculate absolute value of exponent in d1 by mul and add.
				23084	# 3. Correct for exponent sign.
				23085	# 4. Subtract 16 to compensate for interpreting the mant as all integer digits.
				23086	# (i.e., all digits assumed left of the decimal point.)
				23087	#
				23088	# Register usage:
				23089	#
				23090	# calc_e:
				23091	# (*) d0: temp digit storage
				23092	# (*) d1: accumulator for binary exponent
				23093	# (*) d2: digit count
				23094	# (*) d3: offset pointer
				23095	# ( ) d4: first word of bcd
				23096	# ( ) a0: pointer to working bcd value
				23097	# ( ) a6: pointer to original bcd value
				23098	# (*) FP_SCR1: working copy of original bcd value
				23099	# (*) L_SCR1: copy of original exponent word
				23100	#
				23101	calc_e:
				23102	mov.l &EDIGITS,%d2 # # of nibbles (digits) in fraction part
				23103	mov.l &ESTRT,%d3 # counter to pick up digits
				23104	mov.l (%a0),%d4 # get first word of bcd
				23105	clr.l %d1 # zero d1 for accumulator
				23106	e_gd:
				23107	mulu.l &0xa,%d1 # mul partial product by one digit place
				23108	bfextu %d4{%d3:&4},%d0 # get the digit and zero extend into d0
				23109	add.l %d0,%d1 # d1 = d1 + d0
				23110	addq.b &4,%d3 # advance d3 to the next digit
				23111	dbf.w %d2,e_gd # if we have used all 3 digits, exit loop
				23112	btst &30,%d4 # get SE
				23113	beq.b e_pos # don't negate if pos
				23114	neg.l %d1 # negate before subtracting
				23115	e_pos:
				23116	sub.l &16,%d1 # sub to compensate for shift of mant
				23117	bge.b e_save # if still pos, do not neg
				23118	neg.l %d1 # now negative, make pos and set SE
				23119	or.l &0x40000000,%d4 # set SE in d4,
				23120	or.l &0x40000000,(%a0) # and in working bcd
				23121	e_save:
				23122	mov.l %d1,-(%sp) # save exp on stack
				23123	#
				23124	#
				23125	# Calculate mantissa:
				23126	# 1. Calculate absolute value of mantissa in fp0 by mul and add.
				23127	# 2. Correct for mantissa sign.
				23128	# (i.e., all digits assumed left of the decimal point.)
				23129	#
				23130	# Register usage:
				23131	#
				23132	# calc_m:
				23133	# (*) d0: temp digit storage
				23134	# (*) d1: lword counter
				23135	# (*) d2: digit count
				23136	# (*) d3: offset pointer
				23137	# ( ) d4: words 2 and 3 of bcd
				23138	# ( ) a0: pointer to working bcd value
				23139	# ( ) a6: pointer to original bcd value
				23140	# (*) fp0: mantissa accumulator
				23141	# ( ) FP_SCR1: working copy of original bcd value
				23142	# ( ) L_SCR1: copy of original exponent word
				23143	#
				23144	calc_m:
				23145	mov.l &1,%d1 # word counter, init to 1
				23146	fmov.s &0x00000000,%fp0 # accumulator
				23147	#
				23148	#
				23149	# Since the packed number has a long word between the first & second parts,
				23150	# get the integer digit then skip down & get the rest of the
				23151	# mantissa. We will unroll the loop once.
				23152	#
				23153	bfextu (%a0){&28:&4},%d0 # integer part is ls digit in long word
				23154	fadd.b %d0,%fp0 # add digit to sum in fp0
				23155	#
				23156	#
				23157	# Get the rest of the mantissa.
				23158	#
				23159	loadlw:
				23160	mov.l (%a0,%d1.L*4),%d4 # load mantissa lonqword into d4
				23161	mov.l &FSTRT,%d3 # counter to pick up digits
				23162	mov.l &FNIBS,%d2 # reset number of digits per a0 ptr
				23163	md2b:
				23164	fmul.s &0x41200000,%fp0 # fp0 = fp0 * 10
				23165	bfextu %d4{%d3:&4},%d0 # get the digit and zero extend
				23166	fadd.b %d0,%fp0 # fp0 = fp0 + digit
				23167	#
				23168	#
				23169	# If all the digits (8) in that long word have been converted (d2=0),
				23170	# then inc d1 (=2) to point to the next long word and reset d3 to 0
				23171	# to initialize the digit offset, and set d2 to 7 for the digit count;
				23172	# else continue with this long word.
				23173	#
				23174	addq.b &4,%d3 # advance d3 to the next digit
				23175	dbf.w %d2,md2b # check for last digit in this lw
				23176	nextlw:
				23177	addq.l &1,%d1 # inc lw pointer in mantissa
				23178	cmp.l %d1,&2 # test for last lw
				23179	ble.b loadlw # if not, get last one
				23180	#
				23181	# Check the sign of the mant and make the value in fp0 the same sign.
				23182	#
				23183	m_sign:
				23184	btst &31,(%a0) # test sign of the mantissa
				23185	beq.b ap_st_z # if clear, go to append/strip zeros
				23186	fneg.x %fp0 # if set, negate fp0
				23187	#
				23188	# Append/strip zeros:
				23189	#
				23190	# For adjusted exponents which have an absolute value greater than 27*,
				23191	# this routine calculates the amount needed to normalize the mantissa
				23192	# for the adjusted exponent. That number is subtracted from the exp
				23193	# if the exp was positive, and added if it was negative. The purpose
				23194	# of this is to reduce the value of the exponent and the possibility
				23195	# of error in calculation of pwrten.
				23196	#
				23197	# 1. Branch on the sign of the adjusted exponent.
				23198	# 2p.(positive exp)
Andrea Gelmini	86a8280	2016-05-21 13:57:20 +0200	[diff] [blame]	23199	# 2. Check M16 and the digits in lwords 2 and 3 in descending order.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	23200	# 3. Add one for each zero encountered until a non-zero digit.
				23201	# 4. Subtract the count from the exp.
				23202	# 5. Check if the exp has crossed zero in #3 above; make the exp abs
				23203	# and set SE.
				23204	# 6. Multiply the mantissa by 10**count.
				23205	# 2n.(negative exp)
Andrea Gelmini	86a8280	2016-05-21 13:57:20 +0200	[diff] [blame]	23206	# 2. Check the digits in lwords 3 and 2 in descending order.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	23207	# 3. Add one for each zero encountered until a non-zero digit.
				23208	# 4. Add the count to the exp.
				23209	# 5. Check if the exp has crossed zero in #3 above; clear SE.
				23210	# 6. Divide the mantissa by 10**count.
				23211	#
				23212	# *Why 27? If the adjusted exponent is within -28 < expA < 28, than
				23213	# any adjustment due to append/strip zeros will drive the resultane
				23214	# exponent towards zero. Since all pwrten constants with a power
				23215	# of 27 or less are exact, there is no need to use this routine to
				23216	# attempt to lessen the resultant exponent.
				23217	#
				23218	# Register usage:
				23219	#
				23220	# ap_st_z:
				23221	# (*) d0: temp digit storage
				23222	# (*) d1: zero count
				23223	# (*) d2: digit count
				23224	# (*) d3: offset pointer
				23225	# ( ) d4: first word of bcd
				23226	# (*) d5: lword counter
				23227	# ( ) a0: pointer to working bcd value
				23228	# ( ) FP_SCR1: working copy of original bcd value
				23229	# ( ) L_SCR1: copy of original exponent word
				23230	#
				23231	#
				23232	# First check the absolute value of the exponent to see if this
				23233	# routine is necessary. If so, then check the sign of the exponent
				23234	# and do append (+) or strip (-) zeros accordingly.
				23235	# This section handles a positive adjusted exponent.
				23236	#
				23237	ap_st_z:
				23238	mov.l (%sp),%d1 # load expA for range test
				23239	cmp.l %d1,&27 # test is with 27
				23240	ble.w pwrten # if abs(expA) <28, skip ap/st zeros
				23241	btst &30,(%a0) # check sign of exp
				23242	bne.b ap_st_n # if neg, go to neg side
				23243	clr.l %d1 # zero count reg
				23244	mov.l (%a0),%d4 # load lword 1 to d4
				23245	bfextu %d4{&28:&4},%d0 # get M16 in d0
				23246	bne.b ap_p_fx # if M16 is non-zero, go fix exp
				23247	addq.l &1,%d1 # inc zero count
				23248	mov.l &1,%d5 # init lword counter
				23249	mov.l (%a0,%d5.L*4),%d4 # get lword 2 to d4
				23250	bne.b ap_p_cl # if lw 2 is zero, skip it
				23251	addq.l &8,%d1 # and inc count by 8
				23252	addq.l &1,%d5 # inc lword counter
				23253	mov.l (%a0,%d5.L*4),%d4 # get lword 3 to d4
				23254	ap_p_cl:
				23255	clr.l %d3 # init offset reg
				23256	mov.l &7,%d2 # init digit counter
				23257	ap_p_gd:
				23258	bfextu %d4{%d3:&4},%d0 # get digit
				23259	bne.b ap_p_fx # if non-zero, go to fix exp
				23260	addq.l &4,%d3 # point to next digit
				23261	addq.l &1,%d1 # inc digit counter
				23262	dbf.w %d2,ap_p_gd # get next digit
				23263	ap_p_fx:
				23264	mov.l %d1,%d0 # copy counter to d2
				23265	mov.l (%sp),%d1 # get adjusted exp from memory
				23266	sub.l %d0,%d1 # subtract count from exp
				23267	bge.b ap_p_fm # if still pos, go to pwrten
				23268	neg.l %d1 # now its neg; get abs
				23269	mov.l (%a0),%d4 # load lword 1 to d4
				23270	or.l &0x40000000,%d4 # and set SE in d4
				23271	or.l &0x40000000,(%a0) # and in memory
				23272	#
				23273	# Calculate the mantissa multiplier to compensate for the striping of
				23274	# zeros from the mantissa.
				23275	#
				23276	ap_p_fm:
				23277	lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
				23278	clr.l %d3 # init table index
				23279	fmov.s &0x3f800000,%fp1 # init fp1 to 1
				23280	mov.l &3,%d2 # init d2 to count bits in counter
				23281	ap_p_el:
				23282	asr.l &1,%d0 # shift lsb into carry
				23283	bcc.b ap_p_en # if 1, mul fp1 by pwrten factor
				23284	fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
				23285	ap_p_en:
				23286	add.l &12,%d3 # inc d3 to next rtable entry
				23287	tst.l %d0 # check if d0 is zero
				23288	bne.b ap_p_el # if not, get next bit
				23289	fmul.x %fp1,%fp0 # mul mantissa by 10**(no_bits_shifted)
				23290	bra.b pwrten # go calc pwrten
				23291	#
				23292	# This section handles a negative adjusted exponent.
				23293	#
				23294	ap_st_n:
				23295	clr.l %d1 # clr counter
				23296	mov.l &2,%d5 # set up d5 to point to lword 3
				23297	mov.l (%a0,%d5.L*4),%d4 # get lword 3
				23298	bne.b ap_n_cl # if not zero, check digits
				23299	sub.l &1,%d5 # dec d5 to point to lword 2
				23300	addq.l &8,%d1 # inc counter by 8
				23301	mov.l (%a0,%d5.L*4),%d4 # get lword 2
				23302	ap_n_cl:
				23303	mov.l &28,%d3 # point to last digit
				23304	mov.l &7,%d2 # init digit counter
				23305	ap_n_gd:
				23306	bfextu %d4{%d3:&4},%d0 # get digit
				23307	bne.b ap_n_fx # if non-zero, go to exp fix
				23308	subq.l &4,%d3 # point to previous digit
				23309	addq.l &1,%d1 # inc digit counter
				23310	dbf.w %d2,ap_n_gd # get next digit
				23311	ap_n_fx:
				23312	mov.l %d1,%d0 # copy counter to d0
				23313	mov.l (%sp),%d1 # get adjusted exp from memory
				23314	sub.l %d0,%d1 # subtract count from exp
				23315	bgt.b ap_n_fm # if still pos, go fix mantissa
				23316	neg.l %d1 # take abs of exp and clr SE
				23317	mov.l (%a0),%d4 # load lword 1 to d4
				23318	and.l &0xbfffffff,%d4 # and clr SE in d4
				23319	and.l &0xbfffffff,(%a0) # and in memory
				23320	#
				23321	# Calculate the mantissa multiplier to compensate for the appending of
				23322	# zeros to the mantissa.
				23323	#
				23324	ap_n_fm:
				23325	lea.l PTENRN(%pc),%a1 # get address of power-of-ten table
				23326	clr.l %d3 # init table index
				23327	fmov.s &0x3f800000,%fp1 # init fp1 to 1
				23328	mov.l &3,%d2 # init d2 to count bits in counter
				23329	ap_n_el:
				23330	asr.l &1,%d0 # shift lsb into carry
				23331	bcc.b ap_n_en # if 1, mul fp1 by pwrten factor
				23332	fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
				23333	ap_n_en:
				23334	add.l &12,%d3 # inc d3 to next rtable entry
				23335	tst.l %d0 # check if d0 is zero
				23336	bne.b ap_n_el # if not, get next bit
				23337	fdiv.x %fp1,%fp0 # div mantissa by 10**(no_bits_shifted)
				23338	#
				23339	#
				23340	# Calculate power-of-ten factor from adjusted and shifted exponent.
				23341	#
				23342	# Register usage:
				23343	#
				23344	# pwrten:
				23345	# (*) d0: temp
				23346	# ( ) d1: exponent
				23347	# (*) d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
				23348	# (*) d3: FPCR work copy
				23349	# ( ) d4: first word of bcd
				23350	# (*) a1: RTABLE pointer
				23351	# calc_p:
				23352	# (*) d0: temp
				23353	# ( ) d1: exponent
				23354	# (*) d3: PWRTxx table index
				23355	# ( ) a0: pointer to working copy of bcd
				23356	# (*) a1: PWRTxx pointer
				23357	# (*) fp1: power-of-ten accumulator
				23358	#
				23359	# Pwrten calculates the exponent factor in the selected rounding mode
				23360	# according to the following table:
				23361	#
				23362	# Sign of Mant Sign of Exp Rounding Mode PWRTEN Rounding Mode
				23363	#
				23364	# ANY ANY RN RN
				23365	#
				23366	# + + RP RP
				23367	# - + RP RM
				23368	# + - RP RM
				23369	# - - RP RP
				23370	#
				23371	# + + RM RM
				23372	# - + RM RP
				23373	# + - RM RP
				23374	# - - RM RM
				23375	#
				23376	# + + RZ RM
				23377	# - + RZ RM
				23378	# + - RZ RP
				23379	# - - RZ RP
				23380	#
				23381	#
				23382	pwrten:
				23383	mov.l USER_FPCR(%a6),%d3 # get user's FPCR
				23384	bfextu %d3{&26:&2},%d2 # isolate rounding mode bits
				23385	mov.l (%a0),%d4 # reload 1st bcd word to d4
				23386	asl.l &2,%d2 # format d2 to be
				23387	bfextu %d4{&0:&2},%d0 # {FPCR[6],FPCR[5],SM,SE}
				23388	add.l %d0,%d2 # in d2 as index into RTABLE
				23389	lea.l RTABLE(%pc),%a1 # load rtable base
				23390	mov.b (%a1,%d2),%d0 # load new rounding bits from table
				23391	clr.l %d3 # clear d3 to force no exc and extended
				23392	bfins %d0,%d3{&26:&2} # stuff new rounding bits in FPCR
				23393	fmov.l %d3,%fpcr # write new FPCR
				23394	asr.l &1,%d0 # write correct PTENxx table
				23395	bcc.b not_rp # to a1
				23396	lea.l PTENRP(%pc),%a1 # it is RP
				23397	bra.b calc_p # go to init section
				23398	not_rp:
				23399	asr.l &1,%d0 # keep checking
				23400	bcc.b not_rm
				23401	lea.l PTENRM(%pc),%a1 # it is RM
				23402	bra.b calc_p # go to init section
				23403	not_rm:
				23404	lea.l PTENRN(%pc),%a1 # it is RN
				23405	calc_p:
				23406	mov.l %d1,%d0 # copy exp to d0;use d0
				23407	bpl.b no_neg # if exp is negative,
				23408	neg.l %d0 # invert it
				23409	or.l &0x40000000,(%a0) # and set SE bit
				23410	no_neg:
				23411	clr.l %d3 # table index
				23412	fmov.s &0x3f800000,%fp1 # init fp1 to 1
				23413	e_loop:
				23414	asr.l &1,%d0 # shift next bit into carry
				23415	bcc.b e_next # if zero, skip the mul
				23416	fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
				23417	e_next:
				23418	add.l &12,%d3 # inc d3 to next rtable entry
				23419	tst.l %d0 # check if d0 is zero
				23420	bne.b e_loop # not zero, continue shifting
				23421	#
				23422	#
				23423	# Check the sign of the adjusted exp and make the value in fp0 the
				23424	# same sign. If the exp was pos then multiply fp1*fp0;
				23425	# else divide fp0/fp1.
				23426	#
				23427	# Register Usage:
				23428	# norm:
				23429	# ( ) a0: pointer to working bcd value
				23430	# (*) fp0: mantissa accumulator
				23431	# ( ) fp1: scaling factor - 10**(abs(exp))
				23432	#
				23433	pnorm:
				23434	btst &30,(%a0) # test the sign of the exponent
				23435	beq.b mul # if clear, go to multiply
				23436	div:
				23437	fdiv.x %fp1,%fp0 # exp is negative, so divide mant by exp
				23438	bra.b end_dec
				23439	mul:
				23440	fmul.x %fp1,%fp0 # exp is positive, so multiply by exp
				23441	#
				23442	#
				23443	# Clean up and return with result in fp0.
				23444	#
				23445	# If the final mul/div in decbin incurred an inex exception,
				23446	# it will be inex2, but will be reported as inex1 by get_op.
				23447	#
				23448	end_dec:
				23449	fmov.l %fpsr,%d0 # get status register
				23450	bclr &inex2_bit+8,%d0 # test for inex2 and clear it
				23451	beq.b no_exc # skip this if no exc
				23452	ori.w &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
				23453	no_exc:
				23454	add.l &0x4,%sp # clear 1 lw param
				23455	fmovm.x (%sp)+,&0x40 # restore fp1
				23456	movm.l (%sp)+,&0x3c # restore d2-d5
				23457	fmov.l &0x0,%fpcr
				23458	fmov.l &0x0,%fpsr
				23459	rts
				23460
				23461	#########################################################################
				23462	# bindec(): Converts an input in extended precision format to bcd format#
				23463	# #
				23464	# INPUT *************************************************************** #
				23465	# a0 = pointer to the input extended precision value in memory. #
				23466	# the input may be either normalized, unnormalized, or #
				23467	# denormalized. #
				23468	# d0 = contains the k-factor sign-extended to 32-bits. #
				23469	# #
				23470	# OUTPUT ************************************************************** #
				23471	# FP_SCR0(a6) = bcd format result on the stack. #
				23472	# #
				23473	# ALGORITHM *********************************************************** #
				23474	# #
				23475	# A1. Set RM and size ext; Set SIGMA = sign of input. #
				23476	# The k-factor is saved for use in d7. Clear the #
				23477	# BINDEC_FLG for separating normalized/denormalized #
				23478	# input. If input is unnormalized or denormalized, #
				23479	# normalize it. #
				23480	# #
				23481	# A2. Set X = abs(input). #
				23482	# #
				23483	# A3. Compute ILOG. #
				23484	# ILOG is the log base 10 of the input value. It is #
				23485	# approximated by adding e + 0.f when the original #
				23486	# value is viewed as 2^^e * 1.f in extended precision. #
				23487	# This value is stored in d6. #
				23488	# #
				23489	# A4. Clr INEX bit. #
				23490	# The operation in A3 above may have set INEX2. #
				23491	# #
				23492	# A5. Set ICTR = 0; #
				23493	# ICTR is a flag used in A13. It must be set before the #
				23494	# loop entry A6. #
				23495	# #
				23496	# A6. Calculate LEN. #
				23497	# LEN is the number of digits to be displayed. The #
				23498	# k-factor can dictate either the total number of digits, #
				23499	# if it is a positive number, or the number of digits #
				23500	# after the decimal point which are to be included as #
				23501	# significant. See the 68882 manual for examples. #
				23502	# If LEN is computed to be greater than 17, set OPERR in #
				23503	# USER_FPSR. LEN is stored in d4. #
				23504	# #
				23505	# A7. Calculate SCALE. #
				23506	# SCALE is equal to 10^ISCALE, where ISCALE is the number #
				23507	# of decimal places needed to insure LEN integer digits #
				23508	# in the output before conversion to bcd. LAMBDA is the #
				23509	# sign of ISCALE, used in A9. Fp1 contains #
				23510	# 10^^(abs(ISCALE)) using a rounding mode which is a #
				23511	# function of the original rounding mode and the signs #
				23512	# of ISCALE and X. A table is given in the code. #
				23513	# #
				23514	# A8. Clr INEX; Force RZ. #
				23515	# The operation in A3 above may have set INEX2. #
				23516	# RZ mode is forced for the scaling operation to insure #
				23517	# only one rounding error. The grs bits are collected in #
				23518	# the INEX flag for use in A10. #
				23519	# #
				23520	# A9. Scale X -> Y. #
				23521	# The mantissa is scaled to the desired number of #
				23522	# significant digits. The excess digits are collected #
				23523	# in INEX2. #
				23524	# #
				23525	# A10. Or in INEX. #
				23526	# If INEX is set, round error occurred. This is #
				23527	# compensated for by 'or-ing' in the INEX2 flag to #
				23528	# the lsb of Y. #
				23529	# #
				23530	# A11. Restore original FPCR; set size ext. #
				23531	# Perform FINT operation in the user's rounding mode. #
				23532	# Keep the size to extended. #
				23533	# #
				23534	# A12. Calculate YINT = FINT(Y) according to user's rounding #
				23535	# mode. The FPSP routine sintd0 is used. The output #
				23536	# is in fp0. #
				23537	# #
				23538	# A13. Check for LEN digits. #
				23539	# If the int operation results in more than LEN digits, #
				23540	# or less than LEN -1 digits, adjust ILOG and repeat from #
				23541	# A6. This test occurs only on the first pass. If the #
				23542	# result is exactly 10^LEN, decrement ILOG and divide #
				23543	# the mantissa by 10. #
				23544	# #
				23545	# A14. Convert the mantissa to bcd. #
				23546	# The binstr routine is used to convert the LEN digit #
				23547	# mantissa to bcd in memory. The input to binstr is #
				23548	# to be a fraction; i.e. (mantissa)/10^LEN and adjusted #
				23549	# such that the decimal point is to the left of bit 63. #
				23550	# The bcd digits are stored in the correct position in #
				23551	# the final string area in memory. #
				23552	# #
				23553	# A15. Convert the exponent to bcd. #
				23554	# As in A14 above, the exp is converted to bcd and the #
				23555	# digits are stored in the final string. #
				23556	# Test the length of the final exponent string. If the #
				23557	# length is 4, set operr. #
				23558	# #
				23559	# A16. Write sign bits to final string. #
				23560	# #
				23561	#########################################################################
				23562
				23563	set BINDEC_FLG, EXC_TEMP # DENORM flag
				23564
				23565	# Constants in extended precision
				23566	PLOG2:
				23567	long 0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
				23568	PLOG2UP1:
				23569	long 0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
				23570
				23571	# Constants in single precision
				23572	FONE:
				23573	long 0x3F800000,0x00000000,0x00000000,0x00000000
				23574	FTWO:
				23575	long 0x40000000,0x00000000,0x00000000,0x00000000
				23576	FTEN:
				23577	long 0x41200000,0x00000000,0x00000000,0x00000000
				23578	F4933:
				23579	long 0x459A2800,0x00000000,0x00000000,0x00000000
				23580
				23581	RBDTBL:
				23582	byte 0,0,0,0
				23583	byte 3,3,2,2
				23584	byte 3,2,2,3
				23585	byte 2,3,3,2
				23586
				23587	# Implementation Notes:
				23588	#
				23589	# The registers are used as follows:
				23590	#
				23591	# d0: scratch; LEN input to binstr
				23592	# d1: scratch
				23593	# d2: upper 32-bits of mantissa for binstr
				23594	# d3: scratch;lower 32-bits of mantissa for binstr
				23595	# d4: LEN
				23596	# d5: LAMBDA/ICTR
				23597	# d6: ILOG
				23598	# d7: k-factor
				23599	# a0: ptr for original operand/final result
				23600	# a1: scratch pointer
				23601	# a2: pointer to FP_X; abs(original value) in ext
				23602	# fp0: scratch
				23603	# fp1: scratch
				23604	# fp2: scratch
				23605	# F_SCR1:
				23606	# F_SCR2:
				23607	# L_SCR1:
				23608	# L_SCR2:
				23609
				23610	global bindec
				23611	bindec:
				23612	movm.l &0x3f20,-(%sp) # {%d2-%d7/%a2}
				23613	fmovm.x &0x7,-(%sp) # {%fp0-%fp2}
				23614
				23615	# A1. Set RM and size ext. Set SIGMA = sign input;
				23616	# The k-factor is saved for use in d7. Clear BINDEC_FLG for
				23617	# separating normalized/denormalized input. If the input
				23618	# is a denormalized number, set the BINDEC_FLG memory word
				23619	# to signal denorm. If the input is unnormalized, normalize
				23620	# the input and test for denormalized result.
				23621	#
				23622	fmov.l &rm_mode*0x10,%fpcr # set RM and ext
				23623	mov.l (%a0),L_SCR2(%a6) # save exponent for sign check
				23624	mov.l %d0,%d7 # move k-factor to d7
				23625
				23626	clr.b BINDEC_FLG(%a6) # clr norm/denorm flag
				23627	cmpi.b STAG(%a6),&DENORM # is input a DENORM?
				23628	bne.w A2_str # no; input is a NORM
				23629
				23630	#
				23631	# Normalize the denorm
				23632	#
				23633	un_de_norm:
				23634	mov.w (%a0),%d0
				23635	and.w &0x7fff,%d0 # strip sign of normalized exp
				23636	mov.l 4(%a0),%d1
				23637	mov.l 8(%a0),%d2
				23638	norm_loop:
				23639	sub.w &1,%d0
				23640	lsl.l &1,%d2
				23641	roxl.l &1,%d1
				23642	tst.l %d1
				23643	bge.b norm_loop
				23644	#
				23645	# Test if the normalized input is denormalized
				23646	#
				23647	tst.w %d0
				23648	bgt.b pos_exp # if greater than zero, it is a norm
				23649	st BINDEC_FLG(%a6) # set flag for denorm
				23650	pos_exp:
				23651	and.w &0x7fff,%d0 # strip sign of normalized exp
				23652	mov.w %d0,(%a0)
				23653	mov.l %d1,4(%a0)
				23654	mov.l %d2,8(%a0)
				23655
				23656	# A2. Set X = abs(input).
				23657	#
				23658	A2_str:
				23659	mov.l (%a0),FP_SCR1(%a6) # move input to work space
				23660	mov.l 4(%a0),FP_SCR1+4(%a6) # move input to work space
				23661	mov.l 8(%a0),FP_SCR1+8(%a6) # move input to work space
				23662	and.l &0x7fffffff,FP_SCR1(%a6) # create abs(X)
				23663
				23664	# A3. Compute ILOG.
				23665	# ILOG is the log base 10 of the input value. It is approx-
				23666	# imated by adding e + 0.f when the original value is viewed
				23667	# as 2^^e * 1.f in extended precision. This value is stored
				23668	# in d6.
				23669	#
				23670	# Register usage:
				23671	# Input/Output
				23672	# d0: k-factor/exponent
				23673	# d2: x/x
				23674	# d3: x/x
				23675	# d4: x/x
				23676	# d5: x/x
				23677	# d6: x/ILOG
				23678	# d7: k-factor/Unchanged
				23679	# a0: ptr for original operand/final result
				23680	# a1: x/x
				23681	# a2: x/x
				23682	# fp0: x/float(ILOG)
				23683	# fp1: x/x
				23684	# fp2: x/x
				23685	# F_SCR1:x/x
				23686	# F_SCR2:Abs(X)/Abs(X) with $3fff exponent
				23687	# L_SCR1:x/x
				23688	# L_SCR2:first word of X packed/Unchanged
				23689
				23690	tst.b BINDEC_FLG(%a6) # check for denorm
				23691	beq.b A3_cont # if clr, continue with norm
				23692	mov.l &-4933,%d6 # force ILOG = -4933
				23693	bra.b A4_str
				23694	A3_cont:
				23695	mov.w FP_SCR1(%a6),%d0 # move exp to d0
				23696	mov.w &0x3fff,FP_SCR1(%a6) # replace exponent with 0x3fff
				23697	fmov.x FP_SCR1(%a6),%fp0 # now fp0 has 1.f
				23698	sub.w &0x3fff,%d0 # strip off bias
				23699	fadd.w %d0,%fp0 # add in exp
				23700	fsub.s FONE(%pc),%fp0 # subtract off 1.0
				23701	fbge.w pos_res # if pos, branch
				23702	fmul.x PLOG2UP1(%pc),%fp0 # if neg, mul by LOG2UP1
				23703	fmov.l %fp0,%d6 # put ILOG in d6 as a lword
				23704	bra.b A4_str # go move out ILOG
				23705	pos_res:
				23706	fmul.x PLOG2(%pc),%fp0 # if pos, mul by LOG2
				23707	fmov.l %fp0,%d6 # put ILOG in d6 as a lword
				23708
				23709
				23710	# A4. Clr INEX bit.
				23711	# The operation in A3 above may have set INEX2.
				23712
				23713	A4_str:
				23714	fmov.l &0,%fpsr # zero all of fpsr - nothing needed
				23715
				23716
				23717	# A5. Set ICTR = 0;
				23718	# ICTR is a flag used in A13. It must be set before the
				23719	# loop entry A6. The lower word of d5 is used for ICTR.
				23720
				23721	clr.w %d5 # clear ICTR
				23722
				23723	# A6. Calculate LEN.
				23724	# LEN is the number of digits to be displayed. The k-factor
				23725	# can dictate either the total number of digits, if it is
				23726	# a positive number, or the number of digits after the
				23727	# original decimal point which are to be included as
				23728	# significant. See the 68882 manual for examples.
				23729	# If LEN is computed to be greater than 17, set OPERR in
				23730	# USER_FPSR. LEN is stored in d4.
				23731	#
				23732	# Register usage:
				23733	# Input/Output
				23734	# d0: exponent/Unchanged
				23735	# d2: x/x/scratch
				23736	# d3: x/x
				23737	# d4: exc picture/LEN
				23738	# d5: ICTR/Unchanged
				23739	# d6: ILOG/Unchanged
				23740	# d7: k-factor/Unchanged
				23741	# a0: ptr for original operand/final result
				23742	# a1: x/x
				23743	# a2: x/x
				23744	# fp0: float(ILOG)/Unchanged
				23745	# fp1: x/x
				23746	# fp2: x/x
				23747	# F_SCR1:x/x
				23748	# F_SCR2:Abs(X) with $3fff exponent/Unchanged
				23749	# L_SCR1:x/x
				23750	# L_SCR2:first word of X packed/Unchanged
				23751
				23752	A6_str:
				23753	tst.l %d7 # branch on sign of k
				23754	ble.b k_neg # if k <= 0, LEN = ILOG + 1 - k
				23755	mov.l %d7,%d4 # if k > 0, LEN = k
				23756	bra.b len_ck # skip to LEN check
				23757	k_neg:
				23758	mov.l %d6,%d4 # first load ILOG to d4
				23759	sub.l %d7,%d4 # subtract off k
				23760	addq.l &1,%d4 # add in the 1
				23761	len_ck:
				23762	tst.l %d4 # LEN check: branch on sign of LEN
				23763	ble.b LEN_ng # if neg, set LEN = 1
				23764	cmp.l %d4,&17 # test if LEN > 17
				23765	ble.b A7_str # if not, forget it
				23766	mov.l &17,%d4 # set max LEN = 17
				23767	tst.l %d7 # if negative, never set OPERR
				23768	ble.b A7_str # if positive, continue
				23769	or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
				23770	bra.b A7_str # finished here
				23771	LEN_ng:
				23772	mov.l &1,%d4 # min LEN is 1
				23773
				23774
				23775	# A7. Calculate SCALE.
				23776	# SCALE is equal to 10^ISCALE, where ISCALE is the number
				23777	# of decimal places needed to insure LEN integer digits
				23778	# in the output before conversion to bcd. LAMBDA is the sign
				23779	# of ISCALE, used in A9. Fp1 contains 10^^(abs(ISCALE)) using
				23780	# the rounding mode as given in the following table (see
				23781	# Coonen, p. 7.23 as ref.; however, the SCALE variable is
				23782	# of opposite sign in bindec.sa from Coonen).
				23783	#
				23784	# Initial USE
				23785	# FPCR[6:5] LAMBDA SIGN(X) FPCR[6:5]
				23786	# ----------------------------------------------
				23787	# RN 00 0 0 00/0 RN
				23788	# RN 00 0 1 00/0 RN
				23789	# RN 00 1 0 00/0 RN
				23790	# RN 00 1 1 00/0 RN
				23791	# RZ 01 0 0 11/3 RP
				23792	# RZ 01 0 1 11/3 RP
				23793	# RZ 01 1 0 10/2 RM
				23794	# RZ 01 1 1 10/2 RM
				23795	# RM 10 0 0 11/3 RP
				23796	# RM 10 0 1 10/2 RM
				23797	# RM 10 1 0 10/2 RM
				23798	# RM 10 1 1 11/3 RP
				23799	# RP 11 0 0 10/2 RM
				23800	# RP 11 0 1 11/3 RP
				23801	# RP 11 1 0 11/3 RP
				23802	# RP 11 1 1 10/2 RM
				23803	#
				23804	# Register usage:
				23805	# Input/Output
				23806	# d0: exponent/scratch - final is 0
				23807	# d2: x/0 or 24 for A9
				23808	# d3: x/scratch - offset ptr into PTENRM array
				23809	# d4: LEN/Unchanged
				23810	# d5: 0/ICTR:LAMBDA
				23811	# d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
				23812	# d7: k-factor/Unchanged
				23813	# a0: ptr for original operand/final result
				23814	# a1: x/ptr to PTENRM array
				23815	# a2: x/x
				23816	# fp0: float(ILOG)/Unchanged
				23817	# fp1: x/10^ISCALE
				23818	# fp2: x/x
				23819	# F_SCR1:x/x
				23820	# F_SCR2:Abs(X) with $3fff exponent/Unchanged
				23821	# L_SCR1:x/x
				23822	# L_SCR2:first word of X packed/Unchanged
				23823
				23824	A7_str:
				23825	tst.l %d7 # test sign of k
				23826	bgt.b k_pos # if pos and > 0, skip this
				23827	cmp.l %d7,%d6 # test k - ILOG
				23828	blt.b k_pos # if ILOG >= k, skip this
				23829	mov.l %d7,%d6 # if ((k<0) & (ILOG < k)) ILOG = k
				23830	k_pos:
				23831	mov.l %d6,%d0 # calc ILOG + 1 - LEN in d0
				23832	addq.l &1,%d0 # add the 1
				23833	sub.l %d4,%d0 # sub off LEN
				23834	swap %d5 # use upper word of d5 for LAMBDA
				23835	clr.w %d5 # set it zero initially
				23836	clr.w %d2 # set up d2 for very small case
				23837	tst.l %d0 # test sign of ISCALE
				23838	bge.b iscale # if pos, skip next inst
				23839	addq.w &1,%d5 # if neg, set LAMBDA true
				23840	cmp.l %d0,&0xffffecd4 # test iscale <= -4908
				23841	bgt.b no_inf # if false, skip rest
				23842	add.l &24,%d0 # add in 24 to iscale
				23843	mov.l &24,%d2 # put 24 in d2 for A9
				23844	no_inf:
				23845	neg.l %d0 # and take abs of ISCALE
				23846	iscale:
				23847	fmov.s FONE(%pc),%fp1 # init fp1 to 1
				23848	bfextu USER_FPCR(%a6){&26:&2},%d1 # get initial rmode bits
				23849	lsl.w &1,%d1 # put them in bits 2:1
				23850	add.w %d5,%d1 # add in LAMBDA
				23851	lsl.w &1,%d1 # put them in bits 3:1
				23852	tst.l L_SCR2(%a6) # test sign of original x
				23853	bge.b x_pos # if pos, don't set bit 0
				23854	addq.l &1,%d1 # if neg, set bit 0
				23855	x_pos:
				23856	lea.l RBDTBL(%pc),%a2 # load rbdtbl base
				23857	mov.b (%a2,%d1),%d3 # load d3 with new rmode
				23858	lsl.l &4,%d3 # put bits in proper position
				23859	fmov.l %d3,%fpcr # load bits into fpu
				23860	lsr.l &4,%d3 # put bits in proper position
				23861	tst.b %d3 # decode new rmode for pten table
				23862	bne.b not_rn # if zero, it is RN
				23863	lea.l PTENRN(%pc),%a1 # load a1 with RN table base
				23864	bra.b rmode # exit decode
				23865	not_rn:
				23866	lsr.b &1,%d3 # get lsb in carry
				23867	bcc.b not_rp2 # if carry clear, it is RM
				23868	lea.l PTENRP(%pc),%a1 # load a1 with RP table base
				23869	bra.b rmode # exit decode
				23870	not_rp2:
				23871	lea.l PTENRM(%pc),%a1 # load a1 with RM table base
				23872	rmode:
				23873	clr.l %d3 # clr table index
				23874	e_loop2:
				23875	lsr.l &1,%d0 # shift next bit into carry
				23876	bcc.b e_next2 # if zero, skip the mul
				23877	fmul.x (%a1,%d3),%fp1 # mul by 10**(d3_bit_no)
				23878	e_next2:
				23879	add.l &12,%d3 # inc d3 to next pwrten table entry
				23880	tst.l %d0 # test if ISCALE is zero
				23881	bne.b e_loop2 # if not, loop
				23882
				23883	# A8. Clr INEX; Force RZ.
				23884	# The operation in A3 above may have set INEX2.
				23885	# RZ mode is forced for the scaling operation to insure
				23886	# only one rounding error. The grs bits are collected in
				23887	# the INEX flag for use in A10.
				23888	#
				23889	# Register usage:
				23890	# Input/Output
				23891
				23892	fmov.l &0,%fpsr # clr INEX
				23893	fmov.l &rz_mode*0x10,%fpcr # set RZ rounding mode
				23894
				23895	# A9. Scale X -> Y.
				23896	# The mantissa is scaled to the desired number of significant
				23897	# digits. The excess digits are collected in INEX2. If mul,
				23898	# Check d2 for excess 10 exponential value. If not zero,
				23899	# the iscale value would have caused the pwrten calculation
				23900	# to overflow. Only a negative iscale can cause this, so
				23901	# multiply by 10^(d2), which is now only allowed to be 24,
				23902	# with a multiply by 10^8 and 10^16, which is exact since
				23903	# 10^24 is exact. If the input was denormalized, we must
				23904	# create a busy stack frame with the mul command and the
				23905	# two operands, and allow the fpu to complete the multiply.
				23906	#
				23907	# Register usage:
				23908	# Input/Output
				23909	# d0: FPCR with RZ mode/Unchanged
				23910	# d2: 0 or 24/unchanged
				23911	# d3: x/x
				23912	# d4: LEN/Unchanged
				23913	# d5: ICTR:LAMBDA
				23914	# d6: ILOG/Unchanged
				23915	# d7: k-factor/Unchanged
				23916	# a0: ptr for original operand/final result
				23917	# a1: ptr to PTENRM array/Unchanged
				23918	# a2: x/x
				23919	# fp0: float(ILOG)/X adjusted for SCALE (Y)
				23920	# fp1: 10^ISCALE/Unchanged
				23921	# fp2: x/x
				23922	# F_SCR1:x/x
				23923	# F_SCR2:Abs(X) with $3fff exponent/Unchanged
				23924	# L_SCR1:x/x
				23925	# L_SCR2:first word of X packed/Unchanged
				23926
				23927	A9_str:
				23928	fmov.x (%a0),%fp0 # load X from memory
				23929	fabs.x %fp0 # use abs(X)
				23930	tst.w %d5 # LAMBDA is in lower word of d5
				23931	bne.b sc_mul # if neg (LAMBDA = 1), scale by mul
				23932	fdiv.x %fp1,%fp0 # calculate X / SCALE -> Y to fp0
				23933	bra.w A10_st # branch to A10
				23934
				23935	sc_mul:
				23936	tst.b BINDEC_FLG(%a6) # check for denorm
				23937	beq.w A9_norm # if norm, continue with mul
				23938
				23939	# for DENORM, we must calculate:
				23940	# fp0 = input_op * 10^ISCALE * 10^24
				23941	# since the input operand is a DENORM, we can't multiply it directly.
				23942	# so, we do the multiplication of the exponents and mantissas separately.
				23943	# in this way, we avoid underflow on intermediate stages of the
				23944	# multiplication and guarantee a result without exception.
				23945	fmovm.x &0x2,-(%sp) # save 10^ISCALE to stack
				23946
				23947	mov.w (%sp),%d3 # grab exponent
				23948	andi.w &0x7fff,%d3 # clear sign
				23949	ori.w &0x8000,(%a0) # make DENORM exp negative
				23950	add.w (%a0),%d3 # add DENORM exp to 10^ISCALE exp
				23951	subi.w &0x3fff,%d3 # subtract BIAS
				23952	add.w 36(%a1),%d3
				23953	subi.w &0x3fff,%d3 # subtract BIAS
				23954	add.w 48(%a1),%d3
				23955	subi.w &0x3fff,%d3 # subtract BIAS
				23956
				23957	bmi.w sc_mul_err # is result is DENORM, punt!!!
				23958
				23959	andi.w &0x8000,(%sp) # keep sign
				23960	or.w %d3,(%sp) # insert new exponent
				23961	andi.w &0x7fff,(%a0) # clear sign bit on DENORM again
				23962	mov.l 0x8(%a0),-(%sp) # put input op mantissa on stk
				23963	mov.l 0x4(%a0),-(%sp)
				23964	mov.l &0x3fff0000,-(%sp) # force exp to zero
				23965	fmovm.x (%sp)+,&0x80 # load normalized DENORM into fp0
				23966	fmul.x (%sp)+,%fp0
				23967
				23968	# fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
				23969	# fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
				23970	mov.l 36+8(%a1),-(%sp) # get 10^8 mantissa
				23971	mov.l 36+4(%a1),-(%sp)
				23972	mov.l &0x3fff0000,-(%sp) # force exp to zero
				23973	mov.l 48+8(%a1),-(%sp) # get 10^16 mantissa
				23974	mov.l 48+4(%a1),-(%sp)
				23975	mov.l &0x3fff0000,-(%sp)# force exp to zero
				23976	fmul.x (%sp)+,%fp0 # multiply fp0 by 10^8
				23977	fmul.x (%sp)+,%fp0 # multiply fp0 by 10^16
				23978	bra.b A10_st
				23979
				23980	sc_mul_err:
				23981	bra.b sc_mul_err
				23982
				23983	A9_norm:
				23984	tst.w %d2 # test for small exp case
				23985	beq.b A9_con # if zero, continue as normal
				23986	fmul.x 36(%a1),%fp0 # multiply fp0 by 10^8
				23987	fmul.x 48(%a1),%fp0 # multiply fp0 by 10^16
				23988	A9_con:
				23989	fmul.x %fp1,%fp0 # calculate X * SCALE -> Y to fp0
				23990
				23991	# A10. Or in INEX.
				23992	# If INEX is set, round error occurred. This is compensated
				23993	# for by 'or-ing' in the INEX2 flag to the lsb of Y.
				23994	#
				23995	# Register usage:
				23996	# Input/Output
				23997	# d0: FPCR with RZ mode/FPSR with INEX2 isolated
				23998	# d2: x/x
				23999	# d3: x/x
				24000	# d4: LEN/Unchanged
				24001	# d5: ICTR:LAMBDA
				24002	# d6: ILOG/Unchanged
				24003	# d7: k-factor/Unchanged
				24004	# a0: ptr for original operand/final result
				24005	# a1: ptr to PTENxx array/Unchanged
				24006	# a2: x/ptr to FP_SCR1(a6)
				24007	# fp0: Y/Y with lsb adjusted
				24008	# fp1: 10^ISCALE/Unchanged
				24009	# fp2: x/x
				24010
				24011	A10_st:
				24012	fmov.l %fpsr,%d0 # get FPSR
				24013	fmov.x %fp0,FP_SCR1(%a6) # move Y to memory
				24014	lea.l FP_SCR1(%a6),%a2 # load a2 with ptr to FP_SCR1
				24015	btst &9,%d0 # check if INEX2 set
				24016	beq.b A11_st # if clear, skip rest
				24017	or.l &1,8(%a2) # or in 1 to lsb of mantissa
				24018	fmov.x FP_SCR1(%a6),%fp0 # write adjusted Y back to fpu
				24019
				24020
				24021	# A11. Restore original FPCR; set size ext.
				24022	# Perform FINT operation in the user's rounding mode. Keep
				24023	# the size to extended. The sintdo entry point in the sint
				24024	# routine expects the FPCR value to be in USER_FPCR for
				24025	# mode and precision. The original FPCR is saved in L_SCR1.
				24026
				24027	A11_st:
				24028	mov.l USER_FPCR(%a6),L_SCR1(%a6) # save it for later
				24029	and.l &0x00000030,USER_FPCR(%a6) # set size to ext,
				24030	# ;block exceptions
				24031
				24032
				24033	# A12. Calculate YINT = FINT(Y) according to user's rounding mode.
				24034	# The FPSP routine sintd0 is used. The output is in fp0.
				24035	#
				24036	# Register usage:
				24037	# Input/Output
				24038	# d0: FPSR with AINEX cleared/FPCR with size set to ext
				24039	# d2: x/x/scratch
				24040	# d3: x/x
				24041	# d4: LEN/Unchanged
				24042	# d5: ICTR:LAMBDA/Unchanged
				24043	# d6: ILOG/Unchanged
				24044	# d7: k-factor/Unchanged
				24045	# a0: ptr for original operand/src ptr for sintdo
				24046	# a1: ptr to PTENxx array/Unchanged
				24047	# a2: ptr to FP_SCR1(a6)/Unchanged
				24048	# a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
				24049	# fp0: Y/YINT
				24050	# fp1: 10^ISCALE/Unchanged
				24051	# fp2: x/x
				24052	# F_SCR1:x/x
				24053	# F_SCR2:Y adjusted for inex/Y with original exponent
				24054	# L_SCR1:x/original USER_FPCR
				24055	# L_SCR2:first word of X packed/Unchanged
				24056
				24057	A12_st:
				24058	movm.l &0xc0c0,-(%sp) # save regs used by sintd0 {%d0-%d1/%a0-%a1}
				24059	mov.l L_SCR1(%a6),-(%sp)
				24060	mov.l L_SCR2(%a6),-(%sp)
				24061
				24062	lea.l FP_SCR1(%a6),%a0 # a0 is ptr to FP_SCR1(a6)
				24063	fmov.x %fp0,(%a0) # move Y to memory at FP_SCR1(a6)
				24064	tst.l L_SCR2(%a6) # test sign of original operand
				24065	bge.b do_fint12 # if pos, use Y
				24066	or.l &0x80000000,(%a0) # if neg, use -Y
				24067	do_fint12:
				24068	mov.l USER_FPSR(%a6),-(%sp)
				24069	# bsr sintdo # sint routine returns int in fp0
				24070
				24071	fmov.l USER_FPCR(%a6),%fpcr
				24072	fmov.l &0x0,%fpsr # clear the AEXC bits!!!
				24073	## mov.l USER_FPCR(%a6),%d0 # ext prec/keep rnd mode
				24074	## andi.l &0x00000030,%d0
				24075	## fmov.l %d0,%fpcr
				24076	fint.x FP_SCR1(%a6),%fp0 # do fint()
				24077	fmov.l %fpsr,%d0
				24078	or.w %d0,FPSR_EXCEPT(%a6)
				24079	## fmov.l &0x0,%fpcr
				24080	## fmov.l %fpsr,%d0 # don't keep ccodes
				24081	## or.w %d0,FPSR_EXCEPT(%a6)
				24082
				24083	mov.b (%sp),USER_FPSR(%a6)
				24084	add.l &4,%sp
				24085
				24086	mov.l (%sp)+,L_SCR2(%a6)
				24087	mov.l (%sp)+,L_SCR1(%a6)
				24088	movm.l (%sp)+,&0x303 # restore regs used by sint {%d0-%d1/%a0-%a1}
				24089
				24090	mov.l L_SCR2(%a6),FP_SCR1(%a6) # restore original exponent
				24091	mov.l L_SCR1(%a6),USER_FPCR(%a6) # restore user's FPCR
				24092
				24093	# A13. Check for LEN digits.
				24094	# If the int operation results in more than LEN digits,
				24095	# or less than LEN -1 digits, adjust ILOG and repeat from
				24096	# A6. This test occurs only on the first pass. If the
				24097	# result is exactly 10^LEN, decrement ILOG and divide
				24098	# the mantissa by 10. The calculation of 10^LEN cannot
Lucas De Marchi	25985ed	2011-03-30 22:57:33 -0300	[diff] [blame]	24099	# be inexact, since all powers of ten up to 10^27 are exact
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24100	# in extended precision, so the use of a previous power-of-ten
				24101	# table will introduce no error.
				24102	#
				24103	#
				24104	# Register usage:
				24105	# Input/Output
				24106	# d0: FPCR with size set to ext/scratch final = 0
				24107	# d2: x/x
				24108	# d3: x/scratch final = x
				24109	# d4: LEN/LEN adjusted
				24110	# d5: ICTR:LAMBDA/LAMBDA:ICTR
				24111	# d6: ILOG/ILOG adjusted
				24112	# d7: k-factor/Unchanged
				24113	# a0: pointer into memory for packed bcd string formation
				24114	# a1: ptr to PTENxx array/Unchanged
				24115	# a2: ptr to FP_SCR1(a6)/Unchanged
				24116	# fp0: int portion of Y/abs(YINT) adjusted
				24117	# fp1: 10^ISCALE/Unchanged
				24118	# fp2: x/10^LEN
				24119	# F_SCR1:x/x
				24120	# F_SCR2:Y with original exponent/Unchanged
				24121	# L_SCR1:original USER_FPCR/Unchanged
				24122	# L_SCR2:first word of X packed/Unchanged
				24123
				24124	A13_st:
				24125	swap %d5 # put ICTR in lower word of d5
				24126	tst.w %d5 # check if ICTR = 0
				24127	bne not_zr # if non-zero, go to second test
				24128	#
				24129	# Compute 10^(LEN-1)
				24130	#
				24131	fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
				24132	mov.l %d4,%d0 # put LEN in d0
				24133	subq.l &1,%d0 # d0 = LEN -1
				24134	clr.l %d3 # clr table index
				24135	l_loop:
				24136	lsr.l &1,%d0 # shift next bit into carry
				24137	bcc.b l_next # if zero, skip the mul
				24138	fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
				24139	l_next:
				24140	add.l &12,%d3 # inc d3 to next pwrten table entry
				24141	tst.l %d0 # test if LEN is zero
				24142	bne.b l_loop # if not, loop
				24143	#
				24144	# 10^LEN-1 is computed for this test and A14. If the input was
				24145	# denormalized, check only the case in which YINT > 10^LEN.
				24146	#
				24147	tst.b BINDEC_FLG(%a6) # check if input was norm
				24148	beq.b A13_con # if norm, continue with checking
				24149	fabs.x %fp0 # take abs of YINT
				24150	bra test_2
				24151	#
				24152	# Compare abs(YINT) to 10^(LEN-1) and 10^LEN
				24153	#
				24154	A13_con:
				24155	fabs.x %fp0 # take abs of YINT
				24156	fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^(LEN-1)
				24157	fbge.w test_2 # if greater, do next test
				24158	subq.l &1,%d6 # subtract 1 from ILOG
				24159	mov.w &1,%d5 # set ICTR
				24160	fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
				24161	fmul.s FTEN(%pc),%fp2 # compute 10^LEN
				24162	bra.w A6_str # return to A6 and recompute YINT
				24163	test_2:
				24164	fmul.s FTEN(%pc),%fp2 # compute 10^LEN
				24165	fcmp.x %fp0,%fp2 # compare abs(YINT) with 10^LEN
				24166	fblt.w A14_st # if less, all is ok, go to A14
				24167	fbgt.w fix_ex # if greater, fix and redo
				24168	fdiv.s FTEN(%pc),%fp0 # if equal, divide by 10
				24169	addq.l &1,%d6 # and inc ILOG
				24170	bra.b A14_st # and continue elsewhere
				24171	fix_ex:
				24172	addq.l &1,%d6 # increment ILOG by 1
				24173	mov.w &1,%d5 # set ICTR
				24174	fmov.l &rm_mode*0x10,%fpcr # set rmode to RM
				24175	bra.w A6_str # return to A6 and recompute YINT
				24176	#
				24177	# Since ICTR <> 0, we have already been through one adjustment,
				24178	# and shouldn't have another; this is to check if abs(YINT) = 10^LEN
				24179	# 10^LEN is again computed using whatever table is in a1 since the
				24180	# value calculated cannot be inexact.
				24181	#
				24182	not_zr:
				24183	fmov.s FONE(%pc),%fp2 # init fp2 to 1.0
				24184	mov.l %d4,%d0 # put LEN in d0
				24185	clr.l %d3 # clr table index
				24186	z_loop:
				24187	lsr.l &1,%d0 # shift next bit into carry
				24188	bcc.b z_next # if zero, skip the mul
				24189	fmul.x (%a1,%d3),%fp2 # mul by 10**(d3_bit_no)
				24190	z_next:
				24191	add.l &12,%d3 # inc d3 to next pwrten table entry
				24192	tst.l %d0 # test if LEN is zero
				24193	bne.b z_loop # if not, loop
				24194	fabs.x %fp0 # get abs(YINT)
				24195	fcmp.x %fp0,%fp2 # check if abs(YINT) = 10^LEN
				24196	fbneq.w A14_st # if not, skip this
				24197	fdiv.s FTEN(%pc),%fp0 # divide abs(YINT) by 10
				24198	addq.l &1,%d6 # and inc ILOG by 1
				24199	addq.l &1,%d4 # and inc LEN
				24200	fmul.s FTEN(%pc),%fp2 # if LEN++, the get 10^^LEN
				24201
				24202	# A14. Convert the mantissa to bcd.
				24203	# The binstr routine is used to convert the LEN digit
				24204	# mantissa to bcd in memory. The input to binstr is
				24205	# to be a fraction; i.e. (mantissa)/10^LEN and adjusted
				24206	# such that the decimal point is to the left of bit 63.
				24207	# The bcd digits are stored in the correct position in
				24208	# the final string area in memory.
				24209	#
				24210	#
				24211	# Register usage:
				24212	# Input/Output
				24213	# d0: x/LEN call to binstr - final is 0
				24214	# d1: x/0
				24215	# d2: x/ms 32-bits of mant of abs(YINT)
				24216	# d3: x/ls 32-bits of mant of abs(YINT)
				24217	# d4: LEN/Unchanged
				24218	# d5: ICTR:LAMBDA/LAMBDA:ICTR
				24219	# d6: ILOG
				24220	# d7: k-factor/Unchanged
				24221	# a0: pointer into memory for packed bcd string formation
				24222	# /ptr to first mantissa byte in result string
				24223	# a1: ptr to PTENxx array/Unchanged
				24224	# a2: ptr to FP_SCR1(a6)/Unchanged
				24225	# fp0: int portion of Y/abs(YINT) adjusted
				24226	# fp1: 10^ISCALE/Unchanged
				24227	# fp2: 10^LEN/Unchanged
				24228	# F_SCR1:x/Work area for final result
				24229	# F_SCR2:Y with original exponent/Unchanged
				24230	# L_SCR1:original USER_FPCR/Unchanged
				24231	# L_SCR2:first word of X packed/Unchanged
				24232
				24233	A14_st:
				24234	fmov.l &rz_mode*0x10,%fpcr # force rz for conversion
				24235	fdiv.x %fp2,%fp0 # divide abs(YINT) by 10^LEN
				24236	lea.l FP_SCR0(%a6),%a0
				24237	fmov.x %fp0,(%a0) # move abs(YINT)/10^LEN to memory
				24238	mov.l 4(%a0),%d2 # move 2nd word of FP_RES to d2
				24239	mov.l 8(%a0),%d3 # move 3rd word of FP_RES to d3
				24240	clr.l 4(%a0) # zero word 2 of FP_RES
				24241	clr.l 8(%a0) # zero word 3 of FP_RES
				24242	mov.l (%a0),%d0 # move exponent to d0
				24243	swap %d0 # put exponent in lower word
				24244	beq.b no_sft # if zero, don't shift
				24245	sub.l &0x3ffd,%d0 # sub bias less 2 to make fract
				24246	tst.l %d0 # check if > 1
				24247	bgt.b no_sft # if so, don't shift
				24248	neg.l %d0 # make exp positive
				24249	m_loop:
				24250	lsr.l &1,%d2 # shift d2:d3 right, add 0s
				24251	roxr.l &1,%d3 # the number of places
				24252	dbf.w %d0,m_loop # given in d0
				24253	no_sft:
				24254	tst.l %d2 # check for mantissa of zero
				24255	bne.b no_zr # if not, go on
				24256	tst.l %d3 # continue zero check
				24257	beq.b zer_m # if zero, go directly to binstr
				24258	no_zr:
				24259	clr.l %d1 # put zero in d1 for addx
				24260	add.l &0x00000080,%d3 # inc at bit 7
				24261	addx.l %d1,%d2 # continue inc
				24262	and.l &0xffffff80,%d3 # strip off lsb not used by 882
				24263	zer_m:
				24264	mov.l %d4,%d0 # put LEN in d0 for binstr call
				24265	addq.l &3,%a0 # a0 points to M16 byte in result
				24266	bsr binstr # call binstr to convert mant
				24267
				24268
				24269	# A15. Convert the exponent to bcd.
				24270	# As in A14 above, the exp is converted to bcd and the
				24271	# digits are stored in the final string.
				24272	#
				24273	# Digits are stored in L_SCR1(a6) on return from BINDEC as:
				24274	#
				24275	# 32 16 15 0
				24276	# -----------------------------------------
				24277	# \| 0 \| e3 \| e2 \| e1 \| e4 \| X \| X \| X \|
				24278	# -----------------------------------------
				24279	#
				24280	# And are moved into their proper places in FP_SCR0. If digit e4
				24281	# is non-zero, OPERR is signaled. In all cases, all 4 digits are
				24282	# written as specified in the 881/882 manual for packed decimal.
				24283	#
				24284	# Register usage:
				24285	# Input/Output
				24286	# d0: x/LEN call to binstr - final is 0
				24287	# d1: x/scratch (0);shift count for final exponent packing
				24288	# d2: x/ms 32-bits of exp fraction/scratch
				24289	# d3: x/ls 32-bits of exp fraction
				24290	# d4: LEN/Unchanged
				24291	# d5: ICTR:LAMBDA/LAMBDA:ICTR
				24292	# d6: ILOG
				24293	# d7: k-factor/Unchanged
				24294	# a0: ptr to result string/ptr to L_SCR1(a6)
				24295	# a1: ptr to PTENxx array/Unchanged
				24296	# a2: ptr to FP_SCR1(a6)/Unchanged
				24297	# fp0: abs(YINT) adjusted/float(ILOG)
				24298	# fp1: 10^ISCALE/Unchanged
				24299	# fp2: 10^LEN/Unchanged
				24300	# F_SCR1:Work area for final result/BCD result
				24301	# F_SCR2:Y with original exponent/ILOG/10^4
				24302	# L_SCR1:original USER_FPCR/Exponent digits on return from binstr
				24303	# L_SCR2:first word of X packed/Unchanged
				24304
				24305	A15_st:
				24306	tst.b BINDEC_FLG(%a6) # check for denorm
				24307	beq.b not_denorm
				24308	ftest.x %fp0 # test for zero
				24309	fbeq.w den_zero # if zero, use k-factor or 4933
				24310	fmov.l %d6,%fp0 # float ILOG
				24311	fabs.x %fp0 # get abs of ILOG
				24312	bra.b convrt
				24313	den_zero:
				24314	tst.l %d7 # check sign of the k-factor
				24315	blt.b use_ilog # if negative, use ILOG
				24316	fmov.s F4933(%pc),%fp0 # force exponent to 4933
				24317	bra.b convrt # do it
				24318	use_ilog:
				24319	fmov.l %d6,%fp0 # float ILOG
				24320	fabs.x %fp0 # get abs of ILOG
				24321	bra.b convrt
				24322	not_denorm:
				24323	ftest.x %fp0 # test for zero
				24324	fbneq.w not_zero # if zero, force exponent
				24325	fmov.s FONE(%pc),%fp0 # force exponent to 1
				24326	bra.b convrt # do it
				24327	not_zero:
				24328	fmov.l %d6,%fp0 # float ILOG
				24329	fabs.x %fp0 # get abs of ILOG
				24330	convrt:
				24331	fdiv.x 24(%a1),%fp0 # compute ILOG/10^4
				24332	fmov.x %fp0,FP_SCR1(%a6) # store fp0 in memory
				24333	mov.l 4(%a2),%d2 # move word 2 to d2
				24334	mov.l 8(%a2),%d3 # move word 3 to d3
				24335	mov.w (%a2),%d0 # move exp to d0
				24336	beq.b x_loop_fin # if zero, skip the shift
				24337	sub.w &0x3ffd,%d0 # subtract off bias
				24338	neg.w %d0 # make exp positive
				24339	x_loop:
				24340	lsr.l &1,%d2 # shift d2:d3 right
				24341	roxr.l &1,%d3 # the number of places
				24342	dbf.w %d0,x_loop # given in d0
				24343	x_loop_fin:
				24344	clr.l %d1 # put zero in d1 for addx
				24345	add.l &0x00000080,%d3 # inc at bit 6
				24346	addx.l %d1,%d2 # continue inc
				24347	and.l &0xffffff80,%d3 # strip off lsb not used by 882
				24348	mov.l &4,%d0 # put 4 in d0 for binstr call
				24349	lea.l L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
				24350	bsr binstr # call binstr to convert exp
				24351	mov.l L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
				24352	mov.l &12,%d1 # use d1 for shift count
				24353	lsr.l %d1,%d0 # shift d0 right by 12
				24354	bfins %d0,FP_SCR0(%a6){&4:&12} # put e3:e2:e1 in FP_SCR0
				24355	lsr.l %d1,%d0 # shift d0 right by 12
				24356	bfins %d0,FP_SCR0(%a6){&16:&4} # put e4 in FP_SCR0
				24357	tst.b %d0 # check if e4 is zero
				24358	beq.b A16_st # if zero, skip rest
				24359	or.l &opaop_mask,USER_FPSR(%a6) # set OPERR & AIOP in USER_FPSR
				24360
				24361
				24362	# A16. Write sign bits to final string.
				24363	# Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
				24364	#
				24365	# Register usage:
				24366	# Input/Output
				24367	# d0: x/scratch - final is x
				24368	# d2: x/x
				24369	# d3: x/x
				24370	# d4: LEN/Unchanged
				24371	# d5: ICTR:LAMBDA/LAMBDA:ICTR
				24372	# d6: ILOG/ILOG adjusted
				24373	# d7: k-factor/Unchanged
				24374	# a0: ptr to L_SCR1(a6)/Unchanged
				24375	# a1: ptr to PTENxx array/Unchanged
				24376	# a2: ptr to FP_SCR1(a6)/Unchanged
				24377	# fp0: float(ILOG)/Unchanged
				24378	# fp1: 10^ISCALE/Unchanged
				24379	# fp2: 10^LEN/Unchanged
				24380	# F_SCR1:BCD result with correct signs
				24381	# F_SCR2:ILOG/10^4
				24382	# L_SCR1:Exponent digits on return from binstr
				24383	# L_SCR2:first word of X packed/Unchanged
				24384
				24385	A16_st:
				24386	clr.l %d0 # clr d0 for collection of signs
				24387	and.b &0x0f,FP_SCR0(%a6) # clear first nibble of FP_SCR0
				24388	tst.l L_SCR2(%a6) # check sign of original mantissa
				24389	bge.b mant_p # if pos, don't set SM
				24390	mov.l &2,%d0 # move 2 in to d0 for SM
				24391	mant_p:
				24392	tst.l %d6 # check sign of ILOG
				24393	bge.b wr_sgn # if pos, don't set SE
				24394	addq.l &1,%d0 # set bit 0 in d0 for SE
				24395	wr_sgn:
				24396	bfins %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
				24397
				24398	# Clean up and restore all registers used.
				24399
				24400	fmov.l &0,%fpsr # clear possible inex2/ainex bits
				24401	fmovm.x (%sp)+,&0xe0 # {%fp0-%fp2}
				24402	movm.l (%sp)+,&0x4fc # {%d2-%d7/%a2}
				24403	rts
				24404
				24405	global PTENRN
				24406	PTENRN:
				24407	long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
				24408	long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
				24409	long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
				24410	long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
				24411	long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
				24412	long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
				24413	long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
				24414	long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
				24415	long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
				24416	long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
				24417	long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
				24418	long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
				24419	long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
				24420
				24421	global PTENRP
				24422	PTENRP:
				24423	long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
				24424	long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
				24425	long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
				24426	long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
				24427	long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
				24428	long 0x40690000,0x9DC5ADA8,0x2B70B59E # 10 ^ 32
				24429	long 0x40D30000,0xC2781F49,0xFFCFA6D6 # 10 ^ 64
				24430	long 0x41A80000,0x93BA47C9,0x80E98CE0 # 10 ^ 128
				24431	long 0x43510000,0xAA7EEBFB,0x9DF9DE8E # 10 ^ 256
				24432	long 0x46A30000,0xE319A0AE,0xA60E91C7 # 10 ^ 512
				24433	long 0x4D480000,0xC9767586,0x81750C18 # 10 ^ 1024
				24434	long 0x5A920000,0x9E8B3B5D,0xC53D5DE5 # 10 ^ 2048
				24435	long 0x75250000,0xC4605202,0x8A20979B # 10 ^ 4096
				24436
				24437	global PTENRM
				24438	PTENRM:
				24439	long 0x40020000,0xA0000000,0x00000000 # 10 ^ 1
				24440	long 0x40050000,0xC8000000,0x00000000 # 10 ^ 2
				24441	long 0x400C0000,0x9C400000,0x00000000 # 10 ^ 4
				24442	long 0x40190000,0xBEBC2000,0x00000000 # 10 ^ 8
				24443	long 0x40340000,0x8E1BC9BF,0x04000000 # 10 ^ 16
				24444	long 0x40690000,0x9DC5ADA8,0x2B70B59D # 10 ^ 32
				24445	long 0x40D30000,0xC2781F49,0xFFCFA6D5 # 10 ^ 64
				24446	long 0x41A80000,0x93BA47C9,0x80E98CDF # 10 ^ 128
				24447	long 0x43510000,0xAA7EEBFB,0x9DF9DE8D # 10 ^ 256
				24448	long 0x46A30000,0xE319A0AE,0xA60E91C6 # 10 ^ 512
				24449	long 0x4D480000,0xC9767586,0x81750C17 # 10 ^ 1024
				24450	long 0x5A920000,0x9E8B3B5D,0xC53D5DE4 # 10 ^ 2048
				24451	long 0x75250000,0xC4605202,0x8A20979A # 10 ^ 4096
				24452
				24453	#########################################################################
				24454	# binstr(): Converts a 64-bit binary integer to bcd. #
				24455	# #
				24456	# INPUT *************************************************************** #
				24457	# d2:d3 = 64-bit binary integer #
				24458	# d0 = desired length (LEN) #
				24459	# a0 = pointer to start in memory for bcd characters #
				24460	# (This pointer must point to byte 4 of the first #
				24461	# lword of the packed decimal memory string.) #
				24462	# #
				24463	# OUTPUT ************************************************************** #
				24464	# a0 = pointer to LEN bcd digits representing the 64-bit integer. #
				24465	# #
				24466	# ALGORITHM *********************************************************** #
				24467	# The 64-bit binary is assumed to have a decimal point before #
				24468	# bit 63. The fraction is multiplied by 10 using a mul by 2 #
				24469	# shift and a mul by 8 shift. The bits shifted out of the #
				24470	# msb form a decimal digit. This process is iterated until #
				24471	# LEN digits are formed. #
				24472	# #
				24473	# A1. Init d7 to 1. D7 is the byte digit counter, and if 1, the #
				24474	# digit formed will be assumed the least significant. This is #
				24475	# to force the first byte formed to have a 0 in the upper 4 bits. #
				24476	# #
				24477	# A2. Beginning of the loop: #
				24478	# Copy the fraction in d2:d3 to d4:d5. #
				24479	# #
				24480	# A3. Multiply the fraction in d2:d3 by 8 using bit-field #
				24481	# extracts and shifts. The three msbs from d2 will go into d1. #
				24482	# #
				24483	# A4. Multiply the fraction in d4:d5 by 2 using shifts. The msb #
				24484	# will be collected by the carry. #
				24485	# #
				24486	# A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5 #
				24487	# into d2:d3. D1 will contain the bcd digit formed. #
				24488	# #
				24489	# A6. Test d7. If zero, the digit formed is the ms digit. If non- #
				24490	# zero, it is the ls digit. Put the digit in its place in the #
				24491	# upper word of d0. If it is the ls digit, write the word #
				24492	# from d0 to memory. #
				24493	# #
				24494	# A7. Decrement d6 (LEN counter) and repeat the loop until zero. #
				24495	# #
				24496	#########################################################################
				24497
				24498	# Implementation Notes:
				24499	#
				24500	# The registers are used as follows:
				24501	#
				24502	# d0: LEN counter
				24503	# d1: temp used to form the digit
				24504	# d2: upper 32-bits of fraction for mul by 8
				24505	# d3: lower 32-bits of fraction for mul by 8
				24506	# d4: upper 32-bits of fraction for mul by 2
				24507	# d5: lower 32-bits of fraction for mul by 2
				24508	# d6: temp for bit-field extracts
				24509	# d7: byte digit formation word;digit count {0,1}
				24510	# a0: pointer into memory for packed bcd string formation
				24511	#
				24512
				24513	global binstr
				24514	binstr:
				24515	movm.l &0xff00,-(%sp) # {%d0-%d7}
				24516
				24517	#
				24518	# A1: Init d7
				24519	#
				24520	mov.l &1,%d7 # init d7 for second digit
				24521	subq.l &1,%d0 # for dbf d0 would have LEN+1 passes
				24522	#
				24523	# A2. Copy d2:d3 to d4:d5. Start loop.
				24524	#
				24525	loop:
				24526	mov.l %d2,%d4 # copy the fraction before muls
				24527	mov.l %d3,%d5 # to d4:d5
				24528	#
				24529	# A3. Multiply d2:d3 by 8; extract msbs into d1.
				24530	#
				24531	bfextu %d2{&0:&3},%d1 # copy 3 msbs of d2 into d1
				24532	asl.l &3,%d2 # shift d2 left by 3 places
				24533	bfextu %d3{&0:&3},%d6 # copy 3 msbs of d3 into d6
				24534	asl.l &3,%d3 # shift d3 left by 3 places
				24535	or.l %d6,%d2 # or in msbs from d3 into d2
				24536	#
				24537	# A4. Multiply d4:d5 by 2; add carry out to d1.
				24538	#
				24539	asl.l &1,%d5 # mul d5 by 2
				24540	roxl.l &1,%d4 # mul d4 by 2
				24541	swap %d6 # put 0 in d6 lower word
				24542	addx.w %d6,%d1 # add in extend from mul by 2
				24543	#
				24544	# A5. Add mul by 8 to mul by 2. D1 contains the digit formed.
				24545	#
				24546	add.l %d5,%d3 # add lower 32 bits
				24547	nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
				24548	addx.l %d4,%d2 # add with extend upper 32 bits
				24549	nop # ERRATA FIX #13 (Rev. 1.2 6/6/90)
				24550	addx.w %d6,%d1 # add in extend from add to d1
				24551	swap %d6 # with d6 = 0; put 0 in upper word
				24552	#
				24553	# A6. Test d7 and branch.
				24554	#
				24555	tst.w %d7 # if zero, store digit & to loop
				24556	beq.b first_d # if non-zero, form byte & write
				24557	sec_d:
				24558	swap %d7 # bring first digit to word d7b
				24559	asl.w &4,%d7 # first digit in upper 4 bits d7b
				24560	add.w %d1,%d7 # add in ls digit to d7b
				24561	mov.b %d7,(%a0)+ # store d7b byte in memory
				24562	swap %d7 # put LEN counter in word d7a
				24563	clr.w %d7 # set d7a to signal no digits done
				24564	dbf.w %d0,loop # do loop some more!
				24565	bra.b end_bstr # finished, so exit
				24566	first_d:
				24567	swap %d7 # put digit word in d7b
				24568	mov.w %d1,%d7 # put new digit in d7b
				24569	swap %d7 # put LEN counter in word d7a
				24570	addq.w &1,%d7 # set d7a to signal first digit done
				24571	dbf.w %d0,loop # do loop some more!
				24572	swap %d7 # put last digit in string
				24573	lsl.w &4,%d7 # move it to upper 4 bits
				24574	mov.b %d7,(%a0)+ # store it in memory string
				24575	#
				24576	# Clean up and return with result in fp0.
				24577	#
				24578	end_bstr:
				24579	movm.l (%sp)+,&0xff # {%d0-%d7}
				24580	rts
				24581
				24582	#########################################################################
				24583	# XDEF **************************************************************** #
				24584	# facc_in_b(): dmem_read_byte failed #
				24585	# facc_in_w(): dmem_read_word failed #
				24586	# facc_in_l(): dmem_read_long failed #
				24587	# facc_in_d(): dmem_read of dbl prec failed #
				24588	# facc_in_x(): dmem_read of ext prec failed #
				24589	# #
				24590	# facc_out_b(): dmem_write_byte failed #
				24591	# facc_out_w(): dmem_write_word failed #
				24592	# facc_out_l(): dmem_write_long failed #
				24593	# facc_out_d(): dmem_write of dbl prec failed #
				24594	# facc_out_x(): dmem_write of ext prec failed #
				24595	# #
				24596	# XREF **************************************************************** #
				24597	# _real_access() - exit through access error handler #
				24598	# #
				24599	# INPUT *************************************************************** #
				24600	# None #
				24601	# #
				24602	# OUTPUT ************************************************************** #
				24603	# None #
				24604	# #
				24605	# ALGORITHM *********************************************************** #
				24606	# Flow jumps here when an FP data fetch call gets an error #
				24607	# result. This means the operating system wants an access error frame #
				24608	# made out of the current exception stack frame. #
				24609	# So, we first call restore() which makes sure that any updated #
				24610	# -(an)+ register gets returned to its pre-exception value and then #
				24611	# we change the stack to an access error stack frame. #
				24612	# #
				24613	#########################################################################
				24614
				24615	facc_in_b:
				24616	movq.l &0x1,%d0 # one byte
				24617	bsr.w restore # fix An
				24618
				24619	mov.w &0x0121,EXC_VOFF(%a6) # set FSLW
				24620	bra.w facc_finish
				24621
				24622	facc_in_w:
				24623	movq.l &0x2,%d0 # two bytes
				24624	bsr.w restore # fix An
				24625
				24626	mov.w &0x0141,EXC_VOFF(%a6) # set FSLW
				24627	bra.b facc_finish
				24628
				24629	facc_in_l:
				24630	movq.l &0x4,%d0 # four bytes
				24631	bsr.w restore # fix An
				24632
				24633	mov.w &0x0101,EXC_VOFF(%a6) # set FSLW
				24634	bra.b facc_finish
				24635
				24636	facc_in_d:
				24637	movq.l &0x8,%d0 # eight bytes
				24638	bsr.w restore # fix An
				24639
				24640	mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
				24641	bra.b facc_finish
				24642
				24643	facc_in_x:
				24644	movq.l &0xc,%d0 # twelve bytes
				24645	bsr.w restore # fix An
				24646
				24647	mov.w &0x0161,EXC_VOFF(%a6) # set FSLW
				24648	bra.b facc_finish
				24649
				24650	################################################################
				24651
				24652	facc_out_b:
				24653	movq.l &0x1,%d0 # one byte
				24654	bsr.w restore # restore An
				24655
				24656	mov.w &0x00a1,EXC_VOFF(%a6) # set FSLW
				24657	bra.b facc_finish
				24658
				24659	facc_out_w:
				24660	movq.l &0x2,%d0 # two bytes
				24661	bsr.w restore # restore An
				24662
				24663	mov.w &0x00c1,EXC_VOFF(%a6) # set FSLW
				24664	bra.b facc_finish
				24665
				24666	facc_out_l:
				24667	movq.l &0x4,%d0 # four bytes
				24668	bsr.w restore # restore An
				24669
				24670	mov.w &0x0081,EXC_VOFF(%a6) # set FSLW
				24671	bra.b facc_finish
				24672
				24673	facc_out_d:
				24674	movq.l &0x8,%d0 # eight bytes
				24675	bsr.w restore # restore An
				24676
				24677	mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
				24678	bra.b facc_finish
				24679
				24680	facc_out_x:
				24681	mov.l &0xc,%d0 # twelve bytes
				24682	bsr.w restore # restore An
				24683
				24684	mov.w &0x00e1,EXC_VOFF(%a6) # set FSLW
				24685
				24686	# here's where we actually create the access error frame from the
				24687	# current exception stack frame.
				24688	facc_finish:
				24689	mov.l USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
				24690
				24691	fmovm.x EXC_FPREGS(%a6),&0xc0 # restore fp0-fp1
				24692	fmovm.l USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
				24693	movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1
				24694
				24695	unlk %a6
				24696
				24697	mov.l (%sp),-(%sp) # store SR, hi(PC)
				24698	mov.l 0x8(%sp),0x4(%sp) # store lo(PC)
				24699	mov.l 0xc(%sp),0x8(%sp) # store EA
				24700	mov.l &0x00000001,0xc(%sp) # store FSLW
				24701	mov.w 0x6(%sp),0xc(%sp) # fix FSLW (size)
				24702	mov.w &0x4008,0x6(%sp) # store voff
				24703
				24704	btst &0x5,(%sp) # supervisor or user mode?
				24705	beq.b facc_out2 # user
				24706	bset &0x2,0xd(%sp) # set supervisor TM bit
				24707
				24708	facc_out2:
				24709	bra.l _real_access
				24710
				24711	##################################################################
				24712
				24713	# if the effective addressing mode was predecrement or postincrement,
				24714	# the emulation has already changed its value to the correct post-
				24715	# instruction value. but since we're exiting to the access error
				24716	# handler, then AN must be returned to its pre-instruction value.
				24717	# we do that here.
				24718	restore:
				24719	mov.b EXC_OPWORD+0x1(%a6),%d1
				24720	andi.b &0x38,%d1 # extract opmode
				24721	cmpi.b %d1,&0x18 # postinc?
				24722	beq.w rest_inc
				24723	cmpi.b %d1,&0x20 # predec?
				24724	beq.w rest_dec
				24725	rts
				24726
				24727	rest_inc:
				24728	mov.b EXC_OPWORD+0x1(%a6),%d1
				24729	andi.w &0x0007,%d1 # fetch An
				24730
				24731	mov.w (tbl_rest_inc.b,%pc,%d1.w*2),%d1
				24732	jmp (tbl_rest_inc.b,%pc,%d1.w*1)
				24733
				24734	tbl_rest_inc:
				24735	short ri_a0 - tbl_rest_inc
				24736	short ri_a1 - tbl_rest_inc
				24737	short ri_a2 - tbl_rest_inc
				24738	short ri_a3 - tbl_rest_inc
				24739	short ri_a4 - tbl_rest_inc
				24740	short ri_a5 - tbl_rest_inc
				24741	short ri_a6 - tbl_rest_inc
				24742	short ri_a7 - tbl_rest_inc
				24743
				24744	ri_a0:
				24745	sub.l %d0,EXC_DREGS+0x8(%a6) # fix stacked a0
				24746	rts
				24747	ri_a1:
				24748	sub.l %d0,EXC_DREGS+0xc(%a6) # fix stacked a1
				24749	rts
				24750	ri_a2:
				24751	sub.l %d0,%a2 # fix a2
				24752	rts
				24753	ri_a3:
				24754	sub.l %d0,%a3 # fix a3
				24755	rts
				24756	ri_a4:
				24757	sub.l %d0,%a4 # fix a4
				24758	rts
				24759	ri_a5:
				24760	sub.l %d0,%a5 # fix a5
				24761	rts
				24762	ri_a6:
				24763	sub.l %d0,(%a6) # fix stacked a6
				24764	rts
				24765	# if it's a fmove out instruction, we don't have to fix a7
				24766	# because we hadn't changed it yet. if it's an opclass two
				24767	# instruction (data moved in) and the exception was in supervisor
				24768	# mode, then also also wasn't updated. if it was user mode, then
				24769	# restore the correct a7 which is in the USP currently.
				24770	ri_a7:
				24771	cmpi.b EXC_VOFF(%a6),&0x30 # move in or out?
				24772	bne.b ri_a7_done # out
				24773
				24774	btst &0x5,EXC_SR(%a6) # user or supervisor?
				24775	bne.b ri_a7_done # supervisor
				24776	movc %usp,%a0 # restore USP
				24777	sub.l %d0,%a0
				24778	movc %a0,%usp
				24779	ri_a7_done:
				24780	rts
				24781
				24782	# need to invert adjustment value if the <ea> was predec
				24783	rest_dec:
				24784	neg.l %d0
				24785	bra.b rest_inc