Blame - arch/m68k/math-emu/fp_util.S - kernel/msm

blob: a9f7f01290672feded67ad7b4bd6f662668355c8 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* fp_util.S
				3	*
				4	* Copyright Roman Zippel, 1997. All rights reserved.
				5	*
				6	* Redistribution and use in source and binary forms, with or without
				7	* modification, are permitted provided that the following conditions
				8	* are met:
				9	* 1. Redistributions of source code must retain the above copyright
				10	* notice, and the entire permission notice in its entirety,
				11	* including the disclaimer of warranties.
				12	* 2. Redistributions in binary form must reproduce the above copyright
				13	* notice, this list of conditions and the following disclaimer in the
				14	* documentation and/or other materials provided with the distribution.
				15	* 3. The name of the author may not be used to endorse or promote
				16	* products derived from this software without specific prior
				17	* written permission.
				18	*
				19	* ALTERNATIVELY, this product may be distributed under the terms of
				20	* the GNU General Public License, in which case the provisions of the GPL are
				21	* required INSTEAD OF the above restrictions. (This clause is
				22	* necessary due to a potential bad interaction between the GPL and
				23	* the restrictions contained in a BSD-style copyright.)
				24	*
				25	* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
				26	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
				27	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				28	* DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
				29	* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
				30	* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
				31	* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
				32	* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
				33	* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
				34	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
				35	* OF THE POSSIBILITY OF SUCH DAMAGE.
				36	*/
				37
				38	#include <linux/config.h>
				39	#include "fp_emu.h"
				40
				41	/*
				42	* Here are lots of conversion and normalization functions mainly
				43	* used by fp_scan.S
				44	* Note that these functions are optimized for "normal" numbers,
				45	* these are handled first and exit as fast as possible, this is
				46	* especially important for fp_normalize_ext/fp_conv_ext2ext, as
				47	* it's called very often.
				48	* The register usage is optimized for fp_scan.S and which register
				49	* is currently at that time unused, be careful if you want change
				50	* something here. %d0 and %d1 is always usable, sometimes %d2 (or
				51	* only the lower half) most function have to return the %a0
				52	* unmodified, so that the caller can immediately reuse it.
				53	*/
				54
				55	.globl fp_ill, fp_end
				56
				57	\| exits from fp_scan:
				58	\| illegal instruction
				59	fp_ill:
				60	printf ,"fp_illegal\n"
				61	rts
				62	\| completed instruction
				63	fp_end:
				64	tst.l (TASK_MM-8,%a2)
				65	jmi 1f
				66	tst.l (TASK_MM-4,%a2)
				67	jmi 1f
				68	tst.l (TASK_MM,%a2)
				69	jpl 2f
				70	1: printf ,"oops:%p,%p,%p\n",3,%a2@(TASK_MM-8),%a2@(TASK_MM-4),%a2@(TASK_MM)
				71	2: clr.l %d0
				72	rts
				73
				74	.globl fp_conv_long2ext, fp_conv_single2ext
				75	.globl fp_conv_double2ext, fp_conv_ext2ext
				76	.globl fp_normalize_ext, fp_normalize_double
				77	.globl fp_normalize_single, fp_normalize_single_fast
				78	.globl fp_conv_ext2double, fp_conv_ext2single
				79	.globl fp_conv_ext2long, fp_conv_ext2short
				80	.globl fp_conv_ext2byte
				81	.globl fp_finalrounding_single, fp_finalrounding_single_fast
				82	.globl fp_finalrounding_double
				83	.globl fp_finalrounding, fp_finaltest, fp_final
				84
				85	/*
				86	* First several conversion functions from a source operand
				87	* into the extended format. Note, that only fp_conv_ext2ext
				88	* normalizes the number and is always called after the other
				89	* conversion functions, which only move the information into
				90	* fp_ext structure.
				91	*/
				92
				93	\| fp_conv_long2ext:
				94	\|
				95	\| args: %d0 = source (32-bit long)
				96	\| %a0 = destination (ptr to struct fp_ext)
				97
				98	fp_conv_long2ext:
				99	printf PCONV,"l2e: %p -> %p(",2,%d0,%a0
				100	clr.l %d1 \| sign defaults to zero
				101	tst.l %d0
				102	jeq fp_l2e_zero \| is source zero?
				103	jpl 1f \| positive?
				104	moveq #1,%d1
				105	neg.l %d0
				106	1: swap %d1
				107	move.w #0x3fff+31,%d1
				108	move.l %d1,(%a0)+ \| set sign / exp
				109	move.l %d0,(%a0)+ \| set mantissa
				110	clr.l (%a0)
				111	subq.l #8,%a0 \| restore %a0
				112	printx PCONV,%a0@
				113	printf PCONV,")\n"
				114	rts
				115	\| source is zero
				116	fp_l2e_zero:
				117	clr.l (%a0)+
				118	clr.l (%a0)+
				119	clr.l (%a0)
				120	subq.l #8,%a0
				121	printx PCONV,%a0@
				122	printf PCONV,")\n"
				123	rts
				124
				125	\| fp_conv_single2ext
				126	\| args: %d0 = source (single-precision fp value)
				127	\| %a0 = dest (struct fp_ext *)
				128
				129	fp_conv_single2ext:
				130	printf PCONV,"s2e: %p -> %p(",2,%d0,%a0
				131	move.l %d0,%d1
				132	lsl.l #8,%d0 \| shift mantissa
				133	lsr.l #8,%d1 \| exponent / sign
				134	lsr.l #7,%d1
				135	lsr.w #8,%d1
				136	jeq fp_s2e_small \| zero / denormal?
				137	cmp.w #0xff,%d1 \| NaN / Inf?
				138	jeq fp_s2e_large
				139	bset #31,%d0 \| set explizit bit
				140	add.w #0x3fff-0x7f,%d1 \| re-bias the exponent.
				141	9: move.l %d1,(%a0)+ \| fp_ext.sign, fp_ext.exp
				142	move.l %d0,(%a0)+ \| high lword of fp_ext.mant
				143	clr.l (%a0) \| low lword = 0
				144	subq.l #8,%a0
				145	printx PCONV,%a0@
				146	printf PCONV,")\n"
				147	rts
				148	\| zeros and denormalized
				149	fp_s2e_small:
				150	\| exponent is zero, so explizit bit is already zero too
				151	tst.l %d0
				152	jeq 9b
				153	move.w #0x4000-0x7f,%d1
				154	jra 9b
				155	\| infinities and NAN
				156	fp_s2e_large:
				157	bclr #31,%d0 \| clear explizit bit
				158	move.w #0x7fff,%d1
				159	jra 9b
				160
				161	fp_conv_double2ext:
				162	#ifdef FPU_EMU_DEBUG
				163	getuser.l %a1@(0),%d0,fp_err_ua2,%a1
				164	getuser.l %a1@(4),%d1,fp_err_ua2,%a1
				165	printf PCONV,"d2e: %p%p -> %p(",3,%d0,%d1,%a0
				166	#endif
				167	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
				168	move.l %d0,%d1
				169	lsl.l #8,%d0 \| shift high mantissa
				170	lsl.l #3,%d0
				171	lsr.l #8,%d1 \| exponent / sign
				172	lsr.l #7,%d1
				173	lsr.w #5,%d1
				174	jeq fp_d2e_small \| zero / denormal?
				175	cmp.w #0x7ff,%d1 \| NaN / Inf?
				176	jeq fp_d2e_large
				177	bset #31,%d0 \| set explizit bit
				178	add.w #0x3fff-0x3ff,%d1 \| re-bias the exponent.
				179	9: move.l %d1,(%a0)+ \| fp_ext.sign, fp_ext.exp
				180	move.l %d0,(%a0)+
				181	getuser.l (%a1)+,%d0,fp_err_ua2,%a1
				182	move.l %d0,%d1
				183	lsl.l #8,%d0
				184	lsl.l #3,%d0
				185	move.l %d0,(%a0)
				186	moveq #21,%d0
				187	lsr.l %d0,%d1
				188	or.l %d1,-(%a0)
				189	subq.l #4,%a0
				190	printx PCONV,%a0@
				191	printf PCONV,")\n"
				192	rts
				193	\| zeros and denormalized
				194	fp_d2e_small:
				195	\| exponent is zero, so explizit bit is already zero too
				196	tst.l %d0
				197	jeq 9b
				198	move.w #0x4000-0x3ff,%d1
				199	jra 9b
				200	\| infinities and NAN
				201	fp_d2e_large:
				202	bclr #31,%d0 \| clear explizit bit
				203	move.w #0x7fff,%d1
				204	jra 9b
				205
				206	\| fp_conv_ext2ext:
				207	\| originally used to get longdouble from userspace, now it's
				208	\| called before arithmetic operations to make sure the number
				209	\| is normalized [maybe rename it?].
				210	\| args: %a0 = dest (struct fp_ext *)
				211	\| returns 0 in %d0 for a NaN, otherwise 1
				212
				213	fp_conv_ext2ext:
				214	printf PCONV,"e2e: %p(",1,%a0
				215	printx PCONV,%a0@
				216	printf PCONV,"), "
				217	move.l (%a0)+,%d0
				218	cmp.w #0x7fff,%d0 \| Inf / NaN?
				219	jeq fp_e2e_large
				220	move.l (%a0),%d0
				221	jpl fp_e2e_small \| zero / denorm?
				222	\| The high bit is set, so normalization is irrelevant.
				223	fp_e2e_checkround:
				224	subq.l #4,%a0
				225	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				226	move.b (%a0),%d0
				227	jne fp_e2e_round
				228	#endif
				229	printf PCONV,"%p(",1,%a0
				230	printx PCONV,%a0@
				231	printf PCONV,")\n"
				232	moveq #1,%d0
				233	rts
				234	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				235	fp_e2e_round:
				236	fp_set_sr FPSR_EXC_INEX2
				237	clr.b (%a0)
				238	move.w (FPD_RND,FPDATA),%d2
				239	jne fp_e2e_roundother \| %d2 == 0, round to nearest
				240	tst.b %d0 \| test guard bit
				241	jpl 9f \| zero is closer
				242	btst #0,(11,%a0) \| test lsb bit
				243	jne fp_e2e_doroundup \| round to infinity
				244	lsl.b #1,%d0 \| check low bits
				245	jeq 9f \| round to zero
				246	fp_e2e_doroundup:
				247	addq.l #1,(8,%a0)
				248	jcc 9f
				249	addq.l #1,(4,%a0)
				250	jcc 9f
				251	move.w #0x8000,(4,%a0)
				252	addq.w #1,(2,%a0)
				253	9: printf PNORM,"%p(",1,%a0
				254	printx PNORM,%a0@
				255	printf PNORM,")\n"
				256	rts
				257	fp_e2e_roundother:
				258	subq.w #2,%d2
				259	jcs 9b \| %d2 < 2, round to zero
				260	jhi 1f \| %d2 > 2, round to +infinity
				261	tst.b (1,%a0) \| to -inf
				262	jne fp_e2e_doroundup \| negative, round to infinity
				263	jra 9b \| positive, round to zero
				264	1: tst.b (1,%a0) \| to +inf
				265	jeq fp_e2e_doroundup \| positive, round to infinity
				266	jra 9b \| negative, round to zero
				267	#endif
				268	\| zeros and subnormals:
				269	\| try to normalize these anyway.
				270	fp_e2e_small:
				271	jne fp_e2e_small1 \| high lword zero?
				272	move.l (4,%a0),%d0
				273	jne fp_e2e_small2
				274	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				275	clr.l %d0
				276	move.b (-4,%a0),%d0
				277	jne fp_e2e_small3
				278	#endif
				279	\| Genuine zero.
				280	clr.w -(%a0)
				281	subq.l #2,%a0
				282	printf PNORM,"%p(",1,%a0
				283	printx PNORM,%a0@
				284	printf PNORM,")\n"
				285	moveq #1,%d0
				286	rts
				287	\| definitely subnormal, need to shift all 64 bits
				288	fp_e2e_small1:
				289	bfffo %d0{#0,#32},%d1
				290	move.w -(%a0),%d2
				291	sub.w %d1,%d2
				292	jcc 1f
				293	\| Pathologically small, denormalize.
				294	add.w %d2,%d1
				295	clr.w %d2
				296	1: move.w %d2,(%a0)+
				297	move.w %d1,%d2
				298	jeq fp_e2e_checkround
				299	\| fancy 64-bit double-shift begins here
				300	lsl.l %d2,%d0
				301	move.l %d0,(%a0)+
				302	move.l (%a0),%d0
				303	move.l %d0,%d1
				304	lsl.l %d2,%d0
				305	move.l %d0,(%a0)
				306	neg.w %d2
				307	and.w #0x1f,%d2
				308	lsr.l %d2,%d1
				309	or.l %d1,-(%a0)
				310	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				311	fp_e2e_extra1:
				312	clr.l %d0
				313	move.b (-4,%a0),%d0
				314	neg.w %d2
				315	add.w #24,%d2
				316	jcc 1f
				317	clr.b (-4,%a0)
				318	lsl.l %d2,%d0
				319	or.l %d0,(4,%a0)
				320	jra fp_e2e_checkround
				321	1: addq.w #8,%d2
				322	lsl.l %d2,%d0
				323	move.b %d0,(-4,%a0)
				324	lsr.l #8,%d0
				325	or.l %d0,(4,%a0)
				326	#endif
				327	jra fp_e2e_checkround
				328	\| pathologically small subnormal
				329	fp_e2e_small2:
				330	bfffo %d0{#0,#32},%d1
				331	add.w #32,%d1
				332	move.w -(%a0),%d2
				333	sub.w %d1,%d2
				334	jcc 1f
				335	\| Beyond pathologically small, denormalize.
				336	add.w %d2,%d1
				337	clr.w %d2
				338	1: move.w %d2,(%a0)+
				339	ext.l %d1
				340	jeq fp_e2e_checkround
				341	clr.l (4,%a0)
				342	sub.w #32,%d2
				343	jcs 1f
				344	lsl.l %d1,%d0 \| lower lword needs only to be shifted
				345	move.l %d0,(%a0) \| into the higher lword
				346	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				347	clr.l %d0
				348	move.b (-4,%a0),%d0
				349	clr.b (-4,%a0)
				350	neg.w %d1
				351	add.w #32,%d1
				352	bfins %d0,(%a0){%d1,#8}
				353	#endif
				354	jra fp_e2e_checkround
				355	1: neg.w %d1 \| lower lword is splitted between
				356	bfins %d0,(%a0){%d1,#32} \| higher and lower lword
				357	#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
				358	jra fp_e2e_checkround
				359	#else
				360	move.w %d1,%d2
				361	jra fp_e2e_extra1
				362	\| These are extremely small numbers, that will mostly end up as zero
				363	\| anyway, so this is only important for correct rounding.
				364	fp_e2e_small3:
				365	bfffo %d0{#24,#8},%d1
				366	add.w #40,%d1
				367	move.w -(%a0),%d2
				368	sub.w %d1,%d2
				369	jcc 1f
				370	\| Pathologically small, denormalize.
				371	add.w %d2,%d1
				372	clr.w %d2
				373	1: move.w %d2,(%a0)+
				374	ext.l %d1
				375	jeq fp_e2e_checkround
				376	cmp.w #8,%d1
				377	jcs 2f
				378	1: clr.b (-4,%a0)
				379	sub.w #64,%d1
				380	jcs 1f
				381	add.w #24,%d1
				382	lsl.l %d1,%d0
				383	move.l %d0,(%a0)
				384	jra fp_e2e_checkround
				385	1: neg.w %d1
				386	bfins %d0,(%a0){%d1,#8}
				387	jra fp_e2e_checkround
				388	2: lsl.l %d1,%d0
				389	move.b %d0,(-4,%a0)
				390	lsr.l #8,%d0
				391	move.b %d0,(7,%a0)
				392	jra fp_e2e_checkround
				393	#endif
				394	1: move.l %d0,%d1 \| lower lword is splitted between
				395	lsl.l %d2,%d0 \| higher and lower lword
				396	move.l %d0,(%a0)
				397	move.l %d1,%d0
				398	neg.w %d2
				399	add.w #32,%d2
				400	lsr.l %d2,%d0
				401	move.l %d0,-(%a0)
				402	jra fp_e2e_checkround
				403	\| Infinities and NaNs
				404	fp_e2e_large:
				405	move.l (%a0)+,%d0
				406	jne 3f
				407	1: tst.l (%a0)
				408	jne 4f
				409	moveq #1,%d0
				410	2: subq.l #8,%a0
				411	printf PCONV,"%p(",1,%a0
				412	printx PCONV,%a0@
				413	printf PCONV,")\n"
				414	rts
				415	\| we have maybe a NaN, shift off the highest bit
				416	3: lsl.l #1,%d0
				417	jeq 1b
				418	\| we have a NaN, clear the return value
				419	4: clrl %d0
				420	jra 2b
				421
				422
				423	/*
				424	* Normalization functions. Call these on the output of general
				425	* FP operators, and before any conversion into the destination
				426	* formats. fp_normalize_ext has always to be called first, the
				427	* following conversion functions expect an already normalized
				428	* number.
				429	*/
				430
				431	\| fp_normalize_ext:
				432	\| normalize an extended in extended (unpacked) format, basically
				433	\| it does the same as fp_conv_ext2ext, additionally it also does
				434	\| the necessary postprocessing checks.
				435	\| args: %a0 (struct fp_ext *)
				436	\| NOTE: it does _not_ modify %a0/%a1 and the upper word of %d2
				437
				438	fp_normalize_ext:
				439	printf PNORM,"ne: %p(",1,%a0
				440	printx PNORM,%a0@
				441	printf PNORM,"), "
				442	move.l (%a0)+,%d0
				443	cmp.w #0x7fff,%d0 \| Inf / NaN?
				444	jeq fp_ne_large
				445	move.l (%a0),%d0
				446	jpl fp_ne_small \| zero / denorm?
				447	\| The high bit is set, so normalization is irrelevant.
				448	fp_ne_checkround:
				449	subq.l #4,%a0
				450	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				451	move.b (%a0),%d0
				452	jne fp_ne_round
				453	#endif
				454	printf PNORM,"%p(",1,%a0
				455	printx PNORM,%a0@
				456	printf PNORM,")\n"
				457	rts
				458	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				459	fp_ne_round:
				460	fp_set_sr FPSR_EXC_INEX2
				461	clr.b (%a0)
				462	move.w (FPD_RND,FPDATA),%d2
				463	jne fp_ne_roundother \| %d2 == 0, round to nearest
				464	tst.b %d0 \| test guard bit
				465	jpl 9f \| zero is closer
				466	btst #0,(11,%a0) \| test lsb bit
				467	jne fp_ne_doroundup \| round to infinity
				468	lsl.b #1,%d0 \| check low bits
				469	jeq 9f \| round to zero
				470	fp_ne_doroundup:
				471	addq.l #1,(8,%a0)
				472	jcc 9f
				473	addq.l #1,(4,%a0)
				474	jcc 9f
				475	addq.w #1,(2,%a0)
				476	move.w #0x8000,(4,%a0)
				477	9: printf PNORM,"%p(",1,%a0
				478	printx PNORM,%a0@
				479	printf PNORM,")\n"
				480	rts
				481	fp_ne_roundother:
				482	subq.w #2,%d2
				483	jcs 9b \| %d2 < 2, round to zero
				484	jhi 1f \| %d2 > 2, round to +infinity
				485	tst.b (1,%a0) \| to -inf
				486	jne fp_ne_doroundup \| negative, round to infinity
				487	jra 9b \| positive, round to zero
				488	1: tst.b (1,%a0) \| to +inf
				489	jeq fp_ne_doroundup \| positive, round to infinity
				490	jra 9b \| negative, round to zero
				491	#endif
				492	\| Zeros and subnormal numbers
				493	\| These are probably merely subnormal, rather than "denormalized"
				494	\| numbers, so we will try to make them normal again.
				495	fp_ne_small:
				496	jne fp_ne_small1 \| high lword zero?
				497	move.l (4,%a0),%d0
				498	jne fp_ne_small2
				499	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				500	clr.l %d0
				501	move.b (-4,%a0),%d0
				502	jne fp_ne_small3
				503	#endif
				504	\| Genuine zero.
				505	clr.w -(%a0)
				506	subq.l #2,%a0
				507	printf PNORM,"%p(",1,%a0
				508	printx PNORM,%a0@
				509	printf PNORM,")\n"
				510	rts
				511	\| Subnormal.
				512	fp_ne_small1:
				513	bfffo %d0{#0,#32},%d1
				514	move.w -(%a0),%d2
				515	sub.w %d1,%d2
				516	jcc 1f
				517	\| Pathologically small, denormalize.
				518	add.w %d2,%d1
				519	clr.w %d2
				520	fp_set_sr FPSR_EXC_UNFL
				521	1: move.w %d2,(%a0)+
				522	move.w %d1,%d2
				523	jeq fp_ne_checkround
				524	\| This is exactly the same 64-bit double shift as seen above.
				525	lsl.l %d2,%d0
				526	move.l %d0,(%a0)+
				527	move.l (%a0),%d0
				528	move.l %d0,%d1
				529	lsl.l %d2,%d0
				530	move.l %d0,(%a0)
				531	neg.w %d2
				532	and.w #0x1f,%d2
				533	lsr.l %d2,%d1
				534	or.l %d1,-(%a0)
				535	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				536	fp_ne_extra1:
				537	clr.l %d0
				538	move.b (-4,%a0),%d0
				539	neg.w %d2
				540	add.w #24,%d2
				541	jcc 1f
				542	clr.b (-4,%a0)
				543	lsl.l %d2,%d0
				544	or.l %d0,(4,%a0)
				545	jra fp_ne_checkround
				546	1: addq.w #8,%d2
				547	lsl.l %d2,%d0
				548	move.b %d0,(-4,%a0)
				549	lsr.l #8,%d0
				550	or.l %d0,(4,%a0)
				551	#endif
				552	jra fp_ne_checkround
				553	\| May or may not be subnormal, if so, only 32 bits to shift.
				554	fp_ne_small2:
				555	bfffo %d0{#0,#32},%d1
				556	add.w #32,%d1
				557	move.w -(%a0),%d2
				558	sub.w %d1,%d2
				559	jcc 1f
				560	\| Beyond pathologically small, denormalize.
				561	add.w %d2,%d1
				562	clr.w %d2
				563	fp_set_sr FPSR_EXC_UNFL
				564	1: move.w %d2,(%a0)+
				565	ext.l %d1
				566	jeq fp_ne_checkround
				567	clr.l (4,%a0)
				568	sub.w #32,%d1
				569	jcs 1f
				570	lsl.l %d1,%d0 \| lower lword needs only to be shifted
				571	move.l %d0,(%a0) \| into the higher lword
				572	#ifdef CONFIG_M68KFPU_EMU_EXTRAPREC
				573	clr.l %d0
				574	move.b (-4,%a0),%d0
				575	clr.b (-4,%a0)
				576	neg.w %d1
				577	add.w #32,%d1
				578	bfins %d0,(%a0){%d1,#8}
				579	#endif
				580	jra fp_ne_checkround
				581	1: neg.w %d1 \| lower lword is splitted between
				582	bfins %d0,(%a0){%d1,#32} \| higher and lower lword
				583	#ifndef CONFIG_M68KFPU_EMU_EXTRAPREC
				584	jra fp_ne_checkround
				585	#else
				586	move.w %d1,%d2
				587	jra fp_ne_extra1
				588	\| These are extremely small numbers, that will mostly end up as zero
				589	\| anyway, so this is only important for correct rounding.
				590	fp_ne_small3:
				591	bfffo %d0{#24,#8},%d1
				592	add.w #40,%d1
				593	move.w -(%a0),%d2
				594	sub.w %d1,%d2
				595	jcc 1f
				596	\| Pathologically small, denormalize.
				597	add.w %d2,%d1
				598	clr.w %d2
				599	1: move.w %d2,(%a0)+
				600	ext.l %d1
				601	jeq fp_ne_checkround
				602	cmp.w #8,%d1
				603	jcs 2f
				604	1: clr.b (-4,%a0)
				605	sub.w #64,%d1
				606	jcs 1f
				607	add.w #24,%d1
				608	lsl.l %d1,%d0
				609	move.l %d0,(%a0)
				610	jra fp_ne_checkround
				611	1: neg.w %d1
				612	bfins %d0,(%a0){%d1,#8}
				613	jra fp_ne_checkround
				614	2: lsl.l %d1,%d0
				615	move.b %d0,(-4,%a0)
				616	lsr.l #8,%d0
				617	move.b %d0,(7,%a0)
				618	jra fp_ne_checkround
				619	#endif
				620	\| Infinities and NaNs, again, same as above.
				621	fp_ne_large:
				622	move.l (%a0)+,%d0
				623	jne 3f
				624	1: tst.l (%a0)
				625	jne 4f
				626	2: subq.l #8,%a0
				627	printf PNORM,"%p(",1,%a0
				628	printx PNORM,%a0@
				629	printf PNORM,")\n"
				630	rts
				631	\| we have maybe a NaN, shift off the highest bit
				632	3: move.l %d0,%d1
				633	lsl.l #1,%d1
				634	jne 4f
				635	clr.l (-4,%a0)
				636	jra 1b
				637	\| we have a NaN, test if it is signaling
				638	4: bset #30,%d0
				639	jne 2b
				640	fp_set_sr FPSR_EXC_SNAN
				641	move.l %d0,(-4,%a0)
				642	jra 2b
				643
				644	\| these next two do rounding as per the IEEE standard.
				645	\| values for the rounding modes appear to be:
				646	\| 0: Round to nearest
				647	\| 1: Round to zero
				648	\| 2: Round to -Infinity
				649	\| 3: Round to +Infinity
				650	\| both functions expect that fp_normalize was already
				651	\| called (and extended argument is already normalized
				652	\| as far as possible), these are used if there is different
				653	\| rounding precision is selected and before converting
				654	\| into single/double
				655
				656	\| fp_normalize_double:
				657	\| normalize an extended with double (52-bit) precision
				658	\| args: %a0 (struct fp_ext *)
				659
				660	fp_normalize_double:
				661	printf PNORM,"nd: %p(",1,%a0
				662	printx PNORM,%a0@
				663	printf PNORM,"), "
				664	move.l (%a0)+,%d2
				665	tst.w %d2
				666	jeq fp_nd_zero \| zero / denormalized
				667	cmp.w #0x7fff,%d2
				668	jeq fp_nd_huge \| NaN / infinitive.
				669	sub.w #0x4000-0x3ff,%d2 \| will the exponent fit?
				670	jcs fp_nd_small \| too small.
				671	cmp.w #0x7fe,%d2
				672	jcc fp_nd_large \| too big.
				673	addq.l #4,%a0
				674	move.l (%a0),%d0 \| low lword of mantissa
				675	\| now, round off the low 11 bits.
				676	fp_nd_round:
				677	moveq #21,%d1
				678	lsl.l %d1,%d0 \| keep 11 low bits.
				679	jne fp_nd_checkround \| Are they non-zero?
				680	\| nothing to do here
				681	9: subq.l #8,%a0
				682	printf PNORM,"%p(",1,%a0
				683	printx PNORM,%a0@
				684	printf PNORM,")\n"
				685	rts
				686	\| Be careful with the X bit! It contains the lsb
				687	\| from the shift above, it is needed for round to nearest.
				688	fp_nd_checkround:
				689	fp_set_sr FPSR_EXC_INEX2 \| INEX2 bit
				690	and.w #0xf800,(2,%a0) \| clear bits 0-10
				691	move.w (FPD_RND,FPDATA),%d2 \| rounding mode
				692	jne 2f \| %d2 == 0, round to nearest
				693	tst.l %d0 \| test guard bit
				694	jpl 9b \| zero is closer
				695	\| here we test the X bit by adding it to %d2
				696	clr.w %d2 \| first set z bit, addx only clears it
				697	addx.w %d2,%d2 \| test lsb bit
				698	\| IEEE754-specified "round to even" behaviour. If the guard
				699	\| bit is set, then the number is odd, so rounding works like
				700	\| in grade-school arithmetic (i.e. 1.5 rounds to 2.0)
				701	\| Otherwise, an equal distance rounds towards zero, so as not
				702	\| to produce an odd number. This is strange, but it is what
				703	\| the standard says.
				704	jne fp_nd_doroundup \| round to infinity
				705	lsl.l #1,%d0 \| check low bits
				706	jeq 9b \| round to zero
				707	fp_nd_doroundup:
				708	\| round (the mantissa, that is) towards infinity
				709	add.l #0x800,(%a0)
				710	jcc 9b \| no overflow, good.
				711	addq.l #1,-(%a0) \| extend to high lword
				712	jcc 1f \| no overflow, good.
				713	\| Yow! we have managed to overflow the mantissa. Since this
				714	\| only happens when %d1 was 0xfffff800, it is now zero, so
				715	\| reset the high bit, and increment the exponent.
				716	move.w #0x8000,(%a0)
				717	addq.w #1,-(%a0)
				718	cmp.w #0x43ff,(%a0)+ \| exponent now overflown?
				719	jeq fp_nd_large \| yes, so make it infinity.
				720	1: subq.l #4,%a0
				721	printf PNORM,"%p(",1,%a0
				722	printx PNORM,%a0@
				723	printf PNORM,")\n"
				724	rts
				725	2: subq.w #2,%d2
				726	jcs 9b \| %d2 < 2, round to zero
				727	jhi 3f \| %d2 > 2, round to +infinity
				728	\| Round to +Inf or -Inf. High word of %d2 contains the
				729	\| sign of the number, by the way.
				730	swap %d2 \| to -inf
				731	tst.b %d2
				732	jne fp_nd_doroundup \| negative, round to infinity
				733	jra 9b \| positive, round to zero
				734	3: swap %d2 \| to +inf
				735	tst.b %d2
				736	jeq fp_nd_doroundup \| positive, round to infinity
				737	jra 9b \| negative, round to zero
				738	\| Exponent underflow. Try to make a denormal, and set it to
				739	\| the smallest possible fraction if this fails.
				740	fp_nd_small:
				741	fp_set_sr FPSR_EXC_UNFL \| set UNFL bit
				742	move.w #0x3c01,(-2,%a0) \| 2**-1022
				743	neg.w %d2 \| degree of underflow
				744	cmp.w #32,%d2 \| single or double shift?
				745	jcc 1f
				746	\| Again, another 64-bit double shift.
				747	move.l (%a0),%d0
				748	move.l %d0,%d1
				749	lsr.l %d2,%d0
				750	move.l %d0,(%a0)+
				751	move.l (%a0),%d0
				752	lsr.l %d2,%d0
				753	neg.w %d2
				754	add.w #32,%d2
				755	lsl.l %d2,%d1
				756	or.l %d1,%d0
				757	move.l (%a0),%d1
				758	move.l %d0,(%a0)
				759	\| Check to see if we shifted off any significant bits
				760	lsl.l %d2,%d1
				761	jeq fp_nd_round \| Nope, round.
				762	bset #0,%d0 \| Yes, so set the "sticky bit".
				763	jra fp_nd_round \| Now, round.
				764	\| Another 64-bit single shift and store
				765	1: sub.w #32,%d2
				766	cmp.w #32,%d2 \| Do we really need to shift?
				767	jcc 2f \| No, the number is too small.
				768	move.l (%a0),%d0
				769	clr.l (%a0)+
				770	move.l %d0,%d1
				771	lsr.l %d2,%d0
				772	neg.w %d2
				773	add.w #32,%d2
				774	\| Again, check to see if we shifted off any significant bits.
				775	tst.l (%a0)
				776	jeq 1f
				777	bset #0,%d0 \| Sticky bit.
				778	1: move.l %d0,(%a0)
				779	lsl.l %d2,%d1
				780	jeq fp_nd_round
				781	bset #0,%d0
				782	jra fp_nd_round
				783	\| Sorry, the number is just too small.
				784	2: clr.l (%a0)+
				785	clr.l (%a0)
				786	moveq #1,%d0 \| Smallest possible fraction,
				787	jra fp_nd_round \| round as desired.
				788	\| zero and denormalized
				789	fp_nd_zero:
				790	tst.l (%a0)+
				791	jne 1f
				792	tst.l (%a0)
				793	jne 1f
				794	subq.l #8,%a0
				795	printf PNORM,"%p(",1,%a0
				796	printx PNORM,%a0@
				797	printf PNORM,")\n"
				798	rts \| zero. nothing to do.
				799	\| These are not merely subnormal numbers, but true denormals,
				800	\| i.e. pathologically small (exponent is 2**-16383) numbers.
				801	\| It is clearly impossible for even a normal extended number
				802	\| with that exponent to fit into double precision, so just
				803	\| write these ones off as "too darn small".
				804	1: fp_set_sr FPSR_EXC_UNFL \| Set UNFL bit
				805	clr.l (%a0)
				806	clr.l -(%a0)
				807	move.w #0x3c01,-(%a0) \| i.e. 2**-1022
				808	addq.l #6,%a0
				809	moveq #1,%d0
				810	jra fp_nd_round \| round.
				811	\| Exponent overflow. Just call it infinity.
				812	fp_nd_large:
				813	move.w #0x7ff,%d0
				814	and.w (6,%a0),%d0
				815	jeq 1f
				816	fp_set_sr FPSR_EXC_INEX2
				817	1: fp_set_sr FPSR_EXC_OVFL
				818	move.w (FPD_RND,FPDATA),%d2
				819	jne 3f \| %d2 = 0 round to nearest
				820	1: move.w #0x7fff,(-2,%a0)
				821	clr.l (%a0)+
				822	clr.l (%a0)
				823	2: subq.l #8,%a0
				824	printf PNORM,"%p(",1,%a0
				825	printx PNORM,%a0@
				826	printf PNORM,")\n"
				827	rts
				828	3: subq.w #2,%d2
				829	jcs 5f \| %d2 < 2, round to zero
				830	jhi 4f \| %d2 > 2, round to +infinity
				831	tst.b (-3,%a0) \| to -inf
				832	jne 1b
				833	jra 5f
				834	4: tst.b (-3,%a0) \| to +inf
				835	jeq 1b
				836	5: move.w #0x43fe,(-2,%a0)
				837	moveq #-1,%d0
				838	move.l %d0,(%a0)+
				839	move.w #0xf800,%d0
				840	move.l %d0,(%a0)
				841	jra 2b
				842	\| Infinities or NaNs
				843	fp_nd_huge:
				844	subq.l #4,%a0
				845	printf PNORM,"%p(",1,%a0
				846	printx PNORM,%a0@
				847	printf PNORM,")\n"
				848	rts
				849
				850	\| fp_normalize_single:
				851	\| normalize an extended with single (23-bit) precision
				852	\| args: %a0 (struct fp_ext *)
				853
				854	fp_normalize_single:
				855	printf PNORM,"ns: %p(",1,%a0
				856	printx PNORM,%a0@
				857	printf PNORM,") "
				858	addq.l #2,%a0
				859	move.w (%a0)+,%d2
				860	jeq fp_ns_zero \| zero / denormalized
				861	cmp.w #0x7fff,%d2
				862	jeq fp_ns_huge \| NaN / infinitive.
				863	sub.w #0x4000-0x7f,%d2 \| will the exponent fit?
				864	jcs fp_ns_small \| too small.
				865	cmp.w #0xfe,%d2
				866	jcc fp_ns_large \| too big.
				867	move.l (%a0)+,%d0 \| get high lword of mantissa
				868	fp_ns_round:
				869	tst.l (%a0) \| check the low lword
				870	jeq 1f
				871	\| Set a sticky bit if it is non-zero. This should only
				872	\| affect the rounding in what would otherwise be equal-
				873	\| distance situations, which is what we want it to do.
				874	bset #0,%d0
				875	1: clr.l (%a0) \| zap it from memory.
				876	\| now, round off the low 8 bits of the hi lword.
				877	tst.b %d0 \| 8 low bits.
				878	jne fp_ns_checkround \| Are they non-zero?
				879	\| nothing to do here
				880	subq.l #8,%a0
				881	printf PNORM,"%p(",1,%a0
				882	printx PNORM,%a0@
				883	printf PNORM,")\n"
				884	rts
				885	fp_ns_checkround:
				886	fp_set_sr FPSR_EXC_INEX2 \| INEX2 bit
				887	clr.b -(%a0) \| clear low byte of high lword
				888	subq.l #3,%a0
				889	move.w (FPD_RND,FPDATA),%d2 \| rounding mode
				890	jne 2f \| %d2 == 0, round to nearest
				891	tst.b %d0 \| test guard bit
				892	jpl 9f \| zero is closer
				893	btst #8,%d0 \| test lsb bit
				894	\| round to even behaviour, see above.
				895	jne fp_ns_doroundup \| round to infinity
				896	lsl.b #1,%d0 \| check low bits
				897	jeq 9f \| round to zero
				898	fp_ns_doroundup:
				899	\| round (the mantissa, that is) towards infinity
				900	add.l #0x100,(%a0)
				901	jcc 9f \| no overflow, good.
				902	\| Overflow. This means that the %d1 was 0xffffff00, so it
				903	\| is now zero. We will set the mantissa to reflect this, and
				904	\| increment the exponent (checking for overflow there too)
				905	move.w #0x8000,(%a0)
				906	addq.w #1,-(%a0)
				907	cmp.w #0x407f,(%a0)+ \| exponent now overflown?
				908	jeq fp_ns_large \| yes, so make it infinity.
				909	9: subq.l #4,%a0
				910	printf PNORM,"%p(",1,%a0
				911	printx PNORM,%a0@
				912	printf PNORM,")\n"
				913	rts
				914	\| check nondefault rounding modes
				915	2: subq.w #2,%d2
				916	jcs 9b \| %d2 < 2, round to zero
				917	jhi 3f \| %d2 > 2, round to +infinity
				918	tst.b (-3,%a0) \| to -inf
				919	jne fp_ns_doroundup \| negative, round to infinity
				920	jra 9b \| positive, round to zero
				921	3: tst.b (-3,%a0) \| to +inf
				922	jeq fp_ns_doroundup \| positive, round to infinity
				923	jra 9b \| negative, round to zero
				924	\| Exponent underflow. Try to make a denormal, and set it to
				925	\| the smallest possible fraction if this fails.
				926	fp_ns_small:
				927	fp_set_sr FPSR_EXC_UNFL \| set UNFL bit
				928	move.w #0x3f81,(-2,%a0) \| 2**-126
				929	neg.w %d2 \| degree of underflow
				930	cmp.w #32,%d2 \| single or double shift?
				931	jcc 2f
				932	\| a 32-bit shift.
				933	move.l (%a0),%d0
				934	move.l %d0,%d1
				935	lsr.l %d2,%d0
				936	move.l %d0,(%a0)+
				937	\| Check to see if we shifted off any significant bits.
				938	neg.w %d2
				939	add.w #32,%d2
				940	lsl.l %d2,%d1
				941	jeq 1f
				942	bset #0,%d0 \| Sticky bit.
				943	\| Check the lower lword
				944	1: tst.l (%a0)
				945	jeq fp_ns_round
				946	clr (%a0)
				947	bset #0,%d0 \| Sticky bit.
				948	jra fp_ns_round
				949	\| Sorry, the number is just too small.
				950	2: clr.l (%a0)+
				951	clr.l (%a0)
				952	moveq #1,%d0 \| Smallest possible fraction,
				953	jra fp_ns_round \| round as desired.
				954	\| Exponent overflow. Just call it infinity.
				955	fp_ns_large:
				956	tst.b (3,%a0)
				957	jeq 1f
				958	fp_set_sr FPSR_EXC_INEX2
				959	1: fp_set_sr FPSR_EXC_OVFL
				960	move.w (FPD_RND,FPDATA),%d2
				961	jne 3f \| %d2 = 0 round to nearest
				962	1: move.w #0x7fff,(-2,%a0)
				963	clr.l (%a0)+
				964	clr.l (%a0)
				965	2: subq.l #8,%a0
				966	printf PNORM,"%p(",1,%a0
				967	printx PNORM,%a0@
				968	printf PNORM,")\n"
				969	rts
				970	3: subq.w #2,%d2
				971	jcs 5f \| %d2 < 2, round to zero
				972	jhi 4f \| %d2 > 2, round to +infinity
				973	tst.b (-3,%a0) \| to -inf
				974	jne 1b
				975	jra 5f
				976	4: tst.b (-3,%a0) \| to +inf
				977	jeq 1b
				978	5: move.w #0x407e,(-2,%a0)
				979	move.l #0xffffff00,(%a0)+
				980	clr.l (%a0)
				981	jra 2b
				982	\| zero and denormalized
				983	fp_ns_zero:
				984	tst.l (%a0)+
				985	jne 1f
				986	tst.l (%a0)
				987	jne 1f
				988	subq.l #8,%a0
				989	printf PNORM,"%p(",1,%a0
				990	printx PNORM,%a0@
				991	printf PNORM,")\n"
				992	rts \| zero. nothing to do.
				993	\| These are not merely subnormal numbers, but true denormals,
				994	\| i.e. pathologically small (exponent is 2**-16383) numbers.
				995	\| It is clearly impossible for even a normal extended number
				996	\| with that exponent to fit into single precision, so just
				997	\| write these ones off as "too darn small".
				998	1: fp_set_sr FPSR_EXC_UNFL \| Set UNFL bit
				999	clr.l (%a0)
				1000	clr.l -(%a0)
				1001	move.w #0x3f81,-(%a0) \| i.e. 2**-126
				1002	addq.l #6,%a0
				1003	moveq #1,%d0
				1004	jra fp_ns_round \| round.
				1005	\| Infinities or NaNs
				1006	fp_ns_huge:
				1007	subq.l #4,%a0
				1008	printf PNORM,"%p(",1,%a0
				1009	printx PNORM,%a0@
				1010	printf PNORM,")\n"
				1011	rts
				1012
				1013	\| fp_normalize_single_fast:
				1014	\| normalize an extended with single (23-bit) precision
				1015	\| this is only used by fsgldiv/fsgdlmul, where the
				1016	\| operand is not completly normalized.
				1017	\| args: %a0 (struct fp_ext *)
				1018
				1019	fp_normalize_single_fast:
				1020	printf PNORM,"nsf: %p(",1,%a0
				1021	printx PNORM,%a0@
				1022	printf PNORM,") "
				1023	addq.l #2,%a0
				1024	move.w (%a0)+,%d2
				1025	cmp.w #0x7fff,%d2
				1026	jeq fp_nsf_huge \| NaN / infinitive.
				1027	move.l (%a0)+,%d0 \| get high lword of mantissa
				1028	fp_nsf_round:
				1029	tst.l (%a0) \| check the low lword
				1030	jeq 1f
				1031	\| Set a sticky bit if it is non-zero. This should only
				1032	\| affect the rounding in what would otherwise be equal-
				1033	\| distance situations, which is what we want it to do.
				1034	bset #0,%d0
				1035	1: clr.l (%a0) \| zap it from memory.
				1036	\| now, round off the low 8 bits of the hi lword.
				1037	tst.b %d0 \| 8 low bits.
				1038	jne fp_nsf_checkround \| Are they non-zero?
				1039	\| nothing to do here
				1040	subq.l #8,%a0
				1041	printf PNORM,"%p(",1,%a0
				1042	printx PNORM,%a0@
				1043	printf PNORM,")\n"
				1044	rts
				1045	fp_nsf_checkround:
				1046	fp_set_sr FPSR_EXC_INEX2 \| INEX2 bit
				1047	clr.b -(%a0) \| clear low byte of high lword
				1048	subq.l #3,%a0
				1049	move.w (FPD_RND,FPDATA),%d2 \| rounding mode
				1050	jne 2f \| %d2 == 0, round to nearest
				1051	tst.b %d0 \| test guard bit
				1052	jpl 9f \| zero is closer
				1053	btst #8,%d0 \| test lsb bit
				1054	\| round to even behaviour, see above.
				1055	jne fp_nsf_doroundup \| round to infinity
				1056	lsl.b #1,%d0 \| check low bits
				1057	jeq 9f \| round to zero
				1058	fp_nsf_doroundup:
				1059	\| round (the mantissa, that is) towards infinity
				1060	add.l #0x100,(%a0)
				1061	jcc 9f \| no overflow, good.
				1062	\| Overflow. This means that the %d1 was 0xffffff00, so it
				1063	\| is now zero. We will set the mantissa to reflect this, and
				1064	\| increment the exponent (checking for overflow there too)
				1065	move.w #0x8000,(%a0)
				1066	addq.w #1,-(%a0)
				1067	cmp.w #0x407f,(%a0)+ \| exponent now overflown?
				1068	jeq fp_nsf_large \| yes, so make it infinity.
				1069	9: subq.l #4,%a0
				1070	printf PNORM,"%p(",1,%a0
				1071	printx PNORM,%a0@
				1072	printf PNORM,")\n"
				1073	rts
				1074	\| check nondefault rounding modes
				1075	2: subq.w #2,%d2
				1076	jcs 9b \| %d2 < 2, round to zero
				1077	jhi 3f \| %d2 > 2, round to +infinity
				1078	tst.b (-3,%a0) \| to -inf
				1079	jne fp_nsf_doroundup \| negative, round to infinity
				1080	jra 9b \| positive, round to zero
				1081	3: tst.b (-3,%a0) \| to +inf
				1082	jeq fp_nsf_doroundup \| positive, round to infinity
				1083	jra 9b \| negative, round to zero
				1084	\| Exponent overflow. Just call it infinity.
				1085	fp_nsf_large:
				1086	tst.b (3,%a0)
				1087	jeq 1f
				1088	fp_set_sr FPSR_EXC_INEX2
				1089	1: fp_set_sr FPSR_EXC_OVFL
				1090	move.w (FPD_RND,FPDATA),%d2
				1091	jne 3f \| %d2 = 0 round to nearest
				1092	1: move.w #0x7fff,(-2,%a0)
				1093	clr.l (%a0)+
				1094	clr.l (%a0)
				1095	2: subq.l #8,%a0
				1096	printf PNORM,"%p(",1,%a0
				1097	printx PNORM,%a0@
				1098	printf PNORM,")\n"
				1099	rts
				1100	3: subq.w #2,%d2
				1101	jcs 5f \| %d2 < 2, round to zero
				1102	jhi 4f \| %d2 > 2, round to +infinity
				1103	tst.b (-3,%a0) \| to -inf
				1104	jne 1b
				1105	jra 5f
				1106	4: tst.b (-3,%a0) \| to +inf
				1107	jeq 1b
				1108	5: move.w #0x407e,(-2,%a0)
				1109	move.l #0xffffff00,(%a0)+
				1110	clr.l (%a0)
				1111	jra 2b
				1112	\| Infinities or NaNs
				1113	fp_nsf_huge:
				1114	subq.l #4,%a0
				1115	printf PNORM,"%p(",1,%a0
				1116	printx PNORM,%a0@
				1117	printf PNORM,")\n"
				1118	rts
				1119
				1120	\| conv_ext2int (macro):
				1121	\| Generates a subroutine that converts an extended value to an
				1122	\| integer of a given size, again, with the appropriate type of
				1123	\| rounding.
				1124
				1125	\| Macro arguments:
				1126	\| s: size, as given in an assembly instruction.
				1127	\| b: number of bits in that size.
				1128
				1129	\| Subroutine arguments:
				1130	\| %a0: source (struct fp_ext *)
				1131
				1132	\| Returns the integer in %d0 (like it should)
				1133
				1134	.macro conv_ext2int s,b
				1135	.set inf,(1<<(\b-1))-1 \| i.e. MAXINT
				1136	printf PCONV,"e2i%d: %p(",2,#\b,%a0
				1137	printx PCONV,%a0@
				1138	printf PCONV,") "
				1139	addq.l #2,%a0
				1140	move.w (%a0)+,%d2 \| exponent
				1141	jeq fp_e2i_zero\b \| zero / denorm (== 0, here)
				1142	cmp.w #0x7fff,%d2
				1143	jeq fp_e2i_huge\b \| Inf / NaN
				1144	sub.w #0x3ffe,%d2
				1145	jcs fp_e2i_small\b
				1146	cmp.w #\b,%d2
				1147	jhi fp_e2i_large\b
				1148	move.l (%a0),%d0
				1149	move.l %d0,%d1
				1150	lsl.l %d2,%d1
				1151	jne fp_e2i_round\b
				1152	tst.l (4,%a0)
				1153	jne fp_e2i_round\b
				1154	neg.w %d2
				1155	add.w #32,%d2
				1156	lsr.l %d2,%d0
				1157	9: tst.w (-4,%a0)
				1158	jne 1f
				1159	tst.\s %d0
				1160	jmi fp_e2i_large\b
				1161	printf PCONV,"-> %p\n",1,%d0
				1162	rts
				1163	1: neg.\s %d0
				1164	jeq 1f
				1165	jpl fp_e2i_large\b
				1166	1: printf PCONV,"-> %p\n",1,%d0
				1167	rts
				1168	fp_e2i_round\b:
				1169	fp_set_sr FPSR_EXC_INEX2 \| INEX2 bit
				1170	neg.w %d2
				1171	add.w #32,%d2
				1172	.if \b>16
				1173	jeq 5f
				1174	.endif
				1175	lsr.l %d2,%d0
				1176	move.w (FPD_RND,FPDATA),%d2 \| rounding mode
				1177	jne 2f \| %d2 == 0, round to nearest
				1178	tst.l %d1 \| test guard bit
				1179	jpl 9b \| zero is closer
				1180	btst %d2,%d0 \| test lsb bit (%d2 still 0)
				1181	jne fp_e2i_doroundup\b
				1182	lsl.l #1,%d1 \| check low bits
				1183	jne fp_e2i_doroundup\b
				1184	tst.l (4,%a0)
				1185	jeq 9b
				1186	fp_e2i_doroundup\b:
				1187	addq.l #1,%d0
				1188	jra 9b
				1189	\| check nondefault rounding modes
				1190	2: subq.w #2,%d2
				1191	jcs 9b \| %d2 < 2, round to zero
				1192	jhi 3f \| %d2 > 2, round to +infinity
				1193	tst.w (-4,%a0) \| to -inf
				1194	jne fp_e2i_doroundup\b \| negative, round to infinity
				1195	jra 9b \| positive, round to zero
				1196	3: tst.w (-4,%a0) \| to +inf
				1197	jeq fp_e2i_doroundup\b \| positive, round to infinity
				1198	jra 9b \| negative, round to zero
				1199	\| we are only want -2**127 get correctly rounded here,
				1200	\| since the guard bit is in the lower lword.
				1201	\| everything else ends up anyway as overflow.
				1202	.if \b>16
				1203	5: move.w (FPD_RND,FPDATA),%d2 \| rounding mode
				1204	jne 2b \| %d2 == 0, round to nearest
				1205	move.l (4,%a0),%d1 \| test guard bit
				1206	jpl 9b \| zero is closer
				1207	lsl.l #1,%d1 \| check low bits
				1208	jne fp_e2i_doroundup\b
				1209	jra 9b
				1210	.endif
				1211	fp_e2i_zero\b:
				1212	clr.l %d0
				1213	tst.l (%a0)+
				1214	jne 1f
				1215	tst.l (%a0)
				1216	jeq 3f
				1217	1: subq.l #4,%a0
				1218	fp_clr_sr FPSR_EXC_UNFL \| fp_normalize_ext has set this bit
				1219	fp_e2i_small\b:
				1220	fp_set_sr FPSR_EXC_INEX2
				1221	clr.l %d0
				1222	move.w (FPD_RND,FPDATA),%d2 \| rounding mode
				1223	subq.w #2,%d2
				1224	jcs 3f \| %d2 < 2, round to nearest/zero
				1225	jhi 2f \| %d2 > 2, round to +infinity
				1226	tst.w (-4,%a0) \| to -inf
				1227	jeq 3f
				1228	subq.\s #1,%d0
				1229	jra 3f
				1230	2: tst.w (-4,%a0) \| to +inf
				1231	jne 3f
				1232	addq.\s #1,%d0
				1233	3: printf PCONV,"-> %p\n",1,%d0
				1234	rts
				1235	fp_e2i_large\b:
				1236	fp_set_sr FPSR_EXC_OPERR
				1237	move.\s #inf,%d0
				1238	tst.w (-4,%a0)
				1239	jeq 1f
				1240	addq.\s #1,%d0
				1241	1: printf PCONV,"-> %p\n",1,%d0
				1242	rts
				1243	fp_e2i_huge\b:
				1244	move.\s (%a0),%d0
				1245	tst.l (%a0)
				1246	jne 1f
				1247	tst.l (%a0)
				1248	jeq fp_e2i_large\b
				1249	\| fp_normalize_ext has set this bit already
				1250	\| and made the number nonsignaling
				1251	1: fp_tst_sr FPSR_EXC_SNAN
				1252	jne 1f
				1253	fp_set_sr FPSR_EXC_OPERR
				1254	1: printf PCONV,"-> %p\n",1,%d0
				1255	rts
				1256	.endm
				1257
				1258	fp_conv_ext2long:
				1259	conv_ext2int l,32
				1260
				1261	fp_conv_ext2short:
				1262	conv_ext2int w,16
				1263
				1264	fp_conv_ext2byte:
				1265	conv_ext2int b,8
				1266
				1267	fp_conv_ext2double:
				1268	jsr fp_normalize_double
				1269	printf PCONV,"e2d: %p(",1,%a0
				1270	printx PCONV,%a0@
				1271	printf PCONV,"), "
				1272	move.l (%a0)+,%d2
				1273	cmp.w #0x7fff,%d2
				1274	jne 1f
				1275	move.w #0x7ff,%d2
				1276	move.l (%a0)+,%d0
				1277	jra 2f
				1278	1: sub.w #0x3fff-0x3ff,%d2
				1279	move.l (%a0)+,%d0
				1280	jmi 2f
				1281	clr.w %d2
				1282	2: lsl.w #5,%d2
				1283	lsl.l #7,%d2
				1284	lsl.l #8,%d2
				1285	move.l %d0,%d1
				1286	lsl.l #1,%d0
				1287	lsr.l #4,%d0
				1288	lsr.l #8,%d0
				1289	or.l %d2,%d0
				1290	putuser.l %d0,(%a1)+,fp_err_ua2,%a1
				1291	moveq #21,%d0
				1292	lsl.l %d0,%d1
				1293	move.l (%a0),%d0
				1294	lsr.l #4,%d0
				1295	lsr.l #7,%d0
				1296	or.l %d1,%d0
				1297	putuser.l %d0,(%a1),fp_err_ua2,%a1
				1298	#ifdef FPU_EMU_DEBUG
				1299	getuser.l %a1@(-4),%d0,fp_err_ua2,%a1
				1300	getuser.l %a1@(0),%d1,fp_err_ua2,%a1
				1301	printf PCONV,"%p(%08x%08x)\n",3,%a1,%d0,%d1
				1302	#endif
				1303	rts
				1304
				1305	fp_conv_ext2single:
				1306	jsr fp_normalize_single
				1307	printf PCONV,"e2s: %p(",1,%a0
				1308	printx PCONV,%a0@
				1309	printf PCONV,"), "
				1310	move.l (%a0)+,%d1
				1311	cmp.w #0x7fff,%d1
				1312	jne 1f
				1313	move.w #0xff,%d1
				1314	move.l (%a0)+,%d0
				1315	jra 2f
				1316	1: sub.w #0x3fff-0x7f,%d1
				1317	move.l (%a0)+,%d0
				1318	jmi 2f
				1319	clr.w %d1
				1320	2: lsl.w #8,%d1
				1321	lsl.l #7,%d1
				1322	lsl.l #8,%d1
				1323	bclr #31,%d0
				1324	lsr.l #8,%d0
				1325	or.l %d1,%d0
				1326	printf PCONV,"%08x\n",1,%d0
				1327	rts
				1328
				1329	\| special return addresses for instr that
				1330	\| encode the rounding precision in the opcode
				1331	\| (e.g. fsmove,fdmove)
				1332
				1333	fp_finalrounding_single:
				1334	addq.l #8,%sp
				1335	jsr fp_normalize_ext
				1336	jsr fp_normalize_single
				1337	jra fp_finaltest
				1338
				1339	fp_finalrounding_single_fast:
				1340	addq.l #8,%sp
				1341	jsr fp_normalize_ext
				1342	jsr fp_normalize_single_fast
				1343	jra fp_finaltest
				1344
				1345	fp_finalrounding_double:
				1346	addq.l #8,%sp
				1347	jsr fp_normalize_ext
				1348	jsr fp_normalize_double
				1349	jra fp_finaltest
				1350
				1351	\| fp_finaltest:
				1352	\| set the emulated status register based on the outcome of an
				1353	\| emulated instruction.
				1354
				1355	fp_finalrounding:
				1356	addq.l #8,%sp
				1357	\| printf ,"f: %p\n",1,%a0
				1358	jsr fp_normalize_ext
				1359	move.w (FPD_PREC,FPDATA),%d0
				1360	subq.w #1,%d0
				1361	jcs fp_finaltest
				1362	jne 1f
				1363	jsr fp_normalize_single
				1364	jra 2f
				1365	1: jsr fp_normalize_double
				1366	2:\| printf ,"f: %p\n",1,%a0
				1367	fp_finaltest:
				1368	\| First, we do some of the obvious tests for the exception
				1369	\| status byte and condition code bytes of fp_sr here, so that
				1370	\| they do not have to be handled individually by every
				1371	\| emulated instruction.
				1372	clr.l %d0
				1373	addq.l #1,%a0
				1374	tst.b (%a0)+ \| sign
				1375	jeq 1f
				1376	bset #FPSR_CC_NEG-24,%d0 \| N bit
				1377	1: cmp.w #0x7fff,(%a0)+ \| exponent
				1378	jeq 2f
				1379	\| test for zero
				1380	moveq #FPSR_CC_Z-24,%d1
				1381	tst.l (%a0)+
				1382	jne 9f
				1383	tst.l (%a0)
				1384	jne 9f
				1385	jra 8f
				1386	\| infinitiv and NAN
				1387	2: moveq #FPSR_CC_NAN-24,%d1
				1388	move.l (%a0)+,%d2
				1389	lsl.l #1,%d2 \| ignore high bit
				1390	jne 8f
				1391	tst.l (%a0)
				1392	jne 8f
				1393	moveq #FPSR_CC_INF-24,%d1
				1394	8: bset %d1,%d0
				1395	9: move.b %d0,(FPD_FPSR+0,FPDATA) \| set condition test result
				1396	\| move instructions enter here
				1397	\| Here, we test things in the exception status byte, and set
				1398	\| other things in the accrued exception byte accordingly.
				1399	\| Emulated instructions can set various things in the former,
				1400	\| as defined in fp_emu.h.
				1401	fp_final:
				1402	move.l (FPD_FPSR,FPDATA),%d0
				1403	#if 0
				1404	btst #FPSR_EXC_SNAN,%d0 \| EXC_SNAN
				1405	jne 1f
				1406	btst #FPSR_EXC_OPERR,%d0 \| EXC_OPERR
				1407	jeq 2f
				1408	1: bset #FPSR_AEXC_IOP,%d0 \| set IOP bit
				1409	2: btst #FPSR_EXC_OVFL,%d0 \| EXC_OVFL
				1410	jeq 1f
				1411	bset #FPSR_AEXC_OVFL,%d0 \| set OVFL bit
				1412	1: btst #FPSR_EXC_UNFL,%d0 \| EXC_UNFL
				1413	jeq 1f
				1414	btst #FPSR_EXC_INEX2,%d0 \| EXC_INEX2
				1415	jeq 1f
				1416	bset #FPSR_AEXC_UNFL,%d0 \| set UNFL bit
				1417	1: btst #FPSR_EXC_DZ,%d0 \| EXC_INEX1
				1418	jeq 1f
				1419	bset #FPSR_AEXC_DZ,%d0 \| set DZ bit
				1420	1: btst #FPSR_EXC_OVFL,%d0 \| EXC_OVFL
				1421	jne 1f
				1422	btst #FPSR_EXC_INEX2,%d0 \| EXC_INEX2
				1423	jne 1f
				1424	btst #FPSR_EXC_INEX1,%d0 \| EXC_INEX1
				1425	jeq 2f
				1426	1: bset #FPSR_AEXC_INEX,%d0 \| set INEX bit
				1427	2: move.l %d0,(FPD_FPSR,FPDATA)
				1428	#else
				1429	\| same as above, greatly optimized, but untested (yet)
				1430	move.l %d0,%d2
				1431	lsr.l #5,%d0
				1432	move.l %d0,%d1
				1433	lsr.l #4,%d1
				1434	or.l %d0,%d1
				1435	and.b #0x08,%d1
				1436	move.l %d2,%d0
				1437	lsr.l #6,%d0
				1438	or.l %d1,%d0
				1439	move.l %d2,%d1
				1440	lsr.l #4,%d1
				1441	or.b #0xdf,%d1
				1442	and.b %d1,%d0
				1443	move.l %d2,%d1
				1444	lsr.l #7,%d1
				1445	and.b #0x80,%d1
				1446	or.b %d1,%d0
				1447	and.b #0xf8,%d0
				1448	or.b %d0,%d2
				1449	move.l %d2,(FPD_FPSR,FPDATA)
				1450	#endif
				1451	move.b (FPD_FPSR+2,FPDATA),%d0
				1452	and.b (FPD_FPCR+2,FPDATA),%d0
				1453	jeq 1f
				1454	printf ,"send signal!!!\n"
				1455	1: jra fp_end