Blame - arch/m68k/ifpsp060/src/ilsp.S - kernel/msm

blob: 970abaf3303e037184fb8bde424ee67624c0f125 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				2	MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
				3	M68000 Hi-Performance Microprocessor Division
				4	M68060 Software Package
				5	Production Release P1.00 -- October 10, 1994
				6
Jan Engelhardt	96de0e2	2007-10-19 23:21:04 +0200	[diff] [blame]	7	M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	8
				9	THE SOFTWARE is provided on an "AS IS" basis and without warranty.
				10	To the maximum extent permitted by applicable law,
				11	MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
				12	INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
				13	and any warranty against infringement with regard to the SOFTWARE
				14	(INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
				15
				16	To the maximum extent permitted by applicable law,
				17	IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
				18	(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
				19	BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
				20	ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
				21	Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
				22
				23	You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
				24	so long as this entire notice is retained without alteration in any modified and/or
				25	redistributed versions, and that such modified versions are clearly identified as such.
				26	No licenses are granted by implication, estoppel or otherwise under any patents
				27	or trademarks of Motorola, Inc.
				28	~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
				29	# litop.s:
				30	# This file is appended to the top of the 060FPLSP package
				31	# and contains the entry points into the package. The user, in
				32	# effect, branches to one of the branch table entries located here.
				33	#
				34
				35	bra.l _060LSP__idivs64_
				36	short 0x0000
				37	bra.l _060LSP__idivu64_
				38	short 0x0000
				39
				40	bra.l _060LSP__imuls64_
				41	short 0x0000
				42	bra.l _060LSP__imulu64_
				43	short 0x0000
				44
				45	bra.l _060LSP__cmp2_Ab_
				46	short 0x0000
				47	bra.l _060LSP__cmp2_Aw_
				48	short 0x0000
				49	bra.l _060LSP__cmp2_Al_
				50	short 0x0000
				51	bra.l _060LSP__cmp2_Db_
				52	short 0x0000
				53	bra.l _060LSP__cmp2_Dw_
				54	short 0x0000
				55	bra.l _060LSP__cmp2_Dl_
				56	short 0x0000
				57
				58	# leave room for future possible aditions.
				59	align 0x200
				60
				61	#########################################################################
				62	# XDEF **************************************************************** #
				63	# _060LSP__idivu64_(): Emulate 64-bit unsigned div instruction. #
				64	# _060LSP__idivs64_(): Emulate 64-bit signed div instruction. #
				65	# #
				66	# This is the library version which is accessed as a subroutine #
				67	# and therefore does not work exactly like the 680X0 div{s,u}.l #
				68	# 64-bit divide instruction. #
				69	# #
				70	# XREF **************************************************************** #
				71	# None. #
				72	# #
				73	# INPUT *************************************************************** #
				74	# 0x4(sp) = divisor #
				75	# 0x8(sp) = hi(dividend) #
				76	# 0xc(sp) = lo(dividend) #
				77	# 0x10(sp) = pointer to location to place quotient/remainder #
				78	# #
				79	# OUTPUT ************************************************************** #
				80	# 0x10(sp) = points to location of remainder/quotient. #
				81	# remainder is in first longword, quotient is in 2nd. #
				82	# #
				83	# ALGORITHM *********************************************************** #
				84	# If the operands are signed, make them unsigned and save the #
				85	# sign info for later. Separate out special cases like divide-by-zero #
				86	# or 32-bit divides if possible. Else, use a special math algorithm #
				87	# to calculate the result. #
				88	# Restore sign info if signed instruction. Set the condition #
				89	# codes before performing the final "rts". If the divisor was equal to #
				90	# zero, then perform a divide-by-zero using a 16-bit implemented #
				91	# divide instruction. This way, the operating system can record that #
				92	# the event occurred even though it may not point to the correct place. #
				93	# #
				94	#########################################################################
				95
				96	set POSNEG, -1
				97	set NDIVISOR, -2
				98	set NDIVIDEND, -3
				99	set DDSECOND, -4
				100	set DDNORMAL, -8
				101	set DDQUOTIENT, -12
				102	set DIV64_CC, -16
				103
				104	##########
				105	# divs.l #
				106	##########
				107	global _060LSP__idivs64_
				108	_060LSP__idivs64_:
				109	# PROLOGUE BEGIN ########################################################
				110	link.w %a6,&-16
				111	movm.l &0x3f00,-(%sp) # save d2-d7
				112	# fmovm.l &0x0,-(%sp) # save no fpregs
				113	# PROLOGUE END ##########################################################
				114
				115	mov.w %cc,DIV64_CC(%a6)
				116	st POSNEG(%a6) # signed operation
				117	bra.b ldiv64_cont
				118
				119	##########
				120	# divu.l #
				121	##########
				122	global _060LSP__idivu64_
				123	_060LSP__idivu64_:
				124	# PROLOGUE BEGIN ########################################################
				125	link.w %a6,&-16
				126	movm.l &0x3f00,-(%sp) # save d2-d7
				127	# fmovm.l &0x0,-(%sp) # save no fpregs
				128	# PROLOGUE END ##########################################################
				129
				130	mov.w %cc,DIV64_CC(%a6)
				131	sf POSNEG(%a6) # unsigned operation
				132
				133	ldiv64_cont:
				134	mov.l 0x8(%a6),%d7 # fetch divisor
				135
				136	beq.w ldiv64eq0 # divisor is = 0!!!
				137
				138	mov.l 0xc(%a6), %d5 # get dividend hi
				139	mov.l 0x10(%a6), %d6 # get dividend lo
				140
				141	# separate signed and unsigned divide
				142	tst.b POSNEG(%a6) # signed or unsigned?
				143	beq.b ldspecialcases # use positive divide
				144
				145	# save the sign of the divisor
				146	# make divisor unsigned if it's negative
				147	tst.l %d7 # chk sign of divisor
				148	slt NDIVISOR(%a6) # save sign of divisor
				149	bpl.b ldsgndividend
				150	neg.l %d7 # complement negative divisor
				151
				152	# save the sign of the dividend
				153	# make dividend unsigned if it's negative
				154	ldsgndividend:
				155	tst.l %d5 # chk sign of hi(dividend)
				156	slt NDIVIDEND(%a6) # save sign of dividend
				157	bpl.b ldspecialcases
				158
				159	mov.w &0x0, %cc # clear 'X' cc bit
				160	negx.l %d6 # complement signed dividend
				161	negx.l %d5
				162
				163	# extract some special cases:
				164	# - is (dividend == 0) ?
				165	# - is (hi(dividend) == 0 && (divisor <= lo(dividend))) ? (32-bit div)
				166	ldspecialcases:
				167	tst.l %d5 # is (hi(dividend) == 0)
				168	bne.b ldnormaldivide # no, so try it the long way
				169
				170	tst.l %d6 # is (lo(dividend) == 0), too
				171	beq.w lddone # yes, so (dividend == 0)
				172
				173	cmp.l %d7,%d6 # is (divisor <= lo(dividend))
				174	bls.b ld32bitdivide # yes, so use 32 bit divide
				175
				176	exg %d5,%d6 # q = 0, r = dividend
				177	bra.w ldivfinish # can't divide, we're done.
				178
				179	ld32bitdivide:
				180	tdivu.l %d7, %d5:%d6 # it's only a 32/32 bit div!
				181
				182	bra.b ldivfinish
				183
				184	ldnormaldivide:
				185	# last special case:
				186	# - is hi(dividend) >= divisor ? if yes, then overflow
				187	cmp.l %d7,%d5
				188	bls.b lddovf # answer won't fit in 32 bits
				189
				190	# perform the divide algorithm:
				191	bsr.l ldclassical # do int divide
				192
				193	# separate into signed and unsigned finishes.
				194	ldivfinish:
				195	tst.b POSNEG(%a6) # do divs, divu separately
				196	beq.b lddone # divu has no processing!!!
				197
				198	# it was a divs.l, so ccode setting is a little more complicated...
				199	tst.b NDIVIDEND(%a6) # remainder has same sign
				200	beq.b ldcc # as dividend.
				201	neg.l %d5 # sgn(rem) = sgn(dividend)
				202	ldcc:
				203	mov.b NDIVISOR(%a6), %d0
				204	eor.b %d0, NDIVIDEND(%a6) # chk if quotient is negative
				205	beq.b ldqpos # branch to quot positive
				206
				207	# 0x80000000 is the largest number representable as a 32-bit negative
				208	# number. the negative of 0x80000000 is 0x80000000.
				209	cmpi.l %d6, &0x80000000 # will (-quot) fit in 32 bits?
				210	bhi.b lddovf
				211
				212	neg.l %d6 # make (-quot) 2's comp
				213
				214	bra.b lddone
				215
				216	ldqpos:
				217	btst &0x1f, %d6 # will (+quot) fit in 32 bits?
				218	bne.b lddovf
				219
				220	lddone:
				221	# if the register numbers are the same, only the quotient gets saved.
				222	# so, if we always save the quotient second, we save ourselves a cmp&beq
				223	andi.w &0x10,DIV64_CC(%a6)
				224	mov.w DIV64_CC(%a6),%cc
				225	tst.l %d6 # may set 'N' ccode bit
				226
				227	# here, the result is in d1 and d0. the current strategy is to save
				228	# the values at the location pointed to by a0.
				229	# use movm here to not disturb the condition codes.
				230	ldexit:
				231	movm.l &0x0060,([0x14,%a6]) # save result
				232
				233	# EPILOGUE BEGIN ########################################################
				234	# fmovm.l (%sp)+,&0x0 # restore no fpregs
				235	movm.l (%sp)+,&0x00fc # restore d2-d7
				236	unlk %a6
				237	# EPILOGUE END ##########################################################
				238
				239	rts
				240
				241	# the result should be the unchanged dividend
				242	lddovf:
				243	mov.l 0xc(%a6), %d5 # get dividend hi
				244	mov.l 0x10(%a6), %d6 # get dividend lo
				245
				246	andi.w &0x1c,DIV64_CC(%a6)
				247	ori.w &0x02,DIV64_CC(%a6) # set 'V' ccode bit
				248	mov.w DIV64_CC(%a6),%cc
				249
				250	bra.b ldexit
				251
				252	ldiv64eq0:
				253	mov.l 0xc(%a6),([0x14,%a6])
				254	mov.l 0x10(%a6),([0x14,%a6],0x4)
				255
				256	mov.w DIV64_CC(%a6),%cc
				257
				258	# EPILOGUE BEGIN ########################################################
				259	# fmovm.l (%sp)+,&0x0 # restore no fpregs
				260	movm.l (%sp)+,&0x00fc # restore d2-d7
				261	unlk %a6
				262	# EPILOGUE END ##########################################################
				263
				264	divu.w &0x0,%d0 # force a divbyzero exception
				265	rts
				266
				267	###########################################################################
				268	#########################################################################
				269	# This routine uses the 'classical' Algorithm D from Donald Knuth's #
				270	# Art of Computer Programming, vol II, Seminumerical Algorithms. #
				271	# For this implementation b=2**16, and the target is U1U2U3U4/V1V2, #
				272	# where U,V are words of the quadword dividend and longword divisor, #
				273	# and U1, V1 are the most significant words. #
				274	# #
				275	# The most sig. longword of the 64 bit dividend must be in %d5, least #
				276	# in %d6. The divisor must be in the variable ddivisor, and the #
				277	# signed/unsigned flag ddusign must be set (0=unsigned,1=signed). #
				278	# The quotient is returned in %d6, remainder in %d5, unless the #
				279	# v (overflow) bit is set in the saved %ccr. If overflow, the dividend #
				280	# is unchanged. #
				281	#########################################################################
				282	ldclassical:
				283	# if the divisor msw is 0, use simpler algorithm then the full blown
				284	# one at ddknuth:
				285
				286	cmpi.l %d7, &0xffff
				287	bhi.b lddknuth # go use D. Knuth algorithm
				288
				289	# Since the divisor is only a word (and larger than the mslw of the dividend),
				290	# a simpler algorithm may be used :
				291	# In the general case, four quotient words would be created by
				292	# dividing the divisor word into each dividend word. In this case,
				293	# the first two quotient words must be zero, or overflow would occur.
				294	# Since we already checked this case above, we can treat the most significant
				295	# longword of the dividend as (0) remainder (see Knuth) and merely complete
				296	# the last two divisions to get a quotient longword and word remainder:
				297
				298	clr.l %d1
				299	swap %d5 # same as r*b if previous step rqd
				300	swap %d6 # get u3 to lsw position
				301	mov.w %d6, %d5 # rb + u3
				302
				303	divu.w %d7, %d5
				304
				305	mov.w %d5, %d1 # first quotient word
				306	swap %d6 # get u4
				307	mov.w %d6, %d5 # rb + u4
				308
				309	divu.w %d7, %d5
				310
				311	swap %d1
				312	mov.w %d5, %d1 # 2nd quotient 'digit'
				313	clr.w %d5
				314	swap %d5 # now remainder
				315	mov.l %d1, %d6 # and quotient
				316
				317	rts
				318
				319	lddknuth:
				320	# In this algorithm, the divisor is treated as a 2 digit (word) number
				321	# which is divided into a 3 digit (word) dividend to get one quotient
				322	# digit (word). After subtraction, the dividend is shifted and the
				323	# process repeated. Before beginning, the divisor and quotient are
				324	# 'normalized' so that the process of estimating the quotient digit
				325	# will yield verifiably correct results..
				326
				327	clr.l DDNORMAL(%a6) # count of shifts for normalization
				328	clr.b DDSECOND(%a6) # clear flag for quotient digits
				329	clr.l %d1 # %d1 will hold trial quotient
				330	lddnchk:
				331	btst &31, %d7 # must we normalize? first word of
				332	bne.b lddnormalized # divisor (V1) must be >= 65536/2
				333	addq.l &0x1, DDNORMAL(%a6) # count normalization shifts
				334	lsl.l &0x1, %d7 # shift the divisor
				335	lsl.l &0x1, %d6 # shift u4,u3 with overflow to u2
				336	roxl.l &0x1, %d5 # shift u1,u2
				337	bra.w lddnchk
				338	lddnormalized:
				339
				340	# Now calculate an estimate of the quotient words (msw first, then lsw).
				341	# The comments use subscripts for the first quotient digit determination.
				342	mov.l %d7, %d3 # divisor
				343	mov.l %d5, %d2 # dividend mslw
				344	swap %d2
				345	swap %d3
				346	cmp.w %d2, %d3 # V1 = U1 ?
				347	bne.b lddqcalc1
				348	mov.w &0xffff, %d1 # use max trial quotient word
				349	bra.b lddadj0
				350	lddqcalc1:
				351	mov.l %d5, %d1
				352
				353	divu.w %d3, %d1 # use quotient of mslw/msw
				354
				355	andi.l &0x0000ffff, %d1 # zero any remainder
				356	lddadj0:
				357
				358	# now test the trial quotient and adjust. This step plus the
				359	# normalization assures (according to Knuth) that the trial
				360	# quotient will be at worst 1 too large.
				361	mov.l %d6, -(%sp)
				362	clr.w %d6 # word u3 left
				363	swap %d6 # in lsw position
				364	lddadj1: mov.l %d7, %d3
				365	mov.l %d1, %d2
				366	mulu.w %d7, %d2 # V2q
				367	swap %d3
				368	mulu.w %d1, %d3 # V1q
				369	mov.l %d5, %d4 # U1U2
				370	sub.l %d3, %d4 # U1U2 - V1q
				371
				372	swap %d4
				373
				374	mov.w %d4,%d0
				375	mov.w %d6,%d4 # insert lower word (U3)
				376
				377	tst.w %d0 # is upper word set?
				378	bne.w lddadjd1
				379
				380	# add.l %d6, %d4 # (U1U2 - V1q) + U3
				381
				382	cmp.l %d2, %d4
				383	bls.b lddadjd1 # is V2q > (U1U2-V1q) + U3 ?
				384	subq.l &0x1, %d1 # yes, decrement and recheck
				385	bra.b lddadj1
				386	lddadjd1:
				387	# now test the word by multiplying it by the divisor (V1V2) and comparing
				388	# the 3 digit (word) result with the current dividend words
				389	mov.l %d5, -(%sp) # save %d5 (%d6 already saved)
				390	mov.l %d1, %d6
				391	swap %d6 # shift answer to ms 3 words
				392	mov.l %d7, %d5
				393	bsr.l ldmm2
				394	mov.l %d5, %d2 # now %d2,%d3 are trial*divisor
				395	mov.l %d6, %d3
				396	mov.l (%sp)+, %d5 # restore dividend
				397	mov.l (%sp)+, %d6
				398	sub.l %d3, %d6
				399	subx.l %d2, %d5 # subtract double precision
				400	bcc ldd2nd # no carry, do next quotient digit
				401	subq.l &0x1, %d1 # q is one too large
				402	# need to add back divisor longword to current ms 3 digits of dividend
				403	# - according to Knuth, this is done only 2 out of 65536 times for random
				404	# divisor, dividend selection.
				405	clr.l %d2
				406	mov.l %d7, %d3
				407	swap %d3
				408	clr.w %d3 # %d3 now ls word of divisor
				409	add.l %d3, %d6 # aligned with 3rd word of dividend
				410	addx.l %d2, %d5
				411	mov.l %d7, %d3
				412	clr.w %d3 # %d3 now ms word of divisor
				413	swap %d3 # aligned with 2nd word of dividend
				414	add.l %d3, %d5
				415	ldd2nd:
				416	tst.b DDSECOND(%a6) # both q words done?
				417	bne.b lddremain
				418	# first quotient digit now correct. store digit and shift the
				419	# (subtracted) dividend
				420	mov.w %d1, DDQUOTIENT(%a6)
				421	clr.l %d1
				422	swap %d5
				423	swap %d6
				424	mov.w %d6, %d5
				425	clr.w %d6
				426	st DDSECOND(%a6) # second digit
				427	bra.w lddnormalized
				428	lddremain:
				429	# add 2nd word to quotient, get the remainder.
				430	mov.w %d1, DDQUOTIENT+2(%a6)
				431	# shift down one word/digit to renormalize remainder.
				432	mov.w %d5, %d6
				433	swap %d6
				434	swap %d5
				435	mov.l DDNORMAL(%a6), %d7 # get norm shift count
				436	beq.b lddrn
				437	subq.l &0x1, %d7 # set for loop count
				438	lddnlp:
				439	lsr.l &0x1, %d5 # shift into %d6
				440	roxr.l &0x1, %d6
				441	dbf %d7, lddnlp
				442	lddrn:
				443	mov.l %d6, %d5 # remainder
				444	mov.l DDQUOTIENT(%a6), %d6 # quotient
				445
				446	rts
				447	ldmm2:
				448	# factors for the 32X32->64 multiplication are in %d5 and %d6.
				449	# returns 64 bit result in %d5 (hi) %d6(lo).
				450	# destroys %d2,%d3,%d4.
				451
				452	# multiply hi,lo words of each factor to get 4 intermediate products
				453	mov.l %d6, %d2
				454	mov.l %d6, %d3
				455	mov.l %d5, %d4
				456	swap %d3
				457	swap %d4
				458	mulu.w %d5, %d6 # %d6 <- lsw*lsw
				459	mulu.w %d3, %d5 # %d5 <- msw-dest*lsw-source
				460	mulu.w %d4, %d2 # %d2 <- msw-source*lsw-dest
				461	mulu.w %d4, %d3 # %d3 <- msw*msw
				462	# now use swap and addx to consolidate to two longwords
				463	clr.l %d4
				464	swap %d6
				465	add.w %d5, %d6 # add msw of ll to lsw of ml product
				466	addx.w %d4, %d3 # add any carry to m*m product
				467	add.w %d2, %d6 # add in lsw of other m*l product
				468	addx.w %d4, %d3 # add any carry to m*m product
				469	swap %d6 # %d6 is low 32 bits of final product
				470	clr.w %d5
				471	clr.w %d2 # lsw of two mixed products used,
				472	swap %d5 # now use msws of longwords
				473	swap %d2
				474	add.l %d2, %d5
				475	add.l %d3, %d5 # %d5 now ms 32 bits of final product
				476	rts
				477
				478	#########################################################################
				479	# XDEF **************************************************************** #
				480	# _060LSP__imulu64_(): Emulate 64-bit unsigned mul instruction #
				481	# _060LSP__imuls64_(): Emulate 64-bit signed mul instruction. #
				482	# #
				483	# This is the library version which is accessed as a subroutine #
				484	# and therefore does not work exactly like the 680X0 mul{s,u}.l #
				485	# 64-bit multiply instruction. #
				486	# #
				487	# XREF **************************************************************** #
				488	# None #
				489	# #
				490	# INPUT *************************************************************** #
				491	# 0x4(sp) = multiplier #
				492	# 0x8(sp) = multiplicand #
				493	# 0xc(sp) = pointer to location to place 64-bit result #
				494	# #
				495	# OUTPUT ************************************************************** #
				496	# 0xc(sp) = points to location of 64-bit result #
				497	# #
				498	# ALGORITHM *********************************************************** #
				499	# Perform the multiply in pieces using 16x16->32 unsigned #
				500	# multiplies and "add" instructions. #
				501	# Set the condition codes as appropriate before performing an #
				502	# "rts". #
				503	# #
				504	#########################################################################
				505
				506	set MUL64_CC, -4
				507
				508	global _060LSP__imulu64_
				509	_060LSP__imulu64_:
				510
				511	# PROLOGUE BEGIN ########################################################
				512	link.w %a6,&-4
				513	movm.l &0x3800,-(%sp) # save d2-d4
				514	# fmovm.l &0x0,-(%sp) # save no fpregs
				515	# PROLOGUE END ##########################################################
				516
				517	mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
				518
				519	mov.l 0x8(%a6),%d0 # store multiplier in d0
				520	beq.w mulu64_zero # handle zero separately
				521
				522	mov.l 0xc(%a6),%d1 # get multiplicand in d1
				523	beq.w mulu64_zero # handle zero separately
				524
				525	#########################################################################
				526	# 63 32 0 #
				527	# ---------------------------- #
				528	# \| hi(mplier) * hi(mplicand)\| #
				529	# ---------------------------- #
				530	# ----------------------------- #
				531	# \| hi(mplier) * lo(mplicand) \| #
				532	# ----------------------------- #
				533	# ----------------------------- #
				534	# \| lo(mplier) * hi(mplicand) \| #
				535	# ----------------------------- #
				536	# \| ----------------------------- #
				537	# --\|-- \| lo(mplier) * lo(mplicand) \| #
				538	# \| ----------------------------- #
				539	# ======================================================== #
				540	# -------------------------------------------------------- #
				541	# \| hi(result) \| lo(result) \| #
				542	# -------------------------------------------------------- #
				543	#########################################################################
				544	mulu64_alg:
				545	# load temp registers with operands
				546	mov.l %d0,%d2 # mr in d2
				547	mov.l %d0,%d3 # mr in d3
				548	mov.l %d1,%d4 # md in d4
				549	swap %d3 # hi(mr) in lo d3
				550	swap %d4 # hi(md) in lo d4
				551
				552	# complete necessary multiplies:
				553	mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
				554	mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
				555	mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
				556	mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
				557
				558	# add lo portions of [2],[3] to hi portion of [1].
				559	# add carries produced from these adds to [4].
				560	# lo([1]) is the final lo 16 bits of the result.
				561	clr.l %d4 # load d4 w/ zero value
				562	swap %d0 # hi([1]) <==> lo([1])
				563	add.w %d1,%d0 # hi([1]) + lo([2])
				564	addx.l %d4,%d3 # [4] + carry
				565	add.w %d2,%d0 # hi([1]) + lo([3])
				566	addx.l %d4,%d3 # [4] + carry
				567	swap %d0 # lo([1]) <==> hi([1])
				568
				569	# lo portions of [2],[3] have been added in to final result.
				570	# now, clear lo, put hi in lo reg, and add to [4]
				571	clr.w %d1 # clear lo([2])
				572	clr.w %d2 # clear hi([3])
				573	swap %d1 # hi([2]) in lo d1
				574	swap %d2 # hi([3]) in lo d2
				575	add.l %d2,%d1 # [4] + hi([2])
				576	add.l %d3,%d1 # [4] + hi([3])
				577
				578	# now, grab the condition codes. only one that can be set is 'N'.
				579	# 'N' CAN be set if the operation is unsigned if bit 63 is set.
				580	mov.w MUL64_CC(%a6),%d4
				581	andi.b &0x10,%d4 # keep old 'X' bit
				582	tst.l %d1 # may set 'N' bit
				583	bpl.b mulu64_ddone
				584	ori.b &0x8,%d4 # set 'N' bit
				585	mulu64_ddone:
				586	mov.w %d4,%cc
				587
				588	# here, the result is in d1 and d0. the current strategy is to save
				589	# the values at the location pointed to by a0.
				590	# use movm here to not disturb the condition codes.
				591	mulu64_end:
				592	exg %d1,%d0
				593	movm.l &0x0003,([0x10,%a6]) # save result
				594
				595	# EPILOGUE BEGIN ########################################################
				596	# fmovm.l (%sp)+,&0x0 # restore no fpregs
				597	movm.l (%sp)+,&0x001c # restore d2-d4
				598	unlk %a6
				599	# EPILOGUE END ##########################################################
				600
				601	rts
				602
				603	# one or both of the operands is zero so the result is also zero.
				604	# save the zero result to the register file and set the 'Z' ccode bit.
				605	mulu64_zero:
				606	clr.l %d0
				607	clr.l %d1
				608
				609	mov.w MUL64_CC(%a6),%d4
				610	andi.b &0x10,%d4
				611	ori.b &0x4,%d4
				612	mov.w %d4,%cc # set 'Z' ccode bit
				613
				614	bra.b mulu64_end
				615
				616	##########
				617	# muls.l #
				618	##########
				619	global _060LSP__imuls64_
				620	_060LSP__imuls64_:
				621
				622	# PROLOGUE BEGIN ########################################################
				623	link.w %a6,&-4
				624	movm.l &0x3c00,-(%sp) # save d2-d5
				625	# fmovm.l &0x0,-(%sp) # save no fpregs
				626	# PROLOGUE END ##########################################################
				627
				628	mov.w %cc,MUL64_CC(%a6) # save incoming ccodes
				629
				630	mov.l 0x8(%a6),%d0 # store multiplier in d0
				631	beq.b mulu64_zero # handle zero separately
				632
				633	mov.l 0xc(%a6),%d1 # get multiplicand in d1
				634	beq.b mulu64_zero # handle zero separately
				635
				636	clr.b %d5 # clear sign tag
				637	tst.l %d0 # is multiplier negative?
				638	bge.b muls64_chk_md_sgn # no
				639	neg.l %d0 # make multiplier positive
				640
				641	ori.b &0x1,%d5 # save multiplier sgn
				642
				643	# the result sign is the exclusive or of the operand sign bits.
				644	muls64_chk_md_sgn:
				645	tst.l %d1 # is multiplicand negative?
				646	bge.b muls64_alg # no
				647	neg.l %d1 # make multiplicand positive
				648
				649	eori.b &0x1,%d5 # calculate correct sign
				650
				651	#########################################################################
				652	# 63 32 0 #
				653	# ---------------------------- #
				654	# \| hi(mplier) * hi(mplicand)\| #
				655	# ---------------------------- #
				656	# ----------------------------- #
				657	# \| hi(mplier) * lo(mplicand) \| #
				658	# ----------------------------- #
				659	# ----------------------------- #
				660	# \| lo(mplier) * hi(mplicand) \| #
				661	# ----------------------------- #
				662	# \| ----------------------------- #
				663	# --\|-- \| lo(mplier) * lo(mplicand) \| #
				664	# \| ----------------------------- #
				665	# ======================================================== #
				666	# -------------------------------------------------------- #
				667	# \| hi(result) \| lo(result) \| #
				668	# -------------------------------------------------------- #
				669	#########################################################################
				670	muls64_alg:
				671	# load temp registers with operands
				672	mov.l %d0,%d2 # mr in d2
				673	mov.l %d0,%d3 # mr in d3
				674	mov.l %d1,%d4 # md in d4
				675	swap %d3 # hi(mr) in lo d3
				676	swap %d4 # hi(md) in lo d4
				677
				678	# complete necessary multiplies:
				679	mulu.w %d1,%d0 # [1] lo(mr) * lo(md)
				680	mulu.w %d3,%d1 # [2] hi(mr) * lo(md)
				681	mulu.w %d4,%d2 # [3] lo(mr) * hi(md)
				682	mulu.w %d4,%d3 # [4] hi(mr) * hi(md)
				683
				684	# add lo portions of [2],[3] to hi portion of [1].
				685	# add carries produced from these adds to [4].
				686	# lo([1]) is the final lo 16 bits of the result.
				687	clr.l %d4 # load d4 w/ zero value
				688	swap %d0 # hi([1]) <==> lo([1])
				689	add.w %d1,%d0 # hi([1]) + lo([2])
				690	addx.l %d4,%d3 # [4] + carry
				691	add.w %d2,%d0 # hi([1]) + lo([3])
				692	addx.l %d4,%d3 # [4] + carry
				693	swap %d0 # lo([1]) <==> hi([1])
				694
				695	# lo portions of [2],[3] have been added in to final result.
				696	# now, clear lo, put hi in lo reg, and add to [4]
				697	clr.w %d1 # clear lo([2])
				698	clr.w %d2 # clear hi([3])
				699	swap %d1 # hi([2]) in lo d1
				700	swap %d2 # hi([3]) in lo d2
				701	add.l %d2,%d1 # [4] + hi([2])
				702	add.l %d3,%d1 # [4] + hi([3])
				703
				704	tst.b %d5 # should result be signed?
				705	beq.b muls64_done # no
				706
				707	# result should be a signed negative number.
				708	# compute 2's complement of the unsigned number:
				709	# -negate all bits and add 1
				710	muls64_neg:
				711	not.l %d0 # negate lo(result) bits
				712	not.l %d1 # negate hi(result) bits
				713	addq.l &1,%d0 # add 1 to lo(result)
				714	addx.l %d4,%d1 # add carry to hi(result)
				715
				716	muls64_done:
				717	mov.w MUL64_CC(%a6),%d4
				718	andi.b &0x10,%d4 # keep old 'X' bit
				719	tst.l %d1 # may set 'N' bit
				720	bpl.b muls64_ddone
				721	ori.b &0x8,%d4 # set 'N' bit
				722	muls64_ddone:
				723	mov.w %d4,%cc
				724
				725	# here, the result is in d1 and d0. the current strategy is to save
				726	# the values at the location pointed to by a0.
				727	# use movm here to not disturb the condition codes.
				728	muls64_end:
				729	exg %d1,%d0
				730	movm.l &0x0003,([0x10,%a6]) # save result at (a0)
				731
				732	# EPILOGUE BEGIN ########################################################
				733	# fmovm.l (%sp)+,&0x0 # restore no fpregs
				734	movm.l (%sp)+,&0x003c # restore d2-d5
				735	unlk %a6
				736	# EPILOGUE END ##########################################################
				737
				738	rts
				739
				740	# one or both of the operands is zero so the result is also zero.
				741	# save the zero result to the register file and set the 'Z' ccode bit.
				742	muls64_zero:
				743	clr.l %d0
				744	clr.l %d1
				745
				746	mov.w MUL64_CC(%a6),%d4
				747	andi.b &0x10,%d4
				748	ori.b &0x4,%d4
				749	mov.w %d4,%cc # set 'Z' ccode bit
				750
				751	bra.b muls64_end
				752
				753	#########################################################################
				754	# XDEF **************************************************************** #
				755	# _060LSP__cmp2_Ab_(): Emulate "cmp2.b An,<ea>". #
				756	# _060LSP__cmp2_Aw_(): Emulate "cmp2.w An,<ea>". #
				757	# _060LSP__cmp2_Al_(): Emulate "cmp2.l An,<ea>". #
				758	# _060LSP__cmp2_Db_(): Emulate "cmp2.b Dn,<ea>". #
				759	# _060LSP__cmp2_Dw_(): Emulate "cmp2.w Dn,<ea>". #
				760	# _060LSP__cmp2_Dl_(): Emulate "cmp2.l Dn,<ea>". #
				761	# #
				762	# This is the library version which is accessed as a subroutine #
				763	# and therefore does not work exactly like the 680X0 "cmp2" #
				764	# instruction. #
				765	# #
				766	# XREF **************************************************************** #
				767	# None #
				768	# #
				769	# INPUT *************************************************************** #
				770	# 0x4(sp) = Rn #
				771	# 0x8(sp) = pointer to boundary pair #
				772	# #
				773	# OUTPUT ************************************************************** #
				774	# cc = condition codes are set correctly #
				775	# #
				776	# ALGORITHM *********************************************************** #
				777	# In the interest of simplicity, all operands are converted to #
				778	# longword size whether the operation is byte, word, or long. The #
				779	# bounds are sign extended accordingly. If Rn is a data regsiter, Rn is #
				780	# also sign extended. If Rn is an address register, it need not be sign #
				781	# extended since the full register is always used. #
				782	# The condition codes are set correctly before the final "rts". #
				783	# #
				784	#########################################################################
				785
				786	set CMP2_CC, -4
				787
				788	global _060LSP__cmp2_Ab_
				789	_060LSP__cmp2_Ab_:
				790
				791	# PROLOGUE BEGIN ########################################################
				792	link.w %a6,&-4
				793	movm.l &0x3800,-(%sp) # save d2-d4
				794	# fmovm.l &0x0,-(%sp) # save no fpregs
				795	# PROLOGUE END ##########################################################
				796
				797	mov.w %cc,CMP2_CC(%a6)
				798	mov.l 0x8(%a6), %d2 # get regval
				799
				800	mov.b ([0xc,%a6],0x0),%d0
				801	mov.b ([0xc,%a6],0x1),%d1
				802
				803	extb.l %d0 # sign extend lo bnd
				804	extb.l %d1 # sign extend hi bnd
				805	bra.w l_cmp2_cmp # go do the compare emulation
				806
				807	global _060LSP__cmp2_Aw_
				808	_060LSP__cmp2_Aw_:
				809
				810	# PROLOGUE BEGIN ########################################################
				811	link.w %a6,&-4
				812	movm.l &0x3800,-(%sp) # save d2-d4
				813	# fmovm.l &0x0,-(%sp) # save no fpregs
				814	# PROLOGUE END ##########################################################
				815
				816	mov.w %cc,CMP2_CC(%a6)
				817	mov.l 0x8(%a6), %d2 # get regval
				818
				819	mov.w ([0xc,%a6],0x0),%d0
				820	mov.w ([0xc,%a6],0x2),%d1
				821
				822	ext.l %d0 # sign extend lo bnd
				823	ext.l %d1 # sign extend hi bnd
				824	bra.w l_cmp2_cmp # go do the compare emulation
				825
				826	global _060LSP__cmp2_Al_
				827	_060LSP__cmp2_Al_:
				828
				829	# PROLOGUE BEGIN ########################################################
				830	link.w %a6,&-4
				831	movm.l &0x3800,-(%sp) # save d2-d4
				832	# fmovm.l &0x0,-(%sp) # save no fpregs
				833	# PROLOGUE END ##########################################################
				834
				835	mov.w %cc,CMP2_CC(%a6)
				836	mov.l 0x8(%a6), %d2 # get regval
				837
				838	mov.l ([0xc,%a6],0x0),%d0
				839	mov.l ([0xc,%a6],0x4),%d1
				840	bra.w l_cmp2_cmp # go do the compare emulation
				841
				842	global _060LSP__cmp2_Db_
				843	_060LSP__cmp2_Db_:
				844
				845	# PROLOGUE BEGIN ########################################################
				846	link.w %a6,&-4
				847	movm.l &0x3800,-(%sp) # save d2-d4
				848	# fmovm.l &0x0,-(%sp) # save no fpregs
				849	# PROLOGUE END ##########################################################
				850
				851	mov.w %cc,CMP2_CC(%a6)
				852	mov.l 0x8(%a6), %d2 # get regval
				853
				854	mov.b ([0xc,%a6],0x0),%d0
				855	mov.b ([0xc,%a6],0x1),%d1
				856
				857	extb.l %d0 # sign extend lo bnd
				858	extb.l %d1 # sign extend hi bnd
				859
				860	# operation is a data register compare.
				861	# sign extend byte to long so we can do simple longword compares.
				862	extb.l %d2 # sign extend data byte
				863	bra.w l_cmp2_cmp # go do the compare emulation
				864
				865	global _060LSP__cmp2_Dw_
				866	_060LSP__cmp2_Dw_:
				867
				868	# PROLOGUE BEGIN ########################################################
				869	link.w %a6,&-4
				870	movm.l &0x3800,-(%sp) # save d2-d4
				871	# fmovm.l &0x0,-(%sp) # save no fpregs
				872	# PROLOGUE END ##########################################################
				873
				874	mov.w %cc,CMP2_CC(%a6)
				875	mov.l 0x8(%a6), %d2 # get regval
				876
				877	mov.w ([0xc,%a6],0x0),%d0
				878	mov.w ([0xc,%a6],0x2),%d1
				879
				880	ext.l %d0 # sign extend lo bnd
				881	ext.l %d1 # sign extend hi bnd
				882
				883	# operation is a data register compare.
				884	# sign extend word to long so we can do simple longword compares.
				885	ext.l %d2 # sign extend data word
				886	bra.w l_cmp2_cmp # go emulate compare
				887
				888	global _060LSP__cmp2_Dl_
				889	_060LSP__cmp2_Dl_:
				890
				891	# PROLOGUE BEGIN ########################################################
				892	link.w %a6,&-4
				893	movm.l &0x3800,-(%sp) # save d2-d4
				894	# fmovm.l &0x0,-(%sp) # save no fpregs
				895	# PROLOGUE END ##########################################################
				896
				897	mov.w %cc,CMP2_CC(%a6)
				898	mov.l 0x8(%a6), %d2 # get regval
				899
				900	mov.l ([0xc,%a6],0x0),%d0
				901	mov.l ([0xc,%a6],0x4),%d1
				902
				903	#
				904	# To set the ccodes correctly:
				905	# (1) save 'Z' bit from (Rn - lo)
				906	# (2) save 'Z' and 'N' bits from ((hi - lo) - (Rn - hi))
				907	# (3) keep 'X', 'N', and 'V' from before instruction
				908	# (4) combine ccodes
				909	#
				910	l_cmp2_cmp:
				911	sub.l %d0, %d2 # (Rn - lo)
				912	mov.w %cc, %d3 # fetch resulting ccodes
				913	andi.b &0x4, %d3 # keep 'Z' bit
				914	sub.l %d0, %d1 # (hi - lo)
				915	cmp.l %d1,%d2 # ((hi - lo) - (Rn - hi))
				916
				917	mov.w %cc, %d4 # fetch resulting ccodes
				918	or.b %d4, %d3 # combine w/ earlier ccodes
				919	andi.b &0x5, %d3 # keep 'Z' and 'N'
				920
				921	mov.w CMP2_CC(%a6), %d4 # fetch old ccodes
				922	andi.b &0x1a, %d4 # keep 'X','N','V' bits
				923	or.b %d3, %d4 # insert new ccodes
				924	mov.w %d4,%cc # save new ccodes
				925
				926	# EPILOGUE BEGIN ########################################################
				927	# fmovm.l (%sp)+,&0x0 # restore no fpregs
				928	movm.l (%sp)+,&0x001c # restore d2-d4
				929	unlk %a6
				930	# EPILOGUE END ##########################################################
				931
				932	rts