Blame - vm/compiler/template/out/CompilerTemplateAsm-armv5te.S - fp2-dev/platform/dalvik

blob: 59aa7906fb2d97a17835cca7a356d7504fc903ae [file] [log] [blame]

Ben Cheng	ba4fc8b	2009-06-01 13:00:29 -0700	[diff] [blame^]	1	/*
				2	* This file was generated automatically by gen-template.py for 'armv5te'.
				3	*
				4	* --> DO NOT EDIT <--
				5	*/
				6
				7	/* File: armv5te/header.S */
				8	/*
				9	* Copyright (C) 2008 The Android Open Source Project
				10	*
				11	* Licensed under the Apache License, Version 2.0 (the "License");
				12	* you may not use this file except in compliance with the License.
				13	* You may obtain a copy of the License at
				14	*
				15	* http://www.apache.org/licenses/LICENSE-2.0
				16	*
				17	* Unless required by applicable law or agreed to in writing, software
				18	* distributed under the License is distributed on an "AS IS" BASIS,
				19	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				20	* See the License for the specific language governing permissions and
				21	* limitations under the License.
				22	*/
				23
				24	#if defined(WITH_JIT)
				25
				26	/*
				27	* ARMv5 definitions and declarations.
				28	*/
				29
				30	/*
				31	ARM EABI general notes:
				32
				33	r0-r3 hold first 4 args to a method; they are not preserved across method calls
				34	r4-r8 are available for general use
				35	r9 is given special treatment in some situations, but not for us
				36	r10 (sl) seems to be generally available
				37	r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
				38	r12 (ip) is scratch -- not preserved across method calls
				39	r13 (sp) should be managed carefully in case a signal arrives
				40	r14 (lr) must be preserved
				41	r15 (pc) can be tinkered with directly
				42
				43	r0 holds returns of <= 4 bytes
				44	r0-r1 hold returns of 8 bytes, low word in r0
				45
				46	Callee must save/restore r4+ (except r12) if it modifies them.
				47
				48	Stack is "full descending". Only the arguments that don't fit in the first 4
				49	registers are placed on the stack. "sp" points at the first stacked argument
				50	(i.e. the 5th arg).
				51
				52	VFP: single-precision results in s0, double-precision results in d0.
				53
				54	In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
				55	64-bit quantities (long long, double) must be 64-bit aligned.
				56	*/
				57
				58	/*
				59	JIT and ARM notes:
				60
				61	The following registers have fixed assignments:
				62
				63	reg nick purpose
				64	r5 rFP interpreted frame pointer, used for accessing locals and args
				65	r6 rGLUE MterpGlue pointer
				66
				67	The following registers have fixed assignments in mterp but are scratch
				68	registers in compiled code
				69
				70	reg nick purpose
				71	r4 rPC interpreted program counter, used for fetching instructions
				72	r7 rIBASE interpreted instruction base pointer, used for computed goto
				73	r8 rINST first 16-bit code unit of current instruction
				74
				75	Macros are provided for common operations. Each macro MUST emit only
				76	one instruction to make instruction-counting easier. They MUST NOT alter
				77	unspecified registers or condition codes.
				78	*/
				79
				80	/* single-purpose registers, given names for clarity */
				81	#define rPC r4
				82	#define rFP r5
				83	#define rGLUE r6
				84	#define rIBASE r7
				85	#define rINST r8
				86
				87	/*
				88	* Given a frame pointer, find the stack save area.
				89	*
				90	* In C this is "((StackSaveArea*)(_fp) -1)".
				91	*/
				92	#define SAVEAREA_FROM_FP(_reg, _fpreg) \
				93	sub _reg, _fpreg, #sizeofStackSaveArea
				94
				95	/*
				96	* This is a #include, not a %include, because we want the C pre-processor
				97	* to expand the macros into assembler assignment statements.
				98	*/
				99	#include "../../../mterp/common/asm-constants.h"
				100
				101
				102	/* File: armv5te/platform.S */
				103	/*
				104	* ===========================================================================
				105	* CPU-version-specific defines
				106	* ===========================================================================
				107	*/
				108
				109	/*
				110	* Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
				111	* Jump to subroutine.
				112	*
				113	* May modify IP and LR.
				114	*/
				115	.macro LDR_PC_LR source
				116	mov lr, pc
				117	ldr pc, \source
				118	.endm
				119
				120
				121	.global dvmCompilerTemplateStart
				122	.type dvmCompilerTemplateStart, %function
				123	.text
				124
				125	dvmCompilerTemplateStart:
				126
				127	/* ------------------------------ */
				128	.balign 4
				129	.global dvmCompiler_TEMPLATE_CMP_LONG
				130	dvmCompiler_TEMPLATE_CMP_LONG:
				131	/* File: armv5te/TEMPLATE_CMP_LONG.S */
				132	/*
				133	* Compare two 64-bit values. Puts 0, 1, or -1 into the destination
				134	* register based on the results of the comparison.
				135	*
				136	* We load the full values with LDM, but in practice many values could
				137	* be resolved by only looking at the high word. This could be made
				138	* faster or slower by splitting the LDM into a pair of LDRs.
				139	*
				140	* If we just wanted to set condition flags, we could do this:
				141	* subs ip, r0, r2
				142	* sbcs ip, r1, r3
				143	* subeqs ip, r0, r2
				144	* Leaving { <0, 0, >0 } in ip. However, we have to set it to a specific
				145	* integer value, which we can do with 2 conditional mov/mvn instructions
				146	* (set 1, set -1; if they're equal we already have 0 in ip), giving
				147	* us a constant 5-cycle path plus a branch at the end to the
				148	* instruction epilogue code. The multi-compare approach below needs
				149	* 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
				150	* in the worst case (the 64-bit values are equal).
				151	*/
				152	/* cmp-long vAA, vBB, vCC */
				153	cmp r1, r3 @ compare (vBB+1, vCC+1)
				154	blt .LTEMPLATE_CMP_LONG_less @ signed compare on high part
				155	bgt .LTEMPLATE_CMP_LONG_greater
				156	subs r0, r0, r2 @ r0<- r0 - r2
				157	bxeq lr
				158	bhi .LTEMPLATE_CMP_LONG_greater @ unsigned compare on low part
				159	.LTEMPLATE_CMP_LONG_less:
				160	mvn r0, #0 @ r0<- -1
				161	bx lr
				162	.LTEMPLATE_CMP_LONG_greater:
				163	mov r0, #1 @ r0<- 1
				164	bx lr
				165
				166
				167	/* ------------------------------ */
				168	.balign 4
				169	.global dvmCompiler_TEMPLATE_RETURN
				170	dvmCompiler_TEMPLATE_RETURN:
				171	/* File: armv5te/TEMPLATE_RETURN.S */
				172	/*
				173	* Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
				174	* If the stored value in returnAddr
				175	* is non-zero, the caller is compiled by the JIT thus return to the
				176	* address in the code cache following the invoke instruction. Otherwise
				177	* return to the special dvmJitToInterpNoChain entry point.
				178	*/
				179	SAVEAREA_FROM_FP(r0, rFP) @ r0<- saveArea (old)
				180	ldr r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
				181	ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
				182	ldr rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
				183	ldr r9, [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
				184	ldr r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
				185	@ r2<- method we're returning to
				186	ldr r3, [rGLUE, #offGlue_self] @ r3<- glue->self
				187	cmp r2, #0 @ break frame?
				188	beq 1f @ bail to interpreter
				189	ldr r0, .LdvmJitToInterpNoChain @ defined in footer.S
				190	mov rFP, r10 @ publish new FP
				191	ldrne r10, [r2, #offMethod_clazz] @ r10<- method->clazz
				192	ldr r8, [r8] @ r8<- suspendCount
				193
				194	str r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
				195	ldr r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
				196	str rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
				197	add rPC, rPC, #6 @ publish new rPC (advance 6 bytes)
				198	str r1, [rGLUE, #offGlue_methodClassDex]
				199	cmp r8, #0 @ check the suspendCount
				200	movne r9, #0 @ clear the chaining cell address
				201	cmp r9, #0 @ chaining cell exists?
				202	blxne r9 @ jump to the chaining cell
				203	mov pc, r0 @ callsite is interpreted
				204	1:
				205	stmia rGLUE, {rPC, rFP} @ SAVE_PC_FP_TO_GLUE()
				206	ldr r2, .LdvmMterpStdBail @ defined in footer.S
				207	mov r1, #0 @ changeInterp = false
				208	mov r0, rGLUE @ Expecting rGLUE in r0
				209	blx r2 @ exit the interpreter
				210
				211	/* ------------------------------ */
				212	.balign 4
				213	.global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
				214	dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
				215	/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
				216	/*
				217	* For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
				218	* into rPC then jump to dvmJitToInterpNoChain to dispatch the
				219	* runtime-resolved callee.
				220	*/
				221	@ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
				222	ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
				223	ldrh r2, [r0, #offMethod_outsSize] @ r2<- methodToCall->outsSize
				224	ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
				225	ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
				226	add r3, r1, #1 @ Thumb addr is odd
				227	SAVEAREA_FROM_FP(r1, rFP) @ r1<- stack save area
				228	sub r1, r1, r7, lsl #2 @ r1<- newFp (old savearea - regsSize)
				229	SAVEAREA_FROM_FP(r10, r1) @ r10<- stack save area
				230	sub r10, r10, r2, lsl #2 @ r10<- bottom (newsave - outsSize)
				231	ldr r8, [r8] @ r3<- suspendCount (int)
				232	cmp r10, r9 @ bottom < interpStackEnd?
				233	bxlt lr @ return to raise stack overflow excep.
				234	@ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
				235	ldr r9, [r0, #offMethod_clazz] @ r9<- method->clazz
				236	ldr r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
				237	str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
				238	str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
				239	ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
				240
				241
				242	@ set up newSaveArea
				243	str rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
				244	str r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
				245	str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
				246	cmp r8, #0 @ suspendCount != 0
				247	bxne lr @ bail to the interpreter
				248	tst r10, #ACC_NATIVE
				249	bne .LinvokeNative
				250	/*
				251	* If we want to punt to the interpreter for native call, swap the bne with
				252	* the following
				253	* bxne lr
				254	*/
				255
				256
				257	ldr r10, .LdvmJitToInterpNoChain
				258	ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
				259	ldr r2, [rGLUE, #offGlue_self] @ r2<- glue->self
				260
				261	@ Update "glue" values for the new method
				262	str r0, [rGLUE, #offGlue_method] @ glue->method = methodToCall
				263	str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
				264	mov rFP, r1 @ fp = newFp
				265	str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
				266
				267	@ Start executing the callee
				268	mov pc, r10 @ dvmJitToInterpNoChain
				269
				270	/* ------------------------------ */
				271	.balign 4
				272	.global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
				273	dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
				274	/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
				275	/*
				276	* For monomorphic callsite, setup the Dalvik frame and return to the
				277	* Thumb code through the link register to transfer control to the callee
				278	* method through a dedicated chaining cell.
				279	*/
				280	@ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
				281	ldrh r7, [r0, #offMethod_registersSize] @ r7<- methodToCall->regsSize
				282	ldrh r2, [r0, #offMethod_outsSize] @ r2<- methodToCall->outsSize
				283	ldr r9, [rGLUE, #offGlue_interpStackEnd] @ r9<- interpStackEnd
				284	ldr r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
				285	add r3, r1, #1 @ Thumb addr is odd
				286	SAVEAREA_FROM_FP(r1, rFP) @ r1<- stack save area
				287	sub r1, r1, r7, lsl #2 @ r1<- newFp (old savearea - regsSize)
				288	SAVEAREA_FROM_FP(r10, r1) @ r10<- stack save area
				289	add r12, lr, #2 @ setup the punt-to-interp address
				290	sub r10, r10, r2, lsl #2 @ r10<- bottom (newsave - outsSize)
				291	ldr r8, [r8] @ r3<- suspendCount (int)
				292	cmp r10, r9 @ bottom < interpStackEnd?
				293	bxlt r12 @ return to raise stack overflow excep.
				294	@ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
				295	ldr r9, [r0, #offMethod_clazz] @ r9<- method->clazz
				296	ldr r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
				297	str rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
				298	str rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
				299	ldr rPC, [r0, #offMethod_insns] @ rPC<- methodToCall->insns
				300
				301
				302	@ set up newSaveArea
				303	str rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
				304	str r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
				305	str r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
				306	cmp r8, #0 @ suspendCount != 0
				307	bxne r12 @ bail to the interpreter
				308	tst r10, #ACC_NATIVE
				309	bne .LinvokeNative
				310	/*
				311	* If we want to punt to the interpreter for native call, swap the bne with
				312	* the following
				313	* bxne r12
				314	*/
				315
				316
				317	ldr r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
				318	ldr r2, [rGLUE, #offGlue_self] @ r2<- glue->self
				319
				320	@ Update "glue" values for the new method
				321	str r0, [rGLUE, #offGlue_method] @ glue->method = methodToCall
				322	str r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
				323	mov rFP, r1 @ fp = newFp
				324	str rFP, [r2, #offThread_curFrame] @ self->curFrame = newFp
				325
				326	bx lr @ return to the callee-chaining cell
				327
				328
				329
				330	/* ------------------------------ */
				331	.balign 4
				332	.global dvmCompiler_TEMPLATE_CMPG_DOUBLE
				333	dvmCompiler_TEMPLATE_CMPG_DOUBLE:
				334	/* File: armv5te/TEMPLATE_CMPG_DOUBLE.S */
				335	/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
				336	/*
				337	* For the JIT: incoming arguments are pointers to the arguments in r0/r1
				338	* result in r0
				339	*
				340	* Compare two floating-point values. Puts 0, 1, or -1 into the
				341	* destination register based on the results of the comparison.
				342	*
				343	* Provide a "naninst" instruction that puts 1 or -1 into r1 depending
				344	* on what value we'd like to return when one of the operands is NaN.
				345	*
				346	* See OP_CMPL_FLOAT for an explanation.
				347	*
				348	* For: cmpl-double, cmpg-double
				349	*/
				350	/* op vAA, vBB, vCC */
				351	mov r4, lr @ save return address
				352	mov r9, r0 @ save copy of &arg1
				353	mov r10, r1 @ save copy of &arg2
				354	ldmia r9, {r0-r1} @ r0/r1<- vBB/vBB+1
				355	ldmia r10, {r2-r3} @ r2/r3<- vCC/vCC+1
				356	LDR_PC_LR ".L__aeabi_cdcmple" @ PIC way of "bl __aeabi_cdcmple"
				357	bhi .LTEMPLATE_CMPG_DOUBLE_gt_or_nan @ C set and Z clear, disambiguate
				358	mvncc r0, #0 @ (less than) r1<- -1
				359	moveq r0, #0 @ (equal) r1<- 0, trumps less than
				360	bx r4
				361
				362	@ Test for NaN with a second comparison. EABI forbids testing bit
				363	@ patterns, and we can't represent 0x7fc00000 in immediate form, so
				364	@ make the library call.
				365	.LTEMPLATE_CMPG_DOUBLE_gt_or_nan:
				366	ldmia r10, {r0-r1} @ reverse order
				367	ldmia r9, {r2-r3}
				368	LDR_PC_LR ".L__aeabi_cdcmple" @ r0<- Z set if eq, C clear if <
				369	movcc r0, #1 @ (greater than) r1<- 1
				370	bxcc r4
				371	mov r0, #1 @ r1<- 1 or -1 for NaN
				372	bx r4
				373
				374
				375
				376	/* ------------------------------ */
				377	.balign 4
				378	.global dvmCompiler_TEMPLATE_CMPL_DOUBLE
				379	dvmCompiler_TEMPLATE_CMPL_DOUBLE:
				380	/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
				381	/*
				382	* For the JIT: incoming arguments are pointers to the arguments in r0/r1
				383	* result in r0
				384	*
				385	* Compare two floating-point values. Puts 0, 1, or -1 into the
				386	* destination register based on the results of the comparison.
				387	*
				388	* Provide a "naninst" instruction that puts 1 or -1 into r1 depending
				389	* on what value we'd like to return when one of the operands is NaN.
				390	*
				391	* See OP_CMPL_FLOAT for an explanation.
				392	*
				393	* For: cmpl-double, cmpg-double
				394	*/
				395	/* op vAA, vBB, vCC */
				396	mov r4, lr @ save return address
				397	mov r9, r0 @ save copy of &arg1
				398	mov r10, r1 @ save copy of &arg2
				399	ldmia r9, {r0-r1} @ r0/r1<- vBB/vBB+1
				400	ldmia r10, {r2-r3} @ r2/r3<- vCC/vCC+1
				401	LDR_PC_LR ".L__aeabi_cdcmple" @ PIC way of "bl __aeabi_cdcmple"
				402	bhi .LTEMPLATE_CMPL_DOUBLE_gt_or_nan @ C set and Z clear, disambiguate
				403	mvncc r0, #0 @ (less than) r1<- -1
				404	moveq r0, #0 @ (equal) r1<- 0, trumps less than
				405	bx r4
				406
				407	@ Test for NaN with a second comparison. EABI forbids testing bit
				408	@ patterns, and we can't represent 0x7fc00000 in immediate form, so
				409	@ make the library call.
				410	.LTEMPLATE_CMPL_DOUBLE_gt_or_nan:
				411	ldmia r10, {r0-r1} @ reverse order
				412	ldmia r9, {r2-r3}
				413	LDR_PC_LR ".L__aeabi_cdcmple" @ r0<- Z set if eq, C clear if <
				414	movcc r0, #1 @ (greater than) r1<- 1
				415	bxcc r4
				416	mvn r0, #0 @ r1<- 1 or -1 for NaN
				417	bx r4
				418
				419
				420	/* ------------------------------ */
				421	.balign 4
				422	.global dvmCompiler_TEMPLATE_CMPG_FLOAT
				423	dvmCompiler_TEMPLATE_CMPG_FLOAT:
				424	/* File: armv5te/TEMPLATE_CMPG_FLOAT.S */
				425	/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
				426	/*
				427	* For the JIT: incoming arguments in r0, r1
				428	* result in r0
				429	*
				430	* Compare two floating-point values. Puts 0, 1, or -1 into the
				431	* destination register based on the results of the comparison.
				432	*
				433	* Provide a "naninst" instruction that puts 1 or -1 into r1 depending
				434	* on what value we'd like to return when one of the operands is NaN.
				435	*
				436	* The operation we're implementing is:
				437	* if (x == y)
				438	* return 0;
				439	* else if (x < y)
				440	* return -1;
				441	* else if (x > y)
				442	* return 1;
				443	* else
				444	* return {-1,1}; // one or both operands was NaN
				445	*
				446	* The straightforward implementation requires 3 calls to functions
				447	* that return a result in r0. We can do it with two calls if our
				448	* EABI library supports __aeabi_cfcmple (only one if we want to check
				449	* for NaN directly):
				450	* check x <= y
				451	* if <, return -1
				452	* if ==, return 0
				453	* check y <= x
				454	* if <, return 1
				455	* return {-1,1}
				456	*
				457	* for: cmpl-float, cmpg-float
				458	*/
				459	/* op vAA, vBB, vCC */
				460	mov r4, lr @ save return address
				461	mov r9, r0 @ Save copies - we may need to redo
				462	mov r10, r1
				463	LDR_PC_LR ".L__aeabi_cfcmple" @ cmp <=: C clear if <, Z set if eq
				464	bhi .LTEMPLATE_CMPG_FLOAT_gt_or_nan @ C set and Z clear, disambiguate
				465	mvncc r0, #0 @ (less than) r0<- -1
				466	moveq r0, #0 @ (equal) r0<- 0, trumps less than
				467	bx r4
				468	@ Test for NaN with a second comparison. EABI forbids testing bit
				469	@ patterns, and we can't represent 0x7fc00000 in immediate form, so
				470	@ make the library call.
				471	.LTEMPLATE_CMPG_FLOAT_gt_or_nan:
				472	mov r1, r9 @ reverse order
				473	mov r0, r10
				474	LDR_PC_LR ".L__aeabi_cfcmple" @ r0<- Z set if eq, C clear if <
				475	movcc r0, #1 @ (greater than) r1<- 1
				476	bxcc r4
				477	mov r0, #1 @ r1<- 1 or -1 for NaN
				478	bx r4
				479
				480
				481
				482
				483	/* ------------------------------ */
				484	.balign 4
				485	.global dvmCompiler_TEMPLATE_CMPL_FLOAT
				486	dvmCompiler_TEMPLATE_CMPL_FLOAT:
				487	/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
				488	/*
				489	* For the JIT: incoming arguments in r0, r1
				490	* result in r0
				491	*
				492	* Compare two floating-point values. Puts 0, 1, or -1 into the
				493	* destination register based on the results of the comparison.
				494	*
				495	* Provide a "naninst" instruction that puts 1 or -1 into r1 depending
				496	* on what value we'd like to return when one of the operands is NaN.
				497	*
				498	* The operation we're implementing is:
				499	* if (x == y)
				500	* return 0;
				501	* else if (x < y)
				502	* return -1;
				503	* else if (x > y)
				504	* return 1;
				505	* else
				506	* return {-1,1}; // one or both operands was NaN
				507	*
				508	* The straightforward implementation requires 3 calls to functions
				509	* that return a result in r0. We can do it with two calls if our
				510	* EABI library supports __aeabi_cfcmple (only one if we want to check
				511	* for NaN directly):
				512	* check x <= y
				513	* if <, return -1
				514	* if ==, return 0
				515	* check y <= x
				516	* if <, return 1
				517	* return {-1,1}
				518	*
				519	* for: cmpl-float, cmpg-float
				520	*/
				521	/* op vAA, vBB, vCC */
				522	mov r4, lr @ save return address
				523	mov r9, r0 @ Save copies - we may need to redo
				524	mov r10, r1
				525	LDR_PC_LR ".L__aeabi_cfcmple" @ cmp <=: C clear if <, Z set if eq
				526	bhi .LTEMPLATE_CMPL_FLOAT_gt_or_nan @ C set and Z clear, disambiguate
				527	mvncc r0, #0 @ (less than) r0<- -1
				528	moveq r0, #0 @ (equal) r0<- 0, trumps less than
				529	bx r4
				530	@ Test for NaN with a second comparison. EABI forbids testing bit
				531	@ patterns, and we can't represent 0x7fc00000 in immediate form, so
				532	@ make the library call.
				533	.LTEMPLATE_CMPL_FLOAT_gt_or_nan:
				534	mov r1, r9 @ reverse order
				535	mov r0, r10
				536	LDR_PC_LR ".L__aeabi_cfcmple" @ r0<- Z set if eq, C clear if <
				537	movcc r0, #1 @ (greater than) r1<- 1
				538	bxcc r4
				539	mvn r0, #0 @ r1<- 1 or -1 for NaN
				540	bx r4
				541
				542
				543
				544	/* ------------------------------ */
				545	.balign 4
				546	.global dvmCompiler_TEMPLATE_MUL_LONG
				547	dvmCompiler_TEMPLATE_MUL_LONG:
				548	/* File: armv5te/TEMPLATE_MUL_LONG.S */
				549	/*
				550	* Signed 64-bit integer multiply.
				551	*
				552	* For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
				553	*
				554	* Consider WXxYZ (r1r0 x r3r2) with a long multiply:
				555	* WX
				556	* x YZ
				557	* --------
				558	* ZW ZX
				559	* YW YX
				560	*
				561	* The low word of the result holds ZX, the high word holds
				562	* (ZW+YX) + (the high overflow from ZX). YW doesn't matter because
				563	* it doesn't fit in the low 64 bits.
				564	*
				565	* Unlike most ARM math operations, multiply instructions have
				566	* restrictions on using the same register more than once (Rd and Rm
				567	* cannot be the same).
				568	*/
				569	/* mul-long vAA, vBB, vCC */
				570	mul ip, r2, r1 @ ip<- ZxW
				571	umull r9, r10, r2, r0 @ r9/r10 <- ZxX
				572	mla r2, r0, r3, ip @ r2<- YxX + (ZxW)
				573	add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX))
				574	mov r0,r9
				575	mov r1,r10
				576	bx lr
				577
				578	/* ------------------------------ */
				579	.balign 4
				580	.global dvmCompiler_TEMPLATE_SHL_LONG
				581	dvmCompiler_TEMPLATE_SHL_LONG:
				582	/* File: armv5te/TEMPLATE_SHL_LONG.S */
				583	/*
				584	* Long integer shift. This is different from the generic 32/64-bit
				585	* binary operations because vAA/vBB are 64-bit but vCC (the shift
				586	* distance) is 32-bit. Also, Dalvik requires us to ignore all but the low
				587	* 6 bits.
				588	*/
				589	/* shl-long vAA, vBB, vCC */
				590	and r2, r2, #63 @ r2<- r2 & 0x3f
				591	mov r1, r1, asl r2 @ r1<- r1 << r2
				592	rsb r3, r2, #32 @ r3<- 32 - r2
				593	orr r1, r1, r0, lsr r3 @ r1<- r1 \| (r0 << (32-r2))
				594	subs ip, r2, #32 @ ip<- r2 - 32
				595	movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32)
				596	mov r0, r0, asl r2 @ r0<- r0 << r2
				597	bx lr
				598
				599	/* ------------------------------ */
				600	.balign 4
				601	.global dvmCompiler_TEMPLATE_SHR_LONG
				602	dvmCompiler_TEMPLATE_SHR_LONG:
				603	/* File: armv5te/TEMPLATE_SHR_LONG.S */
				604	/*
				605	* Long integer shift. This is different from the generic 32/64-bit
				606	* binary operations because vAA/vBB are 64-bit but vCC (the shift
				607	* distance) is 32-bit. Also, Dalvik requires us to ignore all but the low
				608	* 6 bits.
				609	*/
				610	/* shr-long vAA, vBB, vCC */
				611	and r2, r2, #63 @ r0<- r0 & 0x3f
				612	mov r0, r0, lsr r2 @ r0<- r2 >> r2
				613	rsb r3, r2, #32 @ r3<- 32 - r2
				614	orr r0, r0, r1, asl r3 @ r0<- r0 \| (r1 << (32-r2))
				615	subs ip, r2, #32 @ ip<- r2 - 32
				616	movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32)
				617	mov r1, r1, asr r2 @ r1<- r1 >> r2
				618	bx lr
				619
				620
				621	/* ------------------------------ */
				622	.balign 4
				623	.global dvmCompiler_TEMPLATE_USHR_LONG
				624	dvmCompiler_TEMPLATE_USHR_LONG:
				625	/* File: armv5te/TEMPLATE_USHR_LONG.S */
				626	/*
				627	* Long integer shift. This is different from the generic 32/64-bit
				628	* binary operations because vAA/vBB are 64-bit but vCC (the shift
				629	* distance) is 32-bit. Also, Dalvik requires us to ignore all but the low
				630	* 6 bits.
				631	*/
				632	/* ushr-long vAA, vBB, vCC */
				633	and r2, r2, #63 @ r0<- r0 & 0x3f
				634	mov r0, r0, lsr r2 @ r0<- r2 >> r2
				635	rsb r3, r2, #32 @ r3<- 32 - r2
				636	orr r0, r0, r1, asl r3 @ r0<- r0 \| (r1 << (32-r2))
				637	subs ip, r2, #32 @ ip<- r2 - 32
				638	movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32)
				639	mov r1, r1, lsr r2 @ r1<- r1 >>> r2
				640	bx lr
				641
				642
				643	.size dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
				644	/* File: armv5te/footer.S */
				645	/*
				646	* ===========================================================================
				647	* Common subroutines and data
				648	* ===========================================================================
				649	*/
				650
				651	.text
				652	.align 2
				653	.LinvokeNative:
				654	@ Prep for the native call
				655	@ r1 = newFP, r0 = methodToCall
				656	ldr r3, [rGLUE, #offGlue_self] @ r3<- glue->self
				657	ldr r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
				658	str r1, [r3, #offThread_curFrame] @ self->curFrame = newFp
				659	str r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
				660	@ newFp->localRefTop=refNext
				661	mov r9, r3 @ r9<- glue->self (preserve)
				662	SAVEAREA_FROM_FP(r10, r1) @ r10<- new stack save area
				663
				664	mov r2, r0 @ r2<- methodToCall
				665	mov r0, r1 @ r0<- newFP
				666	add r1, rGLUE, #offGlue_retval @ r1<- &retval
				667
				668	LDR_PC_LR "[r2, #offMethod_nativeFunc]"
				669
				670	@ native return; r9=self, r10=newSaveArea
				671	@ equivalent to dvmPopJniLocals
				672	ldr r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
				673	ldr r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
				674	ldr r1, [r9, #offThread_exception] @ check for exception
				675	str rFP, [r9, #offThread_curFrame] @ self->curFrame = fp
				676	cmp r1, #0 @ null?
				677	str r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
				678	bne .LhandleException @ no, handle exception
				679	bx r2
				680
				681	/* FIXME - untested */
				682	.LhandleException:
				683	ldr rIBASE, .LdvmAsmInstructionStart
				684	ldr rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
				685	b dvmMterpCommonExceptionThrown
				686
				687	.align 2
				688	.LdvmAsmInstructionStart:
				689	.word dvmAsmInstructionStart
				690	.LdvmJitToInterpNoChain:
				691	.word dvmJitToInterpNoChain
				692	.LdvmMterpStdBail:
				693	.word dvmMterpStdBail
				694	.L__aeabi_cdcmple:
				695	.word __aeabi_cdcmple
				696	.L__aeabi_cfcmple:
				697	.word __aeabi_cfcmple
				698
				699	.global dmvCompilerTemplateEnd
				700	dmvCompilerTemplateEnd:
				701
				702	#endif /* WITH_JIT */
				703