Blame - arch/xtensa/lib/usercopy.S - kernel/msm-4.9

blob: ace1892a875ea6505c12924474536586e894ef49 [file] [log] [blame]

Chris Zankel	249ac17	2005-06-23 22:01:20 -0700	[diff] [blame]	1	/*
				2	* arch/xtensa/lib/usercopy.S
				3	*
				4	* Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
				5	*
				6	* DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
				7	* It needs to remain separate and distinct. The hal files are part
Matt LaPlante	4b3f686	2006-10-03 22:21:02 +0200	[diff] [blame]	8	* of the Xtensa link-time HAL, and those files may differ per
Chris Zankel	249ac17	2005-06-23 22:01:20 -0700	[diff] [blame]	9	* processor configuration. Patching the kernel for another
				10	* processor configuration includes replacing the hal files, and we
Matt LaPlante	4b3f686	2006-10-03 22:21:02 +0200	[diff] [blame]	11	* could lose the special functionality for accessing user-space
Chris Zankel	249ac17	2005-06-23 22:01:20 -0700	[diff] [blame]	12	* memory during such a patch. We sacrifice a little code space here
				13	* in favor to simplify code maintenance.
				14	*
				15	* This file is subject to the terms and conditions of the GNU General
				16	* Public License. See the file "COPYING" in the main directory of
				17	* this archive for more details.
				18	*
				19	* Copyright (C) 2002 Tensilica Inc.
				20	*/
				21
				22
				23	/*
				24	* size_t __xtensa_copy_user (void dst, const void src, size_t len);
				25	*
				26	* The returned value is the number of bytes not copied. Implies zero
				27	* is success.
				28	*
				29	* The general case algorithm is as follows:
				30	* If the destination and source are both aligned,
				31	* do 16B chunks with a loop, and then finish up with
				32	* 8B, 4B, 2B, and 1B copies conditional on the length.
				33	* If destination is aligned and source unaligned,
				34	* do the same, but use SRC to align the source data.
				35	* If destination is unaligned, align it by conditionally
				36	* copying 1B and 2B and then retest.
				37	* This code tries to use fall-through braches for the common
				38	* case of aligned destinations (except for the branches to
				39	* the alignment label).
				40	*
				41	* Register use:
				42	* a0/ return address
				43	* a1/ stack pointer
				44	* a2/ return value
				45	* a3/ src
				46	* a4/ length
				47	* a5/ dst
				48	* a6/ tmp
				49	* a7/ tmp
				50	* a8/ tmp
				51	* a9/ tmp
				52	* a10/ tmp
				53	* a11/ original length
				54	*/
				55
Chris Zankel	367b811	2008-11-06 06:40:46 -0800	[diff] [blame]	56	#include <variant/core.h>
Chris Zankel	249ac17	2005-06-23 22:01:20 -0700	[diff] [blame]	57
				58	#ifdef __XTENSA_EB__
				59	#define ALIGN(R, W0, W1) src R, W0, W1
				60	#define SSA8(R) ssa8b R
				61	#else
				62	#define ALIGN(R, W0, W1) src R, W1, W0
				63	#define SSA8(R) ssa8l R
				64	#endif
				65
				66	/* Load or store instructions that may cause exceptions use the EX macro. */
				67
				68	#define EX(insn,reg1,reg2,offset,handler) \
				69	9: insn reg1, reg2, offset; \
				70	.section __ex_table, "a"; \
				71	.word 9b, handler; \
				72	.previous
				73
				74
				75	.text
				76	.align 4
				77	.global __xtensa_copy_user
				78	.type __xtensa_copy_user,@function
				79	__xtensa_copy_user:
				80	entry sp, 16 # minimal stack frame
				81	# a2/ dst, a3/ src, a4/ len
				82	mov a5, a2 # copy dst so that a2 is return value
				83	mov a11, a4 # preserve original len for error case
				84	.Lcommon:
				85	bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
				86	bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
				87	.Ldstaligned: # return here from .Ldstunaligned when dst is aligned
				88	srli a7, a4, 4 # number of loop iterations with 16B
				89	# per iteration
				90	movi a8, 3 # if source is also aligned,
				91	bnone a3, a8, .Laligned # then use word copy
				92	SSA8( a3) # set shift amount from byte offset
				93	bnez a4, .Lsrcunaligned
				94	movi a2, 0 # return success for len==0
				95	retw
				96
				97	/*
				98	* Destination is unaligned
				99	*/
				100
				101	.Ldst1mod2: # dst is only byte aligned
				102	bltui a4, 7, .Lbytecopy # do short copies byte by byte
				103
				104	# copy 1 byte
				105	EX(l8ui, a6, a3, 0, l_fixup)
				106	addi a3, a3, 1
				107	EX(s8i, a6, a5, 0, s_fixup)
				108	addi a5, a5, 1
				109	addi a4, a4, -1
				110	bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
				111	# return to main algorithm
				112	.Ldst2mod4: # dst 16-bit aligned
				113	# copy 2 bytes
				114	bltui a4, 6, .Lbytecopy # do short copies byte by byte
				115	EX(l8ui, a6, a3, 0, l_fixup)
				116	EX(l8ui, a7, a3, 1, l_fixup)
				117	addi a3, a3, 2
				118	EX(s8i, a6, a5, 0, s_fixup)
				119	EX(s8i, a7, a5, 1, s_fixup)
				120	addi a5, a5, 2
				121	addi a4, a4, -2
				122	j .Ldstaligned # dst is now aligned, return to main algorithm
				123
				124	/*
				125	* Byte by byte copy
				126	*/
				127	.align 4
				128	.byte 0 # 1 mod 4 alignment for LOOPNEZ
				129	# (0 mod 4 alignment for LBEG)
				130	.Lbytecopy:
				131	#if XCHAL_HAVE_LOOPS
				132	loopnez a4, .Lbytecopydone
				133	#else /* !XCHAL_HAVE_LOOPS */
				134	beqz a4, .Lbytecopydone
				135	add a7, a3, a4 # a7 = end address for source
				136	#endif /* !XCHAL_HAVE_LOOPS */
				137	.Lnextbyte:
				138	EX(l8ui, a6, a3, 0, l_fixup)
				139	addi a3, a3, 1
				140	EX(s8i, a6, a5, 0, s_fixup)
				141	addi a5, a5, 1
				142	#if !XCHAL_HAVE_LOOPS
				143	blt a3, a7, .Lnextbyte
				144	#endif /* !XCHAL_HAVE_LOOPS */
				145	.Lbytecopydone:
				146	movi a2, 0 # return success for len bytes copied
				147	retw
				148
				149	/*
				150	* Destination and source are word-aligned.
				151	*/
				152	# copy 16 bytes per iteration for word-aligned dst and word-aligned src
				153	.align 4 # 1 mod 4 alignment for LOOPNEZ
				154	.byte 0 # (0 mod 4 alignment for LBEG)
				155	.Laligned:
				156	#if XCHAL_HAVE_LOOPS
				157	loopnez a7, .Loop1done
				158	#else /* !XCHAL_HAVE_LOOPS */
				159	beqz a7, .Loop1done
				160	slli a8, a7, 4
				161	add a8, a8, a3 # a8 = end of last 16B source chunk
				162	#endif /* !XCHAL_HAVE_LOOPS */
				163	.Loop1:
				164	EX(l32i, a6, a3, 0, l_fixup)
				165	EX(l32i, a7, a3, 4, l_fixup)
				166	EX(s32i, a6, a5, 0, s_fixup)
				167	EX(l32i, a6, a3, 8, l_fixup)
				168	EX(s32i, a7, a5, 4, s_fixup)
				169	EX(l32i, a7, a3, 12, l_fixup)
				170	EX(s32i, a6, a5, 8, s_fixup)
				171	addi a3, a3, 16
				172	EX(s32i, a7, a5, 12, s_fixup)
				173	addi a5, a5, 16
				174	#if !XCHAL_HAVE_LOOPS
				175	blt a3, a8, .Loop1
				176	#endif /* !XCHAL_HAVE_LOOPS */
				177	.Loop1done:
				178	bbci.l a4, 3, .L2
				179	# copy 8 bytes
				180	EX(l32i, a6, a3, 0, l_fixup)
				181	EX(l32i, a7, a3, 4, l_fixup)
				182	addi a3, a3, 8
				183	EX(s32i, a6, a5, 0, s_fixup)
				184	EX(s32i, a7, a5, 4, s_fixup)
				185	addi a5, a5, 8
				186	.L2:
				187	bbci.l a4, 2, .L3
				188	# copy 4 bytes
				189	EX(l32i, a6, a3, 0, l_fixup)
				190	addi a3, a3, 4
				191	EX(s32i, a6, a5, 0, s_fixup)
				192	addi a5, a5, 4
				193	.L3:
				194	bbci.l a4, 1, .L4
				195	# copy 2 bytes
				196	EX(l16ui, a6, a3, 0, l_fixup)
				197	addi a3, a3, 2
				198	EX(s16i, a6, a5, 0, s_fixup)
				199	addi a5, a5, 2
				200	.L4:
				201	bbci.l a4, 0, .L5
				202	# copy 1 byte
				203	EX(l8ui, a6, a3, 0, l_fixup)
				204	EX(s8i, a6, a5, 0, s_fixup)
				205	.L5:
				206	movi a2, 0 # return success for len bytes copied
				207	retw
				208
				209	/*
				210	* Destination is aligned, Source is unaligned
				211	*/
				212
				213	.align 4
				214	.byte 0 # 1 mod 4 alignement for LOOPNEZ
				215	# (0 mod 4 alignment for LBEG)
				216	.Lsrcunaligned:
				217	# copy 16 bytes per iteration for word-aligned dst and unaligned src
				218	and a10, a3, a8 # save unalignment offset for below
				219	sub a3, a3, a10 # align a3 (to avoid sim warnings only; not needed for hardware)
				220	EX(l32i, a6, a3, 0, l_fixup) # load first word
				221	#if XCHAL_HAVE_LOOPS
				222	loopnez a7, .Loop2done
				223	#else /* !XCHAL_HAVE_LOOPS */
				224	beqz a7, .Loop2done
				225	slli a10, a7, 4
				226	add a10, a10, a3 # a10 = end of last 16B source chunk
				227	#endif /* !XCHAL_HAVE_LOOPS */
				228	.Loop2:
				229	EX(l32i, a7, a3, 4, l_fixup)
				230	EX(l32i, a8, a3, 8, l_fixup)
				231	ALIGN( a6, a6, a7)
				232	EX(s32i, a6, a5, 0, s_fixup)
				233	EX(l32i, a9, a3, 12, l_fixup)
				234	ALIGN( a7, a7, a8)
				235	EX(s32i, a7, a5, 4, s_fixup)
				236	EX(l32i, a6, a3, 16, l_fixup)
				237	ALIGN( a8, a8, a9)
				238	EX(s32i, a8, a5, 8, s_fixup)
				239	addi a3, a3, 16
				240	ALIGN( a9, a9, a6)
				241	EX(s32i, a9, a5, 12, s_fixup)
				242	addi a5, a5, 16
				243	#if !XCHAL_HAVE_LOOPS
				244	blt a3, a10, .Loop2
				245	#endif /* !XCHAL_HAVE_LOOPS */
				246	.Loop2done:
				247	bbci.l a4, 3, .L12
				248	# copy 8 bytes
				249	EX(l32i, a7, a3, 4, l_fixup)
				250	EX(l32i, a8, a3, 8, l_fixup)
				251	ALIGN( a6, a6, a7)
				252	EX(s32i, a6, a5, 0, s_fixup)
				253	addi a3, a3, 8
				254	ALIGN( a7, a7, a8)
				255	EX(s32i, a7, a5, 4, s_fixup)
				256	addi a5, a5, 8
				257	mov a6, a8
				258	.L12:
				259	bbci.l a4, 2, .L13
				260	# copy 4 bytes
				261	EX(l32i, a7, a3, 4, l_fixup)
				262	addi a3, a3, 4
				263	ALIGN( a6, a6, a7)
				264	EX(s32i, a6, a5, 0, s_fixup)
				265	addi a5, a5, 4
				266	mov a6, a7
				267	.L13:
				268	add a3, a3, a10 # readjust a3 with correct misalignment
				269	bbci.l a4, 1, .L14
				270	# copy 2 bytes
				271	EX(l8ui, a6, a3, 0, l_fixup)
				272	EX(l8ui, a7, a3, 1, l_fixup)
				273	addi a3, a3, 2
				274	EX(s8i, a6, a5, 0, s_fixup)
				275	EX(s8i, a7, a5, 1, s_fixup)
				276	addi a5, a5, 2
				277	.L14:
				278	bbci.l a4, 0, .L15
				279	# copy 1 byte
				280	EX(l8ui, a6, a3, 0, l_fixup)
				281	EX(s8i, a6, a5, 0, s_fixup)
				282	.L15:
				283	movi a2, 0 # return success for len bytes copied
				284	retw
				285
				286
				287	.section .fixup, "ax"
				288	.align 4
				289
				290	/* a2 = original dst; a5 = current dst; a11= original len
				291	* bytes_copied = a5 - a2
				292	* retval = bytes_not_copied = original len - bytes_copied
				293	* retval = a11 - (a5 - a2)
				294	*
				295	* Clearing the remaining pieces of kernel memory plugs security
				296	* holes. This functionality is the equivalent of the *_zeroing
				297	* functions that some architectures provide.
				298	*/
				299
				300	.Lmemset:
				301	.word memset
				302
				303	s_fixup:
				304	sub a2, a5, a2 /* a2 <-- bytes copied */
				305	sub a2, a11, a2 /* a2 <-- bytes not copied */
				306	retw
				307
				308	l_fixup:
				309	sub a2, a5, a2 /* a2 <-- bytes copied */
				310	sub a2, a11, a2 /* a2 <-- bytes not copied == return value */
				311
				312	/* void memset(void s, int c, size_t n); */
				313	mov a6, a5 /* s */
				314	movi a7, 0 /* c */
				315	mov a8, a2 /* n */
				316	l32r a4, .Lmemset
				317	callx4 a4
				318	/* Ignore memset return value in a6. */
				319	/* a2 still contains bytes not copied. */
				320	retw