Blame - arch/cris/arch-v10/lib/usercopy.c - kernel/msm-4.9

blob: 43778d53c254c7ae60044ba72203a8c486c59c73 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	* User address space access functions.
				3	* The non-inlined parts of asm-cris/uaccess.h are here.
				4	*
				5	* Copyright (C) 2000, Axis Communications AB.
				6	*
				7	* Written by Hans-Peter Nilsson.
				8	* Pieces used from memcpy, originally by Kenny Ranerup long time ago.
				9	*/
				10
				11	#include <asm/uaccess.h>
				12
				13	/* Asm:s have been tweaked (within the domain of correctness) to give
				14	satisfactory results for "gcc version 2.96 20000427 (experimental)".
				15
				16	Check regularly...
				17
				18	Note that the PC saved at a bus-fault is the address after the
				19	faulting instruction, which means the branch-target for instructions in
				20	delay-slots for taken branches. Note also that the postincrement in
				21	the instruction is performed regardless of bus-fault; the register is
				22	seen updated in fault handlers.
				23
				24	Oh, and on the code formatting issue, to whomever feels like "fixing
				25	it" to Conformity: I'm too "lazy", but why don't you go ahead and "fix"
				26	string.c too. I just don't think too many people will hack this file
				27	for the code format to be an issue. */
				28
				29
				30	/* Copy to userspace. This is based on the memcpy used for
				31	kernel-to-kernel copying; see "string.c". */
				32
				33	unsigned long
				34	__copy_user (void __user pdst, const void psrc, unsigned long pn)
				35	{
				36	/* We want the parameters put in special registers.
				37	Make sure the compiler is able to make something useful of this.
				38	As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
				39
				40	FIXME: Comment for old gcc version. Check.
				41	If gcc was allright, it really would need no temporaries, and no
				42	stack space to save stuff on. */
				43
				44	register char *dst __asm__ ("r13") = pdst;
				45	register const char *src __asm__ ("r11") = psrc;
				46	register int n __asm__ ("r12") = pn;
				47	register int retn __asm__ ("r10") = 0;
				48
				49
				50	/* When src is aligned but not dst, this makes a few extra needless
				51	cycles. I believe it would take as many to check that the
				52	re-alignment was unnecessary. */
				53	if (((unsigned long) dst & 3) != 0
				54	/* Don't align if we wouldn't copy more than a few bytes; so we
				55	don't have to check further for overflows. */
				56	&& n >= 3)
				57	{
				58	if ((unsigned long) dst & 1)
				59	{
				60	__asm_copy_to_user_1 (dst, src, retn);
				61	n--;
				62	}
				63
				64	if ((unsigned long) dst & 2)
				65	{
				66	__asm_copy_to_user_2 (dst, src, retn);
				67	n -= 2;
				68	}
				69	}
				70
				71	/* Decide which copying method to use. */
				72	if (n >= 442) / Break even between movem and
				73	move16 is at 38.72, but modulo 44. /
				74	{
				75	/* For large copies we use 'movem'. */
				76
				77	/* It is not optimal to tell the compiler about clobbering any
				78	registers; that will move the saving/restoring of those registers
				79	to the function prologue/epilogue, and make non-movem sizes
				80	suboptimal.
				81
				82	This method is not foolproof; it assumes that the "asm reg"
				83	declarations at the beginning of the function really are used
				84	here (beware: they may be moved to temporary registers).
				85	This way, we do not have to save/move the registers around into
				86	temporaries; we can safely use them straight away.
				87
				88	If you want to check that the allocation was right; then
				89	check the equalities in the first comment. It should say
				90	"r13=r13, r11=r11, r12=r12". */
				91	__asm__ volatile ("\
				92	.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
				93	.err \n\
				94	.endif \n\
				95
				96	;; Save the registers we'll use in the movem process
				97	;; on the stack.
				98	subq 11*4,$sp
				99	movem $r10,[$sp]
				100
				101	;; Now we've got this:
				102	;; r11 - src
				103	;; r13 - dst
				104	;; r12 - n
				105
				106	;; Update n for the first loop
				107	subq 44,$r12
				108
				109	; Since the noted PC of a faulting instruction in a delay-slot of a taken
				110	; branch, is that of the branch target, we actually point at the from-movem
				111	; for this case. There is no ambiguity here; if there was a fault in that
				112	; instruction (meaning a kernel oops), the faulted PC would be the address
				113	; after that movem.
				114
				115	0:
				116	movem [$r11+],$r10
				117	subq 44,$r12
				118	bge 0b
				119	movem $r10,[$r13+]
				120	1:
				121	addq 44,$r12 ;; compensate for last loop underflowing n
				122
				123	;; Restore registers from stack
				124	movem [$sp+],$r10
				125	2:
				126	.section .fixup,\"ax\"
				127
				128	; To provide a correct count in r10 of bytes that failed to be copied,
				129	; we jump back into the loop if the loop-branch was taken. There is no
				130	; performance penalty for sany use; the program will segfault soon enough.
				131
				132	3:
				133	move.d [$sp],$r10
				134	addq 44,$r10
				135	move.d $r10,[$sp]
				136	jump 0b
				137	4:
				138	movem [$sp+],$r10
				139	addq 44,$r10
				140	addq 44,$r12
				141	jump 2b
				142
				143	.previous
				144	.section __ex_table,\"a\"
				145	.dword 0b,3b
				146	.dword 1b,4b
				147	.previous"
				148
				149	/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
				150	/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
				151
				152	}
				153
				154	/* Either we directly start copying, using dword copying in a loop, or
				155	we copy as much as possible with 'movem' and then the last block (<44
				156	bytes) is copied here. This will work since 'movem' will have
				157	updated SRC, DST and N. */
				158
				159	while (n >= 16)
				160	{
				161	__asm_copy_to_user_16 (dst, src, retn);
				162	n -= 16;
				163	}
				164
				165	/* Having a separate by-four loops cuts down on cache footprint.
				166	FIXME: Test with and without; increasing switch to be 0..15. */
				167	while (n >= 4)
				168	{
				169	__asm_copy_to_user_4 (dst, src, retn);
				170	n -= 4;
				171	}
				172
				173	switch (n)
				174	{
				175	case 0:
				176	break;
				177	case 1:
				178	__asm_copy_to_user_1 (dst, src, retn);
				179	break;
				180	case 2:
				181	__asm_copy_to_user_2 (dst, src, retn);
				182	break;
				183	case 3:
				184	__asm_copy_to_user_3 (dst, src, retn);
				185	break;
				186	}
				187
				188	return retn;
				189	}
				190
				191	/* Copy from user to kernel, zeroing the bytes that were inaccessible in
				192	userland. The return-value is the number of bytes that were
				193	inaccessible. */
				194
				195	unsigned long
				196	__copy_user_zeroing (void __user pdst, const void psrc, unsigned long pn)
				197	{
				198	/* We want the parameters put in special registers.
				199	Make sure the compiler is able to make something useful of this.
				200	As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
				201
				202	FIXME: Comment for old gcc version. Check.
				203	If gcc was allright, it really would need no temporaries, and no
				204	stack space to save stuff on. */
				205
				206	register char *dst __asm__ ("r13") = pdst;
				207	register const char *src __asm__ ("r11") = psrc;
				208	register int n __asm__ ("r12") = pn;
				209	register int retn __asm__ ("r10") = 0;
				210
				211	/* The best reason to align src is that we then know that a read-fault
				212	was for aligned bytes; there's no 1..3 remaining good bytes to
				213	pickle. */
				214	if (((unsigned long) src & 3) != 0)
				215	{
				216	if (((unsigned long) src & 1) && n != 0)
				217	{
				218	__asm_copy_from_user_1 (dst, src, retn);
				219	n--;
				220	}
				221
				222	if (((unsigned long) src & 2) && n >= 2)
				223	{
				224	__asm_copy_from_user_2 (dst, src, retn);
				225	n -= 2;
				226	}
				227
				228	/* We only need one check after the unalignment-adjustments, because
				229	if both adjustments were done, either both or neither reference
				230	had an exception. */
				231	if (retn != 0)
				232	goto copy_exception_bytes;
				233	}
				234
				235	/* Decide which copying method to use. */
				236	if (n >= 442) / Break even between movem and
				237	move16 is at 38.7*2, but modulo 44.
				238	FIXME: We use move4 now. */
				239	{
				240	/* For large copies we use 'movem' */
				241
				242	/* It is not optimal to tell the compiler about clobbering any
				243	registers; that will move the saving/restoring of those registers
				244	to the function prologue/epilogue, and make non-movem sizes
				245	suboptimal.
				246
				247	This method is not foolproof; it assumes that the "asm reg"
				248	declarations at the beginning of the function really are used
				249	here (beware: they may be moved to temporary registers).
				250	This way, we do not have to save/move the registers around into
				251	temporaries; we can safely use them straight away.
				252
				253	If you want to check that the allocation was right; then
				254	check the equalities in the first comment. It should say
				255	"r13=r13, r11=r11, r12=r12" */
				256	__asm__ volatile ("
				257	.ifnc %0%1%2%3,$r13$r11$r12$r10 \n\
				258	.err \n\
				259	.endif \n\
				260
				261	;; Save the registers we'll use in the movem process
				262	;; on the stack.
				263	subq 11*4,$sp
				264	movem $r10,[$sp]
				265
				266	;; Now we've got this:
				267	;; r11 - src
				268	;; r13 - dst
				269	;; r12 - n
				270
				271	;; Update n for the first loop
				272	subq 44,$r12
				273	0:
				274	movem [$r11+],$r10
				275	1:
				276	subq 44,$r12
				277	bge 0b
				278	movem $r10,[$r13+]
				279
				280	addq 44,$r12 ;; compensate for last loop underflowing n
				281
				282	;; Restore registers from stack
				283	movem [$sp+],$r10
				284	4:
				285	.section .fixup,\"ax\"
				286
				287	;; Do not jump back into the loop if we fail. For some uses, we get a
				288	;; page fault somewhere on the line. Without checking for page limits,
				289	;; we don't know where, but we need to copy accurately and keep an
				290	;; accurate count; not just clear the whole line. To do that, we fall
				291	;; down in the code below, proceeding with smaller amounts. It should
				292	;; be kept in mind that we have to cater to code like what at one time
				293	;; was in fs/super.c:
				294	;; i = size - copy_from_user((void *)page, data, size);
				295	;; which would cause repeated faults while clearing the remainder of
				296	;; the SIZE bytes at PAGE after the first fault.
				297	;; A caveat here is that we must not fall through from a failing page
				298	;; to a valid page.
				299
				300	3:
				301	movem [$sp+],$r10
				302	addq 44,$r12 ;; Get back count before faulting point.
				303	subq 44,$r11 ;; Get back pointer to faulting movem-line.
				304	jump 4b ;; Fall through, pretending the fault didn't happen.
				305
				306	.previous
				307	.section __ex_table,\"a\"
				308	.dword 1b,3b
				309	.previous"
				310
				311	/* Outputs */ : "=r" (dst), "=r" (src), "=r" (n), "=r" (retn)
				312	/* Inputs */ : "0" (dst), "1" (src), "2" (n), "3" (retn));
				313
				314	}
				315
				316	/* Either we directly start copying here, using dword copying in a loop,
				317	or we copy as much as possible with 'movem' and then the last block
				318	(<44 bytes) is copied here. This will work since 'movem' will have
				319	updated src, dst and n. (Except with failing src.)
				320
				321	Since we want to keep src accurate, we can't use
				322	__asm_copy_from_user_N with N != (1, 2, 4); it updates dst and
				323	retn, but not src (by design; it's value is ignored elsewhere). */
				324
				325	while (n >= 4)
				326	{
				327	__asm_copy_from_user_4 (dst, src, retn);
				328	n -= 4;
				329
				330	if (retn)
				331	goto copy_exception_bytes;
				332	}
				333
				334	/* If we get here, there were no memory read faults. */
				335	switch (n)
				336	{
				337	/* These copies are at least "naturally aligned" (so we don't have
				338	to check each byte), due to the src alignment code before the
				339	movem loop. The _3 case will* get the correct count for retn. */
				340	case 0:
				341	/* This case deliberately left in (if you have doubts check the
				342	generated assembly code). */
				343	break;
				344	case 1:
				345	__asm_copy_from_user_1 (dst, src, retn);
				346	break;
				347	case 2:
				348	__asm_copy_from_user_2 (dst, src, retn);
				349	break;
				350	case 3:
				351	__asm_copy_from_user_3 (dst, src, retn);
				352	break;
				353	}
				354
				355	/* If we get here, retn correctly reflects the number of failing
				356	bytes. */
				357	return retn;
				358
				359	copy_exception_bytes:
				360	/* We already have "retn" bytes cleared, and need to clear the
				361	remaining "n" bytes. A non-optimized simple byte-for-byte in-line
				362	memset is preferred here, since this isn't speed-critical code and
				363	we'd rather have this a leaf-function than calling memset. */
				364	{
				365	char *endp;
				366	for (endp = dst + n; dst < endp; dst++)
				367	*dst = 0;
				368	}
				369
				370	return retn + n;
				371	}
				372
				373	/* Zero userspace. */
				374
				375	unsigned long
				376	__do_clear_user (void __user *pto, unsigned long pn)
				377	{
				378	/* We want the parameters put in special registers.
				379	Make sure the compiler is able to make something useful of this.
				380	As it is now: r10 -> r13; r11 -> r11 (nop); r12 -> r12 (nop).
				381
				382	FIXME: Comment for old gcc version. Check.
				383	If gcc was allright, it really would need no temporaries, and no
				384	stack space to save stuff on. */
				385
				386	register char *dst __asm__ ("r13") = pto;
				387	register int n __asm__ ("r12") = pn;
				388	register int retn __asm__ ("r10") = 0;
				389
				390
				391	if (((unsigned long) dst & 3) != 0
				392	/* Don't align if we wouldn't copy more than a few bytes. */
				393	&& n >= 3)
				394	{
				395	if ((unsigned long) dst & 1)
				396	{
				397	__asm_clear_1 (dst, retn);
				398	n--;
				399	}
				400
				401	if ((unsigned long) dst & 2)
				402	{
				403	__asm_clear_2 (dst, retn);
				404	n -= 2;
				405	}
				406	}
				407
				408	/* Decide which copying method to use.
				409	FIXME: This number is from the "ordinary" kernel memset. */
				410	if (n >= (1*48))
				411	{
				412	/* For large clears we use 'movem' */
				413
				414	/* It is not optimal to tell the compiler about clobbering any
				415	call-saved registers; that will move the saving/restoring of
				416	those registers to the function prologue/epilogue, and make
				417	non-movem sizes suboptimal.
				418
				419	This method is not foolproof; it assumes that the "asm reg"
				420	declarations at the beginning of the function really are used
				421	here (beware: they may be moved to temporary registers).
				422	This way, we do not have to save/move the registers around into
				423	temporaries; we can safely use them straight away.
				424
				425	If you want to check that the allocation was right; then
				426	check the equalities in the first comment. It should say
				427	something like "r13=r13, r11=r11, r12=r12". */
				428	__asm__ volatile ("
				429	.ifnc %0%1%2,$r13$r12$r10 \n\
				430	.err \n\
				431	.endif \n\
				432
				433	;; Save the registers we'll clobber in the movem process
				434	;; on the stack. Don't mention them to gcc, it will only be
				435	;; upset.
				436	subq 11*4,$sp
				437	movem $r10,[$sp]
				438
				439	clear.d $r0
				440	clear.d $r1
				441	clear.d $r2
				442	clear.d $r3
				443	clear.d $r4
				444	clear.d $r5
				445	clear.d $r6
				446	clear.d $r7
				447	clear.d $r8
				448	clear.d $r9
				449	clear.d $r10
				450	clear.d $r11
				451
				452	;; Now we've got this:
				453	;; r13 - dst
				454	;; r12 - n
				455
				456	;; Update n for the first loop
				457	subq 12*4,$r12
				458	0:
				459	subq 12*4,$r12
				460	bge 0b
				461	movem $r11,[$r13+]
				462	1:
				463	addq 12*4,$r12 ;; compensate for last loop underflowing n
				464
				465	;; Restore registers from stack
				466	movem [$sp+],$r10
				467	2:
				468	.section .fixup,\"ax\"
				469	3:
				470	move.d [$sp],$r10
				471	addq 12*4,$r10
				472	move.d $r10,[$sp]
				473	clear.d $r10
				474	jump 0b
				475
				476	4:
				477	movem [$sp+],$r10
				478	addq 12*4,$r10
				479	addq 12*4,$r12
				480	jump 2b
				481
				482	.previous
				483	.section __ex_table,\"a\"
				484	.dword 0b,3b
				485	.dword 1b,4b
				486	.previous"
				487
				488	/* Outputs */ : "=r" (dst), "=r" (n), "=r" (retn)
				489	/* Inputs */ : "0" (dst), "1" (n), "2" (retn)
				490	/* Clobber */ : "r11");
				491	}
				492
				493	while (n >= 16)
				494	{
				495	__asm_clear_16 (dst, retn);
				496	n -= 16;
				497	}
				498
				499	/* Having a separate by-four loops cuts down on cache footprint.
				500	FIXME: Test with and without; increasing switch to be 0..15. */
				501	while (n >= 4)
				502	{
				503	__asm_clear_4 (dst, retn);
				504	n -= 4;
				505	}
				506
				507	switch (n)
				508	{
				509	case 0:
				510	break;
				511	case 1:
				512	__asm_clear_1 (dst, retn);
				513	break;
				514	case 2:
				515	__asm_clear_2 (dst, retn);
				516	break;
				517	case 3:
				518	__asm_clear_3 (dst, retn);
				519	break;
				520	}
				521
				522	return retn;
				523	}