Blame - arch/alpha/lib/memcpy.c - kernel/msm-4.9

blob: 64083fc732389419aa55e499532a278af4f8adab [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/arch/alpha/lib/memcpy.c
				3	*
				4	* Copyright (C) 1995 Linus Torvalds
				5	*/
				6
				7	/*
				8	* This is a reasonably optimized memcpy() routine.
				9	*/
				10
				11	/*
				12	* Note that the C code is written to be optimized into good assembly. However,
				13	* at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
				14	* explicit compare against 0 (instead of just using the proper "blt reg, xx" or
				15	* "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
				16	*/
				17
				18	#include <linux/types.h>
				19
				20	/*
				21	* This should be done in one go with ldq_u*2/mask/stq_u. Do it
				22	* with a macro so that we can fix it up later..
				23	*/
				24	#define ALIGN_DEST_TO8_UP(d,s,n) \
				25	while (d & 7) { \
				26	if (n <= 0) return; \
				27	n--; \
				28	(char ) d = (char ) s; \
				29	d++; s++; \
				30	}
				31	#define ALIGN_DEST_TO8_DN(d,s,n) \
				32	while (d & 7) { \
				33	if (n <= 0) return; \
				34	n--; \
				35	d--; s--; \
				36	(char ) d = (char ) s; \
				37	}
				38
				39	/*
				40	* This should similarly be done with ldq_u*2/mask/stq. The destination
				41	* is aligned, but we don't fill in a full quad-word
				42	*/
				43	#define DO_REST_UP(d,s,n) \
				44	while (n > 0) { \
				45	n--; \
				46	(char ) d = (char ) s; \
				47	d++; s++; \
				48	}
				49	#define DO_REST_DN(d,s,n) \
				50	while (n > 0) { \
				51	n--; \
				52	d--; s--; \
				53	(char ) d = (char ) s; \
				54	}
				55
				56	/*
				57	* This should be done with ldq/mask/stq. The source and destination are
				58	* aligned, but we don't fill in a full quad-word
				59	*/
				60	#define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
				61	#define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
				62
				63	/*
				64	* This does unaligned memory copies. We want to avoid storing to
				65	* an unaligned address, as that would do a read-modify-write cycle.
				66	* We also want to avoid double-reading the unaligned reads.
				67	*
				68	* Note the ordering to try to avoid load (and address generation) latencies.
				69	*/
				70	static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
				71	long n)
				72	{
				73	ALIGN_DEST_TO8_UP(d,s,n);
				74	n -= 8; /* to avoid compare against 8 in the loop */
				75	if (n >= 0) {
				76	unsigned long low_word, high_word;
				77	__asm__("ldq_u %0,%1":"=r" (low_word):"m" ((unsigned long ) s));
				78	do {
				79	unsigned long tmp;
				80	__asm__("ldq_u %0,%1":"=r" (high_word):"m" ((unsigned long )(s+8)));
				81	n -= 8;
				82	__asm__("extql %1,%2,%0"
				83	:"=r" (low_word)
				84	:"r" (low_word), "r" (s));
				85	__asm__("extqh %1,%2,%0"
				86	:"=r" (tmp)
				87	:"r" (high_word), "r" (s));
				88	s += 8;
				89	(unsigned long ) d = low_word \| tmp;
				90	d += 8;
				91	low_word = high_word;
				92	} while (n >= 0);
				93	}
				94	n += 8;
				95	DO_REST_UP(d,s,n);
				96	}
				97
				98	static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
				99	long n)
				100	{
				101	/* I don't understand AXP assembler well enough for this. -Tim */
				102	s += n;
				103	d += n;
				104	while (n--)
				105	* (char ) --d = (char *) --s;
				106	}
				107
				108	/*
				109	* Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
				110	* for the load-store. I don't know why, but it would seem that using a floating
				111	* point register for the move seems to slow things down (very small difference,
				112	* though).
				113	*
				114	* Note the ordering to try to avoid load (and address generation) latencies.
				115	*/
				116	static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
				117	long n)
				118	{
				119	ALIGN_DEST_TO8_UP(d,s,n);
				120	n -= 8;
				121	while (n >= 0) {
				122	unsigned long tmp;
				123	__asm__("ldq %0,%1":"=r" (tmp):"m" ((unsigned long ) s));
				124	n -= 8;
				125	s += 8;
				126	(unsigned long ) d = tmp;
				127	d += 8;
				128	}
				129	n += 8;
				130	DO_REST_ALIGNED_UP(d,s,n);
				131	}
				132	static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
				133	long n)
				134	{
				135	s += n;
				136	d += n;
				137	ALIGN_DEST_TO8_DN(d,s,n);
				138	n -= 8;
				139	while (n >= 0) {
				140	unsigned long tmp;
				141	s -= 8;
				142	__asm__("ldq %0,%1":"=r" (tmp):"m" ((unsigned long ) s));
				143	n -= 8;
				144	d -= 8;
				145	(unsigned long ) d = tmp;
				146	}
				147	n += 8;
				148	DO_REST_ALIGNED_DN(d,s,n);
				149	}
				150
				151	void * memcpy(void * dest, const void *src, size_t n)
				152	{
				153	if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
				154	__memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
				155	n);
				156	return dest;
				157	}
				158	__memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
				159	return dest;
				160	}
				161
				162	/* For backward modules compatibility, define __memcpy. */
				163	asm("__memcpy = memcpy; .globl __memcpy");