Blame - arch/alpha/lib/memcpy.c - kernel/msm-4.19

blob: cbac3dc6d9635840f99c8c71c23a94edde76cc24 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame^]	1	// SPDX-License-Identifier: GPL-2.0
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	2	/*
				3	* linux/arch/alpha/lib/memcpy.c
				4	*
				5	* Copyright (C) 1995 Linus Torvalds
				6	*/
				7
				8	/*
				9	* This is a reasonably optimized memcpy() routine.
				10	*/
				11
				12	/*
				13	* Note that the C code is written to be optimized into good assembly. However,
				14	* at this point gcc is unable to sanely compile "if (n >= 0)", resulting in a
				15	* explicit compare against 0 (instead of just using the proper "blt reg, xx" or
				16	* "bge reg, xx"). I hope alpha-gcc will be fixed to notice this eventually..
				17	*/
				18
				19	#include <linux/types.h>
Al Viro	00fc0e0	2016-01-11 09:51:29 -0500	[diff] [blame]	20	#include <linux/export.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	21
				22	/*
				23	* This should be done in one go with ldq_u*2/mask/stq_u. Do it
				24	* with a macro so that we can fix it up later..
				25	*/
				26	#define ALIGN_DEST_TO8_UP(d,s,n) \
				27	while (d & 7) { \
				28	if (n <= 0) return; \
				29	n--; \
				30	(char ) d = (char ) s; \
				31	d++; s++; \
				32	}
				33	#define ALIGN_DEST_TO8_DN(d,s,n) \
				34	while (d & 7) { \
				35	if (n <= 0) return; \
				36	n--; \
				37	d--; s--; \
				38	(char ) d = (char ) s; \
				39	}
				40
				41	/*
				42	* This should similarly be done with ldq_u*2/mask/stq. The destination
				43	* is aligned, but we don't fill in a full quad-word
				44	*/
				45	#define DO_REST_UP(d,s,n) \
				46	while (n > 0) { \
				47	n--; \
				48	(char ) d = (char ) s; \
				49	d++; s++; \
				50	}
				51	#define DO_REST_DN(d,s,n) \
				52	while (n > 0) { \
				53	n--; \
				54	d--; s--; \
				55	(char ) d = (char ) s; \
				56	}
				57
				58	/*
				59	* This should be done with ldq/mask/stq. The source and destination are
				60	* aligned, but we don't fill in a full quad-word
				61	*/
				62	#define DO_REST_ALIGNED_UP(d,s,n) DO_REST_UP(d,s,n)
				63	#define DO_REST_ALIGNED_DN(d,s,n) DO_REST_DN(d,s,n)
				64
				65	/*
				66	* This does unaligned memory copies. We want to avoid storing to
				67	* an unaligned address, as that would do a read-modify-write cycle.
				68	* We also want to avoid double-reading the unaligned reads.
				69	*
				70	* Note the ordering to try to avoid load (and address generation) latencies.
				71	*/
				72	static inline void __memcpy_unaligned_up (unsigned long d, unsigned long s,
				73	long n)
				74	{
				75	ALIGN_DEST_TO8_UP(d,s,n);
				76	n -= 8; /* to avoid compare against 8 in the loop */
				77	if (n >= 0) {
				78	unsigned long low_word, high_word;
				79	__asm__("ldq_u %0,%1":"=r" (low_word):"m" ((unsigned long ) s));
				80	do {
				81	unsigned long tmp;
				82	__asm__("ldq_u %0,%1":"=r" (high_word):"m" ((unsigned long )(s+8)));
				83	n -= 8;
				84	__asm__("extql %1,%2,%0"
				85	:"=r" (low_word)
				86	:"r" (low_word), "r" (s));
				87	__asm__("extqh %1,%2,%0"
				88	:"=r" (tmp)
				89	:"r" (high_word), "r" (s));
				90	s += 8;
				91	(unsigned long ) d = low_word \| tmp;
				92	d += 8;
				93	low_word = high_word;
				94	} while (n >= 0);
				95	}
				96	n += 8;
				97	DO_REST_UP(d,s,n);
				98	}
				99
				100	static inline void __memcpy_unaligned_dn (unsigned long d, unsigned long s,
				101	long n)
				102	{
				103	/* I don't understand AXP assembler well enough for this. -Tim */
				104	s += n;
				105	d += n;
				106	while (n--)
				107	* (char ) --d = (char *) --s;
				108	}
				109
				110	/*
				111	* Hmm.. Strange. The __asm__ here is there to make gcc use an integer register
				112	* for the load-store. I don't know why, but it would seem that using a floating
				113	* point register for the move seems to slow things down (very small difference,
				114	* though).
				115	*
				116	* Note the ordering to try to avoid load (and address generation) latencies.
				117	*/
				118	static inline void __memcpy_aligned_up (unsigned long d, unsigned long s,
				119	long n)
				120	{
				121	ALIGN_DEST_TO8_UP(d,s,n);
				122	n -= 8;
				123	while (n >= 0) {
				124	unsigned long tmp;
				125	__asm__("ldq %0,%1":"=r" (tmp):"m" ((unsigned long ) s));
				126	n -= 8;
				127	s += 8;
				128	(unsigned long ) d = tmp;
				129	d += 8;
				130	}
				131	n += 8;
				132	DO_REST_ALIGNED_UP(d,s,n);
				133	}
				134	static inline void __memcpy_aligned_dn (unsigned long d, unsigned long s,
				135	long n)
				136	{
				137	s += n;
				138	d += n;
				139	ALIGN_DEST_TO8_DN(d,s,n);
				140	n -= 8;
				141	while (n >= 0) {
				142	unsigned long tmp;
				143	s -= 8;
				144	__asm__("ldq %0,%1":"=r" (tmp):"m" ((unsigned long ) s));
				145	n -= 8;
				146	d -= 8;
				147	(unsigned long ) d = tmp;
				148	}
				149	n += 8;
				150	DO_REST_ALIGNED_DN(d,s,n);
				151	}
				152
				153	void * memcpy(void * dest, const void *src, size_t n)
				154	{
				155	if (!(((unsigned long) dest ^ (unsigned long) src) & 7)) {
				156	__memcpy_aligned_up ((unsigned long) dest, (unsigned long) src,
				157	n);
				158	return dest;
				159	}
				160	__memcpy_unaligned_up ((unsigned long) dest, (unsigned long) src, n);
				161	return dest;
				162	}
Al Viro	00fc0e0	2016-01-11 09:51:29 -0500	[diff] [blame]	163	EXPORT_SYMBOL(memcpy);