libc: kryo specific memory routine

Add kryo specific memcpy

Change-Id: Id3af7bdbc9d621c56cd26cbc04f9ad116f228550
diff --git a/libc/arch-arm64/kryo/bionic/memcpy.S b/libc/arch-arm64/kryo/bionic/memcpy.S
new file mode 100644
index 0000000..87e1b3b
--- /dev/null
+++ b/libc/arch-arm64/kryo/bionic/memcpy.S
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2008 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// Prototype: void *memcpy (void *dst, const void *src, size_t count).
+
+#include <private/bionic_asm.h>
+#include <private/libc_events.h>
+
+ENTRY(__memcpy_chk)
+  cmp   x2, x3
+  b.hi  __memcpy_chk_fail
+
+  // Fall through to memcpy...
+  b memcpy
+END(__memcpy_chk)
+
+        .align  6
+ENTRY(memcpy)
+  #include "memcpy_base.S"
+END(memcpy)
+
+ENTRY_PRIVATE(__memcpy_chk_fail)
+  // Preserve for accurate backtrace.
+  stp  x29, x30, [sp, -16]!
+  .cfi_def_cfa_offset 16
+  .cfi_rel_offset x29, 0
+  .cfi_rel_offset x30, 8
+
+  adrp  x0, error_string
+  add   x0, x0, :lo12:error_string
+  ldr   x1, error_code
+  bl    __fortify_chk_fail
+error_code:
+  .word   BIONIC_EVENT_MEMCPY_BUFFER_OVERFLOW
+END(__memcpy_chk_fail)
+
+  .data
+  .align 2
+error_string:
+  .string "memcpy: prevented write past end of buffer"
diff --git a/libc/arch-arm64/kryo/bionic/memcpy_base.S b/libc/arch-arm64/kryo/bionic/memcpy_base.S
new file mode 100644
index 0000000..0096bb7
--- /dev/null
+++ b/libc/arch-arm64/kryo/bionic/memcpy_base.S
@@ -0,0 +1,244 @@
+/* Copyright (c) 2015 The Linux Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of The Linux Foundation nor the names of its contributors may
+ *       be used to endorse or promote products derived from this software
+ *       without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef PLDOFFS
+#undef PLDOFFS
+#endif
+#define PLDOFFS		(16)
+
+#ifdef PLDTHRESH
+#undef PLDTHRESH
+#endif
+#define PLDTHRESH (PLDOFFS)
+
+#ifdef BBTHRESH
+#undef BBTHRESH
+#endif
+#define BBTHRESH (2048/128)
+
+#if (PLDOFFS < 1)
+#error Routine does not support offsets less than 1
+#endif
+#if (PLDTHRESH < PLDOFFS)
+#error PLD threshold must be greater than or equal to the PLD offset
+#endif
+
+#ifdef PLDSIZE
+#undef PLDSIZE
+#endif
+#define PLDSIZE	(128)
+
+kryo_bb_memcpy:
+	mov	x11, x0
+	cmp	x2, #4
+	blo	kryo_bb_lt4
+	cmp	x2, #16
+	blo	kryo_bb_lt16
+	cmp	x2, #32
+	blo	kryo_bb_16
+	cmp	x2, #64
+	blo	kryo_bb_copy_32_a
+	cmp	x2, #128
+	blo	kryo_bb_copy_64_a
+
+	// we have at least 127 bytes to achieve 128-byte alignment
+	neg	x3, x1			// calculate count to get SOURCE aligned
+	ands	x3, x3, #0x7F
+	b.eq	kryo_bb_source_aligned	// already aligned
+	// alignment fixup, small to large (favorable alignment)
+	tbz	x3, #0, 1f
+	ldrb	w5, [x1], #1
+	strb	w5, [x0], #1
+1:	tbz	x3, #1, 2f
+	ldrh	w6, [x1], #2
+	strh	w6, [x0], #2
+2:	tbz	x3, #2, 3f
+	ldr	w8, [x1], #4
+	str	w8, [x0], #4
+3:	tbz	x3, #3, 4f
+	ldr	x9, [x1], #8
+	str	x9, [x0], #8
+4:	tbz	x3, #4, 5f
+	ldr	q7, [x1], #16
+	str	q7, [x0], #16
+5:	tbz	x3, #5, 55f
+	ldp	q0, q1, [x1], #32
+	stp	q0, q1, [x0], #32
+55:	tbz	x3, #6, 6f
+	ldp	q0, q1, [x1], #32
+	ldp	q2, q3, [x1], #32
+	stp	q0, q1, [x0], #32
+	stp	q2, q3, [x0], #32
+6:	subs	x2, x2, x3		// fixup count after alignment
+	b.eq	kryo_bb_exit
+	cmp	x2, #128
+	blo	kryo_bb_copy_64_a
+kryo_bb_source_aligned:
+	lsr	x12, x2, #7
+	cmp	x12, #PLDTHRESH
+	bls	kryo_bb_copy_128_loop_nopld
+
+	cmp	x12, #BBTHRESH
+	bls	kryo_bb_prime_pump
+
+	add	x14, x0, #0x400
+	add	x9,  x1, #(PLDOFFS*PLDSIZE)
+	sub	x14, x14, x9
+	lsl	x14, x14, #(21+32)
+	lsr	x14, x14, #(21+32)
+	add	x14, x14, #(PLDOFFS*PLDSIZE)
+	cmp	x12, x14, lsr #7
+	bls	kryo_bb_prime_pump
+
+	mov	x9, #(PLDOFFS)
+	lsr     x13, x14, #7
+	subs    x9, x13, x9
+	bls	kryo_bb_prime_pump
+
+	add	x10, x1, x14
+	bic	x10, x10, #0x7F		// Round to multiple of PLDSIZE
+
+	sub	x12, x12, x14, lsr #7
+	cmp	x9, x12
+	sub     x13, x12, x9
+	csel    x12, x13, x12, LS
+	csel    x9, x12, x9, HI
+	csel    x12, xzr, x12, HI
+
+	prfm	PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE)]
+	prfm	PLDL1STRM, [x1, #((PLDOFFS-1)*PLDSIZE+64)]
+kryo_bb_copy_128_loop_outer_doublepld:
+	prfm	PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)]
+	prfm	PLDL1STRM, [x1, #((PLDOFFS)*PLDSIZE)+64]
+	subs	x9, x9, #1
+	ldp	q0, q1, [x1], #32
+	ldp	q2, q3, [x1], #32
+	ldp	q4, q5, [x1], #32
+	ldp	q6, q7, [x1], #32
+	prfm	PLDL1KEEP, [x10]
+	prfm	PLDL1KEEP, [x10, #64]
+	add	x10, x10, #128
+	stp	q0, q1, [x0], #32
+	stp	q2, q3, [x0], #32
+	stp	q4, q5, [x0], #32
+	stp	q6, q7, [x0], #32
+	bne	kryo_bb_copy_128_loop_outer_doublepld
+	cmp	x12, #0
+	beq	kryo_bb_pop_before_nopld
+	cmp	x12, #(448*1024/128)
+	bls	kryo_bb_copy_128_loop_outer
+
+kryo_bb_copy_128_loop_ddr:
+	subs	x12, x12, #1
+	ldr	x3, [x10], #128
+	ldp	q0, q1, [x1], #32
+	ldp	q2, q3, [x1], #32
+	ldp	q4, q5, [x1], #32
+	ldp	q6, q7, [x1], #32
+	stp	q0, q1, [x0], #32
+	stp	q2, q3, [x0], #32
+	stp	q4, q5, [x0], #32
+	stp	q6, q7, [x0], #32
+	bne	kryo_bb_copy_128_loop_ddr
+	b	kryo_bb_pop_before_nopld
+
+kryo_bb_prime_pump:
+	mov	x14, #(PLDOFFS*PLDSIZE)
+	add	x10, x1, #(PLDOFFS*PLDSIZE)
+	bic	x10, x10, #0x7F
+	sub	x12, x12, #PLDOFFS
+	prfm	PLDL1KEEP, [x10, #(-1*PLDSIZE)]
+	prfm	PLDL1KEEP, [x10, #(-1*PLDSIZE+64)]
+	cmp	x12, #(448*1024/128)
+	bhi	kryo_bb_copy_128_loop_ddr
+
+kryo_bb_copy_128_loop_outer:
+	subs	x12, x12, #1
+	prfm	PLDL1KEEP, [x10]
+	prfm	PLDL1KEEP, [x10, #64]
+	ldp	q0, q1, [x1], #32
+	ldp	q2, q3, [x1], #32
+	ldp	q4, q5, [x1], #32
+	ldp	q6, q7, [x1], #32
+	add	x10, x10, #128
+	stp	q0, q1, [x0], #32
+	stp	q2, q3, [x0], #32
+	stp	q4, q5, [x0], #32
+	stp	q6, q7, [x0], #32
+	bne	kryo_bb_copy_128_loop_outer
+
+kryo_bb_pop_before_nopld:
+	lsr	x12, x14, #7
+kryo_bb_copy_128_loop_nopld:
+	ldp	q0, q1, [x1], #32
+	ldp	q2, q3, [x1], #32
+	ldp	q4, q5, [x1], #32
+	ldp	q6, q7, [x1], #32
+	subs	x12, x12, #1
+	stp	q0, q1, [x0], #32
+	stp	q2, q3, [x0], #32
+	stp	q4, q5, [x0], #32
+	stp	q6, q7, [x0], #32
+	bne	kryo_bb_copy_128_loop_nopld
+	ands	x2, x2, #0x7f
+	beq	kryo_bb_exit
+
+kryo_bb_copy_64_a:
+	tbz	x2, #6, kryo_bb_copy_32_a
+	ldp	q0, q1, [x1], #32
+	ldp	q2, q3, [x1], #32
+	stp	q0, q1, [x0], #32
+	stp	q2, q3, [x0], #32
+kryo_bb_copy_32_a:
+	tbz	x2, #5, kryo_bb_16
+	ldp	q0, q1, [x1], #32
+	stp	q0, q1, [x0], #32
+kryo_bb_16:
+	tbz	x2, #4, kryo_bb_lt16
+	ldr	q7, [x1], #16
+	str	q7, [x0], #16
+	ands	x2, x2, #0x0f
+	beq	kryo_bb_exit
+kryo_bb_lt16:
+	tbz	x2, #3, kryo_bb_lt8
+	ldr	x3, [x1], #8
+	str	x3, [x0], #8
+kryo_bb_lt8:
+	tbz	x2, #2, kryo_bb_lt4
+	ldr	w3, [x1], #4
+	str	w3, [x0], #4
+kryo_bb_lt4:
+	tbz	x2, #1, kryo_bb_lt2
+	ldrh	w3, [x1], #2
+	strh	w3, [x0], #2
+kryo_bb_lt2:
+	tbz	x2, #0, kryo_bb_exit
+	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+kryo_bb_exit:
+	mov	x0, x11
+	ret
+
diff --git a/libc/arch-arm64/kryo/kryo.mk b/libc/arch-arm64/kryo/kryo.mk
new file mode 100644
index 0000000..b5b714e
--- /dev/null
+++ b/libc/arch-arm64/kryo/kryo.mk
@@ -0,0 +1,14 @@
+libc_bionic_src_files_arm64 += \
+    arch-arm64/generic/bionic/memchr.S \
+    arch-arm64/generic/bionic/memcmp.S \
+    arch-arm64/kryo/bionic/memcpy.S \
+    arch-arm64/generic/bionic/memmove.S \
+    arch-arm64/generic/bionic/memset.S \
+    arch-arm64/generic/bionic/stpcpy.S \
+    arch-arm64/generic/bionic/strchr.S \
+    arch-arm64/generic/bionic/strcmp.S \
+    arch-arm64/generic/bionic/strcpy.S \
+    arch-arm64/generic/bionic/strlen.S \
+    arch-arm64/generic/bionic/strncmp.S \
+    arch-arm64/generic/bionic/strnlen.S \
+    arch-arm64/generic/bionic/wmemmove.S