arm64: klib: Optimised atomic bitops

This patch implements the AArch64-specific atomic bitops functions using
exclusive memory accesses to avoid locking.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S
new file mode 100644
index 0000000..fd1e801
--- /dev/null
+++ b/arch/arm64/lib/bitops.S
@@ -0,0 +1,70 @@
+/*
+ * Based on arch/arm/lib/bitops.h
+ *
+ * Copyright (C) 2013 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * x0: bits 5:0  bit offset
+ *     bits 63:6 word offset
+ * x1: address
+ */
+	.macro	bitop, name, instr
+ENTRY(	\name	)
+	and	x3, x0, #63		// Get bit offset
+	eor	x0, x0, x3		// Clear low bits
+	mov	x2, #1
+	add	x1, x1, x0, lsr #3	// Get word offset
+	lsl	x3, x2, x3		// Create mask
+1:	ldxr	x2, [x1]
+	\instr	x2, x2, x3
+	stxr	w0, x2, [x1]
+	cbnz	w0, 1b
+	ret
+ENDPROC(\name	)
+	.endm
+
+	.macro	testop, name, instr
+ENTRY(	\name	)
+	and	x3, x0, #63		// Get bit offset
+	eor	x0, x0, x3		// Clear low bits
+	mov	x2, #1
+	add	x1, x1, x0, lsr #3	// Get word offset
+	lsl	x4, x2, x3		// Create mask
+	smp_dmb	ish
+1:	ldxr	x2, [x1]
+	lsr	x0, x2, x3		// Save old value of bit
+	\instr	x2, x2, x4		// toggle bit
+	stxr	w2, x2, [x1]
+	cbnz	w2, 1b
+	smp_dmb	ish
+	and	x0, x0, #1
+3:	ret
+ENDPROC(\name	)
+	.endm
+
+/*
+ * Atomic bit operations.
+ */
+	bitop	change_bit, eor
+	bitop	clear_bit, bic
+	bitop	set_bit, orr
+
+	testop	test_and_change_bit, eor
+	testop	test_and_clear_bit, bic
+	testop	test_and_set_bit, orr