RISC-V: Add dynamic ftrace support for RISC-V platforms

This patch set includes the building blocks of dynamic ftrace features
for RISC-V machines.  I'm afraid I'm not very familiar with ftrace, but
the code looks OK to me.  It's been used to track down a performance
problem in our SPI driver and appears to work acceptably, but we haven't
given it a whole lot of banging yet so there might still be some bugs
lurking around somewhere.
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 04807c7..61dd827 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -114,6 +114,9 @@
 	select 64BIT
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 
 endchoice
 
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 6719dd3..899226e 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -11,6 +11,9 @@
 LDFLAGS         :=
 OBJCOPYFLAGS    := -O binary
 LDFLAGS_vmlinux :=
+ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+	LDFLAGS_vmlinux := --no-relax
+endif
 KBUILD_AFLAGS_MODULE += -fPIC
 KBUILD_CFLAGS_MODULE += -fPIC
 
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index 66d4175..c6dcc52 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -8,3 +8,59 @@
 #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER)
 #define HAVE_FUNCTION_GRAPH_FP_TEST
 #endif
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#ifndef __ASSEMBLY__
+void _mcount(void);
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+/*
+ * A general call in RISC-V is a pair of insts:
+ * 1) auipc: setting high-20 pc-related bits to ra register
+ * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+ *          return address (original pc + 4)
+ *
+ * Dynamic ftrace generates probes to call sites, so we must deal with
+ * both auipc and jalr at the same time.
+ */
+
+#define MCOUNT_ADDR		((unsigned long)_mcount)
+#define JALR_SIGN_MASK		(0x00000800)
+#define JALR_OFFSET_MASK	(0x00000fff)
+#define AUIPC_OFFSET_MASK	(0xfffff000)
+#define AUIPC_PAD		(0x00001000)
+#define JALR_SHIFT		20
+#define JALR_BASIC		(0x000080e7)
+#define AUIPC_BASIC		(0x00000097)
+#define NOP4			(0x00000013)
+
+#define make_call(caller, callee, call)					\
+do {									\
+	call[0] = to_auipc_insn((unsigned int)((unsigned long)callee -	\
+				(unsigned long)caller));		\
+	call[1] = to_jalr_insn((unsigned int)((unsigned long)callee -	\
+			       (unsigned long)caller));			\
+} while (0)
+
+#define to_jalr_insn(offset)						\
+	(((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
+
+#define to_auipc_insn(offset)						\
+	((offset & JALR_SIGN_MASK) ?					\
+	(((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) :	\
+	((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
+
+/*
+ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+ */
+#define MCOUNT_INSN_SIZE 8
+#endif
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 196f62f..d7bdf88 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -34,7 +34,8 @@
 obj-$(CONFIG_SMP)		+= smpboot.o
 obj-$(CONFIG_SMP)		+= smp.o
 obj-$(CONFIG_MODULES)		+= module.o
-obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
+
+obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
+obj-$(CONFIG_DYNAMIC_FTRACE)	+= mcount-dyn.o
 
 clean:
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index d0de68d..1157b6b 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -6,9 +6,126 @@
  */
 
 #include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+static int ftrace_check_current_call(unsigned long hook_pos,
+				     unsigned int *expected)
+{
+	unsigned int replaced[2];
+	unsigned int nops[2] = {NOP4, NOP4};
+
+	/* we expect nops at the hook position */
+	if (!expected)
+		expected = nops;
+
+	/*
+	 * Read the text we want to modify;
+	 * return must be -EFAULT on read error
+	 */
+	if (probe_kernel_read(replaced, (void *)hook_pos, MCOUNT_INSN_SIZE))
+		return -EFAULT;
+
+	/*
+	 * Make sure it is what we expect it to be;
+	 * return must be -EINVAL on failed comparison
+	 */
+	if (memcmp(expected, replaced, sizeof(replaced))) {
+		pr_err("%p: expected (%08x %08x) but get (%08x %08x)",
+		       (void *)hook_pos, expected[0], expected[1], replaced[0],
+		       replaced[1]);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+				bool enable)
+{
+	unsigned int call[2];
+	unsigned int nops[2] = {NOP4, NOP4};
+	int ret = 0;
+
+	make_call(hook_pos, target, call);
+
+	/* replace the auipc-jalr pair at once */
+	ret = probe_kernel_write((void *)hook_pos, enable ? call : nops,
+				 MCOUNT_INSN_SIZE);
+	/* return must be -EPERM on write error */
+	if (ret)
+		return -EPERM;
+
+	smp_mb();
+	flush_icache_range((void *)hook_pos, (void *)hook_pos + MCOUNT_INSN_SIZE);
+
+	return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	int ret = ftrace_check_current_call(rec->ip, NULL);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call(rec->ip, addr, true);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned int call[2];
+	int ret;
+
+	make_call(rec->ip, addr, call);
+	ret = ftrace_check_current_call(rec->ip, call);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call(rec->ip, addr, false);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+				       (unsigned long)func, true);
+	if (!ret) {
+		ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+					   (unsigned long)func, true);
+	}
+
+	return ret;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+		       unsigned long addr)
+{
+	unsigned int call[2];
+	int ret;
+
+	make_call(rec->ip, old_addr, call);
+	ret = ftrace_check_current_call(rec->ip, call);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call(rec->ip, addr, true);
+}
+#endif
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 /*
- * Most of this file is copied from arm64.
+ * Most of this function is copied from arm64.
  */
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long frame_pointer)
@@ -34,8 +151,62 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 		return;
 
 	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
-				       frame_pointer, NULL);
+				       frame_pointer, parent);
 	if (err == -EBUSY)
 		return;
 	*parent = return_hooker;
 }
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	unsigned int call[2];
+	static int init_graph = 1;
+	int ret;
+
+	make_call(&ftrace_graph_call, &ftrace_stub, call);
+
+	/*
+	 * When enabling graph tracer for the first time, ftrace_graph_call
+	 * should contains a call to ftrace_stub.  Once it has been disabled,
+	 * the 8-bytes at the position becomes NOPs.
+	 */
+	if (init_graph) {
+		ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+						call);
+		init_graph = 0;
+	} else {
+		ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+						NULL);
+	}
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+				    (unsigned long)&prepare_ftrace_return, true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	unsigned int call[2];
+	int ret;
+
+	make_call(&ftrace_graph_call, &prepare_ftrace_return, call);
+
+	/*
+	 * This is to make sure that ftrace_enable_ftrace_graph_caller
+	 * did the right thing.
+	 */
+	ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+					call);
+
+	if (ret)
+		return ret;
+
+	return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+				    (unsigned long)&prepare_ftrace_return, false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
new file mode 100644
index 0000000..35a6ed7
--- /dev/null
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2017 Andes Technology Corporation */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/csr.h>
+#include <asm/unistd.h>
+#include <asm/thread_info.h>
+#include <asm/asm-offsets.h>
+#include <asm-generic/export.h>
+#include <asm/ftrace.h>
+
+	.text
+
+	.macro SAVE_ABI_STATE
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	addi    sp, sp, -48
+	sd      s0, 32(sp)
+	sd      ra, 40(sp)
+	addi    s0, sp, 48
+	sd      t0, 24(sp)
+	sd      t1, 16(sp)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	sd      t2, 8(sp)
+#endif
+#else
+	addi	sp, sp, -16
+	sd	s0, 0(sp)
+	sd	ra, 8(sp)
+	addi	s0, sp, 16
+#endif
+	.endm
+
+	.macro RESTORE_ABI_STATE
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	ld	s0, 32(sp)
+	ld	ra, 40(sp)
+	addi	sp, sp, 48
+#else
+	ld	ra, 8(sp)
+	ld	s0, 0(sp)
+	addi	sp, sp, 16
+#endif
+	.endm
+
+	.macro RESTORE_GRAPH_ARGS
+	ld	a0, 24(sp)
+	ld	a1, 16(sp)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	a2, 8(sp)
+#endif
+	.endm
+
+ENTRY(ftrace_graph_caller)
+	addi	sp, sp, -16
+	sd	s0, 0(sp)
+	sd	ra, 8(sp)
+	addi	s0, sp, 16
+ftrace_graph_call:
+	.global ftrace_graph_call
+	/*
+	 * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
+	 * call below.  Check ftrace_modify_all_code for details.
+	 */
+	call	ftrace_stub
+	ld	ra, 8(sp)
+	ld	s0, 0(sp)
+	addi	sp, sp, 16
+	ret
+ENDPROC(ftrace_graph_caller)
+
+ENTRY(ftrace_caller)
+	/*
+	 * a0: the address in the caller when calling ftrace_caller
+	 * a1: the caller's return address
+	 * a2: the address of global variable function_trace_op
+	 */
+	ld	a1, -8(s0)
+	addi	a0, ra, -MCOUNT_INSN_SIZE
+	la	t5, function_trace_op
+	ld	a2, 0(t5)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/*
+	 * the graph tracer (specifically, prepare_ftrace_return) needs these
+	 * arguments but for now the function tracer occupies the regs, so we
+	 * save them in temporary regs to recover later.
+	 */
+	addi	t0, s0, -8
+	mv	t1, a0
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	t2, -16(s0)
+#endif
+#endif
+
+	SAVE_ABI_STATE
+ftrace_call:
+	.global ftrace_call
+	/*
+	 * For the dynamic ftrace to work, here we should reserve at least
+	 * 8 bytes for a functional auipc-jalr pair.  The following call
+	 * serves this purpose.
+	 *
+	 * Calling ftrace_update_ftrace_func would overwrite the nops below.
+	 * Check ftrace_modify_all_code for details.
+	 */
+	call	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	RESTORE_GRAPH_ARGS
+	call	ftrace_graph_caller
+#endif
+
+	RESTORE_ABI_STATE
+	ret
+ENDPROC(ftrace_caller)
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+	.macro SAVE_ALL
+	addi	sp, sp, -(PT_SIZE_ON_STACK+16)
+	sd	s0, (PT_SIZE_ON_STACK)(sp)
+	sd	ra, (PT_SIZE_ON_STACK+8)(sp)
+	addi	s0, sp, (PT_SIZE_ON_STACK+16)
+
+	sd x1,  PT_RA(sp)
+	sd x2,  PT_SP(sp)
+	sd x3,  PT_GP(sp)
+	sd x4,  PT_TP(sp)
+	sd x5,  PT_T0(sp)
+	sd x6,  PT_T1(sp)
+	sd x7,  PT_T2(sp)
+	sd x8,  PT_S0(sp)
+	sd x9,  PT_S1(sp)
+	sd x10, PT_A0(sp)
+	sd x11, PT_A1(sp)
+	sd x12, PT_A2(sp)
+	sd x13, PT_A3(sp)
+	sd x14, PT_A4(sp)
+	sd x15, PT_A5(sp)
+	sd x16, PT_A6(sp)
+	sd x17, PT_A7(sp)
+	sd x18, PT_S2(sp)
+	sd x19, PT_S3(sp)
+	sd x20, PT_S4(sp)
+	sd x21, PT_S5(sp)
+	sd x22, PT_S6(sp)
+	sd x23, PT_S7(sp)
+	sd x24, PT_S8(sp)
+	sd x25, PT_S9(sp)
+	sd x26, PT_S10(sp)
+	sd x27, PT_S11(sp)
+	sd x28, PT_T3(sp)
+	sd x29, PT_T4(sp)
+	sd x30, PT_T5(sp)
+	sd x31, PT_T6(sp)
+	.endm
+
+	.macro RESTORE_ALL
+	ld x1,  PT_RA(sp)
+	ld x2,  PT_SP(sp)
+	ld x3,  PT_GP(sp)
+	ld x4,  PT_TP(sp)
+	ld x5,  PT_T0(sp)
+	ld x6,  PT_T1(sp)
+	ld x7,  PT_T2(sp)
+	ld x8,  PT_S0(sp)
+	ld x9,  PT_S1(sp)
+	ld x10, PT_A0(sp)
+	ld x11, PT_A1(sp)
+	ld x12, PT_A2(sp)
+	ld x13, PT_A3(sp)
+	ld x14, PT_A4(sp)
+	ld x15, PT_A5(sp)
+	ld x16, PT_A6(sp)
+	ld x17, PT_A7(sp)
+	ld x18, PT_S2(sp)
+	ld x19, PT_S3(sp)
+	ld x20, PT_S4(sp)
+	ld x21, PT_S5(sp)
+	ld x22, PT_S6(sp)
+	ld x23, PT_S7(sp)
+	ld x24, PT_S8(sp)
+	ld x25, PT_S9(sp)
+	ld x26, PT_S10(sp)
+	ld x27, PT_S11(sp)
+	ld x28, PT_T3(sp)
+	ld x29, PT_T4(sp)
+	ld x30, PT_T5(sp)
+	ld x31, PT_T6(sp)
+
+	ld	s0, (PT_SIZE_ON_STACK)(sp)
+	ld	ra, (PT_SIZE_ON_STACK+8)(sp)
+	addi	sp, sp, (PT_SIZE_ON_STACK+16)
+	.endm
+
+	.macro RESTORE_GRAPH_REG_ARGS
+	ld	a0, PT_T0(sp)
+	ld	a1, PT_T1(sp)
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	a2, PT_T2(sp)
+#endif
+	.endm
+
+/*
+ * Most of the contents are the same as ftrace_caller.
+ */
+ENTRY(ftrace_regs_caller)
+	/*
+	 * a3: the address of all registers in the stack
+	 */
+	ld	a1, -8(s0)
+	addi	a0, ra, -MCOUNT_INSN_SIZE
+	la	t5, function_trace_op
+	ld	a2, 0(t5)
+	addi	a3, sp, -(PT_SIZE_ON_STACK+16)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	addi	t0, s0, -8
+	mv	t1, a0
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+	ld	t2, -16(s0)
+#endif
+#endif
+	SAVE_ALL
+
+ftrace_regs_call:
+	.global ftrace_regs_call
+	call	ftrace_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	RESTORE_GRAPH_REG_ARGS
+	call	ftrace_graph_caller
+#endif
+
+	RESTORE_ALL
+	ret
+ENDPROC(ftrace_regs_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index c46a778..ce9bdc5 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -32,13 +32,13 @@
 	addi	s0, sp, 32
 	.endm
 
-	.macro STORE_ABI_STATE
+	.macro RESTORE_ABI_STATE
 	ld	ra, 8(sp)
 	ld	s0, 0(sp)
 	addi	sp, sp, 16
 	.endm
 
-	.macro STORE_RET_ABI_STATE
+	.macro RESTORE_RET_ABI_STATE
 	ld	ra, 24(sp)
 	ld	s0, 16(sp)
 	ld	a0, 8(sp)
@@ -46,6 +46,10 @@
 	.endm
 
 ENTRY(ftrace_stub)
+#ifdef CONFIG_DYNAMIC_FTRACE
+       .global _mcount
+       .set    _mcount, ftrace_stub
+#endif
 	ret
 ENDPROC(ftrace_stub)
 
@@ -66,15 +70,15 @@
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
 	mv	a0, t6
 #endif
-	la	t0, ftrace_return_to_handler
-	jalr	t0
+	call	ftrace_return_to_handler
 	mv	a1, a0
-	STORE_RET_ABI_STATE
+	RESTORE_RET_ABI_STATE
 	jalr	a1
 ENDPROC(return_to_handler)
 EXPORT_SYMBOL(return_to_handler)
 #endif
 
+#ifndef CONFIG_DYNAMIC_FTRACE
 ENTRY(_mcount)
 	la	t4, ftrace_stub
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -104,9 +108,8 @@
 	ld	a2, -16(s0)
 #endif
 	SAVE_ABI_STATE
-	la	t0, prepare_ftrace_return
-	jalr	t0
-	STORE_ABI_STATE
+	call	prepare_ftrace_return
+	RESTORE_ABI_STATE
 	ret
 #endif
 
@@ -120,7 +123,8 @@
 
 	SAVE_ABI_STATE
 	jalr	t5
-	STORE_ABI_STATE
+	RESTORE_ABI_STATE
 	ret
 ENDPROC(_mcount)
 EXPORT_SYMBOL(_mcount)
+#endif
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 559aae7..a4b1d943 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -18,6 +18,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
+#include <linux/ftrace.h>
 
 #ifdef CONFIG_FRAME_POINTER
 
@@ -63,7 +64,12 @@ static void notrace walk_stackframe(struct task_struct *task,
 		frame = (struct stackframe *)fp - 1;
 		sp = fp;
 		fp = frame->fp;
+#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+		pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+					   (unsigned long *)(fp - 8));
+#else
 		pc = frame->ra - 0x4;
+#endif
 	}
 }
 
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 2033af7..d44d55d 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -376,6 +376,11 @@
     $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s__mcount\$";
     $type = ".quad";
     $alignment = 8;
+} elsif ($arch eq "riscv") {
+    $function_regex = "^([0-9a-fA-F]+)\\s+<([^.0-9][0-9a-zA-Z_\\.]+)>:";
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL\\s_mcount\$";
+    $type = ".quad";
+    $alignment = 2;
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }