ftrace: Remove use of control list and ops

Currently perf has its own list function within the ftrace infrastructure
that seems to be used only to allow for it to have per-cpu disabling as well
as a check to make sure that it's not called while RCU is not watching. It
uses something called the "control_ops" which is used to iterate over ops
under it with the control_list_func().

The problem is that this control_ops and control_list_func unnecessarily
complicates the code. By replacing FTRACE_OPS_FL_CONTROL with two new flags
(FTRACE_OPS_FL_RCU and FTRACE_OPS_FL_PER_CPU) we can remove all the code
that is special with the control ops and add the needed checks within the
generic ftrace_list_func().

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 134f8d4..4736a82 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -76,8 +76,8 @@
  * ENABLED - set/unset when ftrace_ops is registered/unregistered
  * DYNAMIC - set when ftrace_ops is registered to denote dynamically
  *           allocated ftrace_ops which need special care
- * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
- *           could be controled by following calls:
+ * PER_CPU - set manualy by ftrace_ops user to denote the ftrace_ops
+ *           could be controlled by following calls:
  *             ftrace_function_local_enable
  *             ftrace_function_local_disable
  * SAVE_REGS - The ftrace_ops wants regs saved at each function called
@@ -121,7 +121,7 @@
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
 	FTRACE_OPS_FL_DYNAMIC			= 1 << 1,
-	FTRACE_OPS_FL_CONTROL			= 1 << 2,
+	FTRACE_OPS_FL_PER_CPU			= 1 << 2,
 	FTRACE_OPS_FL_SAVE_REGS			= 1 << 3,
 	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 4,
 	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 5,
@@ -134,6 +134,7 @@
 	FTRACE_OPS_FL_ALLOC_TRAMP		= 1 << 12,
 	FTRACE_OPS_FL_IPMODIFY			= 1 << 13,
 	FTRACE_OPS_FL_PID			= 1 << 14,
+	FTRACE_OPS_FL_RCU			= 1 << 15,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -146,11 +147,11 @@
 #endif
 
 /*
- * Note, ftrace_ops can be referenced outside of RCU protection.
- * (Although, for perf, the control ops prevent that). If ftrace_ops is
- * allocated and not part of kernel core data, the unregistering of it will
- * perform a scheduling on all CPUs to make sure that there are no more users.
- * Depending on the load of the system that may take a bit of time.
+ * Note, ftrace_ops can be referenced outside of RCU protection, unless
+ * the RCU flag is set. If ftrace_ops is allocated and not part of kernel
+ * core data, the unregistering of it will perform a scheduling on all CPUs
+ * to make sure that there are no more users. Depending on the load of the
+ * system that may take a bit of time.
  *
  * Any private data added must also take care not to be freed and if private
  * data is added to a ftrace_ops that is in core code, the user of the
@@ -196,34 +197,34 @@
 void clear_ftrace_function(void);
 
 /**
- * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu
+ * ftrace_function_local_enable - enable ftrace_ops on current cpu
  *
  * This function enables tracing on current cpu by decreasing
  * the per cpu control variable.
  * It must be called with preemption disabled and only on ftrace_ops
- * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption
  * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
  */
 static inline void ftrace_function_local_enable(struct ftrace_ops *ops)
 {
-	if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
+	if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)))
 		return;
 
 	(*this_cpu_ptr(ops->disabled))--;
 }
 
 /**
- * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu
+ * ftrace_function_local_disable - disable ftrace_ops on current cpu
  *
- * This function enables tracing on current cpu by decreasing
+ * This function disables tracing on current cpu by increasing
  * the per cpu control variable.
  * It must be called with preemption disabled and only on ftrace_ops
- * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption
  * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
  */
 static inline void ftrace_function_local_disable(struct ftrace_ops *ops)
 {
-	if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
+	if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)))
 		return;
 
 	(*this_cpu_ptr(ops->disabled))++;
@@ -235,12 +236,12 @@
  *
  * This function returns value of ftrace_ops::disabled on current cpu.
  * It must be called with preemption disabled and only on ftrace_ops
- * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption
  * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
  */
 static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
 {
-	WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL));
+	WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU));
 	return *this_cpu_ptr(ops->disabled);
 }
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bf7bebc..bc7f4eb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -62,8 +62,6 @@
 #define FTRACE_HASH_DEFAULT_BITS 10
 #define FTRACE_HASH_MAX_BITS 12
 
-#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_CONTROL)
-
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define INIT_OPS_HASH(opsname)	\
 	.func_hash		= &opsname.local_hash,			\
@@ -113,11 +111,9 @@
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
-static struct ftrace_ops control_ops;
 
 static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip,
 				   struct ftrace_ops *op, struct pt_regs *regs);
@@ -203,7 +199,7 @@
 	ftrace_trace_function = ftrace_stub;
 }
 
-static void control_ops_disable_all(struct ftrace_ops *ops)
+static void per_cpu_ops_disable_all(struct ftrace_ops *ops)
 {
 	int cpu;
 
@@ -211,16 +207,19 @@
 		*per_cpu_ptr(ops->disabled, cpu) = 1;
 }
 
-static int control_ops_alloc(struct ftrace_ops *ops)
+static int per_cpu_ops_alloc(struct ftrace_ops *ops)
 {
 	int __percpu *disabled;
 
+	if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)))
+		return -EINVAL;
+
 	disabled = alloc_percpu(int);
 	if (!disabled)
 		return -ENOMEM;
 
 	ops->disabled = disabled;
-	control_ops_disable_all(ops);
+	per_cpu_ops_disable_all(ops);
 	return 0;
 }
 
@@ -256,10 +255,11 @@
 static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
 {
 	/*
-	 * If this is a dynamic ops or we force list func,
+	 * If this is a dynamic, RCU, or per CPU ops, or we force list func,
 	 * then it needs to call the list anyway.
 	 */
-	if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC)
+	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU |
+			  FTRACE_OPS_FL_RCU) || FTRACE_FORCE_LIST_FUNC)
 		return ftrace_ops_list_func;
 
 	return ftrace_ops_get_func(ops);
@@ -383,26 +383,6 @@
 	return 0;
 }
 
-static void add_ftrace_list_ops(struct ftrace_ops **list,
-				struct ftrace_ops *main_ops,
-				struct ftrace_ops *ops)
-{
-	int first = *list == &ftrace_list_end;
-	add_ftrace_ops(list, ops);
-	if (first)
-		add_ftrace_ops(&ftrace_ops_list, main_ops);
-}
-
-static int remove_ftrace_list_ops(struct ftrace_ops **list,
-				  struct ftrace_ops *main_ops,
-				  struct ftrace_ops *ops)
-{
-	int ret = remove_ftrace_ops(list, ops);
-	if (!ret && *list == &ftrace_list_end)
-		ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
-	return ret;
-}
-
 static void ftrace_update_trampoline(struct ftrace_ops *ops);
 
 static int __register_ftrace_function(struct ftrace_ops *ops)
@@ -430,14 +410,12 @@
 	if (!core_kernel_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
-	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
-		if (control_ops_alloc(ops))
+	if (ops->flags & FTRACE_OPS_FL_PER_CPU) {
+		if (per_cpu_ops_alloc(ops))
 			return -ENOMEM;
-		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
-		/* The control_ops needs the trampoline update */
-		ops = &control_ops;
-	} else
-		add_ftrace_ops(&ftrace_ops_list, ops);
+	}
+
+	add_ftrace_ops(&ftrace_ops_list, ops);
 
 	/* Always save the function, and reset at unregistering */
 	ops->saved_func = ops->func;
@@ -460,11 +438,7 @@
 	if (WARN_ON(!(ops->flags & FTRACE_OPS_FL_ENABLED)))
 		return -EBUSY;
 
-	if (ops->flags & FTRACE_OPS_FL_CONTROL) {
-		ret = remove_ftrace_list_ops(&ftrace_control_list,
-					     &control_ops, ops);
-	} else
-		ret = remove_ftrace_ops(&ftrace_ops_list, ops);
+	ret = remove_ftrace_ops(&ftrace_ops_list, ops);
 
 	if (ret < 0)
 		return ret;
@@ -2630,7 +2604,7 @@
 {
 }
 
-static void control_ops_free(struct ftrace_ops *ops)
+static void per_cpu_ops_free(struct ftrace_ops *ops)
 {
 	free_percpu(ops->disabled);
 }
@@ -2731,13 +2705,13 @@
 
 	if (!command || !ftrace_enabled) {
 		/*
-		 * If these are control ops, they still need their
+		 * If these are per_cpu ops, they still need their
 		 * per_cpu field freed. Since, function tracing is
 		 * not currently active, we can just free them
 		 * without synchronizing all CPUs.
 		 */
-		if (ops->flags & FTRACE_OPS_FL_CONTROL)
-			control_ops_free(ops);
+		if (ops->flags & FTRACE_OPS_FL_PER_CPU)
+			per_cpu_ops_free(ops);
 		return 0;
 	}
 
@@ -2778,7 +2752,7 @@
 	/*
 	 * Dynamic ops may be freed, we must make sure that all
 	 * callers are done before leaving this function.
-	 * The same goes for freeing the per_cpu data of the control
+	 * The same goes for freeing the per_cpu data of the per_cpu
 	 * ops.
 	 *
 	 * Again, normal synchronize_sched() is not good enough.
@@ -2789,13 +2763,13 @@
 	 * infrastructure to do the synchronization, thus we must do it
 	 * ourselves.
 	 */
-	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_CONTROL)) {
+	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) {
 		schedule_on_each_cpu(ftrace_sync);
 
 		arch_ftrace_trampoline_free(ops);
 
-		if (ops->flags & FTRACE_OPS_FL_CONTROL)
-			control_ops_free(ops);
+		if (ops->flags & FTRACE_OPS_FL_PER_CPU)
+			per_cpu_ops_free(ops);
 	}
 
 	return 0;
@@ -5185,44 +5159,6 @@
 	tr->ops->func = ftrace_stub;
 }
 
-static void
-ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
-			struct ftrace_ops *op, struct pt_regs *regs)
-{
-	if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
-		return;
-
-	/*
-	 * Some of the ops may be dynamically allocated,
-	 * they must be freed after a synchronize_sched().
-	 */
-	preempt_disable_notrace();
-	trace_recursion_set(TRACE_CONTROL_BIT);
-
-	/*
-	 * Control funcs (perf) uses RCU. Only trace if
-	 * RCU is currently active.
-	 */
-	if (!rcu_is_watching())
-		goto out;
-
-	do_for_each_ftrace_op(op, ftrace_control_list) {
-		if (!(op->flags & FTRACE_OPS_FL_STUB) &&
-		    !ftrace_function_local_disabled(op) &&
-		    ftrace_ops_test(op, ip, regs))
-			op->func(ip, parent_ip, op, regs);
-	} while_for_each_ftrace_op(op);
- out:
-	trace_recursion_clear(TRACE_CONTROL_BIT);
-	preempt_enable_notrace();
-}
-
-static struct ftrace_ops control_ops = {
-	.func	= ftrace_ops_control_func,
-	.flags	= FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED,
-	INIT_OPS_HASH(control_ops)
-};
-
 static inline void
 __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *ignored, struct pt_regs *regs)
@@ -5239,8 +5175,22 @@
 	 * they must be freed after a synchronize_sched().
 	 */
 	preempt_disable_notrace();
+
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (ftrace_ops_test(op, ip, regs)) {
+		/*
+		 * Check the following for each ops before calling their func:
+		 *  if RCU flag is set, then rcu_is_watching() must be true
+		 *  if PER_CPU is set, then ftrace_function_local_disable()
+		 *                          must be false
+		 *  Otherwise test if the ip matches the ops filter
+		 *
+		 * If any of the above fails then the op->func() is not executed.
+		 */
+		if ((!(op->flags & FTRACE_OPS_FL_RCU) || rcu_is_watching()) &&
+		    (!(op->flags & FTRACE_OPS_FL_PER_CPU) ||
+		     !ftrace_function_local_disabled(op)) &&
+		    ftrace_ops_test(op, ip, regs)) {
+		    
 			if (FTRACE_WARN_ON(!op->func)) {
 				pr_warn("op=%p %pS\n", op, op);
 				goto out;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 919d9d0..d3980b8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -467,8 +467,6 @@
 	TRACE_INTERNAL_IRQ_BIT,
 	TRACE_INTERNAL_SIRQ_BIT,
 
-	TRACE_CONTROL_BIT,
-
 	TRACE_BRANCH_BIT,
 /*
  * Abuse of the trace_recursion.
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index abfc903..2649c85 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -334,7 +334,7 @@
 {
 	struct ftrace_ops *ops = &event->ftrace_ops;
 
-	ops->flags |= FTRACE_OPS_FL_CONTROL;
+	ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU;
 	ops->func = perf_ftrace_function_call;
 	return register_ftrace_function(ops);
 }