Merge "soc: qcom: add microdump collector"
diff --git a/drivers/bus/mhi/core/mhi_init.c b/drivers/bus/mhi/core/mhi_init.c
index 2f8a5eb..6f4c390 100644
--- a/drivers/bus/mhi/core/mhi_init.c
+++ b/drivers/bus/mhi/core/mhi_init.c
@@ -1373,6 +1373,9 @@
 		mutex_init(&mhi_chan->mutex);
 		init_completion(&mhi_chan->completion);
 		rwlock_init(&mhi_chan->lock);
+
+		mhi_event = &mhi_cntrl->mhi_event[mhi_chan->er_index];
+		mhi_chan->bei = !!(mhi_event->intmod);
 	}
 
 	if (mhi_cntrl->bounce_buf) {
diff --git a/drivers/bus/mhi/core/mhi_internal.h b/drivers/bus/mhi/core/mhi_internal.h
index dd173fa..3b82e33 100644
--- a/drivers/bus/mhi/core/mhi_internal.h
+++ b/drivers/bus/mhi/core/mhi_internal.h
@@ -663,7 +663,6 @@
 	struct mhi_ring buf_ring;
 	struct mhi_ring tre_ring;
 	u32 er_index;
-	u32 intmod;
 	enum mhi_ch_type type;
 	enum dma_data_direction dir;
 	struct db_cfg db_cfg;
@@ -671,6 +670,7 @@
 	enum MHI_XFER_TYPE xfer_type;
 	enum MHI_CH_STATE ch_state;
 	enum MHI_EV_CCS ccs;
+	bool bei; /* based on interrupt moderation, true if greater than 0 */
 	bool lpm_notify;
 	bool configured;
 	bool offload_ch;
diff --git a/drivers/bus/mhi/core/mhi_main.c b/drivers/bus/mhi/core/mhi_main.c
index 67a87bf..96ea4b6 100644
--- a/drivers/bus/mhi/core/mhi_main.c
+++ b/drivers/bus/mhi/core/mhi_main.c
@@ -396,7 +396,7 @@
 
 	mhi_tre->ptr = MHI_TRE_DATA_PTR(buf_info->p_addr);
 	mhi_tre->dword[0] = MHI_TRE_DATA_DWORD0(buf_info->len);
-	mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(1, 1, 0, 0);
+	mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(mhi_chan->bei, 1, 0, 0);
 
 	MHI_VERB("chan:%d WP:0x%llx TRE:0x%llx 0x%08x 0x%08x\n", mhi_chan->chan,
 		 (u64)mhi_to_physical(tre_ring, mhi_tre), mhi_tre->ptr,
@@ -479,7 +479,7 @@
 	} else {
 		mhi_tre->ptr = MHI_TRE_DATA_PTR(buf_info->p_addr);
 		mhi_tre->dword[0] = MHI_TRE_DATA_DWORD0(buf_info->len);
-		mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(1, 1, 0, 0);
+		mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(mhi_chan->bei, 1, 0, 0);
 	}
 
 	MHI_VERB("chan:%d WP:0x%llx TRE:0x%llx 0x%08x 0x%08x\n", mhi_chan->chan,
@@ -514,7 +514,7 @@
 	struct mhi_ring *buf_ring, *tre_ring;
 	struct mhi_tre *mhi_tre;
 	struct mhi_buf_info *buf_info;
-	int eot, eob, chain, bei;
+	int eot, eob, chain;
 	int ret;
 
 	buf_ring = &mhi_chan->buf_ring;
@@ -534,12 +534,11 @@
 	eob = !!(flags & MHI_EOB);
 	eot = !!(flags & MHI_EOT);
 	chain = !!(flags & MHI_CHAIN);
-	bei = !!(mhi_chan->intmod);
 
 	mhi_tre = tre_ring->wp;
 	mhi_tre->ptr = MHI_TRE_DATA_PTR(buf_info->p_addr);
 	mhi_tre->dword[0] = MHI_TRE_DATA_DWORD0(buf_len);
-	mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(bei, eot, eob, chain);
+	mhi_tre->dword[1] = MHI_TRE_DATA_DWORD1(mhi_chan->bei, eot, eob, chain);
 
 	MHI_VERB("chan:%d WP:0x%llx TRE:0x%llx 0x%08x 0x%08x\n", mhi_chan->chan,
 		 (u64)mhi_to_physical(tre_ring, mhi_tre), mhi_tre->ptr,
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 244be8d..b50ae46 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -214,6 +214,7 @@
 #define ADRENO_PREEMPT_FAULT BIT(4)
 #define ADRENO_GMU_FAULT BIT(5)
 #define ADRENO_CTX_DETATCH_TIMEOUT_FAULT BIT(6)
+#define ADRENO_GMU_FAULT_SKIP_SNAPSHOT BIT(7)
 
 #define ADRENO_SPTP_PC_CTRL 0
 #define ADRENO_LM_CTRL      1
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index 59d4e07..85db248 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -2198,7 +2198,8 @@
 		&adreno_dev->ft_pf_policy) && adreno_dev->cooperative_reset)
 		gmu_core_dev_cooperative_reset(device);
 
-	do_header_and_snapshot(device, fault, hung_rb, cmdobj);
+	if (!(fault & ADRENO_GMU_FAULT_SKIP_SNAPSHOT))
+		do_header_and_snapshot(device, fault, hung_rb, cmdobj);
 
 	/* Turn off the KEEPALIVE vote from the ISR for hard fault */
 	if (gpudev->gpu_keepalive && fault & ADRENO_HARD_FAULT)
diff --git a/drivers/gpu/msm/kgsl_gmu.c b/drivers/gpu/msm/kgsl_gmu.c
index 04cfc06..a818997 100644
--- a/drivers/gpu/msm/kgsl_gmu.c
+++ b/drivers/gpu/msm/kgsl_gmu.c
@@ -537,7 +537,22 @@
 		dev_err_ratelimited(&gmu->pdev->dev,
 			"Failed to set GPU perf idx %d, bw idx %d\n",
 			req.freq, req.bw);
-		gmu_snapshot(device);
+
+		/*
+		 * We can be here in two situations. First, we send a dcvs
+		 * hfi so gmu knows at what level it must bring up the gpu.
+		 * If that fails, it is already being handled as part of
+		 * gmu boot failures. The other reason why we are here is
+		 * because we are trying to scale an active gpu. For this,
+		 * we need to do inline snapshot and dispatcher based
+		 * recovery.
+		 */
+		if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) {
+			gmu_core_snapshot(device);
+			adreno_set_gpu_fault(adreno_dev, ADRENO_GMU_FAULT |
+				ADRENO_GMU_FAULT_SKIP_SNAPSHOT);
+			adreno_dispatcher_schedule(device);
+		}
 	}
 
 	/* indicate actual clock change */
@@ -1593,7 +1608,10 @@
 			goto error_gmu;
 
 		/* Request default DCVS level */
-		kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+		ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+		if (ret)
+			goto error_gmu;
+
 		msm_bus_scale_client_update_request(gmu->pcl, 0);
 		break;
 
@@ -1613,7 +1631,9 @@
 		if (ret)
 			goto error_gmu;
 
-		kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+		ret = kgsl_pwrctrl_set_default_gpu_pwrlevel(device);
+		if (ret)
+			goto error_gmu;
 		break;
 
 	case KGSL_STATE_RESET:
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 6786ecb4..ddf61de 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -3136,7 +3136,7 @@
  * kgsl_pwrctrl_set_default_gpu_pwrlevel() - Set GPU to default power level
  * @device: Pointer to the kgsl_device struct
  */
-void kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device)
+int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device)
 {
 	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
 	unsigned int new_level = pwr->default_pwrlevel;
@@ -3158,5 +3158,5 @@
 	pwr->previous_pwrlevel = old_level;
 
 	/* Request adjusted DCVS level */
-	kgsl_clk_set_rate(device, pwr->active_pwrlevel);
+	return kgsl_clk_set_rate(device, pwr->active_pwrlevel);
 }
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h
index 6dc7c53..0f4dc72 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -267,7 +267,7 @@
 			struct kgsl_pwr_constraint *pwrc, uint32_t id);
 void kgsl_pwrctrl_update_l2pc(struct kgsl_device *device,
 			unsigned long timeout_us);
-void kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device);
+int kgsl_pwrctrl_set_default_gpu_pwrlevel(struct kgsl_device *device);
 void kgsl_pwrctrl_disable_unused_opp(struct kgsl_device *device,
 		struct device *dev);
 
diff --git a/drivers/platform/msm/ipa/ipa_v3/ipa_hw_stats.c b/drivers/platform/msm/ipa/ipa_v3/ipa_hw_stats.c
index 01164b6..3458de6 100644
--- a/drivers/platform/msm/ipa/ipa_v3/ipa_hw_stats.c
+++ b/drivers/platform/msm/ipa/ipa_v3/ipa_hw_stats.c
@@ -36,7 +36,7 @@
 		teth_stats_init->prod_mask = (
 			IPA_CLIENT_BIT_32(IPA_CLIENT_MHI_PRIME_TETH_PROD) |
 			IPA_CLIENT_BIT_32(IPA_CLIENT_USB_PROD));
-		if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+		if (ipa3_ctx->ipa_wdi3_over_gsi)
 			teth_stats_init->prod_mask |=
 			IPA_CLIENT_BIT_32(IPA_CLIENT_WLAN2_PROD);
 		else
@@ -57,7 +57,7 @@
 			teth_stats_init->dst_ep_mask[ep_index] =
 				IPA_CLIENT_BIT_32(IPA_CLIENT_USB_CONS);
 
-			if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+			if (ipa3_ctx->ipa_wdi3_over_gsi)
 				teth_stats_init->dst_ep_mask[ep_index] |=
 				IPA_CLIENT_BIT_32(IPA_CLIENT_WLAN2_CONS);
 			else
@@ -78,7 +78,7 @@
 			IPA_CLIENT_BIT_32(IPA_CLIENT_Q6_WAN_PROD) |
 			IPA_CLIENT_BIT_32(IPA_CLIENT_USB_PROD));
 
-		if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+		if (ipa3_ctx->ipa_wdi3_over_gsi)
 			teth_stats_init->prod_mask |=
 			IPA_CLIENT_BIT_32(IPA_CLIENT_WLAN2_PROD);
 		else
@@ -102,7 +102,7 @@
 			teth_stats_init->dst_ep_mask[ep_index] =
 			IPA_CLIENT_BIT_32(IPA_CLIENT_USB_CONS);
 
-			if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+			if (ipa3_ctx->ipa_wdi3_over_gsi)
 				teth_stats_init->dst_ep_mask[ep_index] |=
 				IPA_CLIENT_BIT_32(IPA_CLIENT_WLAN2_CONS);
 			else
@@ -131,7 +131,7 @@
 			teth_stats_init->dst_ep_mask[ep_index] =
 				IPA_CLIENT_BIT_32(IPA_CLIENT_USB_CONS);
 
-			if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+			if (ipa3_ctx->ipa_wdi3_over_gsi)
 				teth_stats_init->dst_ep_mask[ep_index] |=
 				IPA_CLIENT_BIT_32(IPA_CLIENT_WLAN2_CONS);
 			else
diff --git a/drivers/platform/msm/ipa/ipa_v3/ipa_mhi_proxy.c b/drivers/platform/msm/ipa/ipa_v3/ipa_mhi_proxy.c
index b768750..18cc101 100644
--- a/drivers/platform/msm/ipa/ipa_v3/ipa_mhi_proxy.c
+++ b/drivers/platform/msm/ipa/ipa_v3/ipa_mhi_proxy.c
@@ -641,7 +641,8 @@
 	 * executed from mhi context.
 	 */
 	if (vote) {
-		ret = mhi_device_get_sync(imp_ctx->md.mhi_dev, MHI_VOTE_BUS);
+		ret = mhi_device_get_sync(imp_ctx->md.mhi_dev,
+			MHI_VOTE_BUS | MHI_VOTE_DEVICE);
 		if (ret) {
 			IMP_ERR("mhi_sync_get failed %d\n", ret);
 			resp->resp.result = IPA_QMI_RESULT_FAILURE_V01;
@@ -651,7 +652,8 @@
 			return resp;
 		}
 	} else {
-		mhi_device_put(imp_ctx->md.mhi_dev, MHI_VOTE_BUS);
+		mhi_device_put(imp_ctx->md.mhi_dev,
+			MHI_VOTE_BUS | MHI_VOTE_DEVICE);
 	}
 
 	mutex_lock(&imp_ctx->mutex);
diff --git a/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c b/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c
index 1e6aa10..6c58fb2 100644
--- a/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c
+++ b/drivers/platform/msm/ipa/ipa_v3/rmnet_ipa.c
@@ -3441,7 +3441,7 @@
 		}
 	}
 
-	if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+	if (ipa3_ctx->ipa_wdi3_over_gsi)
 		wlan_client = IPA_CLIENT_WLAN2_CONS;
 	else
 		wlan_client = IPA_CLIENT_WLAN1_CONS;
@@ -3524,7 +3524,7 @@
 		return rc;
 	}
 
-	if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+	if (ipa3_ctx->ipa_wdi3_over_gsi)
 		wlan_client = IPA_CLIENT_WLAN2_CONS;
 	else
 		wlan_client = IPA_CLIENT_WLAN1_CONS;
@@ -3649,7 +3649,7 @@
 	/* query WLAN UL stats */
 	memset(con_stats, 0, sizeof(struct ipa_quota_stats_all));
 
-	if (ipa3_ctx->ipa_hw_type == IPA_HW_v4_5)
+	if (ipa3_ctx->ipa_wdi3_over_gsi)
 		rc = ipa_query_teth_stats(IPA_CLIENT_WLAN2_PROD,
 			con_stats, reset);
 	else
diff --git a/drivers/soc/qcom/peripheral-loader.c b/drivers/soc/qcom/peripheral-loader.c
index 724d597..78bb53d 100644
--- a/drivers/soc/qcom/peripheral-loader.c
+++ b/drivers/soc/qcom/peripheral-loader.c
@@ -53,9 +53,20 @@
 #define MAX_LEN 96
 #define NUM_OF_ENCRYPTED_KEY	3
 
+#define pil_log(msg, desc)	\
+	do {			\
+		if (pil_ipc_log)		\
+			pil_ipc("[%s]: %s", desc->name, msg); \
+		else		\
+			trace_pil_event(msg, desc);	\
+	} while (0)
+
+
 static void __iomem *pil_info_base;
 static struct md_global_toc *g_md_toc;
 
+void *pil_ipc_log;
+
 /**
  * proxy_timeout - Override for proxy vote timeouts
  * -1: Use driver-specified timeout
@@ -1245,7 +1256,7 @@
 		goto release_fw;
 	}
 
-	trace_pil_event("before_init_image", desc);
+	pil_log("before_init_image", desc);
 	if (desc->ops->init_image)
 		ret = desc->ops->init_image(desc, fw->data, fw->size);
 	if (ret) {
@@ -1253,7 +1264,7 @@
 		goto err_boot;
 	}
 
-	trace_pil_event("before_mem_setup", desc);
+	pil_log("before_mem_setup", desc);
 	if (desc->ops->mem_setup)
 		ret = desc->ops->mem_setup(desc, priv->region_start,
 				priv->region_end - priv->region_start);
@@ -1269,7 +1280,7 @@
 		 * Also for secure boot devices, modem memory has to be released
 		 * after MBA is booted
 		 */
-		trace_pil_event("before_assign_mem", desc);
+		pil_log("before_assign_mem", desc);
 		if (desc->modem_ssr) {
 			ret = pil_assign_mem_to_linux(desc, priv->region_start,
 				(priv->region_end - priv->region_start));
@@ -1288,7 +1299,7 @@
 		hyp_assign = true;
 	}
 
-	trace_pil_event("before_load_seg", desc);
+	pil_log("before_load_seg", desc);
 
 	/**
 	 * Fallback to serial loading of blobs if the
@@ -1307,7 +1318,7 @@
 	}
 
 	if (desc->subsys_vmid > 0) {
-		trace_pil_event("before_reclaim_mem", desc);
+		pil_log("before_reclaim_mem", desc);
 		ret =  pil_reclaim_mem(desc, priv->region_start,
 				(priv->region_end - priv->region_start),
 				desc->subsys_vmid);
@@ -1319,14 +1330,14 @@
 		hyp_assign = false;
 	}
 
-	trace_pil_event("before_auth_reset", desc);
+	pil_log("before_auth_reset", desc);
 	notify_before_auth_and_reset(desc->dev);
 	ret = desc->ops->auth_and_reset(desc);
 	if (ret) {
 		pil_err(desc, "Failed to bring out of reset(rc:%d)\n", ret);
 		goto err_auth_and_reset;
 	}
-	trace_pil_event("reset_done", desc);
+	pil_log("reset_done", desc);
 	pil_info(desc, "Brought out of reset\n");
 	desc->modem_ssr = false;
 err_auth_and_reset:
@@ -1651,6 +1662,9 @@
 	if (!pil_wq)
 		pr_warn("pil: Defaulting to sequential firmware loading.\n");
 
+	pil_ipc_log = ipc_log_context_create(2, "PIL-IPC", 0);
+	if (!pil_ipc_log)
+		pr_warn("Failed to setup PIL ipc logging\n");
 out:
 	return register_pm_notifier(&pil_pm_notifier);
 }
diff --git a/drivers/soc/qcom/peripheral-loader.h b/drivers/soc/qcom/peripheral-loader.h
index 2c1373d..c83b038 100644
--- a/drivers/soc/qcom/peripheral-loader.h
+++ b/drivers/soc/qcom/peripheral-loader.h
@@ -8,11 +8,20 @@
 #include <linux/mailbox_client.h>
 #include <linux/mailbox/qmp.h>
 #include "minidump_private.h"
+#include <linux/ipc_logging.h>
 
 struct device;
 struct module;
 struct pil_priv;
 
+extern void *pil_ipc_log;
+
+#define pil_ipc(__msg, ...) \
+do { \
+	if (pil_ipc_log) \
+		ipc_log_string(pil_ipc_log, \
+			"[%s]: "__msg, __func__,  ##__VA_ARGS__); \
+} while (0)
 /**
  * struct pil_desc - PIL descriptor
  * @name: string used for pil_get()
diff --git a/drivers/soc/qcom/subsystem_restart.c b/drivers/soc/qcom/subsystem_restart.c
index 067d169..f8fb034 100644
--- a/drivers/soc/qcom/subsystem_restart.c
+++ b/drivers/soc/qcom/subsystem_restart.c
@@ -274,6 +274,8 @@
 
 	for (i = 0; i < ARRAY_SIZE(restart_levels); i++)
 		if (!strncasecmp(buf, restart_levels[i], count)) {
+			pil_ipc("[%s]: change restart level to %d\n",
+				subsys->desc->name, i);
 			subsys->restart_level = i;
 			return orig_count;
 		}
@@ -849,7 +851,7 @@
 		subsys_set_state(subsys, SUBSYS_ONLINE);
 		return 0;
 	}
-
+	pil_ipc("[%s]: before wait_for_err_ready\n", subsys->desc->name);
 	ret = wait_for_err_ready(subsys);
 	if (ret) {
 		/* pil-boot succeeded but we need to shutdown
@@ -865,6 +867,7 @@
 
 	notify_each_subsys_device(&subsys, 1, SUBSYS_AFTER_POWERUP,
 								NULL);
+	pil_ipc("[%s]: exit\n", subsys->desc->name);
 	return ret;
 }
 
@@ -872,6 +875,7 @@
 {
 	const char *name = subsys->desc->name;
 
+	pil_ipc("[%s]: entry\n", subsys->desc->name);
 	notify_each_subsys_device(&subsys, 1, SUBSYS_BEFORE_SHUTDOWN, NULL);
 	reinit_completion(&subsys->shutdown_ack);
 	if (!of_property_read_bool(subsys->desc->dev->of_node,
@@ -890,6 +894,7 @@
 	subsys_set_state(subsys, SUBSYS_OFFLINE);
 	disable_all_irqs(subsys);
 	notify_each_subsys_device(&subsys, 1, SUBSYS_AFTER_SHUTDOWN, NULL);
+	pil_ipc("[%s]: exit\n", subsys->desc->name);
 }
 
 int subsystem_set_fwname(const char *name, const char *fw_name)
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 64d5f87..7f7297e 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -41,6 +41,7 @@
 extern unsigned int sysctl_sched_group_upmigrate_pct;
 extern unsigned int sysctl_sched_group_downmigrate_pct;
 extern unsigned int sysctl_sched_conservative_pl;
+extern unsigned int sysctl_sched_many_wakeup_threshold;
 extern unsigned int sysctl_sched_walt_rotate_big_tasks;
 extern unsigned int sysctl_sched_min_task_util_for_boost;
 extern unsigned int sysctl_sched_min_task_util_for_colocation;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 14c1915..ae2c7cc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3871,7 +3871,7 @@
 }
 
 static inline bool
-bias_to_waker_cpu(struct task_struct *p, int cpu, int start_cpu)
+bias_to_this_cpu(struct task_struct *p, int cpu, int start_cpu)
 {
 	bool base_test = cpumask_test_cpu(cpu, &p->cpus_allowed) &&
 			cpu_active(cpu);
@@ -3941,6 +3941,7 @@
 	int fastpath;
 	int start_cpu;
 	bool strict_max;
+	int skip_cpu;
 };
 
 static inline void adjust_cpus_for_packing(struct task_struct *p,
@@ -6809,6 +6810,12 @@
 	return sched_boost() != CONSERVATIVE_BOOST &&
 		get_rtg_status(p) && p->unfilter;
 }
+
+static inline bool is_many_wakeup(int sibling_count_hint)
+{
+	return sibling_count_hint >= sysctl_sched_many_wakeup_threshold;
+}
+
 #else
 static inline bool get_rtg_status(struct task_struct *p)
 {
@@ -6819,6 +6826,11 @@
 {
 	return false;
 }
+
+static inline bool is_many_wakeup(int sibling_count_hint)
+{
+	return false;
+}
 #endif
 
 static int get_start_cpu(struct task_struct *p)
@@ -6866,6 +6878,7 @@
 	NONE = 0,
 	SYNC_WAKEUP,
 	PREV_CPU_FASTPATH,
+	MANY_WAKEUP,
 };
 
 static void find_best_target(struct sched_domain *sd, cpumask_t *cpus,
@@ -6960,6 +6973,9 @@
 			if (sched_cpu_high_irqload(i))
 				continue;
 
+			if (fbt_env->skip_cpu == i)
+				continue;
+
 			/*
 			 * p's blocked utilization is still accounted for on prev_cpu
 			 * so prev_cpu will receive a negative bias due to the double
@@ -7598,7 +7614,8 @@
  * let's keep things simple by re-using the existing slow path.
  */
 
-static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
+static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu,
+				     int sync, int sibling_count_hint)
 {
 	unsigned long prev_energy = ULONG_MAX, best_energy = ULONG_MAX;
 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
@@ -7635,10 +7652,17 @@
 		sync = 0;
 
 	if (sysctl_sched_sync_hint_enable && sync &&
-				bias_to_waker_cpu(p, cpu, start_cpu)) {
+				bias_to_this_cpu(p, cpu, start_cpu)) {
 		best_energy_cpu = cpu;
 		fbt_env.fastpath = SYNC_WAKEUP;
-		goto sync_wakeup;
+		goto done;
+	}
+
+	if (is_many_wakeup(sibling_count_hint) && prev_cpu != cpu &&
+				bias_to_this_cpu(p, prev_cpu, start_cpu)) {
+		best_energy_cpu = prev_cpu;
+		fbt_env.fastpath = MANY_WAKEUP;
+		goto done;
 	}
 
 	rcu_read_lock();
@@ -7668,6 +7692,8 @@
 		fbt_env.boosted = boosted;
 		fbt_env.strict_max = is_rtg &&
 			(task_boost == TASK_BOOST_STRICT_MAX);
+		fbt_env.skip_cpu = is_many_wakeup(sibling_count_hint) ?
+				   cpu : -1;
 
 		find_best_target(NULL, candidates, p, &fbt_env);
 	} else {
@@ -7732,7 +7758,7 @@
 	    ((prev_energy - best_energy) <= prev_energy >> 4))
 		best_energy_cpu = prev_cpu;
 
-sync_wakeup:
+done:
 
 	trace_sched_task_util(p, cpumask_bits(candidates)[0], best_energy_cpu,
 			sync, need_idle, fbt_env.fastpath, placement_boost,
@@ -7770,7 +7796,8 @@
 
 	if (static_branch_unlikely(&sched_energy_present)) {
 		rcu_read_lock();
-		new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync);
+		new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync,
+						    sibling_count_hint);
 		if (unlikely(new_cpu < 0))
 			new_cpu = prev_cpu;
 		rcu_read_unlock();
@@ -7784,7 +7811,8 @@
 			if (schedtune_prefer_idle(p) && !sched_feat(EAS_PREFER_IDLE) && !sync)
 				goto sd_loop;
 
-			new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync);
+			new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync,
+							    sibling_count_hint);
 			if (new_cpu >= 0)
 				return new_cpu;
 			new_cpu = prev_cpu;
@@ -12647,7 +12675,7 @@
 
 		raw_spin_lock(&migration_lock);
 		rcu_read_lock();
-		new_cpu = find_energy_efficient_cpu(p, prev_cpu, 0);
+		new_cpu = find_energy_efficient_cpu(p, prev_cpu, 0, 1);
 		rcu_read_unlock();
 		if ((new_cpu != -1) && (new_cpu != prev_cpu) &&
 		    (capacity_orig_of(new_cpu) > capacity_orig_of(prev_cpu))) {
diff --git a/kernel/sched/walt.c b/kernel/sched/walt.c
index bbed209..ea9166f 100644
--- a/kernel/sched/walt.c
+++ b/kernel/sched/walt.c
@@ -989,6 +989,7 @@
 unsigned int min_possible_efficiency = UINT_MAX;
 
 unsigned int sysctl_sched_conservative_pl;
+unsigned int sysctl_sched_many_wakeup_threshold = 1000;
 
 #define INC_STEP 8
 #define DEC_STEP 2
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 2cba78e..acdccea 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -415,6 +415,15 @@
 		.extra2		= &one,
 	},
 	{
+		.procname	= "sched_many_wakeup_threshold",
+		.data		= &sysctl_sched_many_wakeup_threshold,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &two,
+		.extra2		= &one_thousand,
+	},
+	{
 		.procname	= "sched_walt_rotate_big_tasks",
 		.data		= &sysctl_sched_walt_rotate_big_tasks,
 		.maxlen		= sizeof(unsigned int),
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index e6945b5..f5b3bf0 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -272,9 +272,11 @@
 		goto out;
 	}
 
+	mutex_lock(&event_mutex);
 	ret = perf_trace_event_init(tp_event, p_event);
 	if (ret)
 		destroy_local_trace_kprobe(tp_event);
+	mutex_unlock(&event_mutex);
 out:
 	kfree(func);
 	return ret;
@@ -282,8 +284,10 @@
 
 void perf_kprobe_destroy(struct perf_event *p_event)
 {
+	mutex_lock(&event_mutex);
 	perf_trace_event_close(p_event);
 	perf_trace_event_unreg(p_event);
+	mutex_unlock(&event_mutex);
 
 	destroy_local_trace_kprobe(p_event->tp_event);
 }