Blame - arch/x86/events/intel/cqm.c - kernel/msm-4.9

blob: 1b064c43014097ebaefadb7c6cb3f18660d05ef0 [file] [log] [blame]

Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1	/*
				2	* Intel Cache Quality-of-Service Monitoring (CQM) support.
				3	*
				4	* Based very, very heavily on work by Peter Zijlstra.
				5	*/
				6
				7	#include <linux/perf_event.h>
				8	#include <linux/slab.h>
				9	#include <asm/cpu_device_id.h>
Borislav Petkov	27f6d22	2016-02-10 10:55:23 +0100	[diff] [blame^]	10	#include "../perf_event.h"
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	11
				12	#define MSR_IA32_PQR_ASSOC 0x0c8f
				13	#define MSR_IA32_QM_CTR 0x0c8e
				14	#define MSR_IA32_QM_EVTSEL 0x0c8d
				15
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	16	static u32 cqm_max_rmid = -1;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	17	static unsigned int cqm_l3_scale; /* supposedly cacheline size */
				18
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	19	/**
				20	* struct intel_pqr_state - State cache for the PQR MSR
				21	* @rmid: The cached Resource Monitoring ID
				22	* @closid: The cached Class Of Service ID
				23	* @rmid_usecnt: The usage counter for rmid
				24	*
				25	* The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
				26	* lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
				27	* contains both parts, so we need to cache them.
				28	*
				29	* The cache also helps to avoid pointless updates if the value does
				30	* not change.
				31	*/
				32	struct intel_pqr_state {
Thomas Gleixner	b3df4ec	2015-05-19 00:00:51 +0000	[diff] [blame]	33	u32 rmid;
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	34	u32 closid;
				35	int rmid_usecnt;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	36	};
				37
Thomas Gleixner	9e7eaac	2015-05-19 00:00:53 +0000	[diff] [blame]	38	/*
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	39	* The cached intel_pqr_state is strictly per CPU and can never be
Thomas Gleixner	9e7eaac	2015-05-19 00:00:53 +0000	[diff] [blame]	40	* updated from a remote CPU. Both functions which modify the state
				41	* (intel_cqm_event_start and intel_cqm_event_stop) are called with
				42	* interrupts disabled, which is sufficient for the protection.
				43	*/
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	44	static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	45
				46	/*
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	47	* Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
				48	* Also protects event->hw.cqm_rmid
				49	*
				50	* Hold either for stability, both for modification of ->hw.cqm_rmid.
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	51	*/
				52	static DEFINE_MUTEX(cache_mutex);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	53	static DEFINE_RAW_SPINLOCK(cache_lock);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	54
				55	/*
				56	* Groups of events that have the same target(s), one RMID per group.
				57	*/
				58	static LIST_HEAD(cache_groups);
				59
				60	/*
				61	* Mask of CPUs for reading CQM values. We only need one per-socket.
				62	*/
				63	static cpumask_t cqm_cpumask;
				64
				65	#define RMID_VAL_ERROR (1ULL << 63)
				66	#define RMID_VAL_UNAVAIL (1ULL << 62)
				67
				68	#define QOS_L3_OCCUP_EVENT_ID (1 << 0)
				69
				70	#define QOS_EVENT_MASK QOS_L3_OCCUP_EVENT_ID
				71
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	72	/*
				73	* This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
				74	*
				75	* This rmid is always free and is guaranteed to have an associated
				76	* near-zero occupancy value, i.e. no cachelines are tagged with this
				77	* RMID, once __intel_cqm_rmid_rotate() returns.
				78	*/
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	79	static u32 intel_cqm_rotation_rmid;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	80
				81	#define INVALID_RMID (-1)
				82
				83	/*
				84	* Is @rmid valid for programming the hardware?
				85	*
				86	* rmid 0 is reserved by the hardware for all non-monitored tasks, which
				87	* means that we should never come across an rmid with that value.
				88	* Likewise, an rmid value of -1 is used to indicate "no rmid currently
				89	* assigned" and is used as part of the rotation code.
				90	*/
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	91	static inline bool __rmid_valid(u32 rmid)
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	92	{
				93	if (!rmid \|\| rmid == INVALID_RMID)
				94	return false;
				95
				96	return true;
				97	}
				98
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	99	static u64 __rmid_read(u32 rmid)
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	100	{
				101	u64 val;
				102
				103	/*
				104	* Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
				105	* it just says that to increase confusion.
				106	*/
				107	wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
				108	rdmsrl(MSR_IA32_QM_CTR, val);
				109
				110	/*
				111	* Aside from the ERROR and UNAVAIL bits, assume this thing returns
				112	* the number of cachelines tagged with @rmid.
				113	*/
				114	return val;
				115	}
				116
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	117	enum rmid_recycle_state {
				118	RMID_YOUNG = 0,
				119	RMID_AVAILABLE,
				120	RMID_DIRTY,
				121	};
				122
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	123	struct cqm_rmid_entry {
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	124	u32 rmid;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	125	enum rmid_recycle_state state;
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	126	struct list_head list;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	127	unsigned long queue_time;
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	128	};
				129
				130	/*
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	131	* cqm_rmid_free_lru - A least recently used list of RMIDs.
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	132	*
				133	* Oldest entry at the head, newest (most recently used) entry at the
				134	* tail. This list is never traversed, it's only used to keep track of
				135	* the lru order. That is, we only pick entries of the head or insert
				136	* them on the tail.
				137	*
				138	* All entries on the list are 'free', and their RMIDs are not currently
				139	* in use. To mark an RMID as in use, remove its entry from the lru
				140	* list.
				141	*
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	142	*
				143	* cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
				144	*
				145	* This list is contains RMIDs that no one is currently using but that
				146	* may have a non-zero occupancy value associated with them. The
				147	* rotation worker moves RMIDs from the limbo list to the free list once
				148	* the occupancy value drops below __intel_cqm_threshold.
				149	*
				150	* Both lists are protected by cache_mutex.
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	151	*/
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	152	static LIST_HEAD(cqm_rmid_free_lru);
				153	static LIST_HEAD(cqm_rmid_limbo_lru);
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	154
				155	/*
				156	* We use a simple array of pointers so that we can lookup a struct
				157	* cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
				158	* and __put_rmid() from having to worry about dealing with struct
				159	* cqm_rmid_entry - they just deal with rmids, i.e. integers.
				160	*
				161	* Once this array is initialized it is read-only. No locks are required
				162	* to access it.
				163	*
				164	* All entries for all RMIDs can be looked up in the this array at all
				165	* times.
				166	*/
				167	static struct cqm_rmid_entry **cqm_rmid_ptrs;
				168
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	169	static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	170	{
				171	struct cqm_rmid_entry *entry;
				172
				173	entry = cqm_rmid_ptrs[rmid];
				174	WARN_ON(entry->rmid != rmid);
				175
				176	return entry;
				177	}
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	178
				179	/*
				180	* Returns < 0 on fail.
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	181	*
				182	* We expect to be called with cache_mutex held.
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	183	*/
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	184	static u32 __get_rmid(void)
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	185	{
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	186	struct cqm_rmid_entry *entry;
				187
				188	lockdep_assert_held(&cache_mutex);
				189
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	190	if (list_empty(&cqm_rmid_free_lru))
				191	return INVALID_RMID;
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	192
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	193	entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	194	list_del(&entry->list);
				195
				196	return entry->rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	197	}
				198
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	199	static void __put_rmid(u32 rmid)
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	200	{
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	201	struct cqm_rmid_entry *entry;
				202
				203	lockdep_assert_held(&cache_mutex);
				204
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	205	WARN_ON(!__rmid_valid(rmid));
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	206	entry = __rmid_entry(rmid);
				207
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	208	entry->queue_time = jiffies;
				209	entry->state = RMID_YOUNG;
				210
				211	list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	212	}
				213
				214	static int intel_cqm_setup_rmid_cache(void)
				215	{
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	216	struct cqm_rmid_entry *entry;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	217	unsigned int nr_rmids;
				218	int r = 0;
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	219
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	220	nr_rmids = cqm_max_rmid + 1;
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	221	cqm_rmid_ptrs = kmalloc(sizeof(struct cqm_rmid_entry )
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	222	nr_rmids, GFP_KERNEL);
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	223	if (!cqm_rmid_ptrs)
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	224	return -ENOMEM;
				225
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	226	for (; r <= cqm_max_rmid; r++) {
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	227	struct cqm_rmid_entry *entry;
				228
				229	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
				230	if (!entry)
				231	goto fail;
				232
				233	INIT_LIST_HEAD(&entry->list);
				234	entry->rmid = r;
				235	cqm_rmid_ptrs[r] = entry;
				236
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	237	list_add_tail(&entry->list, &cqm_rmid_free_lru);
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	238	}
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	239
				240	/*
				241	* RMID 0 is special and is always allocated. It's used for all
				242	* tasks that are not monitored.
				243	*/
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	244	entry = __rmid_entry(0);
				245	list_del(&entry->list);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	246
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	247	mutex_lock(&cache_mutex);
				248	intel_cqm_rotation_rmid = __get_rmid();
				249	mutex_unlock(&cache_mutex);
				250
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	251	return 0;
Matt Fleming	35298e5	2015-01-23 18:45:45 +0000	[diff] [blame]	252	fail:
				253	while (r--)
				254	kfree(cqm_rmid_ptrs[r]);
				255
				256	kfree(cqm_rmid_ptrs);
				257	return -ENOMEM;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	258	}
				259
				260	/*
				261	* Determine if @a and @b measure the same set of tasks.
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	262	*
				263	* If @a and @b measure the same set of tasks then we want to share a
				264	* single RMID.
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	265	*/
				266	static bool __match_event(struct perf_event a, struct perf_event b)
				267	{
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	268	/* Per-cpu and task events don't mix */
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	269	if ((a->attach_state & PERF_ATTACH_TASK) !=
				270	(b->attach_state & PERF_ATTACH_TASK))
				271	return false;
				272
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	273	#ifdef CONFIG_CGROUP_PERF
				274	if (a->cgrp != b->cgrp)
				275	return false;
				276	#endif
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	277
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	278	/* If not task event, we're machine wide */
				279	if (!(b->attach_state & PERF_ATTACH_TASK))
				280	return true;
				281
				282	/*
				283	* Events that target same task are placed into the same cache group.
				284	*/
Peter Zijlstra	50f16a8	2015-03-05 22:10:19 +0100	[diff] [blame]	285	if (a->hw.target == b->hw.target)
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	286	return true;
				287
				288	/*
				289	* Are we an inherited event?
				290	*/
				291	if (b->parent == a)
				292	return true;
				293
				294	return false;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	295	}
				296
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	297	#ifdef CONFIG_CGROUP_PERF
				298	static inline struct perf_cgroup event_to_cgroup(struct perf_event event)
				299	{
				300	if (event->attach_state & PERF_ATTACH_TASK)
Stephane Eranian	614e4c4	2015-11-12 11:00:04 +0100	[diff] [blame]	301	return perf_cgroup_from_task(event->hw.target, event->ctx);
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	302
				303	return event->cgrp;
				304	}
				305	#endif
				306
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	307	/*
				308	* Determine if @a's tasks intersect with @b's tasks
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	309	*
				310	* There are combinations of events that we explicitly prohibit,
				311	*
				312	* PROHIBITS
				313	* system-wide -> cgroup and task
				314	* cgroup -> system-wide
				315	* -> task in cgroup
				316	* task -> system-wide
				317	* -> task in cgroup
				318	*
				319	* Call this function before allocating an RMID.
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	320	*/
				321	static bool __conflict_event(struct perf_event a, struct perf_event b)
				322	{
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	323	#ifdef CONFIG_CGROUP_PERF
				324	/*
				325	* We can have any number of cgroups but only one system-wide
				326	* event at a time.
				327	*/
				328	if (a->cgrp && b->cgrp) {
				329	struct perf_cgroup *ac = a->cgrp;
				330	struct perf_cgroup *bc = b->cgrp;
				331
				332	/*
				333	* This condition should have been caught in
				334	* __match_event() and we should be sharing an RMID.
				335	*/
				336	WARN_ON_ONCE(ac == bc);
				337
				338	if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) \|\|
				339	cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
				340	return true;
				341
				342	return false;
				343	}
				344
				345	if (a->cgrp \|\| b->cgrp) {
				346	struct perf_cgroup ac, bc;
				347
				348	/*
				349	* cgroup and system-wide events are mutually exclusive
				350	*/
				351	if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) \|\|
				352	(b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
				353	return true;
				354
				355	/*
				356	* Ensure neither event is part of the other's cgroup
				357	*/
				358	ac = event_to_cgroup(a);
				359	bc = event_to_cgroup(b);
				360	if (ac == bc)
				361	return true;
				362
				363	/*
				364	* Must have cgroup and non-intersecting task events.
				365	*/
				366	if (!ac \|\| !bc)
				367	return false;
				368
				369	/*
				370	* We have cgroup and task events, and the task belongs
				371	* to a cgroup. Check for for overlap.
				372	*/
				373	if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) \|\|
				374	cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
				375	return true;
				376
				377	return false;
				378	}
				379	#endif
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	380	/*
				381	* If one of them is not a task, same story as above with cgroups.
				382	*/
				383	if (!(a->attach_state & PERF_ATTACH_TASK) \|\|
				384	!(b->attach_state & PERF_ATTACH_TASK))
				385	return true;
				386
				387	/*
				388	* Must be non-overlapping.
				389	*/
				390	return false;
				391	}
				392
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	393	struct rmid_read {
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	394	u32 rmid;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	395	atomic64_t value;
				396	};
				397
				398	static void __intel_cqm_event_count(void *info);
				399
				400	/*
				401	* Exchange the RMID of a group of events.
				402	*/
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	403	static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	404	{
				405	struct perf_event *event;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	406	struct list_head *head = &group->hw.cqm_group_entry;
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	407	u32 old_rmid = group->hw.cqm_rmid;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	408
				409	lockdep_assert_held(&cache_mutex);
				410
				411	/*
				412	* If our RMID is being deallocated, perform a read now.
				413	*/
				414	if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
				415	struct rmid_read rr = {
				416	.value = ATOMIC64_INIT(0),
				417	.rmid = old_rmid,
				418	};
				419
				420	on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count,
				421	&rr, 1);
				422	local64_set(&group->count, atomic64_read(&rr.value));
				423	}
				424
				425	raw_spin_lock_irq(&cache_lock);
				426
				427	group->hw.cqm_rmid = rmid;
				428	list_for_each_entry(event, head, hw.cqm_group_entry)
				429	event->hw.cqm_rmid = rmid;
				430
				431	raw_spin_unlock_irq(&cache_lock);
				432
				433	return old_rmid;
				434	}
				435
				436	/*
				437	* If we fail to assign a new RMID for intel_cqm_rotation_rmid because
				438	* cachelines are still tagged with RMIDs in limbo, we progressively
				439	* increment the threshold until we find an RMID in limbo with <=
				440	* __intel_cqm_threshold lines tagged. This is designed to mitigate the
				441	* problem where cachelines tagged with an RMID are not steadily being
				442	* evicted.
				443	*
				444	* On successful rotations we decrease the threshold back towards zero.
				445	*
				446	* __intel_cqm_max_threshold provides an upper bound on the threshold,
				447	* and is measured in bytes because it's exposed to userland.
				448	*/
				449	static unsigned int __intel_cqm_threshold;
				450	static unsigned int __intel_cqm_max_threshold;
				451
				452	/*
				453	* Test whether an RMID has a zero occupancy value on this cpu.
				454	*/
				455	static void intel_cqm_stable(void *arg)
				456	{
				457	struct cqm_rmid_entry *entry;
				458
				459	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
				460	if (entry->state != RMID_AVAILABLE)
				461	break;
				462
				463	if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
				464	entry->state = RMID_DIRTY;
				465	}
				466	}
				467
				468	/*
				469	* If we have group events waiting for an RMID that don't conflict with
				470	* events already running, assign @rmid.
				471	*/
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	472	static bool intel_cqm_sched_in_event(u32 rmid)
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	473	{
				474	struct perf_event leader, event;
				475
				476	lockdep_assert_held(&cache_mutex);
				477
				478	leader = list_first_entry(&cache_groups, struct perf_event,
				479	hw.cqm_groups_entry);
				480	event = leader;
				481
				482	list_for_each_entry_continue(event, &cache_groups,
				483	hw.cqm_groups_entry) {
				484	if (__rmid_valid(event->hw.cqm_rmid))
				485	continue;
				486
				487	if (__conflict_event(event, leader))
				488	continue;
				489
				490	intel_cqm_xchg_rmid(event, rmid);
				491	return true;
				492	}
				493
				494	return false;
				495	}
				496
				497	/*
				498	* Initially use this constant for both the limbo queue time and the
				499	* rotation timer interval, pmu::hrtimer_interval_ms.
				500	*
				501	* They don't need to be the same, but the two are related since if you
				502	* rotate faster than you recycle RMIDs, you may run out of available
				503	* RMIDs.
				504	*/
				505	#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */
				506
				507	static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
				508
				509	/*
				510	* intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
				511	* @nr_available: number of freeable RMIDs on the limbo list
				512	*
				513	* Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
				514	* cachelines are tagged with those RMIDs. After this we can reuse them
				515	* and know that the current set of active RMIDs is stable.
				516	*
				517	* Return %true or %false depending on whether stabilization needs to be
				518	* reattempted.
				519	*
				520	* If we return %true then @nr_available is updated to indicate the
				521	* number of RMIDs on the limbo list that have been queued for the
				522	* minimum queue time (RMID_AVAILABLE), but whose data occupancy values
				523	* are above __intel_cqm_threshold.
				524	*/
				525	static bool intel_cqm_rmid_stabilize(unsigned int *available)
				526	{
				527	struct cqm_rmid_entry entry, tmp;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	528
				529	lockdep_assert_held(&cache_mutex);
				530
				531	*available = 0;
				532	list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
				533	unsigned long min_queue_time;
				534	unsigned long now = jiffies;
				535
				536	/*
				537	* We hold RMIDs placed into limbo for a minimum queue
				538	* time. Before the minimum queue time has elapsed we do
				539	* not recycle RMIDs.
				540	*
				541	* The reasoning is that until a sufficient time has
				542	* passed since we stopped using an RMID, any RMID
				543	* placed onto the limbo list will likely still have
				544	* data tagged in the cache, which means we'll probably
				545	* fail to recycle it anyway.
				546	*
				547	* We can save ourselves an expensive IPI by skipping
				548	* any RMIDs that have not been queued for the minimum
				549	* time.
				550	*/
				551	min_queue_time = entry->queue_time +
				552	msecs_to_jiffies(__rmid_queue_time_ms);
				553
				554	if (time_after(min_queue_time, now))
				555	break;
				556
				557	entry->state = RMID_AVAILABLE;
				558	(*available)++;
				559	}
				560
				561	/*
				562	* Fast return if none of the RMIDs on the limbo list have been
				563	* sitting on the queue for the minimum queue time.
				564	*/
				565	if (!*available)
				566	return false;
				567
				568	/*
				569	* Test whether an RMID is free for each package.
				570	*/
				571	on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
				572
				573	list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
				574	/*
				575	* Exhausted all RMIDs that have waited min queue time.
				576	*/
				577	if (entry->state == RMID_YOUNG)
				578	break;
				579
				580	if (entry->state == RMID_DIRTY)
				581	continue;
				582
				583	list_del(&entry->list); /* remove from limbo */
				584
				585	/*
				586	* The rotation RMID gets priority if it's
				587	* currently invalid. In which case, skip adding
				588	* the RMID to the the free lru.
				589	*/
				590	if (!__rmid_valid(intel_cqm_rotation_rmid)) {
				591	intel_cqm_rotation_rmid = entry->rmid;
				592	continue;
				593	}
				594
				595	/*
				596	* If we have groups waiting for RMIDs, hand
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	597	* them one now provided they don't conflict.
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	598	*/
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	599	if (intel_cqm_sched_in_event(entry->rmid))
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	600	continue;
				601
				602	/*
				603	* Otherwise place it onto the free list.
				604	*/
				605	list_add_tail(&entry->list, &cqm_rmid_free_lru);
				606	}
				607
				608
				609	return __rmid_valid(intel_cqm_rotation_rmid);
				610	}
				611
				612	/*
				613	* Pick a victim group and move it to the tail of the group list.
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	614	* @next: The first group without an RMID
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	615	*/
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	616	static void __intel_cqm_pick_and_rotate(struct perf_event *next)
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	617	{
				618	struct perf_event *rotor;
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	619	u32 rmid;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	620
				621	lockdep_assert_held(&cache_mutex);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	622
				623	rotor = list_first_entry(&cache_groups, struct perf_event,
				624	hw.cqm_groups_entry);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	625
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	626	/*
				627	* The group at the front of the list should always have a valid
				628	* RMID. If it doesn't then no groups have RMIDs assigned and we
				629	* don't need to rotate the list.
				630	*/
				631	if (next == rotor)
				632	return;
				633
				634	rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
				635	__put_rmid(rmid);
				636
				637	list_rotate_left(&cache_groups);
				638	}
				639
				640	/*
				641	* Deallocate the RMIDs from any events that conflict with @event, and
				642	* place them on the back of the group list.
				643	*/
				644	static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
				645	{
				646	struct perf_event group, g;
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	647	u32 rmid;
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	648
				649	lockdep_assert_held(&cache_mutex);
				650
				651	list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
				652	if (group == event)
				653	continue;
				654
				655	rmid = group->hw.cqm_rmid;
				656
				657	/*
				658	* Skip events that don't have a valid RMID.
				659	*/
				660	if (!__rmid_valid(rmid))
				661	continue;
				662
				663	/*
				664	* No conflict? No problem! Leave the event alone.
				665	*/
				666	if (!__conflict_event(group, event))
				667	continue;
				668
				669	intel_cqm_xchg_rmid(group, INVALID_RMID);
				670	__put_rmid(rmid);
				671	}
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	672	}
				673
				674	/*
				675	* Attempt to rotate the groups and assign new RMIDs.
				676	*
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	677	* We rotate for two reasons,
				678	* 1. To handle the scheduling of conflicting events
				679	* 2. To recycle RMIDs
				680	*
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	681	* Rotating RMIDs is complicated because the hardware doesn't give us
				682	* any clues.
				683	*
				684	* There's problems with the hardware interface; when you change the
				685	* task:RMID map cachelines retain their 'old' tags, giving a skewed
				686	* picture. In order to work around this, we must always keep one free
				687	* RMID - intel_cqm_rotation_rmid.
				688	*
				689	* Rotation works by taking away an RMID from a group (the old RMID),
				690	* and assigning the free RMID to another group (the new RMID). We must
				691	* then wait for the old RMID to not be used (no cachelines tagged).
				692	* This ensure that all cachelines are tagged with 'active' RMIDs. At
				693	* this point we can start reading values for the new RMID and treat the
				694	* old RMID as the free RMID for the next rotation.
				695	*
				696	* Return %true or %false depending on whether we did any rotating.
				697	*/
				698	static bool __intel_cqm_rmid_rotate(void)
				699	{
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	700	struct perf_event group, start = NULL;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	701	unsigned int threshold_limit;
				702	unsigned int nr_needed = 0;
				703	unsigned int nr_available;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	704	bool rotated = false;
				705
				706	mutex_lock(&cache_mutex);
				707
				708	again:
				709	/*
				710	* Fast path through this function if there are no groups and no
				711	* RMIDs that need cleaning.
				712	*/
				713	if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
				714	goto out;
				715
				716	list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
				717	if (!__rmid_valid(group->hw.cqm_rmid)) {
				718	if (!start)
				719	start = group;
				720	nr_needed++;
				721	}
				722	}
				723
				724	/*
				725	* We have some event groups, but they all have RMIDs assigned
				726	* and no RMIDs need cleaning.
				727	*/
				728	if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
				729	goto out;
				730
				731	if (!nr_needed)
				732	goto stabilize;
				733
				734	/*
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	735	* We have more event groups without RMIDs than available RMIDs,
				736	* or we have event groups that conflict with the ones currently
				737	* scheduled.
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	738	*
				739	* We force deallocate the rmid of the group at the head of
				740	* cache_groups. The first event group without an RMID then gets
				741	* assigned intel_cqm_rotation_rmid. This ensures we always make
				742	* forward progress.
				743	*
				744	* Rotate the cache_groups list so the previous head is now the
				745	* tail.
				746	*/
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	747	__intel_cqm_pick_and_rotate(start);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	748
				749	/*
				750	* If the rotation is going to succeed, reduce the threshold so
				751	* that we don't needlessly reuse dirty RMIDs.
				752	*/
				753	if (__rmid_valid(intel_cqm_rotation_rmid)) {
				754	intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	755	intel_cqm_rotation_rmid = __get_rmid();
				756
				757	intel_cqm_sched_out_conflicting_events(start);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	758
				759	if (__intel_cqm_threshold)
				760	__intel_cqm_threshold--;
				761	}
				762
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	763	rotated = true;
				764
				765	stabilize:
				766	/*
				767	* We now need to stablize the RMID we freed above (if any) to
				768	* ensure that the next time we rotate we have an RMID with zero
				769	* occupancy value.
				770	*
				771	* Alternatively, if we didn't need to perform any rotation,
				772	* we'll have a bunch of RMIDs in limbo that need stabilizing.
				773	*/
				774	threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
				775
				776	while (intel_cqm_rmid_stabilize(&nr_available) &&
				777	__intel_cqm_threshold < threshold_limit) {
				778	unsigned int steal_limit;
				779
				780	/*
				781	* Don't spin if nobody is actively waiting for an RMID,
				782	* the rotation worker will be kicked as soon as an
				783	* event needs an RMID anyway.
				784	*/
				785	if (!nr_needed)
				786	break;
				787
				788	/* Allow max 25% of RMIDs to be in limbo. */
				789	steal_limit = (cqm_max_rmid + 1) / 4;
				790
				791	/*
				792	* We failed to stabilize any RMIDs so our rotation
				793	* logic is now stuck. In order to make forward progress
				794	* we have a few options:
				795	*
				796	* 1. rotate ("steal") another RMID
				797	* 2. increase the threshold
				798	* 3. do nothing
				799	*
				800	* We do both of 1. and 2. until we hit the steal limit.
				801	*
				802	* The steal limit prevents all RMIDs ending up on the
				803	* limbo list. This can happen if every RMID has a
				804	* non-zero occupancy above threshold_limit, and the
				805	* occupancy values aren't dropping fast enough.
				806	*
				807	* Note that there is prioritisation at work here - we'd
				808	* rather increase the number of RMIDs on the limbo list
				809	* than increase the threshold, because increasing the
				810	* threshold skews the event data (because we reuse
				811	* dirty RMIDs) - threshold bumps are a last resort.
				812	*/
				813	if (nr_available < steal_limit)
				814	goto again;
				815
				816	__intel_cqm_threshold++;
				817	}
				818
				819	out:
				820	mutex_unlock(&cache_mutex);
				821	return rotated;
				822	}
				823
				824	static void intel_cqm_rmid_rotate(struct work_struct *work);
				825
				826	static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
				827
				828	static struct pmu intel_cqm_pmu;
				829
				830	static void intel_cqm_rmid_rotate(struct work_struct *work)
				831	{
				832	unsigned long delay;
				833
				834	__intel_cqm_rmid_rotate();
				835
				836	delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
				837	schedule_delayed_work(&intel_cqm_rmid_work, delay);
				838	}
				839
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	840	/*
				841	* Find a group and setup RMID.
				842	*
				843	* If we're part of a group, we use the group's RMID.
				844	*/
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	845	static void intel_cqm_setup_event(struct perf_event *event,
				846	struct perf_event **group)
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	847	{
				848	struct perf_event *iter;
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	849	bool conflict = false;
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	850	u32 rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	851
				852	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	853	rmid = iter->hw.cqm_rmid;
				854
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	855	if (__match_event(iter, event)) {
				856	/* All tasks in a group share an RMID */
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	857	event->hw.cqm_rmid = rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	858	*group = iter;
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	859	return;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	860	}
				861
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	862	/*
				863	* We only care about conflicts for events that are
				864	* actually scheduled in (and hence have a valid RMID).
				865	*/
				866	if (__conflict_event(iter, event) && __rmid_valid(rmid))
				867	conflict = true;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	868	}
				869
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	870	if (conflict)
				871	rmid = INVALID_RMID;
				872	else
				873	rmid = __get_rmid();
				874
				875	event->hw.cqm_rmid = rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	876	}
				877
				878	static void intel_cqm_event_read(struct perf_event *event)
				879	{
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	880	unsigned long flags;
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	881	u32 rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	882	u64 val;
				883
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	884	/*
				885	* Task events are handled by intel_cqm_event_count().
				886	*/
				887	if (event->cpu == -1)
				888	return;
				889
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	890	raw_spin_lock_irqsave(&cache_lock, flags);
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	891	rmid = event->hw.cqm_rmid;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	892
				893	if (!__rmid_valid(rmid))
				894	goto out;
				895
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	896	val = __rmid_read(rmid);
				897
				898	/*
				899	* Ignore this reading on error states and do not update the value.
				900	*/
				901	if (val & (RMID_VAL_ERROR \| RMID_VAL_UNAVAIL))
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	902	goto out;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	903
				904	local64_set(&event->count, val);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	905	out:
				906	raw_spin_unlock_irqrestore(&cache_lock, flags);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	907	}
				908
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	909	static void __intel_cqm_event_count(void *info)
				910	{
				911	struct rmid_read *rr = info;
				912	u64 val;
				913
				914	val = __rmid_read(rr->rmid);
				915
				916	if (val & (RMID_VAL_ERROR \| RMID_VAL_UNAVAIL))
				917	return;
				918
				919	atomic64_add(val, &rr->value);
				920	}
				921
				922	static inline bool cqm_group_leader(struct perf_event *event)
				923	{
				924	return !list_empty(&event->hw.cqm_groups_entry);
				925	}
				926
				927	static u64 intel_cqm_event_count(struct perf_event *event)
				928	{
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	929	unsigned long flags;
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	930	struct rmid_read rr = {
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	931	.value = ATOMIC64_INIT(0),
				932	};
				933
				934	/*
				935	* We only need to worry about task events. System-wide events
				936	* are handled like usual, i.e. entirely with
				937	* intel_cqm_event_read().
				938	*/
				939	if (event->cpu != -1)
				940	return __perf_event_count(event);
				941
				942	/*
				943	* Only the group leader gets to report values. This stops us
				944	* reporting duplicate values to userspace, and gives us a clear
				945	* rule for which task gets to report the values.
				946	*
				947	* Note that it is impossible to attribute these values to
				948	* specific packages - we forfeit that ability when we create
				949	* task events.
				950	*/
				951	if (!cqm_group_leader(event))
				952	return 0;
				953
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	954	/*
Matt Fleming	2c534c0	2015-07-21 15:55:09 +0100	[diff] [blame]	955	* Getting up-to-date values requires an SMP IPI which is not
				956	* possible if we're being called in interrupt context. Return
				957	* the cached values instead.
				958	*/
				959	if (unlikely(in_interrupt()))
				960	goto out;
				961
				962	/*
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	963	* Notice that we don't perform the reading of an RMID
				964	* atomically, because we can't hold a spin lock across the
				965	* IPIs.
				966	*
				967	* Speculatively perform the read, since @event might be
				968	* assigned a different (possibly invalid) RMID while we're
				969	* busying performing the IPI calls. It's therefore necessary to
				970	* check @event's RMID afterwards, and if it has changed,
				971	* discard the result of the read.
				972	*/
				973	rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
				974
				975	if (!__rmid_valid(rr.rmid))
				976	goto out;
				977
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	978	on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, &rr, 1);
				979
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	980	raw_spin_lock_irqsave(&cache_lock, flags);
				981	if (event->hw.cqm_rmid == rr.rmid)
				982	local64_set(&event->count, atomic64_read(&rr.value));
				983	raw_spin_unlock_irqrestore(&cache_lock, flags);
				984	out:
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	985	return __perf_event_count(event);
				986	}
				987
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	988	static void intel_cqm_event_start(struct perf_event *event, int mode)
				989	{
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	990	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
Thomas Gleixner	b3df4ec	2015-05-19 00:00:51 +0000	[diff] [blame]	991	u32 rmid = event->hw.cqm_rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	992
				993	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
				994	return;
				995
				996	event->hw.cqm_state &= ~PERF_HES_STOPPED;
				997
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	998	if (state->rmid_usecnt++) {
Thomas Gleixner	0bac237	2015-05-19 00:00:55 +0000	[diff] [blame]	999	if (!WARN_ON_ONCE(state->rmid != rmid))
				1000	return;
				1001	} else {
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1002	WARN_ON_ONCE(state->rmid);
Thomas Gleixner	0bac237	2015-05-19 00:00:55 +0000	[diff] [blame]	1003	}
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1004
				1005	state->rmid = rmid;
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	1006	wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1007	}
				1008
				1009	static void intel_cqm_event_stop(struct perf_event *event, int mode)
				1010	{
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	1011	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1012
				1013	if (event->hw.cqm_state & PERF_HES_STOPPED)
				1014	return;
				1015
				1016	event->hw.cqm_state \|= PERF_HES_STOPPED;
				1017
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1018	intel_cqm_event_read(event);
				1019
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	1020	if (!--state->rmid_usecnt) {
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1021	state->rmid = 0;
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	1022	wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1023	} else {
				1024	WARN_ON_ONCE(!state->rmid);
				1025	}
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1026	}
				1027
				1028	static int intel_cqm_event_add(struct perf_event *event, int mode)
				1029	{
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1030	unsigned long flags;
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	1031	u32 rmid;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1032
				1033	raw_spin_lock_irqsave(&cache_lock, flags);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1034
				1035	event->hw.cqm_state = PERF_HES_STOPPED;
				1036	rmid = event->hw.cqm_rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1037
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1038	if (__rmid_valid(rmid) && (mode & PERF_EF_START))
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1039	intel_cqm_event_start(event, mode);
				1040
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1041	raw_spin_unlock_irqrestore(&cache_lock, flags);
				1042
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1043	return 0;
				1044	}
				1045
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1046	static void intel_cqm_event_destroy(struct perf_event *event)
				1047	{
				1048	struct perf_event *group_other = NULL;
				1049
				1050	mutex_lock(&cache_mutex);
				1051
				1052	/*
				1053	* If there's another event in this group...
				1054	*/
				1055	if (!list_empty(&event->hw.cqm_group_entry)) {
				1056	group_other = list_first_entry(&event->hw.cqm_group_entry,
				1057	struct perf_event,
				1058	hw.cqm_group_entry);
				1059	list_del(&event->hw.cqm_group_entry);
				1060	}
				1061
				1062	/*
				1063	* And we're the group leader..
				1064	*/
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	1065	if (cqm_group_leader(event)) {
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1066	/*
				1067	* If there was a group_other, make that leader, otherwise
				1068	* destroy the group and return the RMID.
				1069	*/
				1070	if (group_other) {
				1071	list_replace(&event->hw.cqm_groups_entry,
				1072	&group_other->hw.cqm_groups_entry);
				1073	} else {
Matt Fleming	adafa99	2015-05-22 09:59:42 +0100	[diff] [blame]	1074	u32 rmid = event->hw.cqm_rmid;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1075
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1076	if (__rmid_valid(rmid))
				1077	__put_rmid(rmid);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1078	list_del(&event->hw.cqm_groups_entry);
				1079	}
				1080	}
				1081
				1082	mutex_unlock(&cache_mutex);
				1083	}
				1084
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1085	static int intel_cqm_event_init(struct perf_event *event)
				1086	{
				1087	struct perf_event *group = NULL;
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1088	bool rotate = false;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1089
				1090	if (event->attr.type != intel_cqm_pmu.type)
				1091	return -ENOENT;
				1092
				1093	if (event->attr.config & ~QOS_EVENT_MASK)
				1094	return -EINVAL;
				1095
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1096	/* unsupported modes and filters */
				1097	if (event->attr.exclude_user \|\|
				1098	event->attr.exclude_kernel \|\|
				1099	event->attr.exclude_hv \|\|
				1100	event->attr.exclude_idle \|\|
				1101	event->attr.exclude_host \|\|
				1102	event->attr.exclude_guest \|\|
				1103	event->attr.sample_period) /* no sampling */
				1104	return -EINVAL;
				1105
				1106	INIT_LIST_HEAD(&event->hw.cqm_group_entry);
				1107	INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
				1108
				1109	event->destroy = intel_cqm_event_destroy;
				1110
				1111	mutex_lock(&cache_mutex);
				1112
Matt Fleming	bfe1fcd	2015-01-23 18:45:46 +0000	[diff] [blame]	1113	/* Will also set rmid */
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	1114	intel_cqm_setup_event(event, &group);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1115
				1116	if (group) {
				1117	list_add_tail(&event->hw.cqm_group_entry,
				1118	&group->hw.cqm_group_entry);
				1119	} else {
				1120	list_add_tail(&event->hw.cqm_groups_entry,
				1121	&cache_groups);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1122
				1123	/*
				1124	* All RMIDs are either in use or have recently been
				1125	* used. Kick the rotation worker to clean/free some.
				1126	*
				1127	* We only do this for the group leader, rather than for
				1128	* every event in a group to save on needless work.
				1129	*/
				1130	if (!__rmid_valid(event->hw.cqm_rmid))
				1131	rotate = true;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1132	}
				1133
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1134	mutex_unlock(&cache_mutex);
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1135
				1136	if (rotate)
				1137	schedule_delayed_work(&intel_cqm_rmid_work, 0);
				1138
Matt Fleming	59bf7fd	2015-01-23 18:45:48 +0000	[diff] [blame]	1139	return 0;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1140	}
				1141
				1142	EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
				1143	EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
				1144	EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
				1145	EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
				1146	EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
				1147
				1148	static struct attribute *intel_cqm_events_attr[] = {
				1149	EVENT_PTR(intel_cqm_llc),
				1150	EVENT_PTR(intel_cqm_llc_pkg),
				1151	EVENT_PTR(intel_cqm_llc_unit),
				1152	EVENT_PTR(intel_cqm_llc_scale),
				1153	EVENT_PTR(intel_cqm_llc_snapshot),
				1154	NULL,
				1155	};
				1156
				1157	static struct attribute_group intel_cqm_events_group = {
				1158	.name = "events",
				1159	.attrs = intel_cqm_events_attr,
				1160	};
				1161
				1162	PMU_FORMAT_ATTR(event, "config:0-7");
				1163	static struct attribute *intel_cqm_formats_attr[] = {
				1164	&format_attr_event.attr,
				1165	NULL,
				1166	};
				1167
				1168	static struct attribute_group intel_cqm_format_group = {
				1169	.name = "format",
				1170	.attrs = intel_cqm_formats_attr,
				1171	};
				1172
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1173	static ssize_t
				1174	max_recycle_threshold_show(struct device dev, struct device_attribute attr,
				1175	char *page)
				1176	{
				1177	ssize_t rv;
				1178
				1179	mutex_lock(&cache_mutex);
				1180	rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
				1181	mutex_unlock(&cache_mutex);
				1182
				1183	return rv;
				1184	}
				1185
				1186	static ssize_t
				1187	max_recycle_threshold_store(struct device *dev,
				1188	struct device_attribute *attr,
				1189	const char *buf, size_t count)
				1190	{
				1191	unsigned int bytes, cachelines;
				1192	int ret;
				1193
				1194	ret = kstrtouint(buf, 0, &bytes);
				1195	if (ret)
				1196	return ret;
				1197
				1198	mutex_lock(&cache_mutex);
				1199
				1200	__intel_cqm_max_threshold = bytes;
				1201	cachelines = bytes / cqm_l3_scale;
				1202
				1203	/*
				1204	* The new maximum takes effect immediately.
				1205	*/
				1206	if (__intel_cqm_threshold > cachelines)
				1207	__intel_cqm_threshold = cachelines;
				1208
				1209	mutex_unlock(&cache_mutex);
				1210
				1211	return count;
				1212	}
				1213
				1214	static DEVICE_ATTR_RW(max_recycle_threshold);
				1215
				1216	static struct attribute *intel_cqm_attrs[] = {
				1217	&dev_attr_max_recycle_threshold.attr,
				1218	NULL,
				1219	};
				1220
				1221	static const struct attribute_group intel_cqm_group = {
				1222	.attrs = intel_cqm_attrs,
				1223	};
				1224
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1225	static const struct attribute_group *intel_cqm_attr_groups[] = {
				1226	&intel_cqm_events_group,
				1227	&intel_cqm_format_group,
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1228	&intel_cqm_group,
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1229	NULL,
				1230	};
				1231
				1232	static struct pmu intel_cqm_pmu = {
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1233	.hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
				1234	.attr_groups = intel_cqm_attr_groups,
				1235	.task_ctx_nr = perf_sw_context,
				1236	.event_init = intel_cqm_event_init,
				1237	.add = intel_cqm_event_add,
Thomas Gleixner	43d0c2f	2015-05-19 00:00:56 +0000	[diff] [blame]	1238	.del = intel_cqm_event_stop,
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1239	.start = intel_cqm_event_start,
				1240	.stop = intel_cqm_event_stop,
				1241	.read = intel_cqm_event_read,
				1242	.count = intel_cqm_event_count,
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1243	};
				1244
				1245	static inline void cqm_pick_event_reader(int cpu)
				1246	{
				1247	int phys_id = topology_physical_package_id(cpu);
				1248	int i;
				1249
				1250	for_each_cpu(i, &cqm_cpumask) {
				1251	if (phys_id == topology_physical_package_id(i))
				1252	return; /* already got reader for this socket */
				1253	}
				1254
				1255	cpumask_set_cpu(cpu, &cqm_cpumask);
				1256	}
				1257
Matt Fleming	d7a702f	2015-08-06 13:12:43 +0100	[diff] [blame]	1258	static void intel_cqm_cpu_starting(unsigned int cpu)
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1259	{
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	1260	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1261	struct cpuinfo_x86 *c = &cpu_data(cpu);
				1262
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1263	state->rmid = 0;
Thomas Gleixner	bf92673	2015-05-19 00:00:58 +0000	[diff] [blame]	1264	state->closid = 0;
				1265	state->rmid_usecnt = 0;
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1266
				1267	WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
				1268	WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
				1269	}
				1270
				1271	static void intel_cqm_cpu_exit(unsigned int cpu)
				1272	{
				1273	int phys_id = topology_physical_package_id(cpu);
				1274	int i;
				1275
				1276	/*
				1277	* Is @cpu a designated cqm reader?
				1278	*/
				1279	if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
				1280	return;
				1281
				1282	for_each_online_cpu(i) {
				1283	if (i == cpu)
				1284	continue;
				1285
				1286	if (phys_id == topology_physical_package_id(i)) {
				1287	cpumask_set_cpu(i, &cqm_cpumask);
				1288	break;
				1289	}
				1290	}
				1291	}
				1292
				1293	static int intel_cqm_cpu_notifier(struct notifier_block *nb,
				1294	unsigned long action, void *hcpu)
				1295	{
				1296	unsigned int cpu = (unsigned long)hcpu;
				1297
				1298	switch (action & ~CPU_TASKS_FROZEN) {
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1299	case CPU_DOWN_PREPARE:
				1300	intel_cqm_cpu_exit(cpu);
				1301	break;
				1302	case CPU_STARTING:
Matt Fleming	d7a702f	2015-08-06 13:12:43 +0100	[diff] [blame]	1303	intel_cqm_cpu_starting(cpu);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1304	cqm_pick_event_reader(cpu);
				1305	break;
				1306	}
				1307
				1308	return NOTIFY_OK;
				1309	}
				1310
				1311	static const struct x86_cpu_id intel_cqm_match[] = {
				1312	{ .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
				1313	{}
				1314	};
				1315
				1316	static int __init intel_cqm_init(void)
				1317	{
				1318	char *str, scale[20];
				1319	int i, cpu, ret;
				1320
				1321	if (!x86_match_cpu(intel_cqm_match))
				1322	return -ENODEV;
				1323
				1324	cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
				1325
				1326	/*
				1327	* It's possible that not all resources support the same number
				1328	* of RMIDs. Instead of making scheduling much more complicated
				1329	* (where we have to match a task's RMID to a cpu that supports
				1330	* that many RMIDs) just find the minimum RMIDs supported across
				1331	* all cpus.
				1332	*
				1333	* Also, check that the scales match on all cpus.
				1334	*/
				1335	cpu_notifier_register_begin();
				1336
				1337	for_each_online_cpu(cpu) {
				1338	struct cpuinfo_x86 *c = &cpu_data(cpu);
				1339
				1340	if (c->x86_cache_max_rmid < cqm_max_rmid)
				1341	cqm_max_rmid = c->x86_cache_max_rmid;
				1342
				1343	if (c->x86_cache_occ_scale != cqm_l3_scale) {
				1344	pr_err("Multiple LLC scale values, disabling\n");
				1345	ret = -EINVAL;
				1346	goto out;
				1347	}
				1348	}
				1349
Matt Fleming	bff671d	2015-01-23 18:45:47 +0000	[diff] [blame]	1350	/*
				1351	* A reasonable upper limit on the max threshold is the number
				1352	* of lines tagged per RMID if all RMIDs have the same number of
				1353	* lines tagged in the LLC.
				1354	*
				1355	* For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
				1356	*/
				1357	__intel_cqm_max_threshold =
				1358	boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
				1359
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1360	snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
				1361	str = kstrdup(scale, GFP_KERNEL);
				1362	if (!str) {
				1363	ret = -ENOMEM;
				1364	goto out;
				1365	}
				1366
				1367	event_attr_intel_cqm_llc_scale.event_str = str;
				1368
				1369	ret = intel_cqm_setup_rmid_cache();
				1370	if (ret)
				1371	goto out;
				1372
				1373	for_each_online_cpu(i) {
Matt Fleming	d7a702f	2015-08-06 13:12:43 +0100	[diff] [blame]	1374	intel_cqm_cpu_starting(i);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1375	cqm_pick_event_reader(i);
				1376	}
				1377
				1378	__perf_cpu_notifier(intel_cqm_cpu_notifier);
				1379
Peter Zijlstra	50f16a8	2015-03-05 22:10:19 +0100	[diff] [blame]	1380	ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
Matt Fleming	4afbb24c	2015-01-23 18:45:44 +0000	[diff] [blame]	1381	if (ret)
				1382	pr_err("Intel CQM perf registration failed: %d\n", ret);
				1383	else
				1384	pr_info("Intel CQM monitoring enabled\n");
				1385
				1386	out:
				1387	cpu_notifier_register_done();
				1388
				1389	return ret;
				1390	}
				1391	device_initcall(intel_cqm_init);