blob: 8f82b02934fa701a451ee5e7d8f76193eaefc2fa [file] [log] [blame]
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001/*
2 * Intel Cache Quality-of-Service Monitoring (CQM) support.
3 *
4 * Based very, very heavily on work by Peter Zijlstra.
5 */
6
7#include <linux/perf_event.h>
8#include <linux/slab.h>
9#include <asm/cpu_device_id.h>
Borislav Petkov27f6d222016-02-10 10:55:23 +010010#include "../perf_event.h"
Matt Fleming4afbb24c2015-01-23 18:45:44 +000011
12#define MSR_IA32_PQR_ASSOC 0x0c8f
13#define MSR_IA32_QM_CTR 0x0c8e
14#define MSR_IA32_QM_EVTSEL 0x0c8d
15
Tony Luck87f01cc2016-03-11 11:26:11 -080016#define MBM_CNTR_WIDTH 24
Vikas Shivappae7ee3e82016-03-11 11:26:17 -080017/*
18 * Guaranteed time in ms as per SDM where MBM counters will not overflow.
19 */
20#define MBM_CTR_OVERFLOW_TIME 1000
Tony Luck87f01cc2016-03-11 11:26:11 -080021
Matt Flemingadafa992015-05-22 09:59:42 +010022static u32 cqm_max_rmid = -1;
Matt Fleming4afbb24c2015-01-23 18:45:44 +000023static unsigned int cqm_l3_scale; /* supposedly cacheline size */
Vikas Shivappa33c3cc72016-03-10 15:32:09 -080024static bool cqm_enabled, mbm_enabled;
Vikas Shivappae7ee3e82016-03-11 11:26:17 -080025unsigned int mbm_socket_max;
Matt Fleming4afbb24c2015-01-23 18:45:44 +000026
Thomas Gleixnerbf926732015-05-19 00:00:58 +000027/**
28 * struct intel_pqr_state - State cache for the PQR MSR
29 * @rmid: The cached Resource Monitoring ID
30 * @closid: The cached Class Of Service ID
31 * @rmid_usecnt: The usage counter for rmid
32 *
33 * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
34 * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
35 * contains both parts, so we need to cache them.
36 *
37 * The cache also helps to avoid pointless updates if the value does
38 * not change.
39 */
40struct intel_pqr_state {
Thomas Gleixnerb3df4ec2015-05-19 00:00:51 +000041 u32 rmid;
Thomas Gleixnerbf926732015-05-19 00:00:58 +000042 u32 closid;
43 int rmid_usecnt;
Matt Fleming4afbb24c2015-01-23 18:45:44 +000044};
45
Thomas Gleixner9e7eaac2015-05-19 00:00:53 +000046/*
Thomas Gleixnerbf926732015-05-19 00:00:58 +000047 * The cached intel_pqr_state is strictly per CPU and can never be
Thomas Gleixner9e7eaac2015-05-19 00:00:53 +000048 * updated from a remote CPU. Both functions which modify the state
49 * (intel_cqm_event_start and intel_cqm_event_stop) are called with
50 * interrupts disabled, which is sufficient for the protection.
51 */
Thomas Gleixnerbf926732015-05-19 00:00:58 +000052static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
Vikas Shivappae7ee3e82016-03-11 11:26:17 -080053static struct hrtimer *mbm_timers;
Vikas Shivappa33c3cc72016-03-10 15:32:09 -080054/**
55 * struct sample - mbm event's (local or total) data
56 * @total_bytes #bytes since we began monitoring
57 * @prev_msr previous value of MSR
58 */
59struct sample {
60 u64 total_bytes;
61 u64 prev_msr;
62};
63
64/*
65 * samples profiled for total memory bandwidth type events
66 */
67static struct sample *mbm_total;
68/*
69 * samples profiled for local memory bandwidth type events
70 */
71static struct sample *mbm_local;
Matt Fleming4afbb24c2015-01-23 18:45:44 +000072
Tony Luck87f01cc2016-03-11 11:26:11 -080073#define pkg_id topology_physical_package_id(smp_processor_id())
74/*
75 * rmid_2_index returns the index for the rmid in mbm_local/mbm_total array.
76 * mbm_total[] and mbm_local[] are linearly indexed by socket# * max number of
77 * rmids per socket, an example is given below
78 * RMID1 of Socket0: vrmid = 1
79 * RMID1 of Socket1: vrmid = 1 * (cqm_max_rmid + 1) + 1
80 * RMID1 of Socket2: vrmid = 2 * (cqm_max_rmid + 1) + 1
81 */
82#define rmid_2_index(rmid) ((pkg_id * (cqm_max_rmid + 1)) + rmid)
Matt Fleming4afbb24c2015-01-23 18:45:44 +000083/*
Matt Flemingbff671d2015-01-23 18:45:47 +000084 * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
85 * Also protects event->hw.cqm_rmid
86 *
87 * Hold either for stability, both for modification of ->hw.cqm_rmid.
Matt Fleming4afbb24c2015-01-23 18:45:44 +000088 */
89static DEFINE_MUTEX(cache_mutex);
Matt Flemingbff671d2015-01-23 18:45:47 +000090static DEFINE_RAW_SPINLOCK(cache_lock);
Matt Fleming4afbb24c2015-01-23 18:45:44 +000091
92/*
93 * Groups of events that have the same target(s), one RMID per group.
94 */
95static LIST_HEAD(cache_groups);
96
97/*
98 * Mask of CPUs for reading CQM values. We only need one per-socket.
99 */
100static cpumask_t cqm_cpumask;
101
102#define RMID_VAL_ERROR (1ULL << 63)
103#define RMID_VAL_UNAVAIL (1ULL << 62)
104
Tony Luck87f01cc2016-03-11 11:26:11 -0800105/*
106 * Event IDs are used to program IA32_QM_EVTSEL before reading event
107 * counter from IA32_QM_CTR
108 */
109#define QOS_L3_OCCUP_EVENT_ID 0x01
110#define QOS_MBM_TOTAL_EVENT_ID 0x02
111#define QOS_MBM_LOCAL_EVENT_ID 0x03
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000112
Matt Flemingbff671d2015-01-23 18:45:47 +0000113/*
114 * This is central to the rotation algorithm in __intel_cqm_rmid_rotate().
115 *
116 * This rmid is always free and is guaranteed to have an associated
117 * near-zero occupancy value, i.e. no cachelines are tagged with this
118 * RMID, once __intel_cqm_rmid_rotate() returns.
119 */
Matt Flemingadafa992015-05-22 09:59:42 +0100120static u32 intel_cqm_rotation_rmid;
Matt Flemingbff671d2015-01-23 18:45:47 +0000121
122#define INVALID_RMID (-1)
123
124/*
125 * Is @rmid valid for programming the hardware?
126 *
127 * rmid 0 is reserved by the hardware for all non-monitored tasks, which
128 * means that we should never come across an rmid with that value.
129 * Likewise, an rmid value of -1 is used to indicate "no rmid currently
130 * assigned" and is used as part of the rotation code.
131 */
Matt Flemingadafa992015-05-22 09:59:42 +0100132static inline bool __rmid_valid(u32 rmid)
Matt Flemingbff671d2015-01-23 18:45:47 +0000133{
134 if (!rmid || rmid == INVALID_RMID)
135 return false;
136
137 return true;
138}
139
Matt Flemingadafa992015-05-22 09:59:42 +0100140static u64 __rmid_read(u32 rmid)
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000141{
142 u64 val;
143
144 /*
145 * Ignore the SDM, this thing is _NOTHING_ like a regular perfcnt,
146 * it just says that to increase confusion.
147 */
148 wrmsr(MSR_IA32_QM_EVTSEL, QOS_L3_OCCUP_EVENT_ID, rmid);
149 rdmsrl(MSR_IA32_QM_CTR, val);
150
151 /*
152 * Aside from the ERROR and UNAVAIL bits, assume this thing returns
153 * the number of cachelines tagged with @rmid.
154 */
155 return val;
156}
157
Matt Flemingbff671d2015-01-23 18:45:47 +0000158enum rmid_recycle_state {
159 RMID_YOUNG = 0,
160 RMID_AVAILABLE,
161 RMID_DIRTY,
162};
163
Matt Fleming35298e52015-01-23 18:45:45 +0000164struct cqm_rmid_entry {
Matt Flemingadafa992015-05-22 09:59:42 +0100165 u32 rmid;
Matt Flemingbff671d2015-01-23 18:45:47 +0000166 enum rmid_recycle_state state;
Matt Fleming35298e52015-01-23 18:45:45 +0000167 struct list_head list;
Matt Flemingbff671d2015-01-23 18:45:47 +0000168 unsigned long queue_time;
Matt Fleming35298e52015-01-23 18:45:45 +0000169};
170
171/*
Matt Flemingbff671d2015-01-23 18:45:47 +0000172 * cqm_rmid_free_lru - A least recently used list of RMIDs.
Matt Fleming35298e52015-01-23 18:45:45 +0000173 *
174 * Oldest entry at the head, newest (most recently used) entry at the
175 * tail. This list is never traversed, it's only used to keep track of
176 * the lru order. That is, we only pick entries of the head or insert
177 * them on the tail.
178 *
179 * All entries on the list are 'free', and their RMIDs are not currently
180 * in use. To mark an RMID as in use, remove its entry from the lru
181 * list.
182 *
Matt Flemingbff671d2015-01-23 18:45:47 +0000183 *
184 * cqm_rmid_limbo_lru - list of currently unused but (potentially) dirty RMIDs.
185 *
186 * This list is contains RMIDs that no one is currently using but that
187 * may have a non-zero occupancy value associated with them. The
188 * rotation worker moves RMIDs from the limbo list to the free list once
189 * the occupancy value drops below __intel_cqm_threshold.
190 *
191 * Both lists are protected by cache_mutex.
Matt Fleming35298e52015-01-23 18:45:45 +0000192 */
Matt Flemingbff671d2015-01-23 18:45:47 +0000193static LIST_HEAD(cqm_rmid_free_lru);
194static LIST_HEAD(cqm_rmid_limbo_lru);
Matt Fleming35298e52015-01-23 18:45:45 +0000195
196/*
197 * We use a simple array of pointers so that we can lookup a struct
198 * cqm_rmid_entry in O(1). This alleviates the callers of __get_rmid()
199 * and __put_rmid() from having to worry about dealing with struct
200 * cqm_rmid_entry - they just deal with rmids, i.e. integers.
201 *
202 * Once this array is initialized it is read-only. No locks are required
203 * to access it.
204 *
205 * All entries for all RMIDs can be looked up in the this array at all
206 * times.
207 */
208static struct cqm_rmid_entry **cqm_rmid_ptrs;
209
Matt Flemingadafa992015-05-22 09:59:42 +0100210static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
Matt Fleming35298e52015-01-23 18:45:45 +0000211{
212 struct cqm_rmid_entry *entry;
213
214 entry = cqm_rmid_ptrs[rmid];
215 WARN_ON(entry->rmid != rmid);
216
217 return entry;
218}
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000219
220/*
221 * Returns < 0 on fail.
Matt Fleming35298e52015-01-23 18:45:45 +0000222 *
223 * We expect to be called with cache_mutex held.
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000224 */
Matt Flemingadafa992015-05-22 09:59:42 +0100225static u32 __get_rmid(void)
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000226{
Matt Fleming35298e52015-01-23 18:45:45 +0000227 struct cqm_rmid_entry *entry;
228
229 lockdep_assert_held(&cache_mutex);
230
Matt Flemingbff671d2015-01-23 18:45:47 +0000231 if (list_empty(&cqm_rmid_free_lru))
232 return INVALID_RMID;
Matt Fleming35298e52015-01-23 18:45:45 +0000233
Matt Flemingbff671d2015-01-23 18:45:47 +0000234 entry = list_first_entry(&cqm_rmid_free_lru, struct cqm_rmid_entry, list);
Matt Fleming35298e52015-01-23 18:45:45 +0000235 list_del(&entry->list);
236
237 return entry->rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000238}
239
Matt Flemingadafa992015-05-22 09:59:42 +0100240static void __put_rmid(u32 rmid)
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000241{
Matt Fleming35298e52015-01-23 18:45:45 +0000242 struct cqm_rmid_entry *entry;
243
244 lockdep_assert_held(&cache_mutex);
245
Matt Flemingbff671d2015-01-23 18:45:47 +0000246 WARN_ON(!__rmid_valid(rmid));
Matt Fleming35298e52015-01-23 18:45:45 +0000247 entry = __rmid_entry(rmid);
248
Matt Flemingbff671d2015-01-23 18:45:47 +0000249 entry->queue_time = jiffies;
250 entry->state = RMID_YOUNG;
251
252 list_add_tail(&entry->list, &cqm_rmid_limbo_lru);
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000253}
254
Vikas Shivappaada2f632016-03-10 15:32:08 -0800255static void cqm_cleanup(void)
256{
257 int i;
258
259 if (!cqm_rmid_ptrs)
260 return;
261
262 for (i = 0; i < cqm_max_rmid; i++)
263 kfree(cqm_rmid_ptrs[i]);
264
265 kfree(cqm_rmid_ptrs);
266 cqm_rmid_ptrs = NULL;
Vikas Shivappa33c3cc72016-03-10 15:32:09 -0800267 cqm_enabled = false;
Vikas Shivappaada2f632016-03-10 15:32:08 -0800268}
269
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000270static int intel_cqm_setup_rmid_cache(void)
271{
Matt Fleming35298e52015-01-23 18:45:45 +0000272 struct cqm_rmid_entry *entry;
Matt Flemingbff671d2015-01-23 18:45:47 +0000273 unsigned int nr_rmids;
274 int r = 0;
Matt Fleming35298e52015-01-23 18:45:45 +0000275
Matt Flemingbff671d2015-01-23 18:45:47 +0000276 nr_rmids = cqm_max_rmid + 1;
Vikas Shivappaada2f632016-03-10 15:32:08 -0800277 cqm_rmid_ptrs = kzalloc(sizeof(struct cqm_rmid_entry *) *
Matt Flemingbff671d2015-01-23 18:45:47 +0000278 nr_rmids, GFP_KERNEL);
Matt Fleming35298e52015-01-23 18:45:45 +0000279 if (!cqm_rmid_ptrs)
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000280 return -ENOMEM;
281
Matt Flemingbff671d2015-01-23 18:45:47 +0000282 for (; r <= cqm_max_rmid; r++) {
Matt Fleming35298e52015-01-23 18:45:45 +0000283 struct cqm_rmid_entry *entry;
284
285 entry = kmalloc(sizeof(*entry), GFP_KERNEL);
286 if (!entry)
287 goto fail;
288
289 INIT_LIST_HEAD(&entry->list);
290 entry->rmid = r;
291 cqm_rmid_ptrs[r] = entry;
292
Matt Flemingbff671d2015-01-23 18:45:47 +0000293 list_add_tail(&entry->list, &cqm_rmid_free_lru);
Matt Fleming35298e52015-01-23 18:45:45 +0000294 }
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000295
296 /*
297 * RMID 0 is special and is always allocated. It's used for all
298 * tasks that are not monitored.
299 */
Matt Fleming35298e52015-01-23 18:45:45 +0000300 entry = __rmid_entry(0);
301 list_del(&entry->list);
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000302
Matt Flemingbff671d2015-01-23 18:45:47 +0000303 mutex_lock(&cache_mutex);
304 intel_cqm_rotation_rmid = __get_rmid();
305 mutex_unlock(&cache_mutex);
306
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000307 return 0;
Matt Fleming35298e52015-01-23 18:45:45 +0000308
Vikas Shivappaada2f632016-03-10 15:32:08 -0800309fail:
310 cqm_cleanup();
Matt Fleming35298e52015-01-23 18:45:45 +0000311 return -ENOMEM;
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000312}
313
314/*
315 * Determine if @a and @b measure the same set of tasks.
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000316 *
317 * If @a and @b measure the same set of tasks then we want to share a
318 * single RMID.
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000319 */
320static bool __match_event(struct perf_event *a, struct perf_event *b)
321{
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000322 /* Per-cpu and task events don't mix */
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000323 if ((a->attach_state & PERF_ATTACH_TASK) !=
324 (b->attach_state & PERF_ATTACH_TASK))
325 return false;
326
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000327#ifdef CONFIG_CGROUP_PERF
328 if (a->cgrp != b->cgrp)
329 return false;
330#endif
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000331
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000332 /* If not task event, we're machine wide */
333 if (!(b->attach_state & PERF_ATTACH_TASK))
334 return true;
335
336 /*
337 * Events that target same task are placed into the same cache group.
Vikas Shivappaa223c1c2016-03-10 15:32:07 -0800338 * Mark it as a multi event group, so that we update ->count
339 * for every event rather than just the group leader later.
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000340 */
Vikas Shivappaa223c1c2016-03-10 15:32:07 -0800341 if (a->hw.target == b->hw.target) {
342 b->hw.is_group_event = true;
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000343 return true;
Vikas Shivappaa223c1c2016-03-10 15:32:07 -0800344 }
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000345
346 /*
347 * Are we an inherited event?
348 */
349 if (b->parent == a)
350 return true;
351
352 return false;
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000353}
354
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000355#ifdef CONFIG_CGROUP_PERF
356static inline struct perf_cgroup *event_to_cgroup(struct perf_event *event)
357{
358 if (event->attach_state & PERF_ATTACH_TASK)
Stephane Eranian614e4c42015-11-12 11:00:04 +0100359 return perf_cgroup_from_task(event->hw.target, event->ctx);
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000360
361 return event->cgrp;
362}
363#endif
364
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000365/*
366 * Determine if @a's tasks intersect with @b's tasks
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000367 *
368 * There are combinations of events that we explicitly prohibit,
369 *
370 * PROHIBITS
371 * system-wide -> cgroup and task
372 * cgroup -> system-wide
373 * -> task in cgroup
374 * task -> system-wide
375 * -> task in cgroup
376 *
377 * Call this function before allocating an RMID.
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000378 */
379static bool __conflict_event(struct perf_event *a, struct perf_event *b)
380{
Matt Flemingbfe1fcd2015-01-23 18:45:46 +0000381#ifdef CONFIG_CGROUP_PERF
382 /*
383 * We can have any number of cgroups but only one system-wide
384 * event at a time.
385 */
386 if (a->cgrp && b->cgrp) {
387 struct perf_cgroup *ac = a->cgrp;
388 struct perf_cgroup *bc = b->cgrp;
389
390 /*
391 * This condition should have been caught in
392 * __match_event() and we should be sharing an RMID.
393 */
394 WARN_ON_ONCE(ac == bc);
395
396 if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
397 cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
398 return true;
399
400 return false;
401 }
402
403 if (a->cgrp || b->cgrp) {
404 struct perf_cgroup *ac, *bc;
405
406 /*
407 * cgroup and system-wide events are mutually exclusive
408 */
409 if ((a->cgrp && !(b->attach_state & PERF_ATTACH_TASK)) ||
410 (b->cgrp && !(a->attach_state & PERF_ATTACH_TASK)))
411 return true;
412
413 /*
414 * Ensure neither event is part of the other's cgroup
415 */
416 ac = event_to_cgroup(a);
417 bc = event_to_cgroup(b);
418 if (ac == bc)
419 return true;
420
421 /*
422 * Must have cgroup and non-intersecting task events.
423 */
424 if (!ac || !bc)
425 return false;
426
427 /*
428 * We have cgroup and task events, and the task belongs
429 * to a cgroup. Check for for overlap.
430 */
431 if (cgroup_is_descendant(ac->css.cgroup, bc->css.cgroup) ||
432 cgroup_is_descendant(bc->css.cgroup, ac->css.cgroup))
433 return true;
434
435 return false;
436 }
437#endif
Matt Fleming4afbb24c2015-01-23 18:45:44 +0000438 /*
439 * If one of them is not a task, same story as above with cgroups.
440 */
441 if (!(a->attach_state & PERF_ATTACH_TASK) ||
442 !(b->attach_state & PERF_ATTACH_TASK))
443 return true;
444
445 /*
446 * Must be non-overlapping.
447 */
448 return false;
449}
450
Matt Flemingbff671d2015-01-23 18:45:47 +0000451struct rmid_read {
Matt Flemingadafa992015-05-22 09:59:42 +0100452 u32 rmid;
Tony Luck87f01cc2016-03-11 11:26:11 -0800453 u32 evt_type;
Matt Flemingbff671d2015-01-23 18:45:47 +0000454 atomic64_t value;
455};
456
457static void __intel_cqm_event_count(void *info);
Tony Luck87f01cc2016-03-11 11:26:11 -0800458static void init_mbm_sample(u32 rmid, u32 evt_type);
Vikas Shivappa2d4de832016-03-10 15:32:11 -0800459static void __intel_mbm_event_count(void *info);
Tony Luck87f01cc2016-03-11 11:26:11 -0800460
Jiri Olsa79d102c2016-09-05 17:30:07 +0200461static bool is_cqm_event(int e)
462{
463 return (e == QOS_L3_OCCUP_EVENT_ID);
464}
465
Tony Luck87f01cc2016-03-11 11:26:11 -0800466static bool is_mbm_event(int e)
467{
468 return (e >= QOS_MBM_TOTAL_EVENT_ID && e <= QOS_MBM_LOCAL_EVENT_ID);
469}
Matt Flemingbff671d2015-01-23 18:45:47 +0000470
Peter Zijlstra27348f382016-03-11 23:39:39 +0100471static void cqm_mask_call(struct rmid_read *rr)
472{
473 if (is_mbm_event(rr->evt_type))
474 on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_count, rr, 1);
475 else
476 on_each_cpu_mask(&cqm_cpumask, __intel_cqm_event_count, rr, 1);
477}
478
Matt Flemingbff671d2015-01-23 18:45:47 +0000479/*
480 * Exchange the RMID of a group of events.
481 */
Matt Flemingadafa992015-05-22 09:59:42 +0100482static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
Matt Flemingbff671d2015-01-23 18:45:47 +0000483{
484 struct perf_event *event;
Matt Flemingbff671d2015-01-23 18:45:47 +0000485 struct list_head *head = &group->hw.cqm_group_entry;
Matt Flemingadafa992015-05-22 09:59:42 +0100486 u32 old_rmid = group->hw.cqm_rmid;
Matt Flemingbff671d2015-01-23 18:45:47 +0000487
488 lockdep_assert_held(&cache_mutex);
489
490 /*
491 * If our RMID is being deallocated, perform a read now.
492 */
493 if (__rmid_valid(old_rmid) && !__rmid_valid(rmid)) {
494 struct rmid_read rr = {
Matt Flemingbff671d2015-01-23 18:45:47 +0000495 .rmid = old_rmid,
Peter Zijlstra27348f382016-03-11 23:39:39 +0100496 .evt_type = group->attr.config,
497 .value = ATOMIC64_INIT(0),
Matt Flemingbff671d2015-01-23 18:45:47 +0000498 };
499
Peter Zijlstra27348f382016-03-11 23:39:39 +0100500 cqm_mask_call(&rr);
Matt Flemingbff671d2015-01-23 18:45:47 +0000501 local64_set(&group->count, atomic64_read(&rr.value));
502 }
503
504 raw_spin_lock_irq(&cache_lock);
505
506 group->hw.cqm_rmid = rmid;
507 list_for_each_entry(event, head, hw.cqm_group_entry)
508 event->hw.cqm_rmid = rmid;
509
510 raw_spin_unlock_irq(&cache_lock);
511
Vikas Shivappa2d4de832016-03-10 15:32:11 -0800512 /*
513 * If the allocation is for mbm, init the mbm stats.
514 * Need to check if each event in the group is mbm event
515 * because there could be multiple type of events in the same group.
516 */
517 if (__rmid_valid(rmid)) {
518 event = group;
519 if (is_mbm_event(event->attr.config))
520 init_mbm_sample(rmid, event->attr.config);
521
522 list_for_each_entry(event, head, hw.cqm_group_entry) {
523 if (is_mbm_event(event->attr.config))
524 init_mbm_sample(rmid, event->attr.config);
525 }
526 }
527
Matt Flemingbff671d2015-01-23 18:45:47 +0000528 return old_rmid;
529}
530
531/*
532 * If we fail to assign a new RMID for intel_cqm_rotation_rmid because
533 * cachelines are still tagged with RMIDs in limbo, we progressively
534 * increment the threshold until we find an RMID in limbo with <=
535 * __intel_cqm_threshold lines tagged. This is designed to mitigate the
536 * problem where cachelines tagged with an RMID are not steadily being
537 * evicted.
538 *
539 * On successful rotations we decrease the threshold back towards zero.
540 *
541 * __intel_cqm_max_threshold provides an upper bound on the threshold,
542 * and is measured in bytes because it's exposed to userland.
543 */
544static unsigned int __intel_cqm_threshold;
545static unsigned int __intel_cqm_max_threshold;
546
547/*
548 * Test whether an RMID has a zero occupancy value on this cpu.
549 */
550static void intel_cqm_stable(void *arg)
551{
552 struct cqm_rmid_entry *entry;
553
554 list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
555 if (entry->state != RMID_AVAILABLE)
556 break;
557
558 if (__rmid_read(entry->rmid) > __intel_cqm_threshold)
559 entry->state = RMID_DIRTY;
560 }
561}
562
563/*
564 * If we have group events waiting for an RMID that don't conflict with
565 * events already running, assign @rmid.
566 */
Matt Flemingadafa992015-05-22 09:59:42 +0100567static bool intel_cqm_sched_in_event(u32 rmid)
Matt Flemingbff671d2015-01-23 18:45:47 +0000568{
569 struct perf_event *leader, *event;
570
571 lockdep_assert_held(&cache_mutex);
572
573 leader = list_first_entry(&cache_groups, struct perf_event,
574 hw.cqm_groups_entry);
575 event = leader;
576
577 list_for_each_entry_continue(event, &cache_groups,
578 hw.cqm_groups_entry) {
579 if (__rmid_valid(event->hw.cqm_rmid))
580 continue;
581
582 if (__conflict_event(event, leader))
583 continue;
584
585 intel_cqm_xchg_rmid(event, rmid);
586 return true;
587 }
588
589 return false;
590}
591
592/*
593 * Initially use this constant for both the limbo queue time and the
594 * rotation timer interval, pmu::hrtimer_interval_ms.
595 *
596 * They don't need to be the same, but the two are related since if you
597 * rotate faster than you recycle RMIDs, you may run out of available
598 * RMIDs.
599 */
600#define RMID_DEFAULT_QUEUE_TIME 250 /* ms */
601
602static unsigned int __rmid_queue_time_ms = RMID_DEFAULT_QUEUE_TIME;
603
604/*
605 * intel_cqm_rmid_stabilize - move RMIDs from limbo to free list
606 * @nr_available: number of freeable RMIDs on the limbo list
607 *
608 * Quiescent state; wait for all 'freed' RMIDs to become unused, i.e. no
609 * cachelines are tagged with those RMIDs. After this we can reuse them
610 * and know that the current set of active RMIDs is stable.
611 *
612 * Return %true or %false depending on whether stabilization needs to be
613 * reattempted.
614 *
615 * If we return %true then @nr_available is updated to indicate the
616 * number of RMIDs on the limbo list that have been queued for the
617 * minimum queue time (RMID_AVAILABLE), but whose data occupancy values
618 * are above __intel_cqm_threshold.
619 */
620static bool intel_cqm_rmid_stabilize(unsigned int *available)
621{
622 struct cqm_rmid_entry *entry, *tmp;
Matt Flemingbff671d2015-01-23 18:45:47 +0000623
624 lockdep_assert_held(&cache_mutex);
625
626 *available = 0;
627 list_for_each_entry(entry, &cqm_rmid_limbo_lru, list) {
628 unsigned long min_queue_time;
629 unsigned long now = jiffies;
630
631 /*
632 * We hold RMIDs placed into limbo for a minimum queue
633 * time. Before the minimum queue time has elapsed we do
634 * not recycle RMIDs.
635 *
636 * The reasoning is that until a sufficient time has
637 * passed since we stopped using an RMID, any RMID
638 * placed onto the limbo list will likely still have
639 * data tagged in the cache, which means we'll probably
640 * fail to recycle it anyway.
641 *
642 * We can save ourselves an expensive IPI by skipping
643 * any RMIDs that have not been queued for the minimum
644 * time.
645 */
646 min_queue_time = entry->queue_time +
647 msecs_to_jiffies(__rmid_queue_time_ms);
648
649 if (time_after(min_queue_time, now))
650 break;
651
652 entry->state = RMID_AVAILABLE;
653 (*available)++;
654 }
655
656 /*
657 * Fast return if none of the RMIDs on the limbo list have been
658 * sitting on the queue for the minimum queue time.
659 */
660 if (!*available)
661 return false;
662
663 /*
664 * Test whether an RMID is free for each package.
665 */
666 on_each_cpu_mask(&cqm_cpumask, intel_cqm_stable, NULL, true);
667
668 list_for_each_entry_safe(entry, tmp, &cqm_rmid_limbo_lru, list) {
669 /*
670 * Exhausted all RMIDs that have waited min queue time.
671 */
672 if (entry->state == RMID_YOUNG)
673 break;
674
675 if (entry->state == RMID_DIRTY)
676 continue;
677
678 list_del(&entry->list); /* remove from limbo */
679
680 /*
681 * The rotation RMID gets priority if it's
682 * currently invalid. In which case, skip adding
683 * the RMID to the the free lru.
684 */
685 if (!__rmid_valid(intel_cqm_rotation_rmid)) {
686 intel_cqm_rotation_rmid = entry->rmid;
687 continue;
688 }
689
690 /*
691 * If we have groups waiting for RMIDs, hand
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000692 * them one now provided they don't conflict.
Matt Flemingbff671d2015-01-23 18:45:47 +0000693 */
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000694 if (intel_cqm_sched_in_event(entry->rmid))
Matt Flemingbff671d2015-01-23 18:45:47 +0000695 continue;
696
697 /*
698 * Otherwise place it onto the free list.
699 */
700 list_add_tail(&entry->list, &cqm_rmid_free_lru);
701 }
702
703
704 return __rmid_valid(intel_cqm_rotation_rmid);
705}
706
707/*
708 * Pick a victim group and move it to the tail of the group list.
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000709 * @next: The first group without an RMID
Matt Flemingbff671d2015-01-23 18:45:47 +0000710 */
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000711static void __intel_cqm_pick_and_rotate(struct perf_event *next)
Matt Flemingbff671d2015-01-23 18:45:47 +0000712{
713 struct perf_event *rotor;
Matt Flemingadafa992015-05-22 09:59:42 +0100714 u32 rmid;
Matt Flemingbff671d2015-01-23 18:45:47 +0000715
716 lockdep_assert_held(&cache_mutex);
Matt Flemingbff671d2015-01-23 18:45:47 +0000717
718 rotor = list_first_entry(&cache_groups, struct perf_event,
719 hw.cqm_groups_entry);
Matt Flemingbff671d2015-01-23 18:45:47 +0000720
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000721 /*
722 * The group at the front of the list should always have a valid
723 * RMID. If it doesn't then no groups have RMIDs assigned and we
724 * don't need to rotate the list.
725 */
726 if (next == rotor)
727 return;
728
729 rmid = intel_cqm_xchg_rmid(rotor, INVALID_RMID);
730 __put_rmid(rmid);
731
732 list_rotate_left(&cache_groups);
733}
734
735/*
736 * Deallocate the RMIDs from any events that conflict with @event, and
737 * place them on the back of the group list.
738 */
739static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
740{
741 struct perf_event *group, *g;
Matt Flemingadafa992015-05-22 09:59:42 +0100742 u32 rmid;
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000743
744 lockdep_assert_held(&cache_mutex);
745
746 list_for_each_entry_safe(group, g, &cache_groups, hw.cqm_groups_entry) {
747 if (group == event)
748 continue;
749
750 rmid = group->hw.cqm_rmid;
751
752 /*
753 * Skip events that don't have a valid RMID.
754 */
755 if (!__rmid_valid(rmid))
756 continue;
757
758 /*
759 * No conflict? No problem! Leave the event alone.
760 */
761 if (!__conflict_event(group, event))
762 continue;
763
764 intel_cqm_xchg_rmid(group, INVALID_RMID);
765 __put_rmid(rmid);
766 }
Matt Flemingbff671d2015-01-23 18:45:47 +0000767}
768
769/*
770 * Attempt to rotate the groups and assign new RMIDs.
771 *
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000772 * We rotate for two reasons,
773 * 1. To handle the scheduling of conflicting events
774 * 2. To recycle RMIDs
775 *
Matt Flemingbff671d2015-01-23 18:45:47 +0000776 * Rotating RMIDs is complicated because the hardware doesn't give us
777 * any clues.
778 *
779 * There's problems with the hardware interface; when you change the
780 * task:RMID map cachelines retain their 'old' tags, giving a skewed
781 * picture. In order to work around this, we must always keep one free
782 * RMID - intel_cqm_rotation_rmid.
783 *
784 * Rotation works by taking away an RMID from a group (the old RMID),
785 * and assigning the free RMID to another group (the new RMID). We must
786 * then wait for the old RMID to not be used (no cachelines tagged).
787 * This ensure that all cachelines are tagged with 'active' RMIDs. At
788 * this point we can start reading values for the new RMID and treat the
789 * old RMID as the free RMID for the next rotation.
790 *
791 * Return %true or %false depending on whether we did any rotating.
792 */
793static bool __intel_cqm_rmid_rotate(void)
794{
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000795 struct perf_event *group, *start = NULL;
Matt Flemingbff671d2015-01-23 18:45:47 +0000796 unsigned int threshold_limit;
797 unsigned int nr_needed = 0;
798 unsigned int nr_available;
Matt Flemingbff671d2015-01-23 18:45:47 +0000799 bool rotated = false;
800
801 mutex_lock(&cache_mutex);
802
803again:
804 /*
805 * Fast path through this function if there are no groups and no
806 * RMIDs that need cleaning.
807 */
808 if (list_empty(&cache_groups) && list_empty(&cqm_rmid_limbo_lru))
809 goto out;
810
811 list_for_each_entry(group, &cache_groups, hw.cqm_groups_entry) {
812 if (!__rmid_valid(group->hw.cqm_rmid)) {
813 if (!start)
814 start = group;
815 nr_needed++;
816 }
817 }
818
819 /*
820 * We have some event groups, but they all have RMIDs assigned
821 * and no RMIDs need cleaning.
822 */
823 if (!nr_needed && list_empty(&cqm_rmid_limbo_lru))
824 goto out;
825
826 if (!nr_needed)
827 goto stabilize;
828
829 /*
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000830 * We have more event groups without RMIDs than available RMIDs,
831 * or we have event groups that conflict with the ones currently
832 * scheduled.
Matt Flemingbff671d2015-01-23 18:45:47 +0000833 *
834 * We force deallocate the rmid of the group at the head of
835 * cache_groups. The first event group without an RMID then gets
836 * assigned intel_cqm_rotation_rmid. This ensures we always make
837 * forward progress.
838 *
839 * Rotate the cache_groups list so the previous head is now the
840 * tail.
841 */
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000842 __intel_cqm_pick_and_rotate(start);
Matt Flemingbff671d2015-01-23 18:45:47 +0000843
844 /*
845 * If the rotation is going to succeed, reduce the threshold so
846 * that we don't needlessly reuse dirty RMIDs.
847 */
848 if (__rmid_valid(intel_cqm_rotation_rmid)) {
849 intel_cqm_xchg_rmid(start, intel_cqm_rotation_rmid);
Matt Fleming59bf7fd2015-01-23 18:45:48 +0000850 intel_cqm_rotation_rmid = __get_rmid();
851
852 intel_cqm_sched_out_conflicting_events(start);
Matt Flemingbff671d2015-01-23 18:45:47 +0000853
854 if (__intel_cqm_threshold)
855 __intel_cqm_threshold--;
856 }
857
Matt Flemingbff671d2015-01-23 18:45:47 +0000858 rotated = true;
859
860stabilize:
861 /*
862 * We now need to stablize the RMID we freed above (if any) to
863 * ensure that the next time we rotate we have an RMID with zero
864 * occupancy value.
865 *
866 * Alternatively, if we didn't need to perform any rotation,
867 * we'll have a bunch of RMIDs in limbo that need stabilizing.
868 */
869 threshold_limit = __intel_cqm_max_threshold / cqm_l3_scale;
870
871 while (intel_cqm_rmid_stabilize(&nr_available) &&
872 __intel_cqm_threshold < threshold_limit) {
873 unsigned int steal_limit;
874
875 /*
876 * Don't spin if nobody is actively waiting for an RMID,
877 * the rotation worker will be kicked as soon as an
878 * event needs an RMID anyway.
879 */
880 if (!nr_needed)
881 break;
882
883 /* Allow max 25% of RMIDs to be in limbo. */
884 steal_limit = (cqm_max_rmid + 1) / 4;
885
886 /*
887 * We failed to stabilize any RMIDs so our rotation
888 * logic is now stuck. In order to make forward progress
889 * we have a few options:
890 *
891 * 1. rotate ("steal") another RMID
892 * 2. increase the threshold
893 * 3. do nothing
894 *
895 * We do both of 1. and 2. until we hit the steal limit.
896 *
897 * The steal limit prevents all RMIDs ending up on the
898 * limbo list. This can happen if every RMID has a
899 * non-zero occupancy above threshold_limit, and the
900 * occupancy values aren't dropping fast enough.
901 *
902 * Note that there is prioritisation at work here - we'd
903 * rather increase the number of RMIDs on the limbo list
904 * than increase the threshold, because increasing the
905 * threshold skews the event data (because we reuse
906 * dirty RMIDs) - threshold bumps are a last resort.
907 */
908 if (nr_available < steal_limit)
909 goto again;
910
911 __intel_cqm_threshold++;
912 }
913
914out:
915 mutex_unlock(&cache_mutex);
916 return rotated;
917}
918
919static void intel_cqm_rmid_rotate(struct work_struct *work);
920
921static DECLARE_DELAYED_WORK(intel_cqm_rmid_work, intel_cqm_rmid_rotate);
922
923static struct pmu intel_cqm_pmu;
924
925static void intel_cqm_rmid_rotate(struct work_struct *work)
926{
927 unsigned long delay;
928
929 __intel_cqm_rmid_rotate();
930
931 delay = msecs_to_jiffies(intel_cqm_pmu.hrtimer_interval_ms);
932 schedule_delayed_work(&intel_cqm_rmid_work, delay);
933}
934
Tony Luck87f01cc2016-03-11 11:26:11 -0800935static u64 update_sample(unsigned int rmid, u32 evt_type, int first)
936{
937 struct sample *mbm_current;
938 u32 vrmid = rmid_2_index(rmid);
939 u64 val, bytes, shift;
940 u32 eventid;
941
942 if (evt_type == QOS_MBM_LOCAL_EVENT_ID) {
943 mbm_current = &mbm_local[vrmid];
944 eventid = QOS_MBM_LOCAL_EVENT_ID;
945 } else {
946 mbm_current = &mbm_total[vrmid];
947 eventid = QOS_MBM_TOTAL_EVENT_ID;
948 }
949
950 wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid);
951 rdmsrl(MSR_IA32_QM_CTR, val);
952 if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
953 return mbm_current->total_bytes;
954
955 if (first) {
956 mbm_current->prev_msr = val;
957 mbm_current->total_bytes = 0;
958 return mbm_current->total_bytes;
959 }
960
Vikas Shivappae7ee3e82016-03-11 11:26:17 -0800961 /*
962 * The h/w guarantees that counters will not overflow
963 * so long as we poll them at least once per second.
964 */
Tony Luck87f01cc2016-03-11 11:26:11 -0800965 shift = 64 - MBM_CNTR_WIDTH;
966 bytes = (val << shift) - (mbm_current->prev_msr << shift);
967 bytes >>= shift;
968
969 bytes *= cqm_l3_scale;
970
971 mbm_current->total_bytes += bytes;
972 mbm_current->prev_msr = val;
973
974 return mbm_current->total_bytes;
975}
976
977static u64 rmid_read_mbm(unsigned int rmid, u32 evt_type)
978{
979 return update_sample(rmid, evt_type, 0);
980}
981
982static void __intel_mbm_event_init(void *info)
983{
984 struct rmid_read *rr = info;
985
986 update_sample(rr->rmid, rr->evt_type, 1);
987}
988
989static void init_mbm_sample(u32 rmid, u32 evt_type)
990{
991 struct rmid_read rr = {
992 .rmid = rmid,
993 .evt_type = evt_type,
994 .value = ATOMIC64_INIT(0),
995 };
996
997 /* on each socket, init sample */
998 on_each_cpu_mask(&cqm_cpumask, __intel_mbm_event_init, &rr, 1);
999}
1000
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001001/*
1002 * Find a group and setup RMID.
1003 *
1004 * If we're part of a group, we use the group's RMID.
1005 */
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001006static void intel_cqm_setup_event(struct perf_event *event,
1007 struct perf_event **group)
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001008{
1009 struct perf_event *iter;
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001010 bool conflict = false;
Matt Flemingadafa992015-05-22 09:59:42 +01001011 u32 rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001012
Vikas Shivappaa223c1c2016-03-10 15:32:07 -08001013 event->hw.is_group_event = false;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001014 list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001015 rmid = iter->hw.cqm_rmid;
1016
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001017 if (__match_event(iter, event)) {
1018 /* All tasks in a group share an RMID */
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001019 event->hw.cqm_rmid = rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001020 *group = iter;
Vikas Shivappa2d4de832016-03-10 15:32:11 -08001021 if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
Tony Luck87f01cc2016-03-11 11:26:11 -08001022 init_mbm_sample(rmid, event->attr.config);
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001023 return;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001024 }
1025
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001026 /*
1027 * We only care about conflicts for events that are
1028 * actually scheduled in (and hence have a valid RMID).
1029 */
1030 if (__conflict_event(iter, event) && __rmid_valid(rmid))
1031 conflict = true;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001032 }
1033
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001034 if (conflict)
1035 rmid = INVALID_RMID;
1036 else
1037 rmid = __get_rmid();
1038
Vikas Shivappa2d4de832016-03-10 15:32:11 -08001039 if (is_mbm_event(event->attr.config) && __rmid_valid(rmid))
Tony Luck87f01cc2016-03-11 11:26:11 -08001040 init_mbm_sample(rmid, event->attr.config);
1041
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001042 event->hw.cqm_rmid = rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001043}
1044
1045static void intel_cqm_event_read(struct perf_event *event)
1046{
Matt Flemingbff671d2015-01-23 18:45:47 +00001047 unsigned long flags;
Matt Flemingadafa992015-05-22 09:59:42 +01001048 u32 rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001049 u64 val;
1050
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001051 /*
1052 * Task events are handled by intel_cqm_event_count().
1053 */
1054 if (event->cpu == -1)
1055 return;
1056
Matt Flemingbff671d2015-01-23 18:45:47 +00001057 raw_spin_lock_irqsave(&cache_lock, flags);
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001058 rmid = event->hw.cqm_rmid;
Matt Flemingbff671d2015-01-23 18:45:47 +00001059
1060 if (!__rmid_valid(rmid))
1061 goto out;
1062
Tony Luck87f01cc2016-03-11 11:26:11 -08001063 if (is_mbm_event(event->attr.config))
1064 val = rmid_read_mbm(rmid, event->attr.config);
1065 else
1066 val = __rmid_read(rmid);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001067
1068 /*
1069 * Ignore this reading on error states and do not update the value.
1070 */
1071 if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
Matt Flemingbff671d2015-01-23 18:45:47 +00001072 goto out;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001073
1074 local64_set(&event->count, val);
Matt Flemingbff671d2015-01-23 18:45:47 +00001075out:
1076 raw_spin_unlock_irqrestore(&cache_lock, flags);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001077}
1078
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001079static void __intel_cqm_event_count(void *info)
1080{
1081 struct rmid_read *rr = info;
1082 u64 val;
1083
1084 val = __rmid_read(rr->rmid);
1085
1086 if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
1087 return;
1088
1089 atomic64_add(val, &rr->value);
1090}
1091
1092static inline bool cqm_group_leader(struct perf_event *event)
1093{
1094 return !list_empty(&event->hw.cqm_groups_entry);
1095}
1096
Tony Luck87f01cc2016-03-11 11:26:11 -08001097static void __intel_mbm_event_count(void *info)
1098{
1099 struct rmid_read *rr = info;
1100 u64 val;
1101
1102 val = rmid_read_mbm(rr->rmid, rr->evt_type);
1103 if (val & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
1104 return;
1105 atomic64_add(val, &rr->value);
1106}
1107
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001108static enum hrtimer_restart mbm_hrtimer_handle(struct hrtimer *hrtimer)
1109{
1110 struct perf_event *iter, *iter1;
1111 int ret = HRTIMER_RESTART;
1112 struct list_head *head;
1113 unsigned long flags;
1114 u32 grp_rmid;
1115
1116 /*
1117 * Need to cache_lock as the timer Event Select MSR reads
1118 * can race with the mbm/cqm count() and mbm_init() reads.
1119 */
1120 raw_spin_lock_irqsave(&cache_lock, flags);
1121
1122 if (list_empty(&cache_groups)) {
1123 ret = HRTIMER_NORESTART;
1124 goto out;
1125 }
1126
1127 list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
1128 grp_rmid = iter->hw.cqm_rmid;
1129 if (!__rmid_valid(grp_rmid))
1130 continue;
1131 if (is_mbm_event(iter->attr.config))
1132 update_sample(grp_rmid, iter->attr.config, 0);
1133
1134 head = &iter->hw.cqm_group_entry;
1135 if (list_empty(head))
1136 continue;
1137 list_for_each_entry(iter1, head, hw.cqm_group_entry) {
1138 if (!iter1->hw.is_group_event)
1139 break;
1140 if (is_mbm_event(iter1->attr.config))
1141 update_sample(iter1->hw.cqm_rmid,
1142 iter1->attr.config, 0);
1143 }
1144 }
1145
1146 hrtimer_forward_now(hrtimer, ms_to_ktime(MBM_CTR_OVERFLOW_TIME));
1147out:
1148 raw_spin_unlock_irqrestore(&cache_lock, flags);
1149
1150 return ret;
1151}
1152
1153static void __mbm_start_timer(void *info)
1154{
1155 hrtimer_start(&mbm_timers[pkg_id], ms_to_ktime(MBM_CTR_OVERFLOW_TIME),
1156 HRTIMER_MODE_REL_PINNED);
1157}
1158
1159static void __mbm_stop_timer(void *info)
1160{
1161 hrtimer_cancel(&mbm_timers[pkg_id]);
1162}
1163
1164static void mbm_start_timers(void)
1165{
1166 on_each_cpu_mask(&cqm_cpumask, __mbm_start_timer, NULL, 1);
1167}
1168
1169static void mbm_stop_timers(void)
1170{
1171 on_each_cpu_mask(&cqm_cpumask, __mbm_stop_timer, NULL, 1);
1172}
1173
1174static void mbm_hrtimer_init(void)
1175{
1176 struct hrtimer *hr;
1177 int i;
1178
1179 for (i = 0; i < mbm_socket_max; i++) {
1180 hr = &mbm_timers[i];
1181 hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1182 hr->function = mbm_hrtimer_handle;
1183 }
1184}
1185
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001186static u64 intel_cqm_event_count(struct perf_event *event)
1187{
Matt Flemingbff671d2015-01-23 18:45:47 +00001188 unsigned long flags;
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001189 struct rmid_read rr = {
Peter Zijlstra27348f382016-03-11 23:39:39 +01001190 .evt_type = event->attr.config,
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001191 .value = ATOMIC64_INIT(0),
1192 };
1193
1194 /*
1195 * We only need to worry about task events. System-wide events
1196 * are handled like usual, i.e. entirely with
1197 * intel_cqm_event_read().
1198 */
1199 if (event->cpu != -1)
1200 return __perf_event_count(event);
1201
1202 /*
Vikas Shivappaa223c1c2016-03-10 15:32:07 -08001203 * Only the group leader gets to report values except in case of
1204 * multiple events in the same group, we still need to read the
1205 * other events.This stops us
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001206 * reporting duplicate values to userspace, and gives us a clear
1207 * rule for which task gets to report the values.
1208 *
1209 * Note that it is impossible to attribute these values to
1210 * specific packages - we forfeit that ability when we create
1211 * task events.
1212 */
Vikas Shivappaa223c1c2016-03-10 15:32:07 -08001213 if (!cqm_group_leader(event) && !event->hw.is_group_event)
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001214 return 0;
1215
Matt Flemingbff671d2015-01-23 18:45:47 +00001216 /*
Matt Fleming2c534c02015-07-21 15:55:09 +01001217 * Getting up-to-date values requires an SMP IPI which is not
1218 * possible if we're being called in interrupt context. Return
1219 * the cached values instead.
1220 */
1221 if (unlikely(in_interrupt()))
1222 goto out;
1223
1224 /*
Matt Flemingbff671d2015-01-23 18:45:47 +00001225 * Notice that we don't perform the reading of an RMID
1226 * atomically, because we can't hold a spin lock across the
1227 * IPIs.
1228 *
1229 * Speculatively perform the read, since @event might be
1230 * assigned a different (possibly invalid) RMID while we're
1231 * busying performing the IPI calls. It's therefore necessary to
1232 * check @event's RMID afterwards, and if it has changed,
1233 * discard the result of the read.
1234 */
1235 rr.rmid = ACCESS_ONCE(event->hw.cqm_rmid);
1236
1237 if (!__rmid_valid(rr.rmid))
1238 goto out;
1239
Peter Zijlstra27348f382016-03-11 23:39:39 +01001240 cqm_mask_call(&rr);
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001241
Matt Flemingbff671d2015-01-23 18:45:47 +00001242 raw_spin_lock_irqsave(&cache_lock, flags);
1243 if (event->hw.cqm_rmid == rr.rmid)
1244 local64_set(&event->count, atomic64_read(&rr.value));
1245 raw_spin_unlock_irqrestore(&cache_lock, flags);
1246out:
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001247 return __perf_event_count(event);
1248}
1249
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001250static void intel_cqm_event_start(struct perf_event *event, int mode)
1251{
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001252 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
Thomas Gleixnerb3df4ec2015-05-19 00:00:51 +00001253 u32 rmid = event->hw.cqm_rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001254
1255 if (!(event->hw.cqm_state & PERF_HES_STOPPED))
1256 return;
1257
1258 event->hw.cqm_state &= ~PERF_HES_STOPPED;
1259
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001260 if (state->rmid_usecnt++) {
Thomas Gleixner0bac2372015-05-19 00:00:55 +00001261 if (!WARN_ON_ONCE(state->rmid != rmid))
1262 return;
1263 } else {
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001264 WARN_ON_ONCE(state->rmid);
Thomas Gleixner0bac2372015-05-19 00:00:55 +00001265 }
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001266
1267 state->rmid = rmid;
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001268 wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001269}
1270
1271static void intel_cqm_event_stop(struct perf_event *event, int mode)
1272{
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001273 struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001274
1275 if (event->hw.cqm_state & PERF_HES_STOPPED)
1276 return;
1277
1278 event->hw.cqm_state |= PERF_HES_STOPPED;
1279
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001280 intel_cqm_event_read(event);
1281
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001282 if (!--state->rmid_usecnt) {
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001283 state->rmid = 0;
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001284 wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001285 } else {
1286 WARN_ON_ONCE(!state->rmid);
1287 }
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001288}
1289
1290static int intel_cqm_event_add(struct perf_event *event, int mode)
1291{
Matt Flemingbff671d2015-01-23 18:45:47 +00001292 unsigned long flags;
Matt Flemingadafa992015-05-22 09:59:42 +01001293 u32 rmid;
Matt Flemingbff671d2015-01-23 18:45:47 +00001294
1295 raw_spin_lock_irqsave(&cache_lock, flags);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001296
1297 event->hw.cqm_state = PERF_HES_STOPPED;
1298 rmid = event->hw.cqm_rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001299
Matt Flemingbff671d2015-01-23 18:45:47 +00001300 if (__rmid_valid(rmid) && (mode & PERF_EF_START))
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001301 intel_cqm_event_start(event, mode);
1302
Matt Flemingbff671d2015-01-23 18:45:47 +00001303 raw_spin_unlock_irqrestore(&cache_lock, flags);
1304
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001305 return 0;
1306}
1307
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001308static void intel_cqm_event_destroy(struct perf_event *event)
1309{
1310 struct perf_event *group_other = NULL;
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001311 unsigned long flags;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001312
1313 mutex_lock(&cache_mutex);
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001314 /*
1315 * Hold the cache_lock as mbm timer handlers could be
1316 * scanning the list of events.
1317 */
1318 raw_spin_lock_irqsave(&cache_lock, flags);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001319
1320 /*
1321 * If there's another event in this group...
1322 */
1323 if (!list_empty(&event->hw.cqm_group_entry)) {
1324 group_other = list_first_entry(&event->hw.cqm_group_entry,
1325 struct perf_event,
1326 hw.cqm_group_entry);
1327 list_del(&event->hw.cqm_group_entry);
1328 }
1329
1330 /*
1331 * And we're the group leader..
1332 */
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001333 if (cqm_group_leader(event)) {
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001334 /*
1335 * If there was a group_other, make that leader, otherwise
1336 * destroy the group and return the RMID.
1337 */
1338 if (group_other) {
1339 list_replace(&event->hw.cqm_groups_entry,
1340 &group_other->hw.cqm_groups_entry);
1341 } else {
Matt Flemingadafa992015-05-22 09:59:42 +01001342 u32 rmid = event->hw.cqm_rmid;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001343
Matt Flemingbff671d2015-01-23 18:45:47 +00001344 if (__rmid_valid(rmid))
1345 __put_rmid(rmid);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001346 list_del(&event->hw.cqm_groups_entry);
1347 }
1348 }
1349
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001350 raw_spin_unlock_irqrestore(&cache_lock, flags);
1351
1352 /*
1353 * Stop the mbm overflow timers when the last event is destroyed.
1354 */
1355 if (mbm_enabled && list_empty(&cache_groups))
1356 mbm_stop_timers();
1357
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001358 mutex_unlock(&cache_mutex);
1359}
1360
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001361static int intel_cqm_event_init(struct perf_event *event)
1362{
1363 struct perf_event *group = NULL;
Matt Flemingbff671d2015-01-23 18:45:47 +00001364 bool rotate = false;
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001365 unsigned long flags;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001366
1367 if (event->attr.type != intel_cqm_pmu.type)
1368 return -ENOENT;
1369
Tony Luck87f01cc2016-03-11 11:26:11 -08001370 if ((event->attr.config < QOS_L3_OCCUP_EVENT_ID) ||
1371 (event->attr.config > QOS_MBM_LOCAL_EVENT_ID))
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001372 return -EINVAL;
1373
Jiri Olsa79d102c2016-09-05 17:30:07 +02001374 if ((is_cqm_event(event->attr.config) && !cqm_enabled) ||
1375 (is_mbm_event(event->attr.config) && !mbm_enabled))
1376 return -EINVAL;
1377
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001378 /* unsupported modes and filters */
1379 if (event->attr.exclude_user ||
1380 event->attr.exclude_kernel ||
1381 event->attr.exclude_hv ||
1382 event->attr.exclude_idle ||
1383 event->attr.exclude_host ||
1384 event->attr.exclude_guest ||
1385 event->attr.sample_period) /* no sampling */
1386 return -EINVAL;
1387
1388 INIT_LIST_HEAD(&event->hw.cqm_group_entry);
1389 INIT_LIST_HEAD(&event->hw.cqm_groups_entry);
1390
1391 event->destroy = intel_cqm_event_destroy;
1392
1393 mutex_lock(&cache_mutex);
1394
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001395 /*
1396 * Start the mbm overflow timers when the first event is created.
1397 */
1398 if (mbm_enabled && list_empty(&cache_groups))
1399 mbm_start_timers();
1400
Matt Flemingbfe1fcd2015-01-23 18:45:46 +00001401 /* Will also set rmid */
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001402 intel_cqm_setup_event(event, &group);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001403
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001404 /*
1405 * Hold the cache_lock as mbm timer handlers be
1406 * scanning the list of events.
1407 */
1408 raw_spin_lock_irqsave(&cache_lock, flags);
1409
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001410 if (group) {
1411 list_add_tail(&event->hw.cqm_group_entry,
1412 &group->hw.cqm_group_entry);
1413 } else {
1414 list_add_tail(&event->hw.cqm_groups_entry,
1415 &cache_groups);
Matt Flemingbff671d2015-01-23 18:45:47 +00001416
1417 /*
1418 * All RMIDs are either in use or have recently been
1419 * used. Kick the rotation worker to clean/free some.
1420 *
1421 * We only do this for the group leader, rather than for
1422 * every event in a group to save on needless work.
1423 */
1424 if (!__rmid_valid(event->hw.cqm_rmid))
1425 rotate = true;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001426 }
1427
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001428 raw_spin_unlock_irqrestore(&cache_lock, flags);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001429 mutex_unlock(&cache_mutex);
Matt Flemingbff671d2015-01-23 18:45:47 +00001430
1431 if (rotate)
1432 schedule_delayed_work(&intel_cqm_rmid_work, 0);
1433
Matt Fleming59bf7fd2015-01-23 18:45:48 +00001434 return 0;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001435}
1436
1437EVENT_ATTR_STR(llc_occupancy, intel_cqm_llc, "event=0x01");
1438EVENT_ATTR_STR(llc_occupancy.per-pkg, intel_cqm_llc_pkg, "1");
1439EVENT_ATTR_STR(llc_occupancy.unit, intel_cqm_llc_unit, "Bytes");
1440EVENT_ATTR_STR(llc_occupancy.scale, intel_cqm_llc_scale, NULL);
1441EVENT_ATTR_STR(llc_occupancy.snapshot, intel_cqm_llc_snapshot, "1");
1442
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001443EVENT_ATTR_STR(total_bytes, intel_cqm_total_bytes, "event=0x02");
1444EVENT_ATTR_STR(total_bytes.per-pkg, intel_cqm_total_bytes_pkg, "1");
1445EVENT_ATTR_STR(total_bytes.unit, intel_cqm_total_bytes_unit, "MB");
1446EVENT_ATTR_STR(total_bytes.scale, intel_cqm_total_bytes_scale, "1e-6");
1447
1448EVENT_ATTR_STR(local_bytes, intel_cqm_local_bytes, "event=0x03");
1449EVENT_ATTR_STR(local_bytes.per-pkg, intel_cqm_local_bytes_pkg, "1");
1450EVENT_ATTR_STR(local_bytes.unit, intel_cqm_local_bytes_unit, "MB");
1451EVENT_ATTR_STR(local_bytes.scale, intel_cqm_local_bytes_scale, "1e-6");
1452
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001453static struct attribute *intel_cqm_events_attr[] = {
1454 EVENT_PTR(intel_cqm_llc),
1455 EVENT_PTR(intel_cqm_llc_pkg),
1456 EVENT_PTR(intel_cqm_llc_unit),
1457 EVENT_PTR(intel_cqm_llc_scale),
1458 EVENT_PTR(intel_cqm_llc_snapshot),
1459 NULL,
1460};
1461
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001462static struct attribute *intel_mbm_events_attr[] = {
1463 EVENT_PTR(intel_cqm_total_bytes),
1464 EVENT_PTR(intel_cqm_local_bytes),
1465 EVENT_PTR(intel_cqm_total_bytes_pkg),
1466 EVENT_PTR(intel_cqm_local_bytes_pkg),
1467 EVENT_PTR(intel_cqm_total_bytes_unit),
1468 EVENT_PTR(intel_cqm_local_bytes_unit),
1469 EVENT_PTR(intel_cqm_total_bytes_scale),
1470 EVENT_PTR(intel_cqm_local_bytes_scale),
1471 NULL,
1472};
1473
1474static struct attribute *intel_cmt_mbm_events_attr[] = {
1475 EVENT_PTR(intel_cqm_llc),
1476 EVENT_PTR(intel_cqm_total_bytes),
1477 EVENT_PTR(intel_cqm_local_bytes),
1478 EVENT_PTR(intel_cqm_llc_pkg),
1479 EVENT_PTR(intel_cqm_total_bytes_pkg),
1480 EVENT_PTR(intel_cqm_local_bytes_pkg),
1481 EVENT_PTR(intel_cqm_llc_unit),
1482 EVENT_PTR(intel_cqm_total_bytes_unit),
1483 EVENT_PTR(intel_cqm_local_bytes_unit),
1484 EVENT_PTR(intel_cqm_llc_scale),
1485 EVENT_PTR(intel_cqm_total_bytes_scale),
1486 EVENT_PTR(intel_cqm_local_bytes_scale),
1487 EVENT_PTR(intel_cqm_llc_snapshot),
1488 NULL,
1489};
1490
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001491static struct attribute_group intel_cqm_events_group = {
1492 .name = "events",
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001493 .attrs = NULL,
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001494};
1495
1496PMU_FORMAT_ATTR(event, "config:0-7");
1497static struct attribute *intel_cqm_formats_attr[] = {
1498 &format_attr_event.attr,
1499 NULL,
1500};
1501
1502static struct attribute_group intel_cqm_format_group = {
1503 .name = "format",
1504 .attrs = intel_cqm_formats_attr,
1505};
1506
Matt Flemingbff671d2015-01-23 18:45:47 +00001507static ssize_t
1508max_recycle_threshold_show(struct device *dev, struct device_attribute *attr,
1509 char *page)
1510{
1511 ssize_t rv;
1512
1513 mutex_lock(&cache_mutex);
1514 rv = snprintf(page, PAGE_SIZE-1, "%u\n", __intel_cqm_max_threshold);
1515 mutex_unlock(&cache_mutex);
1516
1517 return rv;
1518}
1519
1520static ssize_t
1521max_recycle_threshold_store(struct device *dev,
1522 struct device_attribute *attr,
1523 const char *buf, size_t count)
1524{
1525 unsigned int bytes, cachelines;
1526 int ret;
1527
1528 ret = kstrtouint(buf, 0, &bytes);
1529 if (ret)
1530 return ret;
1531
1532 mutex_lock(&cache_mutex);
1533
1534 __intel_cqm_max_threshold = bytes;
1535 cachelines = bytes / cqm_l3_scale;
1536
1537 /*
1538 * The new maximum takes effect immediately.
1539 */
1540 if (__intel_cqm_threshold > cachelines)
1541 __intel_cqm_threshold = cachelines;
1542
1543 mutex_unlock(&cache_mutex);
1544
1545 return count;
1546}
1547
1548static DEVICE_ATTR_RW(max_recycle_threshold);
1549
1550static struct attribute *intel_cqm_attrs[] = {
1551 &dev_attr_max_recycle_threshold.attr,
1552 NULL,
1553};
1554
1555static const struct attribute_group intel_cqm_group = {
1556 .attrs = intel_cqm_attrs,
1557};
1558
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001559static const struct attribute_group *intel_cqm_attr_groups[] = {
1560 &intel_cqm_events_group,
1561 &intel_cqm_format_group,
Matt Flemingbff671d2015-01-23 18:45:47 +00001562 &intel_cqm_group,
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001563 NULL,
1564};
1565
1566static struct pmu intel_cqm_pmu = {
Matt Flemingbff671d2015-01-23 18:45:47 +00001567 .hrtimer_interval_ms = RMID_DEFAULT_QUEUE_TIME,
1568 .attr_groups = intel_cqm_attr_groups,
1569 .task_ctx_nr = perf_sw_context,
1570 .event_init = intel_cqm_event_init,
1571 .add = intel_cqm_event_add,
Thomas Gleixner43d0c2f2015-05-19 00:00:56 +00001572 .del = intel_cqm_event_stop,
Matt Flemingbff671d2015-01-23 18:45:47 +00001573 .start = intel_cqm_event_start,
1574 .stop = intel_cqm_event_stop,
1575 .read = intel_cqm_event_read,
1576 .count = intel_cqm_event_count,
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001577};
1578
1579static inline void cqm_pick_event_reader(int cpu)
1580{
Thomas Gleixner827db832016-02-22 22:19:20 +00001581 int reader;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001582
Thomas Gleixner827db832016-02-22 22:19:20 +00001583 /* First online cpu in package becomes the reader */
1584 reader = cpumask_any_and(&cqm_cpumask, topology_core_cpumask(cpu));
1585 if (reader >= nr_cpu_ids)
1586 cpumask_set_cpu(cpu, &cqm_cpumask);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001587}
1588
Richard Cochranf0704822016-07-13 17:16:16 +00001589static int intel_cqm_cpu_starting(unsigned int cpu)
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001590{
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001591 struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001592 struct cpuinfo_x86 *c = &cpu_data(cpu);
1593
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001594 state->rmid = 0;
Thomas Gleixnerbf926732015-05-19 00:00:58 +00001595 state->closid = 0;
1596 state->rmid_usecnt = 0;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001597
1598 WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
1599 WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);
Richard Cochranf0704822016-07-13 17:16:16 +00001600
1601 cqm_pick_event_reader(cpu);
1602 return 0;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001603}
1604
Richard Cochranf0704822016-07-13 17:16:16 +00001605static int intel_cqm_cpu_exit(unsigned int cpu)
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001606{
Thomas Gleixner827db832016-02-22 22:19:20 +00001607 int target;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001608
Thomas Gleixner827db832016-02-22 22:19:20 +00001609 /* Is @cpu the current cqm reader for this package ? */
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001610 if (!cpumask_test_and_clear_cpu(cpu, &cqm_cpumask))
Richard Cochranf0704822016-07-13 17:16:16 +00001611 return 0;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001612
Thomas Gleixner827db832016-02-22 22:19:20 +00001613 /* Find another online reader in this package */
1614 target = cpumask_any_but(topology_core_cpumask(cpu), cpu);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001615
Thomas Gleixner827db832016-02-22 22:19:20 +00001616 if (target < nr_cpu_ids)
1617 cpumask_set_cpu(target, &cqm_cpumask);
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001618
Richard Cochranf0704822016-07-13 17:16:16 +00001619 return 0;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001620}
1621
1622static const struct x86_cpu_id intel_cqm_match[] = {
1623 { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_OCCUP_LLC },
1624 {}
1625};
1626
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001627static void mbm_cleanup(void)
1628{
1629 if (!mbm_enabled)
1630 return;
1631
1632 kfree(mbm_local);
1633 kfree(mbm_total);
1634 mbm_enabled = false;
1635}
1636
1637static const struct x86_cpu_id intel_mbm_local_match[] = {
1638 { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_LOCAL },
1639 {}
1640};
1641
1642static const struct x86_cpu_id intel_mbm_total_match[] = {
1643 { .vendor = X86_VENDOR_INTEL, .feature = X86_FEATURE_CQM_MBM_TOTAL },
1644 {}
1645};
1646
1647static int intel_mbm_init(void)
1648{
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001649 int ret = 0, array_size, maxid = cqm_max_rmid + 1;
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001650
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001651 mbm_socket_max = topology_max_packages();
1652 array_size = sizeof(struct sample) * maxid * mbm_socket_max;
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001653 mbm_local = kmalloc(array_size, GFP_KERNEL);
1654 if (!mbm_local)
1655 return -ENOMEM;
1656
1657 mbm_total = kmalloc(array_size, GFP_KERNEL);
1658 if (!mbm_total) {
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001659 ret = -ENOMEM;
1660 goto out;
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001661 }
1662
Vikas Shivappae7ee3e82016-03-11 11:26:17 -08001663 array_size = sizeof(struct hrtimer) * mbm_socket_max;
1664 mbm_timers = kmalloc(array_size, GFP_KERNEL);
1665 if (!mbm_timers) {
1666 ret = -ENOMEM;
1667 goto out;
1668 }
1669 mbm_hrtimer_init();
1670
1671out:
1672 if (ret)
1673 mbm_cleanup();
1674
1675 return ret;
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001676}
1677
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001678static int __init intel_cqm_init(void)
1679{
Vikas Shivappaada2f632016-03-10 15:32:08 -08001680 char *str = NULL, scale[20];
Richard Cochranf0704822016-07-13 17:16:16 +00001681 int cpu, ret;
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001682
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001683 if (x86_match_cpu(intel_cqm_match))
1684 cqm_enabled = true;
1685
1686 if (x86_match_cpu(intel_mbm_local_match) &&
1687 x86_match_cpu(intel_mbm_total_match))
1688 mbm_enabled = true;
1689
1690 if (!cqm_enabled && !mbm_enabled)
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001691 return -ENODEV;
1692
1693 cqm_l3_scale = boot_cpu_data.x86_cache_occ_scale;
1694
1695 /*
1696 * It's possible that not all resources support the same number
1697 * of RMIDs. Instead of making scheduling much more complicated
1698 * (where we have to match a task's RMID to a cpu that supports
1699 * that many RMIDs) just find the minimum RMIDs supported across
1700 * all cpus.
1701 *
1702 * Also, check that the scales match on all cpus.
1703 */
Richard Cochranf0704822016-07-13 17:16:16 +00001704 get_online_cpus();
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001705 for_each_online_cpu(cpu) {
1706 struct cpuinfo_x86 *c = &cpu_data(cpu);
1707
1708 if (c->x86_cache_max_rmid < cqm_max_rmid)
1709 cqm_max_rmid = c->x86_cache_max_rmid;
1710
1711 if (c->x86_cache_occ_scale != cqm_l3_scale) {
1712 pr_err("Multiple LLC scale values, disabling\n");
1713 ret = -EINVAL;
1714 goto out;
1715 }
1716 }
1717
Matt Flemingbff671d2015-01-23 18:45:47 +00001718 /*
1719 * A reasonable upper limit on the max threshold is the number
1720 * of lines tagged per RMID if all RMIDs have the same number of
1721 * lines tagged in the LLC.
1722 *
1723 * For a 35MB LLC and 56 RMIDs, this is ~1.8% of the LLC.
1724 */
1725 __intel_cqm_max_threshold =
1726 boot_cpu_data.x86_cache_size * 1024 / (cqm_max_rmid + 1);
1727
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001728 snprintf(scale, sizeof(scale), "%u", cqm_l3_scale);
1729 str = kstrdup(scale, GFP_KERNEL);
1730 if (!str) {
1731 ret = -ENOMEM;
1732 goto out;
1733 }
1734
1735 event_attr_intel_cqm_llc_scale.event_str = str;
1736
1737 ret = intel_cqm_setup_rmid_cache();
1738 if (ret)
1739 goto out;
1740
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001741 if (mbm_enabled)
1742 ret = intel_mbm_init();
1743 if (ret && !cqm_enabled)
1744 goto out;
1745
1746 if (cqm_enabled && mbm_enabled)
1747 intel_cqm_events_group.attrs = intel_cmt_mbm_events_attr;
1748 else if (!cqm_enabled && mbm_enabled)
1749 intel_cqm_events_group.attrs = intel_mbm_events_attr;
1750 else if (cqm_enabled && !mbm_enabled)
1751 intel_cqm_events_group.attrs = intel_cqm_events_attr;
1752
Peter Zijlstra50f16a82015-03-05 22:10:19 +01001753 ret = perf_pmu_register(&intel_cqm_pmu, "intel_cqm", -1);
Vikas Shivappaada2f632016-03-10 15:32:08 -08001754 if (ret) {
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001755 pr_err("Intel CQM perf registration failed: %d\n", ret);
Vikas Shivappaada2f632016-03-10 15:32:08 -08001756 goto out;
1757 }
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001758
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001759 if (cqm_enabled)
1760 pr_info("Intel CQM monitoring enabled\n");
1761 if (mbm_enabled)
1762 pr_info("Intel MBM enabled\n");
Vikas Shivappaada2f632016-03-10 15:32:08 -08001763
1764 /*
Richard Cochranf0704822016-07-13 17:16:16 +00001765 * Setup the hot cpu notifier once we are sure cqm
Vikas Shivappaada2f632016-03-10 15:32:08 -08001766 * is enabled to avoid notifier leak.
1767 */
Richard Cochranf0704822016-07-13 17:16:16 +00001768 cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING,
1769 "AP_PERF_X86_CQM_STARTING",
1770 intel_cqm_cpu_starting, NULL);
1771 cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "AP_PERF_X86_CQM_ONLINE",
1772 NULL, intel_cqm_cpu_exit);
1773
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001774out:
Richard Cochranf0704822016-07-13 17:16:16 +00001775 put_online_cpus();
1776
Vikas Shivappaada2f632016-03-10 15:32:08 -08001777 if (ret) {
1778 kfree(str);
1779 cqm_cleanup();
Vikas Shivappa33c3cc72016-03-10 15:32:09 -08001780 mbm_cleanup();
Vikas Shivappaada2f632016-03-10 15:32:08 -08001781 }
Matt Fleming4afbb24c2015-01-23 18:45:44 +00001782
1783 return ret;
1784}
1785device_initcall(intel_cqm_init);