blob: 1a87fe967439c31f90a8dcbea9fe9ee67589fa10 [file] [log] [blame]
Robert Braggeec688e2016-11-07 19:49:47 +00001/*
2 * Copyright © 2015-2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Robert Bragg <robert@sixbynine.org>
25 */
26
27#include <linux/anon_inodes.h>
Robert Braggd7965152016-11-07 19:49:52 +000028#include <linux/sizes.h>
Robert Braggeec688e2016-11-07 19:49:47 +000029
30#include "i915_drv.h"
Robert Braggd7965152016-11-07 19:49:52 +000031#include "i915_oa_hsw.h"
32
33/* HW requires this to be a power of two, between 128k and 16M, though driver
34 * is currently generally designed assuming the largest 16M size is used such
35 * that the overflow cases are unlikely in normal operation.
36 */
37#define OA_BUFFER_SIZE SZ_16M
38
39#define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1))
40
41/* There's a HW race condition between OA unit tail pointer register updates and
42 * writes to memory whereby the tail pointer can sometimes get ahead of what's
43 * been written out to the OA buffer so far.
44 *
45 * Although this can be observed explicitly by checking for a zeroed report-id
46 * field in tail reports, it seems preferable to account for this earlier e.g.
47 * as part of the _oa_buffer_is_empty checks to minimize -EAGAIN polling cycles
48 * in this situation.
49 *
50 * To give time for the most recent reports to land before they may be copied to
51 * userspace, the driver operates as if the tail pointer effectively lags behind
52 * the HW tail pointer by 'tail_margin' bytes. The margin in bytes is calculated
53 * based on this constant in nanoseconds, the current OA sampling exponent
54 * and current report size.
55 *
56 * There is also a fallback check while reading to simply skip over reports with
57 * a zeroed report-id.
58 */
59#define OA_TAIL_MARGIN_NSEC 100000ULL
60
61/* frequency for checking whether the OA unit has written new reports to the
62 * circular OA buffer...
63 */
64#define POLL_FREQUENCY 200
65#define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
66
Robert Braggccdf6342016-11-07 19:49:54 +000067/* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
68static int zero;
69static int one = 1;
70static u32 i915_perf_stream_paranoid = true;
71
Robert Braggd7965152016-11-07 19:49:52 +000072/* The maximum exponent the hardware accepts is 63 (essentially it selects one
73 * of the 64bit timestamp bits to trigger reports from) but there's currently
74 * no known use case for sampling as infrequently as once per 47 thousand years.
75 *
76 * Since the timestamps included in OA reports are only 32bits it seems
77 * reasonable to limit the OA exponent where it's still possible to account for
78 * overflow in OA report timestamps.
79 */
80#define OA_EXPONENT_MAX 31
81
82#define INVALID_CTX_ID 0xffffffff
83
84
Robert Bragg00319ba2016-11-07 19:49:55 +000085/* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
86 *
87 * 160ns is the smallest sampling period we can theoretically program the OA
88 * unit with on Haswell, corresponding to 6.25MHz.
89 */
90static int oa_sample_rate_hard_limit = 6250000;
91
92/* Theoretically we can program the OA unit to sample every 160ns but don't
93 * allow that by default unless root...
94 *
95 * The default threshold of 100000Hz is based on perf's similar
96 * kernel.perf_event_max_sample_rate sysctl parameter.
97 */
98static u32 i915_oa_max_sample_rate = 100000;
99
Robert Braggd7965152016-11-07 19:49:52 +0000100/* XXX: beware if future OA HW adds new report formats that the current
101 * code assumes all reports have a power-of-two size and ~(size - 1) can
102 * be used as a mask to align the OA tail pointer.
103 */
104static struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = {
105 [I915_OA_FORMAT_A13] = { 0, 64 },
106 [I915_OA_FORMAT_A29] = { 1, 128 },
107 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 },
108 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */
109 [I915_OA_FORMAT_B4_C8] = { 4, 64 },
110 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 },
111 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 },
112 [I915_OA_FORMAT_C4_B8] = { 7, 64 },
113};
114
115#define SAMPLE_OA_REPORT (1<<0)
Robert Braggeec688e2016-11-07 19:49:47 +0000116
117struct perf_open_properties {
118 u32 sample_flags;
119
120 u64 single_context:1;
121 u64 ctx_handle;
Robert Braggd7965152016-11-07 19:49:52 +0000122
123 /* OA sampling state */
124 int metrics_set;
125 int oa_format;
126 bool oa_periodic;
127 int oa_period_exponent;
Robert Braggeec688e2016-11-07 19:49:47 +0000128};
129
Robert Braggd7965152016-11-07 19:49:52 +0000130/* NB: This is either called via fops or the poll check hrtimer (atomic ctx)
131 *
132 * It's safe to read OA config state here unlocked, assuming that this is only
133 * called while the stream is enabled, while the global OA configuration can't
134 * be modified.
135 *
136 * Note: we don't lock around the head/tail reads even though there's the slim
137 * possibility of read() fop errors forcing a re-init of the OA buffer
138 * pointers. A race here could result in a false positive !empty status which
139 * is acceptable.
140 */
141static bool gen7_oa_buffer_is_empty_fop_unlocked(struct drm_i915_private *dev_priv)
142{
143 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
144 u32 oastatus2 = I915_READ(GEN7_OASTATUS2);
145 u32 oastatus1 = I915_READ(GEN7_OASTATUS1);
146 u32 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
147 u32 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
148
149 return OA_TAKEN(tail, head) <
150 dev_priv->perf.oa.tail_margin + report_size;
151}
152
153/**
154 * Appends a status record to a userspace read() buffer.
155 */
156static int append_oa_status(struct i915_perf_stream *stream,
157 char __user *buf,
158 size_t count,
159 size_t *offset,
160 enum drm_i915_perf_record_type type)
161{
162 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) };
163
164 if ((count - *offset) < header.size)
165 return -ENOSPC;
166
167 if (copy_to_user(buf + *offset, &header, sizeof(header)))
168 return -EFAULT;
169
170 (*offset) += header.size;
171
172 return 0;
173}
174
175/**
176 * Copies single OA report into userspace read() buffer.
177 */
178static int append_oa_sample(struct i915_perf_stream *stream,
179 char __user *buf,
180 size_t count,
181 size_t *offset,
182 const u8 *report)
183{
184 struct drm_i915_private *dev_priv = stream->dev_priv;
185 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
186 struct drm_i915_perf_record_header header;
187 u32 sample_flags = stream->sample_flags;
188
189 header.type = DRM_I915_PERF_RECORD_SAMPLE;
190 header.pad = 0;
191 header.size = stream->sample_size;
192
193 if ((count - *offset) < header.size)
194 return -ENOSPC;
195
196 buf += *offset;
197 if (copy_to_user(buf, &header, sizeof(header)))
198 return -EFAULT;
199 buf += sizeof(header);
200
201 if (sample_flags & SAMPLE_OA_REPORT) {
202 if (copy_to_user(buf, report, report_size))
203 return -EFAULT;
204 }
205
206 (*offset) += header.size;
207
208 return 0;
209}
210
211/**
212 * Copies all buffered OA reports into userspace read() buffer.
213 * @stream: An i915-perf stream opened for OA metrics
214 * @buf: destination buffer given by userspace
215 * @count: the number of bytes userspace wants to read
216 * @offset: (inout): the current position for writing into @buf
217 * @head_ptr: (inout): the current oa buffer cpu read position
218 * @tail: the current oa buffer gpu write position
219 *
220 * Returns 0 on success, negative error code on failure.
221 *
222 * Notably any error condition resulting in a short read (-ENOSPC or
223 * -EFAULT) will be returned even though one or more records may
224 * have been successfully copied. In this case it's up to the caller
225 * to decide if the error should be squashed before returning to
226 * userspace.
227 *
228 * Note: reports are consumed from the head, and appended to the
229 * tail, so the head chases the tail?... If you think that's mad
230 * and back-to-front you're not alone, but this follows the
231 * Gen PRM naming convention.
232 */
233static int gen7_append_oa_reports(struct i915_perf_stream *stream,
234 char __user *buf,
235 size_t count,
236 size_t *offset,
237 u32 *head_ptr,
238 u32 tail)
239{
240 struct drm_i915_private *dev_priv = stream->dev_priv;
241 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
242 u8 *oa_buf_base = dev_priv->perf.oa.oa_buffer.vaddr;
243 int tail_margin = dev_priv->perf.oa.tail_margin;
244 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
245 u32 mask = (OA_BUFFER_SIZE - 1);
246 u32 head;
247 u32 taken;
248 int ret = 0;
249
250 if (WARN_ON(!stream->enabled))
251 return -EIO;
252
253 head = *head_ptr - gtt_offset;
254 tail -= gtt_offset;
255
256 /* The OA unit is expected to wrap the tail pointer according to the OA
257 * buffer size and since we should never write a misaligned head
258 * pointer we don't expect to read one back either...
259 */
260 if (tail > OA_BUFFER_SIZE || head > OA_BUFFER_SIZE ||
261 head % report_size) {
262 DRM_ERROR("Inconsistent OA buffer pointer (head = %u, tail = %u): force restart\n",
263 head, tail);
264 dev_priv->perf.oa.ops.oa_disable(dev_priv);
265 dev_priv->perf.oa.ops.oa_enable(dev_priv);
266 *head_ptr = I915_READ(GEN7_OASTATUS2) &
267 GEN7_OASTATUS2_HEAD_MASK;
268 return -EIO;
269 }
270
271
272 /* The tail pointer increases in 64 byte increments, not in report_size
273 * steps...
274 */
275 tail &= ~(report_size - 1);
276
277 /* Move the tail pointer back by the current tail_margin to account for
278 * the possibility that the latest reports may not have really landed
279 * in memory yet...
280 */
281
282 if (OA_TAKEN(tail, head) < report_size + tail_margin)
283 return -EAGAIN;
284
285 tail -= tail_margin;
286 tail &= mask;
287
288 for (/* none */;
289 (taken = OA_TAKEN(tail, head));
290 head = (head + report_size) & mask) {
291 u8 *report = oa_buf_base + head;
292 u32 *report32 = (void *)report;
293
294 /* All the report sizes factor neatly into the buffer
295 * size so we never expect to see a report split
296 * between the beginning and end of the buffer.
297 *
298 * Given the initial alignment check a misalignment
299 * here would imply a driver bug that would result
300 * in an overrun.
301 */
302 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) {
303 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n");
304 break;
305 }
306
307 /* The report-ID field for periodic samples includes
308 * some undocumented flags related to what triggered
309 * the report and is never expected to be zero so we
310 * can check that the report isn't invalid before
311 * copying it to userspace...
312 */
313 if (report32[0] == 0) {
314 DRM_ERROR("Skipping spurious, invalid OA report\n");
315 continue;
316 }
317
318 ret = append_oa_sample(stream, buf, count, offset, report);
319 if (ret)
320 break;
321
322 /* The above report-id field sanity check is based on
323 * the assumption that the OA buffer is initially
324 * zeroed and we reset the field after copying so the
325 * check is still meaningful once old reports start
326 * being overwritten.
327 */
328 report32[0] = 0;
329 }
330
331 *head_ptr = gtt_offset + head;
332
333 return ret;
334}
335
336static int gen7_oa_read(struct i915_perf_stream *stream,
337 char __user *buf,
338 size_t count,
339 size_t *offset)
340{
341 struct drm_i915_private *dev_priv = stream->dev_priv;
342 int report_size = dev_priv->perf.oa.oa_buffer.format_size;
343 u32 oastatus2;
344 u32 oastatus1;
345 u32 head;
346 u32 tail;
347 int ret;
348
349 if (WARN_ON(!dev_priv->perf.oa.oa_buffer.vaddr))
350 return -EIO;
351
352 oastatus2 = I915_READ(GEN7_OASTATUS2);
353 oastatus1 = I915_READ(GEN7_OASTATUS1);
354
355 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
356 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
357
358 /* XXX: On Haswell we don't have a safe way to clear oastatus1
359 * bits while the OA unit is enabled (while the tail pointer
360 * may be updated asynchronously) so we ignore status bits
361 * that have already been reported to userspace.
362 */
363 oastatus1 &= ~dev_priv->perf.oa.gen7_latched_oastatus1;
364
365 /* We treat OABUFFER_OVERFLOW as a significant error:
366 *
367 * - The status can be interpreted to mean that the buffer is
368 * currently full (with a higher precedence than OA_TAKEN()
369 * which will start to report a near-empty buffer after an
370 * overflow) but it's awkward that we can't clear the status
371 * on Haswell, so without a reset we won't be able to catch
372 * the state again.
373 *
374 * - Since it also implies the HW has started overwriting old
375 * reports it may also affect our sanity checks for invalid
376 * reports when copying to userspace that assume new reports
377 * are being written to cleared memory.
378 *
379 * - In the future we may want to introduce a flight recorder
380 * mode where the driver will automatically maintain a safe
381 * guard band between head/tail, avoiding this overflow
382 * condition, but we avoid the added driver complexity for
383 * now.
384 */
385 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) {
386 ret = append_oa_status(stream, buf, count, offset,
387 DRM_I915_PERF_RECORD_OA_BUFFER_LOST);
388 if (ret)
389 return ret;
390
391 DRM_ERROR("OA buffer overflow: force restart\n");
392
393 dev_priv->perf.oa.ops.oa_disable(dev_priv);
394 dev_priv->perf.oa.ops.oa_enable(dev_priv);
395
396 oastatus2 = I915_READ(GEN7_OASTATUS2);
397 oastatus1 = I915_READ(GEN7_OASTATUS1);
398
399 head = oastatus2 & GEN7_OASTATUS2_HEAD_MASK;
400 tail = oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
401 }
402
403 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) {
404 ret = append_oa_status(stream, buf, count, offset,
405 DRM_I915_PERF_RECORD_OA_REPORT_LOST);
406 if (ret)
407 return ret;
408 dev_priv->perf.oa.gen7_latched_oastatus1 |=
409 GEN7_OASTATUS1_REPORT_LOST;
410 }
411
412 ret = gen7_append_oa_reports(stream, buf, count, offset,
413 &head, tail);
414
415 /* All the report sizes are a power of two and the
416 * head should always be incremented by some multiple
417 * of the report size.
418 *
419 * A warning here, but notably if we later read back a
420 * misaligned pointer we will treat that as a bug since
421 * it could lead to a buffer overrun.
422 */
423 WARN_ONCE(head & (report_size - 1),
424 "i915: Writing misaligned OA head pointer");
425
426 /* Note: we update the head pointer here even if an error
427 * was returned since the error may represent a short read
428 * where some some reports were successfully copied.
429 */
430 I915_WRITE(GEN7_OASTATUS2,
431 ((head & GEN7_OASTATUS2_HEAD_MASK) |
432 OA_MEM_SELECT_GGTT));
433
434 return ret;
435}
436
437static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
438{
439 struct drm_i915_private *dev_priv = stream->dev_priv;
440
441 /* We would wait indefinitely if periodic sampling is not enabled */
442 if (!dev_priv->perf.oa.periodic)
443 return -EIO;
444
445 /* Note: the oa_buffer_is_empty() condition is ok to run unlocked as it
446 * just performs mmio reads of the OA buffer head + tail pointers and
447 * it's assumed we're handling some operation that implies the stream
448 * can't be destroyed until completion (such as a read()) that ensures
449 * the device + OA buffer can't disappear
450 */
451 return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
452 !dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv));
453}
454
455static void i915_oa_poll_wait(struct i915_perf_stream *stream,
456 struct file *file,
457 poll_table *wait)
458{
459 struct drm_i915_private *dev_priv = stream->dev_priv;
460
461 poll_wait(file, &dev_priv->perf.oa.poll_wq, wait);
462}
463
464static int i915_oa_read(struct i915_perf_stream *stream,
465 char __user *buf,
466 size_t count,
467 size_t *offset)
468{
469 struct drm_i915_private *dev_priv = stream->dev_priv;
470
471 return dev_priv->perf.oa.ops.read(stream, buf, count, offset);
472}
473
474/* Determine the render context hw id, and ensure it remains fixed for the
475 * lifetime of the stream. This ensures that we don't have to worry about
476 * updating the context ID in OACONTROL on the fly.
477 */
478static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
479{
480 struct drm_i915_private *dev_priv = stream->dev_priv;
481 struct i915_vma *vma;
482 int ret;
483
484 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
485 if (ret)
486 return ret;
487
488 /* As the ID is the gtt offset of the context's vma we pin
489 * the vma to ensure the ID remains fixed.
490 *
491 * NB: implied RCS engine...
492 */
493 vma = i915_gem_context_pin_legacy(stream->ctx, 0);
494 if (IS_ERR(vma)) {
495 ret = PTR_ERR(vma);
496 goto unlock;
497 }
498
499 dev_priv->perf.oa.pinned_rcs_vma = vma;
500
501 /* Explicitly track the ID (instead of calling i915_ggtt_offset()
502 * on the fly) considering the difference with gen8+ and
503 * execlists
504 */
505 dev_priv->perf.oa.specific_ctx_id = i915_ggtt_offset(vma);
506
507unlock:
508 mutex_unlock(&dev_priv->drm.struct_mutex);
509
510 return ret;
511}
512
513static void oa_put_render_ctx_id(struct i915_perf_stream *stream)
514{
515 struct drm_i915_private *dev_priv = stream->dev_priv;
516
517 mutex_lock(&dev_priv->drm.struct_mutex);
518
519 i915_vma_unpin(dev_priv->perf.oa.pinned_rcs_vma);
520 dev_priv->perf.oa.pinned_rcs_vma = NULL;
521
522 dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID;
523
524 mutex_unlock(&dev_priv->drm.struct_mutex);
525}
526
527static void
528free_oa_buffer(struct drm_i915_private *i915)
529{
530 mutex_lock(&i915->drm.struct_mutex);
531
532 i915_gem_object_unpin_map(i915->perf.oa.oa_buffer.vma->obj);
533 i915_vma_unpin(i915->perf.oa.oa_buffer.vma);
534 i915_gem_object_put(i915->perf.oa.oa_buffer.vma->obj);
535
536 i915->perf.oa.oa_buffer.vma = NULL;
537 i915->perf.oa.oa_buffer.vaddr = NULL;
538
539 mutex_unlock(&i915->drm.struct_mutex);
540}
541
542static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
543{
544 struct drm_i915_private *dev_priv = stream->dev_priv;
545
546 BUG_ON(stream != dev_priv->perf.oa.exclusive_stream);
547
548 dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
549
550 free_oa_buffer(dev_priv);
551
552 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
553 intel_runtime_pm_put(dev_priv);
554
555 if (stream->ctx)
556 oa_put_render_ctx_id(stream);
557
558 dev_priv->perf.oa.exclusive_stream = NULL;
559}
560
561static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv)
562{
563 u32 gtt_offset = i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma);
564
565 /* Pre-DevBDW: OABUFFER must be set with counters off,
566 * before OASTATUS1, but after OASTATUS2
567 */
568 I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */
569 I915_WRITE(GEN7_OABUFFER, gtt_offset);
570 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */
571
572 /* On Haswell we have to track which OASTATUS1 flags we've
573 * already seen since they can't be cleared while periodic
574 * sampling is enabled.
575 */
576 dev_priv->perf.oa.gen7_latched_oastatus1 = 0;
577
578 /* NB: although the OA buffer will initially be allocated
579 * zeroed via shmfs (and so this memset is redundant when
580 * first allocating), we may re-init the OA buffer, either
581 * when re-enabling a stream or in error/reset paths.
582 *
583 * The reason we clear the buffer for each re-init is for the
584 * sanity check in gen7_append_oa_reports() that looks at the
585 * report-id field to make sure it's non-zero which relies on
586 * the assumption that new reports are being written to zeroed
587 * memory...
588 */
589 memset(dev_priv->perf.oa.oa_buffer.vaddr, 0, OA_BUFFER_SIZE);
590
591 /* Maybe make ->pollin per-stream state if we support multiple
592 * concurrent streams in the future.
593 */
594 dev_priv->perf.oa.pollin = false;
595}
596
597static int alloc_oa_buffer(struct drm_i915_private *dev_priv)
598{
599 struct drm_i915_gem_object *bo;
600 struct i915_vma *vma;
601 int ret;
602
603 if (WARN_ON(dev_priv->perf.oa.oa_buffer.vma))
604 return -ENODEV;
605
606 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
607 if (ret)
608 return ret;
609
610 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE);
611 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M);
612
613 bo = i915_gem_object_create(&dev_priv->drm, OA_BUFFER_SIZE);
614 if (IS_ERR(bo)) {
615 DRM_ERROR("Failed to allocate OA buffer\n");
616 ret = PTR_ERR(bo);
617 goto unlock;
618 }
619
620 ret = i915_gem_object_set_cache_level(bo, I915_CACHE_LLC);
621 if (ret)
622 goto err_unref;
623
624 /* PreHSW required 512K alignment, HSW requires 16M */
625 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0);
626 if (IS_ERR(vma)) {
627 ret = PTR_ERR(vma);
628 goto err_unref;
629 }
630 dev_priv->perf.oa.oa_buffer.vma = vma;
631
632 dev_priv->perf.oa.oa_buffer.vaddr =
633 i915_gem_object_pin_map(bo, I915_MAP_WB);
634 if (IS_ERR(dev_priv->perf.oa.oa_buffer.vaddr)) {
635 ret = PTR_ERR(dev_priv->perf.oa.oa_buffer.vaddr);
636 goto err_unpin;
637 }
638
639 dev_priv->perf.oa.ops.init_oa_buffer(dev_priv);
640
641 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n",
642 i915_ggtt_offset(dev_priv->perf.oa.oa_buffer.vma),
643 dev_priv->perf.oa.oa_buffer.vaddr);
644
645 goto unlock;
646
647err_unpin:
648 __i915_vma_unpin(vma);
649
650err_unref:
651 i915_gem_object_put(bo);
652
653 dev_priv->perf.oa.oa_buffer.vaddr = NULL;
654 dev_priv->perf.oa.oa_buffer.vma = NULL;
655
656unlock:
657 mutex_unlock(&dev_priv->drm.struct_mutex);
658 return ret;
659}
660
661static void config_oa_regs(struct drm_i915_private *dev_priv,
662 const struct i915_oa_reg *regs,
663 int n_regs)
664{
665 int i;
666
667 for (i = 0; i < n_regs; i++) {
668 const struct i915_oa_reg *reg = regs + i;
669
670 I915_WRITE(reg->addr, reg->value);
671 }
672}
673
674static int hsw_enable_metric_set(struct drm_i915_private *dev_priv)
675{
676 int ret = i915_oa_select_metric_set_hsw(dev_priv);
677
678 if (ret)
679 return ret;
680
681 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) |
682 GT_NOA_ENABLE));
683
684 /* PRM:
685 *
686 * OA unit is using “crclk” for its functionality. When trunk
687 * level clock gating takes place, OA clock would be gated,
688 * unable to count the events from non-render clock domain.
689 * Render clock gating must be disabled when OA is enabled to
690 * count the events from non-render domain. Unit level clock
691 * gating for RCS should also be disabled.
692 */
693 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
694 ~GEN7_DOP_CLOCK_GATE_ENABLE));
695 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) |
696 GEN6_CSUNIT_CLOCK_GATE_DISABLE));
697
698 config_oa_regs(dev_priv, dev_priv->perf.oa.mux_regs,
699 dev_priv->perf.oa.mux_regs_len);
700
701 /* It apparently takes a fairly long time for a new MUX
702 * configuration to be be applied after these register writes.
703 * This delay duration was derived empirically based on the
704 * render_basic config but hopefully it covers the maximum
705 * configuration latency.
706 *
707 * As a fallback, the checks in _append_oa_reports() to skip
708 * invalid OA reports do also seem to work to discard reports
709 * generated before this config has completed - albeit not
710 * silently.
711 *
712 * Unfortunately this is essentially a magic number, since we
713 * don't currently know of a reliable mechanism for predicting
714 * how long the MUX config will take to apply and besides
715 * seeing invalid reports we don't know of a reliable way to
716 * explicitly check that the MUX config has landed.
717 *
718 * It's even possible we've miss characterized the underlying
719 * problem - it just seems like the simplest explanation why
720 * a delay at this location would mitigate any invalid reports.
721 */
722 usleep_range(15000, 20000);
723
724 config_oa_regs(dev_priv, dev_priv->perf.oa.b_counter_regs,
725 dev_priv->perf.oa.b_counter_regs_len);
726
727 return 0;
728}
729
730static void hsw_disable_metric_set(struct drm_i915_private *dev_priv)
731{
732 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) &
733 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE));
734 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) |
735 GEN7_DOP_CLOCK_GATE_ENABLE));
736
737 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) &
738 ~GT_NOA_ENABLE));
739}
740
741static void gen7_update_oacontrol_locked(struct drm_i915_private *dev_priv)
742{
743 assert_spin_locked(&dev_priv->perf.hook_lock);
744
745 if (dev_priv->perf.oa.exclusive_stream->enabled) {
746 struct i915_gem_context *ctx =
747 dev_priv->perf.oa.exclusive_stream->ctx;
748 u32 ctx_id = dev_priv->perf.oa.specific_ctx_id;
749
750 bool periodic = dev_priv->perf.oa.periodic;
751 u32 period_exponent = dev_priv->perf.oa.period_exponent;
752 u32 report_format = dev_priv->perf.oa.oa_buffer.format;
753
754 I915_WRITE(GEN7_OACONTROL,
755 (ctx_id & GEN7_OACONTROL_CTX_MASK) |
756 (period_exponent <<
757 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) |
758 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) |
759 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) |
760 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) |
761 GEN7_OACONTROL_ENABLE);
762 } else
763 I915_WRITE(GEN7_OACONTROL, 0);
764}
765
766static void gen7_oa_enable(struct drm_i915_private *dev_priv)
767{
768 unsigned long flags;
769
770 /* Reset buf pointers so we don't forward reports from before now.
771 *
772 * Think carefully if considering trying to avoid this, since it
773 * also ensures status flags and the buffer itself are cleared
774 * in error paths, and we have checks for invalid reports based
775 * on the assumption that certain fields are written to zeroed
776 * memory which this helps maintains.
777 */
778 gen7_init_oa_buffer(dev_priv);
779
780 spin_lock_irqsave(&dev_priv->perf.hook_lock, flags);
781 gen7_update_oacontrol_locked(dev_priv);
782 spin_unlock_irqrestore(&dev_priv->perf.hook_lock, flags);
783}
784
785static void i915_oa_stream_enable(struct i915_perf_stream *stream)
786{
787 struct drm_i915_private *dev_priv = stream->dev_priv;
788
789 dev_priv->perf.oa.ops.oa_enable(dev_priv);
790
791 if (dev_priv->perf.oa.periodic)
792 hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
793 ns_to_ktime(POLL_PERIOD),
794 HRTIMER_MODE_REL_PINNED);
795}
796
797static void gen7_oa_disable(struct drm_i915_private *dev_priv)
798{
799 I915_WRITE(GEN7_OACONTROL, 0);
800}
801
802static void i915_oa_stream_disable(struct i915_perf_stream *stream)
803{
804 struct drm_i915_private *dev_priv = stream->dev_priv;
805
806 dev_priv->perf.oa.ops.oa_disable(dev_priv);
807
808 if (dev_priv->perf.oa.periodic)
809 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
810}
811
812static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
813{
814 return 1000000000ULL * (2ULL << exponent) /
815 dev_priv->perf.oa.timestamp_frequency;
816}
817
818static const struct i915_perf_stream_ops i915_oa_stream_ops = {
819 .destroy = i915_oa_stream_destroy,
820 .enable = i915_oa_stream_enable,
821 .disable = i915_oa_stream_disable,
822 .wait_unlocked = i915_oa_wait_unlocked,
823 .poll_wait = i915_oa_poll_wait,
824 .read = i915_oa_read,
825};
826
827static int i915_oa_stream_init(struct i915_perf_stream *stream,
828 struct drm_i915_perf_open_param *param,
829 struct perf_open_properties *props)
830{
831 struct drm_i915_private *dev_priv = stream->dev_priv;
832 int format_size;
833 int ret;
834
Robert Bragg442b8c02016-11-07 19:49:53 +0000835 /* If the sysfs metrics/ directory wasn't registered for some
836 * reason then don't let userspace try their luck with config
837 * IDs
838 */
839 if (!dev_priv->perf.metrics_kobj) {
840 DRM_ERROR("OA metrics weren't advertised via sysfs\n");
841 return -EINVAL;
842 }
843
Robert Braggd7965152016-11-07 19:49:52 +0000844 if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
845 DRM_ERROR("Only OA report sampling supported\n");
846 return -EINVAL;
847 }
848
849 if (!dev_priv->perf.oa.ops.init_oa_buffer) {
850 DRM_ERROR("OA unit not supported\n");
851 return -ENODEV;
852 }
853
854 /* To avoid the complexity of having to accurately filter
855 * counter reports and marshal to the appropriate client
856 * we currently only allow exclusive access
857 */
858 if (dev_priv->perf.oa.exclusive_stream) {
859 DRM_ERROR("OA unit already in use\n");
860 return -EBUSY;
861 }
862
863 if (!props->metrics_set) {
864 DRM_ERROR("OA metric set not specified\n");
865 return -EINVAL;
866 }
867
868 if (!props->oa_format) {
869 DRM_ERROR("OA report format not specified\n");
870 return -EINVAL;
871 }
872
873 stream->sample_size = sizeof(struct drm_i915_perf_record_header);
874
875 format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
876
877 stream->sample_flags |= SAMPLE_OA_REPORT;
878 stream->sample_size += format_size;
879
880 dev_priv->perf.oa.oa_buffer.format_size = format_size;
881 if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
882 return -EINVAL;
883
884 dev_priv->perf.oa.oa_buffer.format =
885 dev_priv->perf.oa.oa_formats[props->oa_format].format;
886
887 dev_priv->perf.oa.metrics_set = props->metrics_set;
888
889 dev_priv->perf.oa.periodic = props->oa_periodic;
890 if (dev_priv->perf.oa.periodic) {
891 u64 period_ns = oa_exponent_to_ns(dev_priv,
892 props->oa_period_exponent);
893
894 dev_priv->perf.oa.period_exponent = props->oa_period_exponent;
895
896 /* See comment for OA_TAIL_MARGIN_NSEC for details
897 * about this tail_margin...
898 */
899 dev_priv->perf.oa.tail_margin =
900 ((OA_TAIL_MARGIN_NSEC / period_ns) + 1) * format_size;
901 }
902
903 if (stream->ctx) {
904 ret = oa_get_render_ctx_id(stream);
905 if (ret)
906 return ret;
907 }
908
909 ret = alloc_oa_buffer(dev_priv);
910 if (ret)
911 goto err_oa_buf_alloc;
912
913 /* PRM - observability performance counters:
914 *
915 * OACONTROL, performance counter enable, note:
916 *
917 * "When this bit is set, in order to have coherent counts,
918 * RC6 power state and trunk clock gating must be disabled.
919 * This can be achieved by programming MMIO registers as
920 * 0xA094=0 and 0xA090[31]=1"
921 *
922 * In our case we are expecting that taking pm + FORCEWAKE
923 * references will effectively disable RC6.
924 */
925 intel_runtime_pm_get(dev_priv);
926 intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
927
928 ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv);
929 if (ret)
930 goto err_enable;
931
932 stream->ops = &i915_oa_stream_ops;
933
934 dev_priv->perf.oa.exclusive_stream = stream;
935
936 return 0;
937
938err_enable:
939 intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
940 intel_runtime_pm_put(dev_priv);
941 free_oa_buffer(dev_priv);
942
943err_oa_buf_alloc:
944 if (stream->ctx)
945 oa_put_render_ctx_id(stream);
946
947 return ret;
948}
949
Robert Braggeec688e2016-11-07 19:49:47 +0000950static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream,
951 struct file *file,
952 char __user *buf,
953 size_t count,
954 loff_t *ppos)
955{
956 /* Note we keep the offset (aka bytes read) separate from any
957 * error status so that the final check for whether we return
958 * the bytes read with a higher precedence than any error (see
959 * comment below) doesn't need to be handled/duplicated in
960 * stream->ops->read() implementations.
961 */
962 size_t offset = 0;
963 int ret = stream->ops->read(stream, buf, count, &offset);
964
965 /* If we've successfully copied any data then reporting that
966 * takes precedence over any internal error status, so the
967 * data isn't lost.
968 *
969 * For example ret will be -ENOSPC whenever there is more
970 * buffered data than can be copied to userspace, but that's
971 * only interesting if we weren't able to copy some data
972 * because it implies the userspace buffer is too small to
973 * receive a single record (and we never split records).
974 *
975 * Another case with ret == -EFAULT is more of a grey area
976 * since it would seem like bad form for userspace to ask us
977 * to overrun its buffer, but the user knows best:
978 *
979 * http://yarchive.net/comp/linux/partial_reads_writes.html
980 */
981 return offset ?: (ret ?: -EAGAIN);
982}
983
984static ssize_t i915_perf_read(struct file *file,
985 char __user *buf,
986 size_t count,
987 loff_t *ppos)
988{
989 struct i915_perf_stream *stream = file->private_data;
990 struct drm_i915_private *dev_priv = stream->dev_priv;
991 ssize_t ret;
992
Robert Braggd7965152016-11-07 19:49:52 +0000993 /* To ensure it's handled consistently we simply treat all reads of a
994 * disabled stream as an error. In particular it might otherwise lead
995 * to a deadlock for blocking file descriptors...
996 */
997 if (!stream->enabled)
998 return -EIO;
999
Robert Braggeec688e2016-11-07 19:49:47 +00001000 if (!(file->f_flags & O_NONBLOCK)) {
Robert Braggd7965152016-11-07 19:49:52 +00001001 /* There's the small chance of false positives from
1002 * stream->ops->wait_unlocked.
1003 *
1004 * E.g. with single context filtering since we only wait until
1005 * oabuffer has >= 1 report we don't immediately know whether
1006 * any reports really belong to the current context
Robert Braggeec688e2016-11-07 19:49:47 +00001007 */
1008 do {
1009 ret = stream->ops->wait_unlocked(stream);
1010 if (ret)
1011 return ret;
1012
1013 mutex_lock(&dev_priv->perf.lock);
1014 ret = i915_perf_read_locked(stream, file,
1015 buf, count, ppos);
1016 mutex_unlock(&dev_priv->perf.lock);
1017 } while (ret == -EAGAIN);
1018 } else {
1019 mutex_lock(&dev_priv->perf.lock);
1020 ret = i915_perf_read_locked(stream, file, buf, count, ppos);
1021 mutex_unlock(&dev_priv->perf.lock);
1022 }
1023
Robert Braggd7965152016-11-07 19:49:52 +00001024 if (ret >= 0) {
1025 /* Maybe make ->pollin per-stream state if we support multiple
1026 * concurrent streams in the future.
1027 */
1028 dev_priv->perf.oa.pollin = false;
1029 }
1030
Robert Braggeec688e2016-11-07 19:49:47 +00001031 return ret;
1032}
1033
Robert Braggd7965152016-11-07 19:49:52 +00001034static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer)
1035{
1036 struct drm_i915_private *dev_priv =
1037 container_of(hrtimer, typeof(*dev_priv),
1038 perf.oa.poll_check_timer);
1039
1040 if (!dev_priv->perf.oa.ops.oa_buffer_is_empty(dev_priv)) {
1041 dev_priv->perf.oa.pollin = true;
1042 wake_up(&dev_priv->perf.oa.poll_wq);
1043 }
1044
1045 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD));
1046
1047 return HRTIMER_RESTART;
1048}
1049
1050static unsigned int i915_perf_poll_locked(struct drm_i915_private *dev_priv,
1051 struct i915_perf_stream *stream,
Robert Braggeec688e2016-11-07 19:49:47 +00001052 struct file *file,
1053 poll_table *wait)
1054{
Robert Braggd7965152016-11-07 19:49:52 +00001055 unsigned int events = 0;
Robert Braggeec688e2016-11-07 19:49:47 +00001056
1057 stream->ops->poll_wait(stream, file, wait);
1058
Robert Braggd7965152016-11-07 19:49:52 +00001059 /* Note: we don't explicitly check whether there's something to read
1060 * here since this path may be very hot depending on what else
1061 * userspace is polling, or on the timeout in use. We rely solely on
1062 * the hrtimer/oa_poll_check_timer_cb to notify us when there are
1063 * samples to read.
1064 */
1065 if (dev_priv->perf.oa.pollin)
1066 events |= POLLIN;
Robert Braggeec688e2016-11-07 19:49:47 +00001067
Robert Braggd7965152016-11-07 19:49:52 +00001068 return events;
Robert Braggeec688e2016-11-07 19:49:47 +00001069}
1070
1071static unsigned int i915_perf_poll(struct file *file, poll_table *wait)
1072{
1073 struct i915_perf_stream *stream = file->private_data;
1074 struct drm_i915_private *dev_priv = stream->dev_priv;
1075 int ret;
1076
1077 mutex_lock(&dev_priv->perf.lock);
Robert Braggd7965152016-11-07 19:49:52 +00001078 ret = i915_perf_poll_locked(dev_priv, stream, file, wait);
Robert Braggeec688e2016-11-07 19:49:47 +00001079 mutex_unlock(&dev_priv->perf.lock);
1080
1081 return ret;
1082}
1083
1084static void i915_perf_enable_locked(struct i915_perf_stream *stream)
1085{
1086 if (stream->enabled)
1087 return;
1088
1089 /* Allow stream->ops->enable() to refer to this */
1090 stream->enabled = true;
1091
1092 if (stream->ops->enable)
1093 stream->ops->enable(stream);
1094}
1095
1096static void i915_perf_disable_locked(struct i915_perf_stream *stream)
1097{
1098 if (!stream->enabled)
1099 return;
1100
1101 /* Allow stream->ops->disable() to refer to this */
1102 stream->enabled = false;
1103
1104 if (stream->ops->disable)
1105 stream->ops->disable(stream);
1106}
1107
1108static long i915_perf_ioctl_locked(struct i915_perf_stream *stream,
1109 unsigned int cmd,
1110 unsigned long arg)
1111{
1112 switch (cmd) {
1113 case I915_PERF_IOCTL_ENABLE:
1114 i915_perf_enable_locked(stream);
1115 return 0;
1116 case I915_PERF_IOCTL_DISABLE:
1117 i915_perf_disable_locked(stream);
1118 return 0;
1119 }
1120
1121 return -EINVAL;
1122}
1123
1124static long i915_perf_ioctl(struct file *file,
1125 unsigned int cmd,
1126 unsigned long arg)
1127{
1128 struct i915_perf_stream *stream = file->private_data;
1129 struct drm_i915_private *dev_priv = stream->dev_priv;
1130 long ret;
1131
1132 mutex_lock(&dev_priv->perf.lock);
1133 ret = i915_perf_ioctl_locked(stream, cmd, arg);
1134 mutex_unlock(&dev_priv->perf.lock);
1135
1136 return ret;
1137}
1138
1139static void i915_perf_destroy_locked(struct i915_perf_stream *stream)
1140{
1141 struct drm_i915_private *dev_priv = stream->dev_priv;
1142
1143 if (stream->enabled)
1144 i915_perf_disable_locked(stream);
1145
1146 if (stream->ops->destroy)
1147 stream->ops->destroy(stream);
1148
1149 list_del(&stream->link);
1150
1151 if (stream->ctx) {
1152 mutex_lock(&dev_priv->drm.struct_mutex);
1153 i915_gem_context_put(stream->ctx);
1154 mutex_unlock(&dev_priv->drm.struct_mutex);
1155 }
1156
1157 kfree(stream);
1158}
1159
1160static int i915_perf_release(struct inode *inode, struct file *file)
1161{
1162 struct i915_perf_stream *stream = file->private_data;
1163 struct drm_i915_private *dev_priv = stream->dev_priv;
1164
1165 mutex_lock(&dev_priv->perf.lock);
1166 i915_perf_destroy_locked(stream);
1167 mutex_unlock(&dev_priv->perf.lock);
1168
1169 return 0;
1170}
1171
1172
1173static const struct file_operations fops = {
1174 .owner = THIS_MODULE,
1175 .llseek = no_llseek,
1176 .release = i915_perf_release,
1177 .poll = i915_perf_poll,
1178 .read = i915_perf_read,
1179 .unlocked_ioctl = i915_perf_ioctl,
1180};
1181
1182
1183static struct i915_gem_context *
1184lookup_context(struct drm_i915_private *dev_priv,
1185 struct drm_i915_file_private *file_priv,
1186 u32 ctx_user_handle)
1187{
1188 struct i915_gem_context *ctx;
1189 int ret;
1190
1191 ret = i915_mutex_lock_interruptible(&dev_priv->drm);
1192 if (ret)
1193 return ERR_PTR(ret);
1194
1195 ctx = i915_gem_context_lookup(file_priv, ctx_user_handle);
1196 if (!IS_ERR(ctx))
1197 i915_gem_context_get(ctx);
1198
1199 mutex_unlock(&dev_priv->drm.struct_mutex);
1200
1201 return ctx;
1202}
1203
1204static int
1205i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
1206 struct drm_i915_perf_open_param *param,
1207 struct perf_open_properties *props,
1208 struct drm_file *file)
1209{
1210 struct i915_gem_context *specific_ctx = NULL;
1211 struct i915_perf_stream *stream = NULL;
1212 unsigned long f_flags = 0;
1213 int stream_fd;
1214 int ret;
1215
1216 if (props->single_context) {
1217 u32 ctx_handle = props->ctx_handle;
1218 struct drm_i915_file_private *file_priv = file->driver_priv;
1219
1220 specific_ctx = lookup_context(dev_priv, file_priv, ctx_handle);
1221 if (IS_ERR(specific_ctx)) {
1222 ret = PTR_ERR(specific_ctx);
1223 if (ret != -EINTR)
1224 DRM_ERROR("Failed to look up context with ID %u for opening perf stream\n",
1225 ctx_handle);
1226 goto err;
1227 }
1228 }
1229
Robert Braggccdf6342016-11-07 19:49:54 +00001230 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option
1231 * we check a dev.i915.perf_stream_paranoid sysctl option
1232 * to determine if it's ok to access system wide OA counters
1233 * without CAP_SYS_ADMIN privileges.
1234 */
1235 if (!specific_ctx &&
1236 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) {
Robert Braggeec688e2016-11-07 19:49:47 +00001237 DRM_ERROR("Insufficient privileges to open system-wide i915 perf stream\n");
1238 ret = -EACCES;
1239 goto err_ctx;
1240 }
1241
1242 stream = kzalloc(sizeof(*stream), GFP_KERNEL);
1243 if (!stream) {
1244 ret = -ENOMEM;
1245 goto err_ctx;
1246 }
1247
Robert Braggeec688e2016-11-07 19:49:47 +00001248 stream->dev_priv = dev_priv;
1249 stream->ctx = specific_ctx;
1250
Robert Braggd7965152016-11-07 19:49:52 +00001251 ret = i915_oa_stream_init(stream, param, props);
1252 if (ret)
1253 goto err_alloc;
1254
1255 /* we avoid simply assigning stream->sample_flags = props->sample_flags
1256 * to have _stream_init check the combination of sample flags more
1257 * thoroughly, but still this is the expected result at this point.
Robert Braggeec688e2016-11-07 19:49:47 +00001258 */
Robert Braggd7965152016-11-07 19:49:52 +00001259 if (WARN_ON(stream->sample_flags != props->sample_flags)) {
1260 ret = -ENODEV;
1261 goto err_alloc;
1262 }
Robert Braggeec688e2016-11-07 19:49:47 +00001263
1264 list_add(&stream->link, &dev_priv->perf.streams);
1265
1266 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC)
1267 f_flags |= O_CLOEXEC;
1268 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK)
1269 f_flags |= O_NONBLOCK;
1270
1271 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags);
1272 if (stream_fd < 0) {
1273 ret = stream_fd;
1274 goto err_open;
1275 }
1276
1277 if (!(param->flags & I915_PERF_FLAG_DISABLED))
1278 i915_perf_enable_locked(stream);
1279
1280 return stream_fd;
1281
1282err_open:
1283 list_del(&stream->link);
1284 if (stream->ops->destroy)
1285 stream->ops->destroy(stream);
1286err_alloc:
1287 kfree(stream);
1288err_ctx:
1289 if (specific_ctx) {
1290 mutex_lock(&dev_priv->drm.struct_mutex);
1291 i915_gem_context_put(specific_ctx);
1292 mutex_unlock(&dev_priv->drm.struct_mutex);
1293 }
1294err:
1295 return ret;
1296}
1297
1298/* Note we copy the properties from userspace outside of the i915 perf
1299 * mutex to avoid an awkward lockdep with mmap_sem.
1300 *
1301 * Note this function only validates properties in isolation it doesn't
1302 * validate that the combination of properties makes sense or that all
1303 * properties necessary for a particular kind of stream have been set.
1304 */
1305static int read_properties_unlocked(struct drm_i915_private *dev_priv,
1306 u64 __user *uprops,
1307 u32 n_props,
1308 struct perf_open_properties *props)
1309{
1310 u64 __user *uprop = uprops;
1311 int i;
1312
1313 memset(props, 0, sizeof(struct perf_open_properties));
1314
1315 if (!n_props) {
1316 DRM_ERROR("No i915 perf properties given");
1317 return -EINVAL;
1318 }
1319
1320 /* Considering that ID = 0 is reserved and assuming that we don't
1321 * (currently) expect any configurations to ever specify duplicate
1322 * values for a particular property ID then the last _PROP_MAX value is
1323 * one greater than the maximum number of properties we expect to get
1324 * from userspace.
1325 */
1326 if (n_props >= DRM_I915_PERF_PROP_MAX) {
1327 DRM_ERROR("More i915 perf properties specified than exist");
1328 return -EINVAL;
1329 }
1330
1331 for (i = 0; i < n_props; i++) {
Robert Bragg00319ba2016-11-07 19:49:55 +00001332 u64 oa_period, oa_freq_hz;
Robert Braggeec688e2016-11-07 19:49:47 +00001333 u64 id, value;
1334 int ret;
1335
1336 ret = get_user(id, uprop);
1337 if (ret)
1338 return ret;
1339
1340 ret = get_user(value, uprop + 1);
1341 if (ret)
1342 return ret;
1343
1344 switch ((enum drm_i915_perf_property_id)id) {
1345 case DRM_I915_PERF_PROP_CTX_HANDLE:
1346 props->single_context = 1;
1347 props->ctx_handle = value;
1348 break;
Robert Braggd7965152016-11-07 19:49:52 +00001349 case DRM_I915_PERF_PROP_SAMPLE_OA:
1350 props->sample_flags |= SAMPLE_OA_REPORT;
1351 break;
1352 case DRM_I915_PERF_PROP_OA_METRICS_SET:
1353 if (value == 0 ||
1354 value > dev_priv->perf.oa.n_builtin_sets) {
1355 DRM_ERROR("Unknown OA metric set ID");
1356 return -EINVAL;
1357 }
1358 props->metrics_set = value;
1359 break;
1360 case DRM_I915_PERF_PROP_OA_FORMAT:
1361 if (value == 0 || value >= I915_OA_FORMAT_MAX) {
1362 DRM_ERROR("Invalid OA report format\n");
1363 return -EINVAL;
1364 }
1365 if (!dev_priv->perf.oa.oa_formats[value].size) {
1366 DRM_ERROR("Invalid OA report format\n");
1367 return -EINVAL;
1368 }
1369 props->oa_format = value;
1370 break;
1371 case DRM_I915_PERF_PROP_OA_EXPONENT:
1372 if (value > OA_EXPONENT_MAX) {
1373 DRM_ERROR("OA timer exponent too high (> %u)\n",
1374 OA_EXPONENT_MAX);
1375 return -EINVAL;
1376 }
1377
Robert Bragg00319ba2016-11-07 19:49:55 +00001378 /* Theoretically we can program the OA unit to sample
Robert Braggd7965152016-11-07 19:49:52 +00001379 * every 160ns but don't allow that by default unless
1380 * root.
1381 *
Robert Bragg00319ba2016-11-07 19:49:55 +00001382 * On Haswell the period is derived from the exponent
1383 * as:
1384 *
1385 * period = 80ns * 2^(exponent + 1)
Robert Braggd7965152016-11-07 19:49:52 +00001386 */
Robert Bragg00319ba2016-11-07 19:49:55 +00001387 BUILD_BUG_ON(sizeof(oa_period) != 8);
1388 oa_period = 80ull * (2ull << value);
1389
1390 /* This check is primarily to ensure that oa_period <=
1391 * UINT32_MAX (before passing to do_div which only
1392 * accepts a u32 denominator), but we can also skip
1393 * checking anything < 1Hz which implicitly can't be
1394 * limited via an integer oa_max_sample_rate.
1395 */
1396 if (oa_period <= NSEC_PER_SEC) {
1397 u64 tmp = NSEC_PER_SEC;
1398 do_div(tmp, oa_period);
1399 oa_freq_hz = tmp;
1400 } else
1401 oa_freq_hz = 0;
1402
1403 if (oa_freq_hz > i915_oa_max_sample_rate &&
1404 !capable(CAP_SYS_ADMIN)) {
1405 DRM_ERROR("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n",
1406 i915_oa_max_sample_rate);
Robert Braggd7965152016-11-07 19:49:52 +00001407 return -EACCES;
1408 }
1409
1410 props->oa_periodic = true;
1411 props->oa_period_exponent = value;
1412 break;
Robert Braggeec688e2016-11-07 19:49:47 +00001413 default:
1414 MISSING_CASE(id);
1415 DRM_ERROR("Unknown i915 perf property ID");
1416 return -EINVAL;
1417 }
1418
1419 uprop += 2;
1420 }
1421
1422 return 0;
1423}
1424
1425int i915_perf_open_ioctl(struct drm_device *dev, void *data,
1426 struct drm_file *file)
1427{
1428 struct drm_i915_private *dev_priv = dev->dev_private;
1429 struct drm_i915_perf_open_param *param = data;
1430 struct perf_open_properties props;
1431 u32 known_open_flags;
1432 int ret;
1433
1434 if (!dev_priv->perf.initialized) {
1435 DRM_ERROR("i915 perf interface not available for this system");
1436 return -ENOTSUPP;
1437 }
1438
1439 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC |
1440 I915_PERF_FLAG_FD_NONBLOCK |
1441 I915_PERF_FLAG_DISABLED;
1442 if (param->flags & ~known_open_flags) {
1443 DRM_ERROR("Unknown drm_i915_perf_open_param flag\n");
1444 return -EINVAL;
1445 }
1446
1447 ret = read_properties_unlocked(dev_priv,
1448 u64_to_user_ptr(param->properties_ptr),
1449 param->num_properties,
1450 &props);
1451 if (ret)
1452 return ret;
1453
1454 mutex_lock(&dev_priv->perf.lock);
1455 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file);
1456 mutex_unlock(&dev_priv->perf.lock);
1457
1458 return ret;
1459}
1460
Robert Bragg442b8c02016-11-07 19:49:53 +00001461void i915_perf_register(struct drm_i915_private *dev_priv)
1462{
1463 if (!IS_HASWELL(dev_priv))
1464 return;
1465
1466 if (!dev_priv->perf.initialized)
1467 return;
1468
1469 /* To be sure we're synchronized with an attempted
1470 * i915_perf_open_ioctl(); considering that we register after
1471 * being exposed to userspace.
1472 */
1473 mutex_lock(&dev_priv->perf.lock);
1474
1475 dev_priv->perf.metrics_kobj =
1476 kobject_create_and_add("metrics",
1477 &dev_priv->drm.primary->kdev->kobj);
1478 if (!dev_priv->perf.metrics_kobj)
1479 goto exit;
1480
1481 if (i915_perf_register_sysfs_hsw(dev_priv)) {
1482 kobject_put(dev_priv->perf.metrics_kobj);
1483 dev_priv->perf.metrics_kobj = NULL;
1484 }
1485
1486exit:
1487 mutex_unlock(&dev_priv->perf.lock);
1488}
1489
1490void i915_perf_unregister(struct drm_i915_private *dev_priv)
1491{
1492 if (!IS_HASWELL(dev_priv))
1493 return;
1494
1495 if (!dev_priv->perf.metrics_kobj)
1496 return;
1497
1498 i915_perf_unregister_sysfs_hsw(dev_priv);
1499
1500 kobject_put(dev_priv->perf.metrics_kobj);
1501 dev_priv->perf.metrics_kobj = NULL;
1502}
1503
Robert Braggccdf6342016-11-07 19:49:54 +00001504static struct ctl_table oa_table[] = {
1505 {
1506 .procname = "perf_stream_paranoid",
1507 .data = &i915_perf_stream_paranoid,
1508 .maxlen = sizeof(i915_perf_stream_paranoid),
1509 .mode = 0644,
1510 .proc_handler = proc_dointvec_minmax,
1511 .extra1 = &zero,
1512 .extra2 = &one,
1513 },
Robert Bragg00319ba2016-11-07 19:49:55 +00001514 {
1515 .procname = "oa_max_sample_rate",
1516 .data = &i915_oa_max_sample_rate,
1517 .maxlen = sizeof(i915_oa_max_sample_rate),
1518 .mode = 0644,
1519 .proc_handler = proc_dointvec_minmax,
1520 .extra1 = &zero,
1521 .extra2 = &oa_sample_rate_hard_limit,
1522 },
Robert Braggccdf6342016-11-07 19:49:54 +00001523 {}
1524};
1525
1526static struct ctl_table i915_root[] = {
1527 {
1528 .procname = "i915",
1529 .maxlen = 0,
1530 .mode = 0555,
1531 .child = oa_table,
1532 },
1533 {}
1534};
1535
1536static struct ctl_table dev_root[] = {
1537 {
1538 .procname = "dev",
1539 .maxlen = 0,
1540 .mode = 0555,
1541 .child = i915_root,
1542 },
1543 {}
1544};
1545
Robert Braggeec688e2016-11-07 19:49:47 +00001546void i915_perf_init(struct drm_i915_private *dev_priv)
1547{
Robert Braggd7965152016-11-07 19:49:52 +00001548 if (!IS_HASWELL(dev_priv))
1549 return;
1550
1551 hrtimer_init(&dev_priv->perf.oa.poll_check_timer,
1552 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1553 dev_priv->perf.oa.poll_check_timer.function = oa_poll_check_timer_cb;
1554 init_waitqueue_head(&dev_priv->perf.oa.poll_wq);
1555
Robert Braggeec688e2016-11-07 19:49:47 +00001556 INIT_LIST_HEAD(&dev_priv->perf.streams);
1557 mutex_init(&dev_priv->perf.lock);
Robert Braggd7965152016-11-07 19:49:52 +00001558 spin_lock_init(&dev_priv->perf.hook_lock);
1559
1560 dev_priv->perf.oa.ops.init_oa_buffer = gen7_init_oa_buffer;
1561 dev_priv->perf.oa.ops.enable_metric_set = hsw_enable_metric_set;
1562 dev_priv->perf.oa.ops.disable_metric_set = hsw_disable_metric_set;
1563 dev_priv->perf.oa.ops.oa_enable = gen7_oa_enable;
1564 dev_priv->perf.oa.ops.oa_disable = gen7_oa_disable;
1565 dev_priv->perf.oa.ops.read = gen7_oa_read;
1566 dev_priv->perf.oa.ops.oa_buffer_is_empty =
1567 gen7_oa_buffer_is_empty_fop_unlocked;
1568
1569 dev_priv->perf.oa.timestamp_frequency = 12500000;
1570
1571 dev_priv->perf.oa.oa_formats = hsw_oa_formats;
1572
1573 dev_priv->perf.oa.n_builtin_sets =
1574 i915_oa_n_builtin_metric_sets_hsw;
Robert Braggeec688e2016-11-07 19:49:47 +00001575
Robert Braggccdf6342016-11-07 19:49:54 +00001576 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
1577
Robert Braggeec688e2016-11-07 19:49:47 +00001578 dev_priv->perf.initialized = true;
1579}
1580
1581void i915_perf_fini(struct drm_i915_private *dev_priv)
1582{
1583 if (!dev_priv->perf.initialized)
1584 return;
1585
Robert Braggccdf6342016-11-07 19:49:54 +00001586 unregister_sysctl_table(dev_priv->perf.sysctl_header);
1587
Robert Braggd7965152016-11-07 19:49:52 +00001588 memset(&dev_priv->perf.oa.ops, 0, sizeof(dev_priv->perf.oa.ops));
Robert Braggeec688e2016-11-07 19:49:47 +00001589 dev_priv->perf.initialized = false;
1590}