drm/i915: Add i915 perf infrastructure
Adds base i915 perf infrastructure for Gen performance metrics.
This adds a DRM_IOCTL_I915_PERF_OPEN ioctl that takes an array of uint64
properties to configure a stream of metrics and returns a new fd usable
with standard VFS system calls including read() to read typed and sized
records; ioctl() to enable or disable capture and poll() to wait for
data.
A stream is opened something like:
uint64_t properties[] = {
/* Single context sampling */
DRM_I915_PERF_PROP_CTX_HANDLE, ctx_handle,
/* Include OA reports in samples */
DRM_I915_PERF_PROP_SAMPLE_OA, true,
/* OA unit configuration */
DRM_I915_PERF_PROP_OA_METRICS_SET, metrics_set_id,
DRM_I915_PERF_PROP_OA_FORMAT, report_format,
DRM_I915_PERF_PROP_OA_EXPONENT, period_exponent,
};
struct drm_i915_perf_open_param parm = {
.flags = I915_PERF_FLAG_FD_CLOEXEC |
I915_PERF_FLAG_FD_NONBLOCK |
I915_PERF_FLAG_DISABLED,
.properties_ptr = (uint64_t)properties,
.num_properties = sizeof(properties) / 16,
};
int fd = drmIoctl(drm_fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);
Records read all start with a common { type, size } header with
DRM_I915_PERF_RECORD_SAMPLE being of most interest. Sample records
contain an extensible number of fields and it's the
DRM_I915_PERF_PROP_SAMPLE_xyz properties given when opening that
determine what's included in every sample.
No specific streams are supported yet so any attempt to open a stream
will return an error.
v2:
use i915_gem_context_get() - Chris Wilson
v3:
update read() interface to avoid passing state struct - Chris Wilson
fix some rebase fallout, with i915-perf init/deinit
v4:
s/DRM_IORW/DRM_IOW/ - Emil Velikov
Signed-off-by: Robert Bragg <robert@sixbynine.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Sourab Gupta <sourab.gupta@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20161107194957.3385-2-robert@sixbynine.org
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index c7d5f7a..b69f844 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1797,6 +1797,84 @@ struct intel_wm_config {
bool sprites_scaled;
};
+struct i915_perf_stream;
+
+struct i915_perf_stream_ops {
+ /* Enables the collection of HW samples, either in response to
+ * I915_PERF_IOCTL_ENABLE or implicitly called when stream is
+ * opened without I915_PERF_FLAG_DISABLED.
+ */
+ void (*enable)(struct i915_perf_stream *stream);
+
+ /* Disables the collection of HW samples, either in response to
+ * I915_PERF_IOCTL_DISABLE or implicitly called before
+ * destroying the stream.
+ */
+ void (*disable)(struct i915_perf_stream *stream);
+
+ /* Return: true if any i915 perf records are ready to read()
+ * for this stream.
+ */
+ bool (*can_read)(struct i915_perf_stream *stream);
+
+ /* Call poll_wait, passing a wait queue that will be woken
+ * once there is something ready to read() for the stream
+ */
+ void (*poll_wait)(struct i915_perf_stream *stream,
+ struct file *file,
+ poll_table *wait);
+
+ /* For handling a blocking read, wait until there is something
+ * to ready to read() for the stream. E.g. wait on the same
+ * wait queue that would be passed to poll_wait() until
+ * ->can_read() returns true (if its safe to call ->can_read()
+ * without the i915 perf lock held).
+ */
+ int (*wait_unlocked)(struct i915_perf_stream *stream);
+
+ /* read - Copy buffered metrics as records to userspace
+ * @buf: the userspace, destination buffer
+ * @count: the number of bytes to copy, requested by userspace
+ * @offset: zero at the start of the read, updated as the read
+ * proceeds, it represents how many bytes have been
+ * copied so far and the buffer offset for copying the
+ * next record.
+ *
+ * Copy as many buffered i915 perf samples and records for
+ * this stream to userspace as will fit in the given buffer.
+ *
+ * Only write complete records; returning -ENOSPC if there
+ * isn't room for a complete record.
+ *
+ * Return any error condition that results in a short read
+ * such as -ENOSPC or -EFAULT, even though these may be
+ * squashed before returning to userspace.
+ */
+ int (*read)(struct i915_perf_stream *stream,
+ char __user *buf,
+ size_t count,
+ size_t *offset);
+
+ /* Cleanup any stream specific resources.
+ *
+ * The stream will always be disabled before this is called.
+ */
+ void (*destroy)(struct i915_perf_stream *stream);
+};
+
+struct i915_perf_stream {
+ struct drm_i915_private *dev_priv;
+
+ struct list_head link;
+
+ u32 sample_flags;
+
+ struct i915_gem_context *ctx;
+ bool enabled;
+
+ struct i915_perf_stream_ops *ops;
+};
+
struct drm_i915_private {
struct drm_device drm;
@@ -2092,6 +2170,12 @@ struct drm_i915_private {
struct i915_runtime_pm pm;
+ struct {
+ bool initialized;
+ struct mutex lock;
+ struct list_head streams;
+ } perf;
+
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct {
void (*resume)(struct drm_i915_private *);
@@ -3253,6 +3337,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+int i915_perf_open_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
/* i915_gem_evict.c */
int __must_check i915_gem_evict_something(struct i915_address_space *vm,
u64 min_size, u64 alignment,
@@ -3383,6 +3470,10 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 batch_len,
bool is_master);
+/* i915_perf.c */
+extern void i915_perf_init(struct drm_i915_private *dev_priv);
+extern void i915_perf_fini(struct drm_i915_private *dev_priv);
+
/* i915_suspend.c */
extern int i915_save_state(struct drm_device *dev);
extern int i915_restore_state(struct drm_device *dev);