Jamie Gennis | 88848da | 2013-02-21 17:55:28 -0800 | [diff] [blame] | 1 | #undef TRACE_SYSTEM |
| 2 | #define TRACE_SYSTEM gpu |
| 3 | |
| 4 | #if !defined(_TRACE_GPU_H) || defined(TRACE_HEADER_MULTI_READ) |
| 5 | #define _TRACE_GPU_H |
| 6 | |
| 7 | #include <linux/tracepoint.h> |
| 8 | #include <linux/time.h> |
| 9 | |
| 10 | #define show_secs_from_ns(ns) \ |
| 11 | ({ \ |
| 12 | u64 t = ns + (NSEC_PER_USEC / 2); \ |
| 13 | do_div(t, NSEC_PER_SEC); \ |
| 14 | }) |
| 15 | |
| 16 | #define show_usecs_from_ns(ns) \ |
| 17 | ({ \ |
| 18 | u64 t = ns + (NSEC_PER_USEC / 2) ; \ |
| 19 | u32 rem; \ |
| 20 | do_div(t, NSEC_PER_USEC); \ |
| 21 | rem = do_div(t, USEC_PER_SEC); \ |
| 22 | }) |
| 23 | |
| 24 | /* |
| 25 | * The gpu_sched_switch event indicates that a switch from one GPU context to |
| 26 | * another occurred on one of the GPU hardware blocks. |
| 27 | * |
| 28 | * The gpu_name argument identifies the GPU hardware block. Each independently |
| 29 | * scheduled GPU hardware block should have a different name. This may be used |
| 30 | * in different ways for different GPUs. For example, if a GPU includes |
| 31 | * multiple processing cores it may use names "GPU 0", "GPU 1", etc. If a GPU |
| 32 | * includes a separately scheduled 2D and 3D hardware block, it might use the |
| 33 | * names "2D" and "3D". |
| 34 | * |
| 35 | * The timestamp argument is the timestamp at which the switch occurred on the |
| 36 | * GPU. These timestamps are in units of nanoseconds and must use |
| 37 | * approximately the same time as sched_clock, though they need not come from |
| 38 | * any CPU clock. The timestamps for a single hardware block must be |
| 39 | * monotonically nondecreasing. This means that if a variable compensation |
| 40 | * offset is used to translate from some other clock to the sched_clock, then |
| 41 | * care must be taken when increasing that offset, and doing so may result in |
| 42 | * multiple events with the same timestamp. |
| 43 | * |
| 44 | * The next_ctx_id argument identifies the next context that was running on |
| 45 | * the GPU hardware block. A value of 0 indicates that the hardware block |
| 46 | * will be idle. |
| 47 | * |
| 48 | * The next_prio argument indicates the priority of the next context at the |
| 49 | * time of the event. The exact numeric values may mean different things for |
| 50 | * different GPUs, but they should follow the rule that lower values indicate a |
| 51 | * higher priority. |
| 52 | * |
| 53 | * The next_job_id argument identifies the batch of work that the GPU will be |
| 54 | * working on. This should correspond to a job_id that was previously traced |
| 55 | * as a gpu_job_enqueue event when the batch of work was created. |
| 56 | */ |
| 57 | TRACE_EVENT(gpu_sched_switch, |
| 58 | |
| 59 | TP_PROTO(const char *gpu_name, u64 timestamp, |
| 60 | u32 next_ctx_id, s32 next_prio, u32 next_job_id), |
| 61 | |
| 62 | TP_ARGS(gpu_name, timestamp, next_ctx_id, next_prio, next_job_id), |
| 63 | |
| 64 | TP_STRUCT__entry( |
| 65 | __string( gpu_name, gpu_name ) |
| 66 | __field( u64, timestamp ) |
| 67 | __field( u32, next_ctx_id ) |
| 68 | __field( s32, next_prio ) |
| 69 | __field( u32, next_job_id ) |
| 70 | ), |
| 71 | |
| 72 | TP_fast_assign( |
| 73 | __assign_str(gpu_name, gpu_name); |
| 74 | __entry->timestamp = timestamp; |
| 75 | __entry->next_ctx_id = next_ctx_id; |
| 76 | __entry->next_prio = next_prio; |
| 77 | __entry->next_job_id = next_job_id; |
| 78 | ), |
| 79 | |
| 80 | TP_printk("gpu_name=%s ts=%5llu.%06lu next_ctx_id=%lu next_prio=%ld " |
| 81 | "next_job_id=%lu", |
| 82 | __get_str(gpu_name), |
| 83 | (unsigned long long)show_secs_from_ns(__entry->timestamp), |
| 84 | (unsigned long)show_usecs_from_ns(__entry->timestamp), |
| 85 | (unsigned long)__entry->next_ctx_id, |
| 86 | (long)__entry->next_prio, |
| 87 | (unsigned long)__entry->next_job_id) |
| 88 | ); |
| 89 | |
| 90 | /* |
| 91 | * The gpu_job_enqueue event indicates that a batch of work has been queued up |
| 92 | * to be processed by the GPU. This event is not intended to indicate that |
| 93 | * the batch of work has been submitted to the GPU hardware, but rather that |
| 94 | * it has been submitted to the GPU kernel driver. |
| 95 | * |
| 96 | * This event should be traced on the thread that initiated the work being |
| 97 | * queued. For example, if a batch of work is submitted to the kernel by a |
| 98 | * userland thread, the event should be traced on that thread. |
| 99 | * |
| 100 | * The ctx_id field identifies the GPU context in which the batch of work |
| 101 | * being queued is to be run. |
| 102 | * |
| 103 | * The job_id field identifies the batch of work being queued within the given |
| 104 | * GPU context. The first batch of work submitted for a given GPU context |
| 105 | * should have a job_id of 0, and each subsequent batch of work should |
| 106 | * increment the job_id by 1. |
| 107 | * |
| 108 | * The type field identifies the type of the job being enqueued. The job |
| 109 | * types may be different for different GPU hardware. For example, a GPU may |
| 110 | * differentiate between "2D", "3D", and "compute" jobs. |
| 111 | */ |
| 112 | TRACE_EVENT(gpu_job_enqueue, |
| 113 | |
| 114 | TP_PROTO(u32 ctx_id, u32 job_id, const char *type), |
| 115 | |
| 116 | TP_ARGS(ctx_id, job_id, type), |
| 117 | |
| 118 | TP_STRUCT__entry( |
| 119 | __field( u32, ctx_id ) |
| 120 | __field( u32, job_id ) |
| 121 | __string( type, type ) |
| 122 | ), |
| 123 | |
| 124 | TP_fast_assign( |
| 125 | __entry->ctx_id = ctx_id; |
| 126 | __entry->job_id = job_id; |
| 127 | __assign_str(type, type); |
| 128 | ), |
| 129 | |
| 130 | TP_printk("ctx_id=%lu job_id=%lu type=%s", |
| 131 | (unsigned long)__entry->ctx_id, |
| 132 | (unsigned long)__entry->job_id, |
| 133 | __get_str(type)) |
| 134 | ); |
| 135 | |
| 136 | #undef show_secs_from_ns |
| 137 | #undef show_usecs_from_ns |
| 138 | |
| 139 | #endif /* _TRACE_GPU_H */ |
| 140 | |
| 141 | /* This part must be outside protection */ |
| 142 | #include <trace/define_trace.h> |