Terje Bergstrom | 6579324 | 2013-03-22 16:34:03 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Tegra host1x Command DMA |
| 3 | * |
| 4 | * Copyright (c) 2010-2013, NVIDIA Corporation. |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify it |
| 7 | * under the terms and conditions of the GNU General Public License, |
| 8 | * version 2, as published by the Free Software Foundation. |
| 9 | * |
| 10 | * This program is distributed in the hope it will be useful, but WITHOUT |
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| 13 | * more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License |
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 17 | */ |
| 18 | |
| 19 | |
| 20 | #include <asm/cacheflush.h> |
| 21 | #include <linux/device.h> |
| 22 | #include <linux/dma-mapping.h> |
Thierry Reding | 35d747a | 2013-09-24 16:30:32 +0200 | [diff] [blame] | 23 | #include <linux/host1x.h> |
Terje Bergstrom | 6579324 | 2013-03-22 16:34:03 +0200 | [diff] [blame] | 24 | #include <linux/interrupt.h> |
| 25 | #include <linux/kernel.h> |
| 26 | #include <linux/kfifo.h> |
| 27 | #include <linux/slab.h> |
| 28 | #include <trace/events/host1x.h> |
| 29 | |
| 30 | #include "cdma.h" |
| 31 | #include "channel.h" |
| 32 | #include "dev.h" |
| 33 | #include "debug.h" |
Terje Bergstrom | 6579324 | 2013-03-22 16:34:03 +0200 | [diff] [blame] | 34 | #include "job.h" |
| 35 | |
| 36 | /* |
| 37 | * push_buffer |
| 38 | * |
| 39 | * The push buffer is a circular array of words to be fetched by command DMA. |
| 40 | * Note that it works slightly differently to the sync queue; fence == pos |
| 41 | * means that the push buffer is full, not empty. |
| 42 | */ |
| 43 | |
| 44 | #define HOST1X_PUSHBUFFER_SLOTS 512 |
| 45 | |
| 46 | /* |
| 47 | * Clean up push buffer resources |
| 48 | */ |
| 49 | static void host1x_pushbuffer_destroy(struct push_buffer *pb) |
| 50 | { |
| 51 | struct host1x_cdma *cdma = pb_to_cdma(pb); |
| 52 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 53 | |
| 54 | if (pb->phys != 0) |
| 55 | dma_free_writecombine(host1x->dev, pb->size_bytes + 4, |
| 56 | pb->mapped, pb->phys); |
| 57 | |
| 58 | pb->mapped = NULL; |
| 59 | pb->phys = 0; |
| 60 | } |
| 61 | |
| 62 | /* |
| 63 | * Init push buffer resources |
| 64 | */ |
| 65 | static int host1x_pushbuffer_init(struct push_buffer *pb) |
| 66 | { |
| 67 | struct host1x_cdma *cdma = pb_to_cdma(pb); |
| 68 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 69 | |
| 70 | pb->mapped = NULL; |
| 71 | pb->phys = 0; |
| 72 | pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; |
| 73 | |
| 74 | /* initialize buffer pointers */ |
| 75 | pb->fence = pb->size_bytes - 8; |
| 76 | pb->pos = 0; |
| 77 | |
| 78 | /* allocate and map pushbuffer memory */ |
| 79 | pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4, |
| 80 | &pb->phys, GFP_KERNEL); |
| 81 | if (!pb->mapped) |
| 82 | goto fail; |
| 83 | |
| 84 | host1x_hw_pushbuffer_init(host1x, pb); |
| 85 | |
| 86 | return 0; |
| 87 | |
| 88 | fail: |
| 89 | host1x_pushbuffer_destroy(pb); |
| 90 | return -ENOMEM; |
| 91 | } |
| 92 | |
| 93 | /* |
| 94 | * Push two words to the push buffer |
| 95 | * Caller must ensure push buffer is not full |
| 96 | */ |
| 97 | static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) |
| 98 | { |
| 99 | u32 pos = pb->pos; |
Thierry Reding | 0169b93 | 2014-06-12 13:16:54 +0200 | [diff] [blame] | 100 | u32 *p = (u32 *)((void *)pb->mapped + pos); |
Terje Bergstrom | 6579324 | 2013-03-22 16:34:03 +0200 | [diff] [blame] | 101 | WARN_ON(pos == pb->fence); |
| 102 | *(p++) = op1; |
| 103 | *(p++) = op2; |
| 104 | pb->pos = (pos + 8) & (pb->size_bytes - 1); |
| 105 | } |
| 106 | |
| 107 | /* |
| 108 | * Pop a number of two word slots from the push buffer |
| 109 | * Caller must ensure push buffer is not empty |
| 110 | */ |
| 111 | static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) |
| 112 | { |
| 113 | /* Advance the next write position */ |
| 114 | pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1); |
| 115 | } |
| 116 | |
| 117 | /* |
| 118 | * Return the number of two word slots free in the push buffer |
| 119 | */ |
| 120 | static u32 host1x_pushbuffer_space(struct push_buffer *pb) |
| 121 | { |
| 122 | return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8; |
| 123 | } |
| 124 | |
| 125 | /* |
| 126 | * Sleep (if necessary) until the requested event happens |
| 127 | * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty. |
| 128 | * - Returns 1 |
| 129 | * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer |
| 130 | * - Return the amount of space (> 0) |
| 131 | * Must be called with the cdma lock held. |
| 132 | */ |
| 133 | unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, |
| 134 | enum cdma_event event) |
| 135 | { |
| 136 | for (;;) { |
| 137 | unsigned int space; |
| 138 | |
| 139 | if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY) |
| 140 | space = list_empty(&cdma->sync_queue) ? 1 : 0; |
| 141 | else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) { |
| 142 | struct push_buffer *pb = &cdma->push_buffer; |
| 143 | space = host1x_pushbuffer_space(pb); |
| 144 | } else { |
| 145 | WARN_ON(1); |
| 146 | return -EINVAL; |
| 147 | } |
| 148 | |
| 149 | if (space) |
| 150 | return space; |
| 151 | |
| 152 | trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), |
| 153 | event); |
| 154 | |
| 155 | /* If somebody has managed to already start waiting, yield */ |
| 156 | if (cdma->event != CDMA_EVENT_NONE) { |
| 157 | mutex_unlock(&cdma->lock); |
| 158 | schedule(); |
| 159 | mutex_lock(&cdma->lock); |
| 160 | continue; |
| 161 | } |
| 162 | cdma->event = event; |
| 163 | |
| 164 | mutex_unlock(&cdma->lock); |
| 165 | down(&cdma->sem); |
| 166 | mutex_lock(&cdma->lock); |
| 167 | } |
| 168 | return 0; |
| 169 | } |
| 170 | |
| 171 | /* |
| 172 | * Start timer that tracks the time spent by the job. |
| 173 | * Must be called with the cdma lock held. |
| 174 | */ |
| 175 | static void cdma_start_timer_locked(struct host1x_cdma *cdma, |
| 176 | struct host1x_job *job) |
| 177 | { |
| 178 | struct host1x *host = cdma_to_host1x(cdma); |
| 179 | |
| 180 | if (cdma->timeout.client) { |
| 181 | /* timer already started */ |
| 182 | return; |
| 183 | } |
| 184 | |
| 185 | cdma->timeout.client = job->client; |
| 186 | cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); |
| 187 | cdma->timeout.syncpt_val = job->syncpt_end; |
| 188 | cdma->timeout.start_ktime = ktime_get(); |
| 189 | |
| 190 | schedule_delayed_work(&cdma->timeout.wq, |
| 191 | msecs_to_jiffies(job->timeout)); |
| 192 | } |
| 193 | |
| 194 | /* |
| 195 | * Stop timer when a buffer submission completes. |
| 196 | * Must be called with the cdma lock held. |
| 197 | */ |
| 198 | static void stop_cdma_timer_locked(struct host1x_cdma *cdma) |
| 199 | { |
| 200 | cancel_delayed_work(&cdma->timeout.wq); |
| 201 | cdma->timeout.client = 0; |
| 202 | } |
| 203 | |
| 204 | /* |
| 205 | * For all sync queue entries that have already finished according to the |
| 206 | * current sync point registers: |
| 207 | * - unpin & unref their mems |
| 208 | * - pop their push buffer slots |
| 209 | * - remove them from the sync queue |
| 210 | * This is normally called from the host code's worker thread, but can be |
| 211 | * called manually if necessary. |
| 212 | * Must be called with the cdma lock held. |
| 213 | */ |
| 214 | static void update_cdma_locked(struct host1x_cdma *cdma) |
| 215 | { |
| 216 | bool signal = false; |
| 217 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 218 | struct host1x_job *job, *n; |
| 219 | |
| 220 | /* If CDMA is stopped, queue is cleared and we can return */ |
| 221 | if (!cdma->running) |
| 222 | return; |
| 223 | |
| 224 | /* |
| 225 | * Walk the sync queue, reading the sync point registers as necessary, |
| 226 | * to consume as many sync queue entries as possible without blocking |
| 227 | */ |
| 228 | list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { |
| 229 | struct host1x_syncpt *sp = |
| 230 | host1x_syncpt_get(host1x, job->syncpt_id); |
| 231 | |
| 232 | /* Check whether this syncpt has completed, and bail if not */ |
| 233 | if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { |
| 234 | /* Start timer on next pending syncpt */ |
| 235 | if (job->timeout) |
| 236 | cdma_start_timer_locked(cdma, job); |
| 237 | break; |
| 238 | } |
| 239 | |
| 240 | /* Cancel timeout, when a buffer completes */ |
| 241 | if (cdma->timeout.client) |
| 242 | stop_cdma_timer_locked(cdma); |
| 243 | |
| 244 | /* Unpin the memory */ |
| 245 | host1x_job_unpin(job); |
| 246 | |
| 247 | /* Pop push buffer slots */ |
| 248 | if (job->num_slots) { |
| 249 | struct push_buffer *pb = &cdma->push_buffer; |
| 250 | host1x_pushbuffer_pop(pb, job->num_slots); |
| 251 | if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) |
| 252 | signal = true; |
| 253 | } |
| 254 | |
| 255 | list_del(&job->list); |
| 256 | host1x_job_put(job); |
| 257 | } |
| 258 | |
| 259 | if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY && |
| 260 | list_empty(&cdma->sync_queue)) |
| 261 | signal = true; |
| 262 | |
| 263 | if (signal) { |
| 264 | cdma->event = CDMA_EVENT_NONE; |
| 265 | up(&cdma->sem); |
| 266 | } |
| 267 | } |
| 268 | |
| 269 | void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, |
| 270 | struct device *dev) |
| 271 | { |
| 272 | u32 restart_addr; |
| 273 | u32 syncpt_incrs; |
| 274 | struct host1x_job *job = NULL; |
| 275 | u32 syncpt_val; |
| 276 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 277 | |
| 278 | syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); |
| 279 | |
| 280 | dev_dbg(dev, "%s: starting cleanup (thresh %d)\n", |
| 281 | __func__, syncpt_val); |
| 282 | |
| 283 | /* |
| 284 | * Move the sync_queue read pointer to the first entry that hasn't |
| 285 | * completed based on the current HW syncpt value. It's likely there |
| 286 | * won't be any (i.e. we're still at the head), but covers the case |
| 287 | * where a syncpt incr happens just prior/during the teardown. |
| 288 | */ |
| 289 | |
| 290 | dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n", |
| 291 | __func__); |
| 292 | |
| 293 | list_for_each_entry(job, &cdma->sync_queue, list) { |
| 294 | if (syncpt_val < job->syncpt_end) |
| 295 | break; |
| 296 | |
| 297 | host1x_job_dump(dev, job); |
| 298 | } |
| 299 | |
| 300 | /* |
| 301 | * Walk the sync_queue, first incrementing with the CPU syncpts that |
| 302 | * are partially executed (the first buffer) or fully skipped while |
| 303 | * still in the current context (slots are also NOP-ed). |
| 304 | * |
| 305 | * At the point contexts are interleaved, syncpt increments must be |
| 306 | * done inline with the pushbuffer from a GATHER buffer to maintain |
| 307 | * the order (slots are modified to be a GATHER of syncpt incrs). |
| 308 | * |
| 309 | * Note: save in restart_addr the location where the timed out buffer |
| 310 | * started in the PB, so we can start the refetch from there (with the |
| 311 | * modified NOP-ed PB slots). This lets things appear to have completed |
| 312 | * properly for this buffer and resources are freed. |
| 313 | */ |
| 314 | |
| 315 | dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n", |
| 316 | __func__); |
| 317 | |
| 318 | if (!list_empty(&cdma->sync_queue)) |
| 319 | restart_addr = job->first_get; |
| 320 | else |
| 321 | restart_addr = cdma->last_pos; |
| 322 | |
| 323 | /* do CPU increments as long as this context continues */ |
| 324 | list_for_each_entry_from(job, &cdma->sync_queue, list) { |
| 325 | /* different context, gets us out of this loop */ |
| 326 | if (job->client != cdma->timeout.client) |
| 327 | break; |
| 328 | |
| 329 | /* won't need a timeout when replayed */ |
| 330 | job->timeout = 0; |
| 331 | |
| 332 | syncpt_incrs = job->syncpt_end - syncpt_val; |
| 333 | dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs); |
| 334 | |
| 335 | host1x_job_dump(dev, job); |
| 336 | |
| 337 | /* safe to use CPU to incr syncpts */ |
| 338 | host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, |
| 339 | syncpt_incrs, job->syncpt_end, |
| 340 | job->num_slots); |
| 341 | |
| 342 | syncpt_val += syncpt_incrs; |
| 343 | } |
| 344 | |
| 345 | /* The following sumbits from the same client may be dependent on the |
| 346 | * failed submit and therefore they may fail. Force a small timeout |
| 347 | * to make the queue cleanup faster */ |
| 348 | |
| 349 | list_for_each_entry_from(job, &cdma->sync_queue, list) |
| 350 | if (job->client == cdma->timeout.client) |
| 351 | job->timeout = min_t(unsigned int, job->timeout, 500); |
| 352 | |
| 353 | dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); |
| 354 | |
| 355 | /* roll back DMAGET and start up channel again */ |
| 356 | host1x_hw_cdma_resume(host1x, cdma, restart_addr); |
| 357 | } |
| 358 | |
| 359 | /* |
| 360 | * Create a cdma |
| 361 | */ |
| 362 | int host1x_cdma_init(struct host1x_cdma *cdma) |
| 363 | { |
| 364 | int err; |
| 365 | |
| 366 | mutex_init(&cdma->lock); |
| 367 | sema_init(&cdma->sem, 0); |
| 368 | |
| 369 | INIT_LIST_HEAD(&cdma->sync_queue); |
| 370 | |
| 371 | cdma->event = CDMA_EVENT_NONE; |
| 372 | cdma->running = false; |
| 373 | cdma->torndown = false; |
| 374 | |
| 375 | err = host1x_pushbuffer_init(&cdma->push_buffer); |
| 376 | if (err) |
| 377 | return err; |
| 378 | return 0; |
| 379 | } |
| 380 | |
| 381 | /* |
| 382 | * Destroy a cdma |
| 383 | */ |
| 384 | int host1x_cdma_deinit(struct host1x_cdma *cdma) |
| 385 | { |
| 386 | struct push_buffer *pb = &cdma->push_buffer; |
| 387 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 388 | |
| 389 | if (cdma->running) { |
| 390 | pr_warn("%s: CDMA still running\n", __func__); |
| 391 | return -EBUSY; |
| 392 | } |
| 393 | |
| 394 | host1x_pushbuffer_destroy(pb); |
| 395 | host1x_hw_cdma_timeout_destroy(host1x, cdma); |
| 396 | |
| 397 | return 0; |
| 398 | } |
| 399 | |
| 400 | /* |
| 401 | * Begin a cdma submit |
| 402 | */ |
| 403 | int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) |
| 404 | { |
| 405 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 406 | |
| 407 | mutex_lock(&cdma->lock); |
| 408 | |
| 409 | if (job->timeout) { |
| 410 | /* init state on first submit with timeout value */ |
| 411 | if (!cdma->timeout.initialized) { |
| 412 | int err; |
| 413 | err = host1x_hw_cdma_timeout_init(host1x, cdma, |
| 414 | job->syncpt_id); |
| 415 | if (err) { |
| 416 | mutex_unlock(&cdma->lock); |
| 417 | return err; |
| 418 | } |
| 419 | } |
| 420 | } |
| 421 | if (!cdma->running) |
| 422 | host1x_hw_cdma_start(host1x, cdma); |
| 423 | |
| 424 | cdma->slots_free = 0; |
| 425 | cdma->slots_used = 0; |
| 426 | cdma->first_get = cdma->push_buffer.pos; |
| 427 | |
| 428 | trace_host1x_cdma_begin(dev_name(job->channel->dev)); |
| 429 | return 0; |
| 430 | } |
| 431 | |
| 432 | /* |
| 433 | * Push two words into a push buffer slot |
| 434 | * Blocks as necessary if the push buffer is full. |
| 435 | */ |
| 436 | void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) |
| 437 | { |
| 438 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 439 | struct push_buffer *pb = &cdma->push_buffer; |
| 440 | u32 slots_free = cdma->slots_free; |
| 441 | |
Terje Bergstrom | 6236451 | 2013-03-22 16:34:04 +0200 | [diff] [blame] | 442 | if (host1x_debug_trace_cmdbuf) |
| 443 | trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), |
| 444 | op1, op2); |
| 445 | |
Terje Bergstrom | 6579324 | 2013-03-22 16:34:03 +0200 | [diff] [blame] | 446 | if (slots_free == 0) { |
| 447 | host1x_hw_cdma_flush(host1x, cdma); |
| 448 | slots_free = host1x_cdma_wait_locked(cdma, |
| 449 | CDMA_EVENT_PUSH_BUFFER_SPACE); |
| 450 | } |
| 451 | cdma->slots_free = slots_free - 1; |
| 452 | cdma->slots_used++; |
| 453 | host1x_pushbuffer_push(pb, op1, op2); |
| 454 | } |
| 455 | |
| 456 | /* |
| 457 | * End a cdma submit |
| 458 | * Kick off DMA, add job to the sync queue, and a number of slots to be freed |
| 459 | * from the pushbuffer. The handles for a submit must all be pinned at the same |
| 460 | * time, but they can be unpinned in smaller chunks. |
| 461 | */ |
| 462 | void host1x_cdma_end(struct host1x_cdma *cdma, |
| 463 | struct host1x_job *job) |
| 464 | { |
| 465 | struct host1x *host1x = cdma_to_host1x(cdma); |
| 466 | bool idle = list_empty(&cdma->sync_queue); |
| 467 | |
| 468 | host1x_hw_cdma_flush(host1x, cdma); |
| 469 | |
| 470 | job->first_get = cdma->first_get; |
| 471 | job->num_slots = cdma->slots_used; |
| 472 | host1x_job_get(job); |
| 473 | list_add_tail(&job->list, &cdma->sync_queue); |
| 474 | |
| 475 | /* start timer on idle -> active transitions */ |
| 476 | if (job->timeout && idle) |
| 477 | cdma_start_timer_locked(cdma, job); |
| 478 | |
| 479 | trace_host1x_cdma_end(dev_name(job->channel->dev)); |
| 480 | mutex_unlock(&cdma->lock); |
| 481 | } |
| 482 | |
| 483 | /* |
| 484 | * Update cdma state according to current sync point values |
| 485 | */ |
| 486 | void host1x_cdma_update(struct host1x_cdma *cdma) |
| 487 | { |
| 488 | mutex_lock(&cdma->lock); |
| 489 | update_cdma_locked(cdma); |
| 490 | mutex_unlock(&cdma->lock); |
| 491 | } |