| /* fuc microcode for nvc0 PGRAPH/GPC |
| * |
| * Copyright 2011 Red Hat Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
| * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
| * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| * OTHER DEALINGS IN THE SOFTWARE. |
| * |
| * Authors: Ben Skeggs |
| */ |
| |
| /* To build: |
| * m4 nvc0_grgpc.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_grgpc.fuc.h |
| */ |
| |
| /* TODO |
| * - bracket certain functions with scratch writes, useful for debugging |
| * - watchdog timer around ctx operations |
| */ |
| |
| .section #nvc0_grgpc_data |
| include(`nvc0_graph.fuc') |
| gpc_id: .b32 0 |
| gpc_mmio_list_head: .b32 0 |
| gpc_mmio_list_tail: .b32 0 |
| |
| tpc_count: .b32 0 |
| tpc_mask: .b32 0 |
| tpc_mmio_list_head: .b32 0 |
| tpc_mmio_list_tail: .b32 0 |
| |
| cmd_queue: queue_init |
| |
| // chipset descriptions |
| chipsets: |
| .b8 0xc0 0 0 0 |
| .b16 #nvc0_gpc_mmio_head |
| .b16 #nvc0_gpc_mmio_tail |
| .b16 #nvc0_tpc_mmio_head |
| .b16 #nvc0_tpc_mmio_tail |
| .b8 0xc1 0 0 0 |
| .b16 #nvc0_gpc_mmio_head |
| .b16 #nvc1_gpc_mmio_tail |
| .b16 #nvc0_tpc_mmio_head |
| .b16 #nvc1_tpc_mmio_tail |
| .b8 0xc3 0 0 0 |
| .b16 #nvc0_gpc_mmio_head |
| .b16 #nvc0_gpc_mmio_tail |
| .b16 #nvc0_tpc_mmio_head |
| .b16 #nvc3_tpc_mmio_tail |
| .b8 0xc4 0 0 0 |
| .b16 #nvc0_gpc_mmio_head |
| .b16 #nvc0_gpc_mmio_tail |
| .b16 #nvc0_tpc_mmio_head |
| .b16 #nvc3_tpc_mmio_tail |
| .b8 0xc8 0 0 0 |
| .b16 #nvc0_gpc_mmio_head |
| .b16 #nvc0_gpc_mmio_tail |
| .b16 #nvc0_tpc_mmio_head |
| .b16 #nvc0_tpc_mmio_tail |
| .b8 0xce 0 0 0 |
| .b16 #nvc0_gpc_mmio_head |
| .b16 #nvc0_gpc_mmio_tail |
| .b16 #nvc0_tpc_mmio_head |
| .b16 #nvc3_tpc_mmio_tail |
| .b8 0xcf 0 0 0 |
| .b16 #nvc0_gpc_mmio_head |
| .b16 #nvc0_gpc_mmio_tail |
| .b16 #nvc0_tpc_mmio_head |
| .b16 #nvcf_tpc_mmio_tail |
| .b8 0xd9 0 0 0 |
| .b16 #nvd9_gpc_mmio_head |
| .b16 #nvd9_gpc_mmio_tail |
| .b16 #nvd9_tpc_mmio_head |
| .b16 #nvd9_tpc_mmio_tail |
| .b8 0 0 0 0 |
| |
| // GPC mmio lists |
| nvc0_gpc_mmio_head: |
| mmctx_data(0x000380, 1) |
| mmctx_data(0x000400, 6) |
| mmctx_data(0x000450, 9) |
| mmctx_data(0x000600, 1) |
| mmctx_data(0x000684, 1) |
| mmctx_data(0x000700, 5) |
| mmctx_data(0x000800, 1) |
| mmctx_data(0x000808, 3) |
| mmctx_data(0x000828, 1) |
| mmctx_data(0x000830, 1) |
| mmctx_data(0x0008d8, 1) |
| mmctx_data(0x0008e0, 1) |
| mmctx_data(0x0008e8, 6) |
| mmctx_data(0x00091c, 1) |
| mmctx_data(0x000924, 3) |
| mmctx_data(0x000b00, 1) |
| mmctx_data(0x000b08, 6) |
| mmctx_data(0x000bb8, 1) |
| mmctx_data(0x000c08, 1) |
| mmctx_data(0x000c10, 8) |
| mmctx_data(0x000c80, 1) |
| mmctx_data(0x000c8c, 1) |
| mmctx_data(0x001000, 3) |
| mmctx_data(0x001014, 1) |
| nvc0_gpc_mmio_tail: |
| mmctx_data(0x000c6c, 1); |
| nvc1_gpc_mmio_tail: |
| |
| nvd9_gpc_mmio_head: |
| mmctx_data(0x000380, 1) |
| mmctx_data(0x000400, 2) |
| mmctx_data(0x00040c, 3) |
| mmctx_data(0x000450, 9) |
| mmctx_data(0x000600, 1) |
| mmctx_data(0x000684, 1) |
| mmctx_data(0x000700, 5) |
| mmctx_data(0x000800, 1) |
| mmctx_data(0x000808, 3) |
| mmctx_data(0x000828, 1) |
| mmctx_data(0x000830, 1) |
| mmctx_data(0x0008d8, 1) |
| mmctx_data(0x0008e0, 1) |
| mmctx_data(0x0008e8, 6) |
| mmctx_data(0x00091c, 1) |
| mmctx_data(0x000924, 3) |
| mmctx_data(0x000b00, 1) |
| mmctx_data(0x000b08, 6) |
| mmctx_data(0x000bb8, 1) |
| mmctx_data(0x000c08, 1) |
| mmctx_data(0x000c10, 8) |
| mmctx_data(0x000c6c, 1) |
| mmctx_data(0x000c80, 1) |
| mmctx_data(0x000c8c, 1) |
| mmctx_data(0x001000, 3) |
| mmctx_data(0x001014, 1) |
| nvd9_gpc_mmio_tail: |
| |
| // TPC mmio lists |
| nvc0_tpc_mmio_head: |
| mmctx_data(0x000018, 1) |
| mmctx_data(0x00003c, 1) |
| mmctx_data(0x000048, 1) |
| mmctx_data(0x000064, 1) |
| mmctx_data(0x000088, 1) |
| mmctx_data(0x000200, 6) |
| mmctx_data(0x00021c, 2) |
| mmctx_data(0x000300, 6) |
| mmctx_data(0x0003d0, 1) |
| mmctx_data(0x0003e0, 2) |
| mmctx_data(0x000400, 3) |
| mmctx_data(0x000420, 1) |
| mmctx_data(0x0004b0, 1) |
| mmctx_data(0x0004e8, 1) |
| mmctx_data(0x0004f4, 1) |
| mmctx_data(0x000520, 2) |
| mmctx_data(0x000604, 4) |
| mmctx_data(0x000644, 20) |
| mmctx_data(0x000698, 1) |
| mmctx_data(0x000750, 2) |
| nvc0_tpc_mmio_tail: |
| mmctx_data(0x000758, 1) |
| mmctx_data(0x0002c4, 1) |
| mmctx_data(0x0006e0, 1) |
| nvcf_tpc_mmio_tail: |
| mmctx_data(0x0004bc, 1) |
| nvc3_tpc_mmio_tail: |
| mmctx_data(0x000544, 1) |
| nvc1_tpc_mmio_tail: |
| |
| nvd9_tpc_mmio_head: |
| mmctx_data(0x000018, 1) |
| mmctx_data(0x00003c, 1) |
| mmctx_data(0x000048, 1) |
| mmctx_data(0x000064, 1) |
| mmctx_data(0x000088, 1) |
| mmctx_data(0x000200, 6) |
| mmctx_data(0x00021c, 2) |
| mmctx_data(0x0002c4, 1) |
| mmctx_data(0x000300, 6) |
| mmctx_data(0x0003d0, 1) |
| mmctx_data(0x0003e0, 2) |
| mmctx_data(0x000400, 3) |
| mmctx_data(0x000420, 3) |
| mmctx_data(0x0004b0, 1) |
| mmctx_data(0x0004e8, 1) |
| mmctx_data(0x0004f4, 1) |
| mmctx_data(0x000520, 2) |
| mmctx_data(0x000544, 1) |
| mmctx_data(0x000604, 4) |
| mmctx_data(0x000644, 20) |
| mmctx_data(0x000698, 1) |
| mmctx_data(0x0006e0, 1) |
| mmctx_data(0x000750, 3) |
| nvd9_tpc_mmio_tail: |
| |
| .section #nvc0_grgpc_code |
| bra #init |
| define(`include_code') |
| include(`nvc0_graph.fuc') |
| |
| // reports an exception to the host |
| // |
| // In: $r15 error code (see nvc0_graph.fuc) |
| // |
| error: |
| push $r14 |
| mov $r14 -0x67ec // 0x9814 |
| sethi $r14 0x400000 |
| call #nv_wr32 // HUB_CTXCTL_CC_SCRATCH[5] = error code |
| add b32 $r14 0x41c |
| mov $r15 1 |
| call #nv_wr32 // HUB_CTXCTL_INTR_UP_SET |
| pop $r14 |
| ret |
| |
| // GPC fuc initialisation, executed by triggering ucode start, will |
| // fall through to main loop after completion. |
| // |
| // Input: |
| // CC_SCRATCH[0]: chipset (PMC_BOOT_0 read returns 0x0bad0bad... sigh) |
| // CC_SCRATCH[1]: context base |
| // |
| // Output: |
| // CC_SCRATCH[0]: |
| // 31:31: set to signal completion |
| // CC_SCRATCH[1]: |
| // 31:0: GPC context size |
| // |
| init: |
| clear b32 $r0 |
| mov $sp $r0 |
| |
| // enable fifo access |
| mov $r1 0x1200 |
| mov $r2 2 |
| iowr I[$r1 + 0x000] $r2 // FIFO_ENABLE |
| |
| // setup i0 handler, and route all interrupts to it |
| mov $r1 #ih |
| mov $iv0 $r1 |
| mov $r1 0x400 |
| iowr I[$r1 + 0x300] $r0 // INTR_DISPATCH |
| |
| // enable fifo interrupt |
| mov $r2 4 |
| iowr I[$r1 + 0x000] $r2 // INTR_EN_SET |
| |
| // enable interrupts |
| bset $flags ie0 |
| |
| // figure out which GPC we are, and how many TPCs we have |
| mov $r1 0x608 |
| shl b32 $r1 6 |
| iord $r2 I[$r1 + 0x000] // UNITS |
| mov $r3 1 |
| and $r2 0x1f |
| shl b32 $r3 $r2 |
| sub b32 $r3 1 |
| st b32 D[$r0 + #tpc_count] $r2 |
| st b32 D[$r0 + #tpc_mask] $r3 |
| add b32 $r1 0x400 |
| iord $r2 I[$r1 + 0x000] // MYINDEX |
| st b32 D[$r0 + #gpc_id] $r2 |
| |
| // find context data for this chipset |
| mov $r2 0x800 |
| shl b32 $r2 6 |
| iord $r2 I[$r2 + 0x000] // CC_SCRATCH[0] |
| mov $r1 #chipsets - 12 |
| init_find_chipset: |
| add b32 $r1 12 |
| ld b32 $r3 D[$r1 + 0x00] |
| cmpu b32 $r3 $r2 |
| bra e #init_context |
| cmpu b32 $r3 0 |
| bra ne #init_find_chipset |
| // unknown chipset |
| ret |
| |
| // initialise context base, and size tracking |
| init_context: |
| mov $r2 0x800 |
| shl b32 $r2 6 |
| iord $r2 I[$r2 + 0x100] // CC_SCRATCH[1], initial base |
| clear b32 $r3 // track GPC context size here |
| |
| // set mmctx base addresses now so we don't have to do it later, |
| // they don't currently ever change |
| mov $r4 0x700 |
| shl b32 $r4 6 |
| shr b32 $r5 $r2 8 |
| iowr I[$r4 + 0x000] $r5 // MMCTX_SAVE_SWBASE |
| iowr I[$r4 + 0x100] $r5 // MMCTX_LOAD_SWBASE |
| |
| // calculate GPC mmio context size, store the chipset-specific |
| // mmio list pointers somewhere we can get at them later without |
| // re-parsing the chipset list |
| clear b32 $r14 |
| clear b32 $r15 |
| ld b16 $r14 D[$r1 + 4] |
| ld b16 $r15 D[$r1 + 6] |
| st b16 D[$r0 + #gpc_mmio_list_head] $r14 |
| st b16 D[$r0 + #gpc_mmio_list_tail] $r15 |
| call #mmctx_size |
| add b32 $r2 $r15 |
| add b32 $r3 $r15 |
| |
| // calculate per-TPC mmio context size, store the list pointers |
| ld b16 $r14 D[$r1 + 8] |
| ld b16 $r15 D[$r1 + 10] |
| st b16 D[$r0 + #tpc_mmio_list_head] $r14 |
| st b16 D[$r0 + #tpc_mmio_list_tail] $r15 |
| call #mmctx_size |
| ld b32 $r14 D[$r0 + #tpc_count] |
| mulu $r14 $r15 |
| add b32 $r2 $r14 |
| add b32 $r3 $r14 |
| |
| // round up base/size to 256 byte boundary (for strand SWBASE) |
| add b32 $r4 0x1300 |
| shr b32 $r3 2 |
| iowr I[$r4 + 0x000] $r3 // MMCTX_LOAD_COUNT, wtf for?!? |
| shr b32 $r2 8 |
| shr b32 $r3 6 |
| add b32 $r2 1 |
| add b32 $r3 1 |
| shl b32 $r2 8 |
| shl b32 $r3 8 |
| |
| // calculate size of strand context data |
| mov b32 $r15 $r2 |
| call #strand_ctx_init |
| add b32 $r3 $r15 |
| |
| // save context size, and tell HUB we're done |
| mov $r1 0x800 |
| shl b32 $r1 6 |
| iowr I[$r1 + 0x100] $r3 // CC_SCRATCH[1] = context size |
| add b32 $r1 0x800 |
| clear b32 $r2 |
| bset $r2 31 |
| iowr I[$r1 + 0x000] $r2 // CC_SCRATCH[0] |= 0x80000000 |
| |
| // Main program loop, very simple, sleeps until woken up by the interrupt |
| // handler, pulls a command from the queue and executes its handler |
| // |
| main: |
| bset $flags $p0 |
| sleep $p0 |
| mov $r13 #cmd_queue |
| call #queue_get |
| bra $p1 #main |
| |
| // 0x0000-0x0003 are all context transfers |
| cmpu b32 $r14 0x04 |
| bra nc #main_not_ctx_xfer |
| // fetch $flags and mask off $p1/$p2 |
| mov $r1 $flags |
| mov $r2 0x0006 |
| not b32 $r2 |
| and $r1 $r2 |
| // set $p1/$p2 according to transfer type |
| shl b32 $r14 1 |
| or $r1 $r14 |
| mov $flags $r1 |
| // transfer context data |
| call #ctx_xfer |
| bra #main |
| |
| main_not_ctx_xfer: |
| shl b32 $r15 $r14 16 |
| or $r15 E_BAD_COMMAND |
| call #error |
| bra #main |
| |
| // interrupt handler |
| ih: |
| push $r8 |
| mov $r8 $flags |
| push $r8 |
| push $r9 |
| push $r10 |
| push $r11 |
| push $r13 |
| push $r14 |
| push $r15 |
| |
| // incoming fifo command? |
| iord $r10 I[$r0 + 0x200] // INTR |
| and $r11 $r10 0x00000004 |
| bra e #ih_no_fifo |
| // queue incoming fifo command for later processing |
| mov $r11 0x1900 |
| mov $r13 #cmd_queue |
| iord $r14 I[$r11 + 0x100] // FIFO_CMD |
| iord $r15 I[$r11 + 0x000] // FIFO_DATA |
| call #queue_put |
| add b32 $r11 0x400 |
| mov $r14 1 |
| iowr I[$r11 + 0x000] $r14 // FIFO_ACK |
| |
| // ack, and wake up main() |
| ih_no_fifo: |
| iowr I[$r0 + 0x100] $r10 // INTR_ACK |
| |
| pop $r15 |
| pop $r14 |
| pop $r13 |
| pop $r11 |
| pop $r10 |
| pop $r9 |
| pop $r8 |
| mov $flags $r8 |
| pop $r8 |
| bclr $flags $p0 |
| iret |
| |
| // Set this GPC's bit in HUB_BAR, used to signal completion of various |
| // activities to the HUB fuc |
| // |
| hub_barrier_done: |
| mov $r15 1 |
| ld b32 $r14 D[$r0 + #gpc_id] |
| shl b32 $r15 $r14 |
| mov $r14 -0x6be8 // 0x409418 - HUB_BAR_SET |
| sethi $r14 0x400000 |
| call #nv_wr32 |
| ret |
| |
| // Disables various things, waits a bit, and re-enables them.. |
| // |
| // Not sure how exactly this helps, perhaps "ENABLE" is not such a |
| // good description for the bits we turn off? Anyways, without this, |
| // funny things happen. |
| // |
| ctx_redswitch: |
| mov $r14 0x614 |
| shl b32 $r14 6 |
| mov $r15 0x020 |
| iowr I[$r14] $r15 // GPC_RED_SWITCH = POWER |
| mov $r15 8 |
| ctx_redswitch_delay: |
| sub b32 $r15 1 |
| bra ne #ctx_redswitch_delay |
| mov $r15 0xa20 |
| iowr I[$r14] $r15 // GPC_RED_SWITCH = UNK11, ENABLE, POWER |
| ret |
| |
| // Transfer GPC context data between GPU and storage area |
| // |
| // In: $r15 context base address |
| // $p1 clear on save, set on load |
| // $p2 set if opposite direction done/will be done, so: |
| // on save it means: "a load will follow this save" |
| // on load it means: "a save preceeded this load" |
| // |
| ctx_xfer: |
| // set context base address |
| mov $r1 0xa04 |
| shl b32 $r1 6 |
| iowr I[$r1 + 0x000] $r15// MEM_BASE |
| bra not $p1 #ctx_xfer_not_load |
| call #ctx_redswitch |
| ctx_xfer_not_load: |
| |
| // strands |
| mov $r1 0x4afc |
| sethi $r1 0x20000 |
| mov $r2 0xc |
| iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0c |
| call #strand_wait |
| mov $r2 0x47fc |
| sethi $r2 0x20000 |
| iowr I[$r2] $r0 // STRAND_FIRST_GENE(0x3f) = 0x00 |
| xbit $r2 $flags $p1 |
| add b32 $r2 3 |
| iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x03/0x04 (SAVE/LOAD) |
| |
| // mmio context |
| xbit $r10 $flags $p1 // direction |
| or $r10 2 // first |
| mov $r11 0x0000 |
| sethi $r11 0x500000 |
| ld b32 $r12 D[$r0 + #gpc_id] |
| shl b32 $r12 15 |
| add b32 $r11 $r12 // base = NV_PGRAPH_GPCn |
| ld b32 $r12 D[$r0 + #gpc_mmio_list_head] |
| ld b32 $r13 D[$r0 + #gpc_mmio_list_tail] |
| mov $r14 0 // not multi |
| call #mmctx_xfer |
| |
| // per-TPC mmio context |
| xbit $r10 $flags $p1 // direction |
| or $r10 4 // last |
| mov $r11 0x4000 |
| sethi $r11 0x500000 // base = NV_PGRAPH_GPC0_TPC0 |
| ld b32 $r12 D[$r0 + #gpc_id] |
| shl b32 $r12 15 |
| add b32 $r11 $r12 // base = NV_PGRAPH_GPCn_TPC0 |
| ld b32 $r12 D[$r0 + #tpc_mmio_list_head] |
| ld b32 $r13 D[$r0 + #tpc_mmio_list_tail] |
| ld b32 $r15 D[$r0 + #tpc_mask] |
| mov $r14 0x800 // stride = 0x800 |
| call #mmctx_xfer |
| |
| // wait for strands to finish |
| call #strand_wait |
| |
| // if load, or a save without a load following, do some |
| // unknown stuff that's done after finishing a block of |
| // strand commands |
| bra $p1 #ctx_xfer_post |
| bra not $p2 #ctx_xfer_done |
| ctx_xfer_post: |
| mov $r1 0x4afc |
| sethi $r1 0x20000 |
| mov $r2 0xd |
| iowr I[$r1] $r2 // STRAND_CMD(0x3f) = 0x0d |
| call #strand_wait |
| |
| // mark completion in HUB's barrier |
| ctx_xfer_done: |
| call #hub_barrier_done |
| ret |
| |
| .align 256 |