first sksl on skvm

Exactly enough implemented to run

    fm --skvm -b cpu -s runtime_shader -w foo

This shader

     0: 0077 load2 0
    10: 00a2 pushimmediate 998277249(0.0039215688593685627)
    22: 0070 dup
    31: 0094 multiplyf2
    40: 0080 loaduniform 2
    50: 00a2 pushimmediate 1065353216(1.0)
    62: 00c1 store4 2
    71: 00b4 return 0

becomes this blitter, including matrix, blending, asserts, etc:

    17 registers, 57 instructions:
    0	r0 = uniform32 arg(0) 4
    1	r0 = to_f32 r0
    2	r1 = splat 3F000000 (0.5)
    3	r0 = add_f32 r0 r1
    4	r2 = uniform32 arg(0) 2C
    5	r3 = uniform32 arg(0) 28
    6	r2 = fma_f32 r0 r3 r2
    7	r3 = uniform32 arg(0) 0
    8	r4 = uniform32 arg(0) 24
    9	r5 = splat 3F800000 (1)
    10	r6 = uniform32 arg(0) 38
    11	r6 = min_f32 r6 r5
    12	r7 = splat 0 (0)
    13	r6 = max_f32 r7 r6
    14	r8 = splat 437F0000 (255)
    15	r9 = mul_f32 r6 r8
    16	r9 = round r9
    17	r10 = splat FF (3.5733111e-43)
    18	r10 = pack r9 r10 8
    19	r9 = splat 3B808081 (0.0039215689)
    20	r11 = uniform32 arg(0) 20
    21	r12 = uniform32 arg(0) 1C
    22	r11 = fma_f32 r0 r12 r11
    23	r12 = uniform32 arg(0) 18
    24	r0 = splat 3F800001 (1.0000001)
    25	r13 = min_f32 r6 r0
    26	r14 = splat B4000000 (-1.1920929e-07)
    27	r13 = max_f32 r14 r13
    28	r13 = eq_f32 r6 r13
    29	assert_true r13 r6
    loop:
    30	    r6 = index
    31	    r6 = sub_i32 r3 r6
    32	    r6 = to_f32 r6
    33	    r6 = add_f32 r6 r1
    34	    r13 = fma_f32 r6 r4 r2
    35	    r13 = mul_f32 r9 r13
    36	    r6 = fma_f32 r6 r12 r11
    37	    r13 = min_f32 r13 r5
    38	    r13 = max_f32 r7 r13
    39	    r15 = mul_f32 r13 r8
    40	    r15 = round r15
    41	    r6 = mul_f32 r9 r6
    42	    r6 = min_f32 r6 r5
    43	    r6 = max_f32 r7 r6
    44	    r16 = mul_f32 r6 r8
    45	    r16 = round r16
    46	    r15 = pack r16 r15 8
    47	    r15 = pack r15 r10 16
    48	    store32 arg(1) r15
    49	    r15 = min_f32 r13 r0
    50	    r15 = max_f32 r14 r15
    51	    r15 = eq_f32 r13 r15
    52	    assert_true r15 r13
    53	    r13 = min_f32 r6 r0
    54	    r13 = max_f32 r14 r13
    55	    r13 = eq_f32 r6 r13
    56	    assert_true r13 r6

And that JITs using 11 ymm registers.

Change-Id: Ib45b5fa6aee427f290b77d8900f10d433ad81133
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/281746
Reviewed-by: Brian Osman <brianosman@google.com>
Commit-Queue: Mike Klein <mtklein@google.com>
diff --git a/src/sksl/SkSLByteCode.cpp b/src/sksl/SkSLByteCode.cpp
index a9c3480..90c6671 100644
--- a/src/sksl/SkSLByteCode.cpp
+++ b/src/sksl/SkSLByteCode.cpp
@@ -55,7 +55,9 @@
     case ByteCodeInstruction::op##N: printf(text "N %d", READ8()); break;
 
 static const uint8_t* DisassembleInstruction(const uint8_t* ip) {
-    switch ((ByteCodeInstruction) (intptr_t) READ_INST()) {
+    auto inst = (ByteCodeInstruction) (intptr_t) READ_INST();
+    printf("%04x ", (int)inst);
+    switch (inst) {
         VECTOR_MATRIX_DISASSEMBLE(kAddF, "addf")
         VECTOR_DISASSEMBLE(kAddI, "addi")
         case ByteCodeInstruction::kAndB: printf("andb"); break;