blob: a70e7712d764179dfb71f83e5c91ddb4aa6c330c [file] [log] [blame]
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Authors:
* Ben Widawsky <ben@bwidawsk.net>
*
*/
#include "debug.h"
#include "evict.h"
#define CR0_0_ME_STATE_CTRL (1 << 31)
#define CR0_0_BP_SUPPRESS (1 << 15)
#define CR0_0_SPF_EN (1 << 2)
#define CR0_0_ACC_DIS (1 << 1)
#define CR0_1_BES_CTRL (1 << 31)
#define CR0_1_HALT_CTRL (1 << 30)
#define CR0_1_SOFT_EXCEPTION_CTRL (1 << 29)
#define CR0_1_ILLGL_OP_STS (1 << 28)
#define CR0_1_STACK_OVRFLW_STS (1 << 27)
#define CR0_0_ENTRY_UNMASK (CR0_0_SPF_EN | CR0_0_ACC_DIS)
// TODO: Need to fix this for non breakpoint case
#define CR0_1_ENTRY_UNMASK ~(CR0_1_BES_CTRL)
#define CR0_0_RETURN_MASK ~(CR0_0_ME_STATE_CTRL | CR0_0_SPF_EN | CR0_0_ACC_DIS)
// TODO: not sure how to make this not hardcoded
#define PER_THREAD_SCRATCH_SIZE (1 << 20)
#define PER_THREAD_QWORDS (PER_THREAD_SCRATCH_SIZE >> 4)
/* Should get this from brw_defines.h */
#define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
#define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
#define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 8
#define BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0
/* desc field, ie. dword3 6.3.66.2 and 2.11.2.1.4 */
#define SEND_MLEN_5 (5<<25)
#define SEND_MLEN_3 (3<<25)
#define SEND_MLEN_2 (2<<25)
#define SEND_MLEN_1 (1<<25)
#define SEND_RLEN_1 (1<<20)
#define SEND_RLEN_0 (0<<20)
#define SEND_HEADER_PRESENT (1<<19)
#define SEND_WRITE_COMMIT (1<<17)
#define SEND_TYPE_WRITE (GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE<<13)
#define SEND_TYPE_READ (BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ<<13)
#define SEND_BLOCK_SIZE1 (BRW_DATAPORT_OWORD_BLOCK_2_OWORDS<<8)
#define SEND_BLOCK_SIZE2 (BRW_DATAPORT_OWORD_BLOCK_4_OWORDS<<8)
#define SEND_BLOCK_SIZE4 (BRW_DATAPORT_OWORD_BLOCK_8_OWORDS<<8)
#define SEND_BINDING_TABLE (255<<0)
// No write commit
#define WRITE_DESC1_XXX SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2
#define WRITE_DESC1_WC SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_2 | SEND_WRITE_COMMIT
#define WRITE_DESC2 SEND_BINDING_TABLE | SEND_BLOCK_SIZE2 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_3
#define WRITE_DESC4 SEND_BINDING_TABLE | SEND_BLOCK_SIZE4 | SEND_TYPE_WRITE | SEND_HEADER_PRESENT | SEND_MLEN_5
#define RECV_DESC1 SEND_BINDING_TABLE | SEND_BLOCK_SIZE1 | SEND_TYPE_READ | SEND_HEADER_PRESENT | SEND_MLEN_1 | SEND_RLEN_1
//#define SEND_DESC1 0x40902FF
#define SEND_DESC1_WC 0x40b02FF
/* ex_desc field 6.3.66.2 */
#define SEND_DP_RENDER_CACHE (5<<0)
#define SEND_EOT (1<<5)
#define SEND_EX_DESC SEND_DP_RENDER_CACHE
/**
* WRITE_SCRATCH1 - Write 2 owords.
* cdst.2 - offset
* cdst.5 - per thread scratch base, relative to gsba??
* cdst+1 - data to be written.
*/
#define WRITE_SCRATCH1(cdst) \
send (16) null cdst SEND_EX_DESC WRITE_DESC1_XXX FLAGS
#define WRITE_SCRATCH1_WC(cdst) \
send (16) g1 cdst SEND_EX_DESC WRITE_DESC1_WC FLAGS
#define WRITE_SCRATCH2(cdst) \
send (16) null cdst SEND_EX_DESC WRITE_DESC2 FLAGS
#define WRITE_SCRATCH4(cdst) \
send (16) null cdst SEND_EX_DESC WRITE_DESC4 FLAGS
/**
* READ_SCRATCH1 - Read 2 owords.
* cdst.2 - offset
* cdst.5 - per thread scratch base, relative to gsba??
* grf - register where read data is populated.
*/
#define READ_SCRATCH1(grf, cdst) \
send (16) grf:ud cdst SEND_EX_DESC RECV_DESC1 FLAGS
/**
* SET_OFFSET - setup mrf for the given offset prior to a send instruction.
* mrf - message register to be used as the header.
* offset - offset.
*
* If a WRITE_SCRATCH follows, mrf+1 -> mrf+1+n should contain the data to be
* written.
*/
#define SET_OFFSET(mrf, offset) \
mov (1) mrf.5:ud g0.5:ud FLAGS; \
mov (1) mrf.2:ud offset:ud FLAGS
/**
* SAVE_CRF - save the control register
* clobbers: m0.2, m0.5
*/
#define CR_OFFSET 0x40
#define SAVE_CRF \
SET_OFFSET(m0, CR_OFFSET); \
mov (8) m1:ud 0xdeadbeef:ud FLAGS; \
mov (1) m1.0:ud cr0.0 FLAGS; \
mov (1) m1.1:ud cr0.1 FLAGS; \
mov (1) m1.2:ud cr0.2 FLAGS; \
mov (1) m1.3:ud sr0:ud FLAGS; \
WRITE_SCRATCH1(m0)
/*
* clobbers: m0.2, m0.5
*/
#define STORE_GRF(grf, offset) \
SET_OFFSET(m0, offset); \
mov (8) m1:ud grf:ud FLAGS; \
WRITE_SCRATCH1(m0)
/*
* clobbers: m0.2, m0.5
*/
#define LOAD_GRF(grf, offset) \
SET_OFFSET(m0, offset); \
READ_SCRATCH1(grf, m0)
/*
* clobbers: mrf.2 mrf.5
*/
#define STORE_MRF(mrf, offset) \
SET_OFFSET(mrf, offset); \
WRITE_SCRATCH1(mrf)
/*
* non-quirky semantics, unlike STORE_MRF
* clobbers: g1
*/
#define LOAD_MRF(mrf, offset) \
LOAD_GRF(g1, offset); \
mov (8) mrf:ud g1:ud FLAGS
#define SAVE_ALL_MRF \
/* m1 is saved already */ \
STORE_MRF(m1, 0x2); \
STORE_MRF(m2, 0x4); \
STORE_MRF(m3, 0x6); \
STORE_MRF(m4, 0x8); \
STORE_MRF(m5, 0xa); \
STORE_MRF(m6, 0xc); \
STORE_MRF(m7, 0xe); \
STORE_MRF(m8, 0x10); \
STORE_MRF(m9, 0x12); \
STORE_MRF(m10, 0x14); \
STORE_MRF(m11, 0x16); \
STORE_MRF(m12, 0x18); \
STORE_MRF(m13, 0x1a); \
STORE_MRF(m14, 0x1c)
#define RESTORE_ALL_MRF \
LOAD_MRF(m15, 0x1c); \
LOAD_MRF(m14, 0x1a); \
LOAD_MRF(m13, 0x18); \
LOAD_MRF(m12, 0x16); \
LOAD_MRF(m11, 0x14); \
LOAD_MRF(m10, 0x12); \
LOAD_MRF(m9, 0x10); \
LOAD_MRF(m8, 0xe); \
LOAD_MRF(m7, 0xc); \
LOAD_MRF(m6, 0xa); \
LOAD_MRF(m5, 0x8); \
LOAD_MRF(m4, 0x6); \
LOAD_MRF(m3, 0x4); \
LOAD_MRF(m2, 0x2); \
LOAD_MRF(m1, 0x0)
#ifndef SANDYBRIDGE
#error Only SandyBridge is supported
#endif
/* Default flags for an instruction */
#define FLAGS { ALIGN1, SWITCH, MASK_DISABLE, ACCWRCTRL}
/*
* We can clobber m0, and g0.4, everything else must be saved.
*/
Enter:
nop;
or (1) cr0.0 cr0.0 CR0_0_ENTRY_UNMASK:ud FLAGS;
/*
* g0.5 has the per thread scratch space when running in FS or VS.
* If we don't have a valid g0.5, we can calculate a per thread scratch offset
* using the system registers. The problem is we do not have a good way to know
* the offset from GSBA. The system routine will have to be hardcoded or
* dynamically patched with the correct offset.
* TID is in sr0.0[2:0]
* EUID is in sr0.0[11:8]
*/
#ifdef GPGPU
mov (1) g0.4:ud 0:ud FLAGS;
#if 0
/* This should work according to the docs, the add blows up */
shr (1) g0.8:uw sr0.0:uw 5 FLAGS;
add (1) g0.16:ub gr0.16:ub sr0.0:ub FLAGS;
#else
shr (1) g0.8:uw sr0.0:uw 5 FLAGS;
mov (1) g0.9:uw sr0.0:uw FLAGS;
and (1) g0.9:uw g0.9:uw 0x7:uw FLAGS;
add (1) g0.8:uw g0.8:uw g0.9:uw FLAGS;
mov (1) g0.9:uw 0:uw FLAGS;
mul (1) g0.4:ud g0.4:ud PER_THREAD_QWORDS FLAGS;
#endif
#endif
mov (8) m0:ud 0:ud FLAGS;
/* Saves must occur in order so as not to clobber the next register */
STORE_MRF(m0, 0);
STORE_GRF(g0, 0x20);
STORE_GRF(g1, 0x22);
SAVE_ALL_MRF;
mov (8) g1:ud STATE_EU_MSG:ud FLAGS;
STORE_GRF(g1, STATE_QWORD);
mov (8) g1:ud DEBUG_PROTOCOL_VERSION:ud FLAGS;
STORE_GRF(g1, COMMUNICATION_QWORD);
SAVE_CRF;
EVICT_CACHE;
wait n1:ud;
EVICT_CACHE;
/* Using this to try to keep coherency */
LOAD_GRF(g1, CR_OFFSET);
LOAD_GRF(g1, COMMUNICATION_QWORD);
LOAD_GRF(g1, STATE_QWORD);
RESTORE_ALL_MRF;
LOAD_GRF(g1, 0x22);
LOAD_GRF(g0, 0x20);
/* Clear breakpoint status */
and (1) cr0.1 cr0.1 CR0_1_ENTRY_UNMASK:ud FLAGS;
/* set breakpoint suppress this should be conditional on bes */
or (1) cr0.0 cr0.0 CR0_0_BP_SUPPRESS:ud FLAGS;
and (1) cr0.0 cr0.0 CR0_0_RETURN_MASK:ud FLAGS;
nop;