diff options
Diffstat (limited to 'drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml')
-rw-r--r-- | drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml | 2268 |
1 files changed, 2268 insertions, 0 deletions
diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml new file mode 100644 index 000000000000..cab01af55d22 --- /dev/null +++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml @@ -0,0 +1,2268 @@ +<?xml version="1.0" encoding="UTF-8"?> +<database xmlns="http://nouveau.freedesktop.org/" +xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" +xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd"> +<import file="freedreno_copyright.xml"/> +<import file="adreno/adreno_common.xml"/> + +<enum name="vgt_event_type" varset="chip"> + <value name="VS_DEALLOC" value="0"/> + <value name="PS_DEALLOC" value="1" variants="A2XX-A6XX"/> + <value name="VS_DONE_TS" value="2"/> + <value name="PS_DONE_TS" value="3"/> + <doc> + Flushes dirty data from UCHE, and also writes a GPU timestamp to + the address if one is provided. + </doc> + <value name="CACHE_FLUSH_TS" value="4"/> + <value name="CONTEXT_DONE" value="5"/> + <value name="CACHE_FLUSH" value="6" variants="A2XX-A4XX"/> + <value name="VIZQUERY_START" value="7" variants="A2XX"/> + <value name="HLSQ_FLUSH" value="7" variants="A3XX-A4XX"/> + <value name="VIZQUERY_END" value="8" variants="A2XX"/> + <value name="SC_WAIT_WC" value="9" variants="A2XX"/> + <value name="WRITE_PRIMITIVE_COUNTS" value="9" variants="A6XX"/> + <value name="START_PRIMITIVE_CTRS" value="11" variants="A6XX"/> + <value name="STOP_PRIMITIVE_CTRS" value="12" variants="A6XX"/> + <!-- Not sure that these 4 events don't have the same meaning as on A5XX+ --> + <value name="RST_PIX_CNT" value="13" variants="A2XX-A4XX"/> + <value name="RST_VTX_CNT" value="14" variants="A2XX-A4XX"/> + <value name="TILE_FLUSH" value="15" variants="A2XX-A4XX"/> + <value name="STAT_EVENT" value="16" variants="A2XX-A4XX"/> + <value name="CACHE_FLUSH_AND_INV_TS_EVENT" value="20" variants="A2XX-A4XX"/> + <doc> + If A6XX_RB_SAMPLE_COUNT_CONTROL.copy is true, writes OQ Z passed + sample counts to RB_SAMPLE_COUNT_ADDR. This writes to main + memory, skipping UCHE. + </doc> + <value name="ZPASS_DONE" value="21"/> + <value name="CACHE_FLUSH_AND_INV_EVENT" value="22" variants="A2XX"/> + + <doc> + Writes the GPU timestamp to the address that follows, once RB + access and flushes are complete. + </doc> + <value name="RB_DONE_TS" value="22" variants="A3XX-"/> + + <value name="PERFCOUNTER_START" value="23" variants="A2XX-A4XX"/> + <value name="PERFCOUNTER_STOP" value="24" variants="A2XX-A4XX"/> + <value name="VS_FETCH_DONE" value="27"/> + <value name="FACENESS_FLUSH" value="28" variants="A2XX-A4XX"/> + + <!-- a5xx events --> + <value name="WT_DONE_TS" value="8" variants="A5XX-"/> + <value name="START_FRAGMENT_CTRS" value="13" variants="A5XX-"/> + <value name="STOP_FRAGMENT_CTRS" value="14" variants="A5XX-"/> + <value name="START_COMPUTE_CTRS" value="15" variants="A5XX-"/> + <value name="STOP_COMPUTE_CTRS" value="16" variants="A5XX-"/> + <value name="FLUSH_SO_0" value="17" variants="A5XX-"/> + <value name="FLUSH_SO_1" value="18" variants="A5XX-"/> + <value name="FLUSH_SO_2" value="19" variants="A5XX-"/> + <value name="FLUSH_SO_3" value="20" variants="A5XX-"/> + + <doc> + Invalidates depth attachment data from the CCU. We assume this + happens in the last stage. + </doc> + <value name="PC_CCU_INVALIDATE_DEPTH" value="24" variants="A5XX-"/> + + <doc> + Invalidates color attachment data from the CCU. We assume this + happens in the last stage. + </doc> + <value name="PC_CCU_INVALIDATE_COLOR" value="25" variants="A5XX-"/> + + <doc> + Flushes the small cache used by CP_EVENT_WRITE::BLIT (which, + along with its registers, would be better named RESOLVE). + </doc> + <value name="PC_CCU_RESOLVE_TS" value="26" variants="A6XX"/> + + <doc> + Flushes depth attachment data from the CCU. We assume this + happens in the last stage. + </doc> + <value name="PC_CCU_FLUSH_DEPTH_TS" value="28" variants="A5XX-"/> + + <doc> + Flushes color attachment data from the CCU. We assume this + happens in the last stage. + </doc> + <value name="PC_CCU_FLUSH_COLOR_TS" value="29" variants="A5XX-"/> + + <doc> + 2D blit to resolve GMEM to system memory (skipping CCU) at the + end of a render pass. Compare to CP_BLIT's BLIT_OP_SCALE for + more general blitting. + </doc> + <value name="BLIT" value="30" variants="A5XX-"/> + + <doc> + Clears based on GRAS_LRZ_CNTL configuration, could clear + fast-clear buffer or LRZ direction. + LRZ direction is stored at lrz_fc_offset + 0x200, has 1 byte which + could be expressed by enum: + CUR_DIR_DISABLED = 0x0 + CUR_DIR_GE = 0x1 + CUR_DIR_LE = 0x2 + CUR_DIR_UNSET = 0x3 + Clear of direction means setting the direction to CUR_DIR_UNSET. + </doc> + <value name="LRZ_CLEAR" value="37" variants="A5XX-"/> + + <value name="LRZ_FLUSH" value="38" variants="A5XX-"/> + <value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/> + <value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-A6XX"/> + <value name="UNK_40" value="40" variants="A7XX"/> + <value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/> + <value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/> + <value name="UNK_2C" value="44" variants="A5XX-"/> + <value name="UNK_2D" value="45" variants="A5XX-"/> + + <!-- a6xx events --> + <doc> + Invalidates UCHE. + </doc> + <value name="CACHE_INVALIDATE" value="49" variants="A6XX"/> + + <value name="LABEL" value="63" variants="A6XX-"/> + + <!-- note, some of these are the same as a6xx, just named differently --> + + <doc> Doesn't seem to do anything </doc> + <value name="DUMMY_EVENT" value="1" variants="A7XX"/> + <value name="CCU_INVALIDATE_DEPTH" value="24" variants="A7XX"/> + <value name="CCU_INVALIDATE_COLOR" value="25" variants="A7XX"/> + <value name="CCU_RESOLVE_CLEAN" value="26" variants="A7XX"/> + <value name="CCU_FLUSH_DEPTH" value="28" variants="A7XX"/> + <value name="CCU_FLUSH_COLOR" value="29" variants="A7XX"/> + <value name="CCU_RESOLVE" value="30" variants="A7XX"/> + <value name="CCU_END_RESOLVE_GROUP" value="31" variants="A7XX"/> + <value name="CCU_CLEAN_DEPTH" value="32" variants="A7XX"/> + <value name="CCU_CLEAN_COLOR" value="33" variants="A7XX"/> + <value name="CACHE_RESET" value="48" variants="A7XX"/> + <value name="CACHE_CLEAN" value="49" variants="A7XX"/> + <!-- TODO: deal with name conflicts with other gens --> + <value name="CACHE_FLUSH7" value="50" variants="A7XX"/> + <value name="CACHE_INVALIDATE7" value="51" variants="A7XX"/> +</enum> + +<enum name="pc_di_primtype"> + <value name="DI_PT_NONE" value="0"/> + <!-- POINTLIST_PSIZE is used on a3xx/a4xx when gl_PointSize is written: --> + <value name="DI_PT_POINTLIST_PSIZE" value="1"/> + <value name="DI_PT_LINELIST" value="2"/> + <value name="DI_PT_LINESTRIP" value="3"/> + <value name="DI_PT_TRILIST" value="4"/> + <value name="DI_PT_TRIFAN" value="5"/> + <value name="DI_PT_TRISTRIP" value="6"/> + <value name="DI_PT_LINELOOP" value="7"/> <!-- a22x, a3xx --> + <value name="DI_PT_RECTLIST" value="8"/> + <value name="DI_PT_POINTLIST" value="9"/> + <value name="DI_PT_LINE_ADJ" value="0xa"/> + <value name="DI_PT_LINESTRIP_ADJ" value="0xb"/> + <value name="DI_PT_TRI_ADJ" value="0xc"/> + <value name="DI_PT_TRISTRIP_ADJ" value="0xd"/> + + <value name="DI_PT_PATCHES0" value="0x1f"/> + <value name="DI_PT_PATCHES1" value="0x20"/> + <value name="DI_PT_PATCHES2" value="0x21"/> + <value name="DI_PT_PATCHES3" value="0x22"/> + <value name="DI_PT_PATCHES4" value="0x23"/> + <value name="DI_PT_PATCHES5" value="0x24"/> + <value name="DI_PT_PATCHES6" value="0x25"/> + <value name="DI_PT_PATCHES7" value="0x26"/> + <value name="DI_PT_PATCHES8" value="0x27"/> + <value name="DI_PT_PATCHES9" value="0x28"/> + <value name="DI_PT_PATCHES10" value="0x29"/> + <value name="DI_PT_PATCHES11" value="0x2a"/> + <value name="DI_PT_PATCHES12" value="0x2b"/> + <value name="DI_PT_PATCHES13" value="0x2c"/> + <value name="DI_PT_PATCHES14" value="0x2d"/> + <value name="DI_PT_PATCHES15" value="0x2e"/> + <value name="DI_PT_PATCHES16" value="0x2f"/> + <value name="DI_PT_PATCHES17" value="0x30"/> + <value name="DI_PT_PATCHES18" value="0x31"/> + <value name="DI_PT_PATCHES19" value="0x32"/> + <value name="DI_PT_PATCHES20" value="0x33"/> + <value name="DI_PT_PATCHES21" value="0x34"/> + <value name="DI_PT_PATCHES22" value="0x35"/> + <value name="DI_PT_PATCHES23" value="0x36"/> + <value name="DI_PT_PATCHES24" value="0x37"/> + <value name="DI_PT_PATCHES25" value="0x38"/> + <value name="DI_PT_PATCHES26" value="0x39"/> + <value name="DI_PT_PATCHES27" value="0x3a"/> + <value name="DI_PT_PATCHES28" value="0x3b"/> + <value name="DI_PT_PATCHES29" value="0x3c"/> + <value name="DI_PT_PATCHES30" value="0x3d"/> + <value name="DI_PT_PATCHES31" value="0x3e"/> +</enum> + +<enum name="pc_di_src_sel"> + <value name="DI_SRC_SEL_DMA" value="0"/> + <value name="DI_SRC_SEL_IMMEDIATE" value="1"/> + <value name="DI_SRC_SEL_AUTO_INDEX" value="2"/> + <value name="DI_SRC_SEL_AUTO_XFB" value="3"/> +</enum> + +<enum name="pc_di_face_cull_sel"> + <value name="DI_FACE_CULL_NONE" value="0"/> + <value name="DI_FACE_CULL_FETCH" value="1"/> + <value name="DI_FACE_BACKFACE_CULL" value="2"/> + <value name="DI_FACE_FRONTFACE_CULL" value="3"/> +</enum> + +<enum name="pc_di_index_size"> + <value name="INDEX_SIZE_IGN" value="0"/> + <value name="INDEX_SIZE_16_BIT" value="0"/> + <value name="INDEX_SIZE_32_BIT" value="1"/> + <value name="INDEX_SIZE_8_BIT" value="2"/> + <value name="INDEX_SIZE_INVALID"/> +</enum> + +<enum name="pc_di_vis_cull_mode"> + <value name="IGNORE_VISIBILITY" value="0"/> + <value name="USE_VISIBILITY" value="1"/> +</enum> + +<enum name="adreno_pm4_packet_type"> + <value name="CP_TYPE0_PKT" value="0x00000000"/> + <value name="CP_TYPE1_PKT" value="0x40000000"/> + <value name="CP_TYPE2_PKT" value="0x80000000"/> + <value name="CP_TYPE3_PKT" value="0xc0000000"/> + <value name="CP_TYPE4_PKT" value="0x40000000"/> + <value name="CP_TYPE7_PKT" value="0x70000000"/> +</enum> + +<!-- + Note that in some cases, the same packet id is recycled on a later + generation, so variants attribute is used to distinguish. They + may not be completely accurate, we would probably have to analyze + the pfp and me/pm4 firmware to verify the packet is actually + handled on a particular generation. But it is at least enough to + disambiguate the packet-id's that were re-used for different + packets starting with a5xx. + --> +<enum name="adreno_pm4_type3_packets" varset="chip"> + <doc>initialize CP's micro-engine</doc> + <value name="CP_ME_INIT" value="0x48"/> + <doc>skip N 32-bit words to get to the next packet</doc> + <value name="CP_NOP" value="0x10"/> + <doc> + indirect buffer dispatch. prefetch parser uses this packet + type to determine whether to pre-fetch the IB + </doc> + <value name="CP_PREEMPT_ENABLE" value="0x1c" variants="A5XX"/> + <value name="CP_PREEMPT_TOKEN" value="0x1e" variants="A5XX"/> + <value name="CP_INDIRECT_BUFFER" value="0x3f"/> + <doc> + Takes the same arguments as CP_INDIRECT_BUFFER, but jumps to + another buffer at the same level. Must be at the end of IB, and + doesn't work with draw state IB's. + </doc> + <value name="CP_INDIRECT_BUFFER_CHAIN" value="0x57" variants="A5XX-"/> + <doc>indirect buffer dispatch. same as IB, but init is pipelined</doc> + <value name="CP_INDIRECT_BUFFER_PFD" value="0x37"/> + <doc> + Waits for the IDLE state of the engine before further drawing. + This is pipelined, so the CP may continue. + </doc> + <value name="CP_WAIT_FOR_IDLE" value="0x26"/> + <doc>wait until a register or memory location is a specific value</doc> + <value name="CP_WAIT_REG_MEM" value="0x3c"/> + <doc>wait until a register location is equal to a specific value</doc> + <value name="CP_WAIT_REG_EQ" value="0x52"/> + <doc>wait until a register location is >= a specific value</doc> + <value name="CP_WAIT_REG_GTE" value="0x53" variants="A2XX-A4XX"/> + <doc>wait until a read completes</doc> + <value name="CP_WAIT_UNTIL_READ" value="0x5c" variants="A2XX-A4XX"/> + <doc>wait until all base/size writes from an IB_PFD packet have completed</doc> + <!-- + NOTE: CP_WAIT_IB_PFD_COMPLETE unimplemented at least since a5xx fw, and + recycled for something new on a7xx + --> + <value name="CP_WAIT_IB_PFD_COMPLETE" value="0x5d" varset="chip" variants="A2XX-A4XX"/> + <doc>register read/modify/write</doc> + <value name="CP_REG_RMW" value="0x21"/> + <doc>Set binning configuration registers</doc> + <value name="CP_SET_BIN_DATA" value="0x2f" variants="A2XX-A4XX"/> + <value name="CP_SET_BIN_DATA5" value="0x2f" variants="A5XX-"/> + <doc>reads register in chip and writes to memory</doc> + <value name="CP_REG_TO_MEM" value="0x3e"/> + <doc>write N 32-bit words to memory</doc> + <value name="CP_MEM_WRITE" value="0x3d"/> + <doc>write CP_PROG_COUNTER value to memory</doc> + <value name="CP_MEM_WRITE_CNTR" value="0x4f"/> + <doc>conditional execution of a sequence of packets</doc> + <value name="CP_COND_EXEC" value="0x44"/> + <doc>conditional write to memory or register</doc> + <value name="CP_COND_WRITE" value="0x45" variants="A2XX-A4XX"/> + <value name="CP_COND_WRITE5" value="0x45" variants="A5XX-"/> + <doc>generate an event that creates a write to memory when completed</doc> + <value name="CP_EVENT_WRITE" value="0x46" variants="A2XX-A6XX"/> + <value name="CP_EVENT_WRITE7" value="0x46" variants="A7XX-"/> + <doc>generate a VS|PS_done event</doc> + <value name="CP_EVENT_WRITE_SHD" value="0x58"/> + <doc>generate a cache flush done event</doc> + <value name="CP_EVENT_WRITE_CFL" value="0x59"/> + <doc>generate a z_pass done event</doc> + <value name="CP_EVENT_WRITE_ZPD" value="0x5b"/> + <doc> + not sure the real name, but this seems to be what is used for + opencl, instead of CP_DRAW_INDX.. + </doc> + <value name="CP_RUN_OPENCL" value="0x31"/> + <doc>initiate fetch of index buffer and draw</doc> + <value name="CP_DRAW_INDX" value="0x22"/> + <doc>draw using supplied indices in packet</doc> + <value name="CP_DRAW_INDX_2" value="0x36" variants="A2XX-A4XX"/> <!-- this is something different on a6xx and unused on a5xx --> + <doc>initiate fetch of index buffer and binIDs and draw</doc> + <value name="CP_DRAW_INDX_BIN" value="0x34" variants="A2XX-A4XX"/> + <doc>initiate fetch of bin IDs and draw using supplied indices</doc> + <value name="CP_DRAW_INDX_2_BIN" value="0x35" variants="A2XX-A4XX"/> + <doc>begin/end initiator for viz query extent processing</doc> + <value name="CP_VIZ_QUERY" value="0x23" variants="A2XX-A4XX"/> + <doc>fetch state sub-blocks and initiate shader code DMAs</doc> + <value name="CP_SET_STATE" value="0x25"/> + <doc>load constant into chip and to memory</doc> + <value name="CP_SET_CONSTANT" value="0x2d" variants="A2XX"/> + <doc>load sequencer instruction memory (pointer-based)</doc> + <value name="CP_IM_LOAD" value="0x27"/> + <doc>load sequencer instruction memory (code embedded in packet)</doc> + <value name="CP_IM_LOAD_IMMEDIATE" value="0x2b"/> + <doc>load constants from a location in memory</doc> + <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e" variants="A2XX"/> + <doc>selective invalidation of state pointers</doc> + <value name="CP_INVALIDATE_STATE" value="0x3b"/> + <doc>dynamically changes shader instruction memory partition</doc> + <value name="CP_SET_SHADER_BASES" value="0x4a" variants="A2XX-A4XX"/> + <doc>sets the 64-bit BIN_MASK register in the PFP</doc> + <value name="CP_SET_BIN_MASK" value="0x50" variants="A2XX-A4XX"/> + <doc>sets the 64-bit BIN_SELECT register in the PFP</doc> + <value name="CP_SET_BIN_SELECT" value="0x51" variants="A2XX-A4XX"/> + <doc>updates the current context, if needed</doc> + <value name="CP_CONTEXT_UPDATE" value="0x5e"/> + <doc>generate interrupt from the command stream</doc> + <value name="CP_INTERRUPT" value="0x40"/> + <doc>copy sequencer instruction memory to system memory</doc> + <value name="CP_IM_STORE" value="0x2c" variants="A2XX"/> + + <!-- For a20x --> +<!-- TODO handle variants.. + <doc> + Program an offset that will added to the BIN_BASE value of + the 3D_DRAW_INDX_BIN packet + </doc> + <value name="CP_SET_BIN_BASE_OFFSET" value="0x4b"/> + --> + + <!-- for a22x --> + <doc> + sets draw initiator flags register in PFP, gets bitwise-ORed into + every draw initiator + </doc> + <value name="CP_SET_DRAW_INIT_FLAGS" value="0x4b"/> + <doc>sets the register protection mode</doc> + <value name="CP_SET_PROTECTED_MODE" value="0x5f"/> + + <value name="CP_BOOTSTRAP_UCODE" value="0x6f"/> + + <!-- for a3xx --> + <doc>load high level sequencer command</doc> + <value name="CP_LOAD_STATE" value="0x30" variants="A3XX"/> + <value name="CP_LOAD_STATE4" value="0x30" variants="A4XX-A5XX"/> + <doc>Conditionally load a IB based on a flag, prefetch enabled</doc> + <value name="CP_COND_INDIRECT_BUFFER_PFE" value="0x3a"/> + <doc>Conditionally load a IB based on a flag, prefetch disabled</doc> + <value name="CP_COND_INDIRECT_BUFFER_PFD" value="0x32" variants="A3XX"/> + <doc>Load a buffer with pre-fetch enabled</doc> + <value name="CP_INDIRECT_BUFFER_PFE" value="0x3f" variants="A5XX"/> + <doc>Set bin (?)</doc> + <value name="CP_SET_BIN" value="0x4c" variants="A2XX"/> + + <doc>test 2 memory locations to dword values specified</doc> + <value name="CP_TEST_TWO_MEMS" value="0x71"/> + + <doc>Write register, ignoring context state for context sensitive registers</doc> + <value name="CP_REG_WR_NO_CTXT" value="0x78"/> + + <doc>Record the real-time when this packet is processed by PFP</doc> + <value name="CP_RECORD_PFP_TIMESTAMP" value="0x11"/> + + <!-- Used to switch GPU between secure and non-secure modes --> + <value name="CP_SET_SECURE_MODE" value="0x66"/> + + <doc>PFP waits until the FIFO between the PFP and the ME is empty</doc> + <value name="CP_WAIT_FOR_ME" value="0x13"/> + + <!-- for a4xx --> + <doc> + Used a bit like CP_SET_CONSTANT on a2xx, but can write multiple + groups of registers. Looks like it can be used to create state + objects in GPU memory, and on state change only emit pointer + (via CP_SET_DRAW_STATE), which should be nice for reducing CPU + overhead: + + (A4x) save PM4 stream pointers to execute upon a visible draw + </doc> + <value name="CP_SET_DRAW_STATE" value="0x43" variants="A4XX-"/> + <value name="CP_DRAW_INDX_OFFSET" value="0x38"/> + <value name="CP_DRAW_INDIRECT" value="0x28" variants="A4XX-"/> + <value name="CP_DRAW_INDX_INDIRECT" value="0x29" variants="A4XX-"/> + <value name="CP_DRAW_INDIRECT_MULTI" value="0x2a" variants="A6XX-"/> + <value name="CP_DRAW_AUTO" value="0x24"/> + + <doc> + Enable or disable predication globally. Also resets the + predicate to "passing" and the local bit to enabled when + enabling global predication. + </doc> + <value name="CP_DRAW_PRED_ENABLE_GLOBAL" value="0x19"/> + + <doc> + Enable or disable predication locally. Unlike globally enabling + predication, this packet doesn't touch any other state. + Predication only happens when enabled globally and locally and a + predicate has been set. This should be used for internal draws + which aren't supposed to use the predication state: + + CP_DRAW_PRED_ENABLE_LOCAL(0) + ... do draw... + CP_DRAW_PRED_ENABLE_LOCAL(1) + </doc> + <value name="CP_DRAW_PRED_ENABLE_LOCAL" value="0x1a"/> + + <doc> + Latch a draw predicate into the internal register. + </doc> + <value name="CP_DRAW_PRED_SET" value="0x4e"/> + + <doc> + for A4xx + Write to register with address that does not fit into type-0 pkt + </doc> + <value name="CP_WIDE_REG_WRITE" value="0x74" variants="A4XX"/> + + <doc>copy from ME scratch RAM to a register</doc> + <value name="CP_SCRATCH_TO_REG" value="0x4d"/> + + <doc>Copy from REG to ME scratch RAM</doc> + <value name="CP_REG_TO_SCRATCH" value="0x4a"/> + + <doc>Wait for memory writes to complete</doc> + <value name="CP_WAIT_MEM_WRITES" value="0x12"/> + + <doc>Conditional execution based on register comparison</doc> + <value name="CP_COND_REG_EXEC" value="0x47"/> + + <doc>Memory to REG copy</doc> + <value name="CP_MEM_TO_REG" value="0x42"/> + + <value name="CP_EXEC_CS_INDIRECT" value="0x41" variants="A4XX-"/> + <value name="CP_EXEC_CS" value="0x33"/> + + <doc> + for a5xx + </doc> + <value name="CP_PERFCOUNTER_ACTION" value="0x50" variants="A5XX"/> + <!-- switches SMMU pagetable, used on a5xx+ only --> + <value name="CP_SMMU_TABLE_UPDATE" value="0x53" variants="A5XX-"/> + <!-- for a6xx --> + <doc>Tells CP the current mode of GPU operation</doc> + <value name="CP_SET_MARKER" value="0x65" variants="A6XX-"/> + <doc>Instruct CP to set a few internal CP registers</doc> + <value name="CP_SET_PSEUDO_REG" value="0x56" variants="A6XX-"/> + <!-- + pairs of regid and value.. seems to be used to program some TF + related regs: + --> + <value name="CP_CONTEXT_REG_BUNCH" value="0x5c" variants="A5XX-"/> + <!-- A5XX Enable yield in RB only --> + <value name="CP_YIELD_ENABLE" value="0x1c" variants="A5XX"/> + <doc> + Enables IB2 skipping. If both GLOBAL and LOCAL are 1 and + nothing is left in the visibility stream, then + CP_INDIRECT_BUFFER will be skipped, and draws will early return + from their IB. + </doc> + <value name="CP_SKIP_IB2_ENABLE_GLOBAL" value="0x1d" variants="A5XX-"/> + <value name="CP_SKIP_IB2_ENABLE_LOCAL" value="0x23" variants="A5XX-"/> + <value name="CP_SET_SUBDRAW_SIZE" value="0x35" variants="A5XX-"/> + <value name="CP_WHERE_AM_I" value="0x62" variants="A5XX-"/> + <value name="CP_SET_VISIBILITY_OVERRIDE" value="0x64" variants="A5XX-"/> + <!-- Enable/Disable/Defer A5x global preemption model --> + <value name="CP_PREEMPT_ENABLE_GLOBAL" value="0x69" variants="A5XX"/> + <!-- Enable/Disable A5x local preemption model --> + <value name="CP_PREEMPT_ENABLE_LOCAL" value="0x6a" variants="A5XX"/> + <!-- Yield token on a5xx similar to CP_PREEMPT on a4xx --> + <value name="CP_CONTEXT_SWITCH_YIELD" value="0x6b" variants="A5XX-"/> + <!-- Inform CP about current render mode (needed for a5xx preemption) --> + <value name="CP_SET_RENDER_MODE" value="0x6c" variants="A5XX"/> + <value name="CP_COMPUTE_CHECKPOINT" value="0x6e" variants="A5XX"/> + <!-- check if this works on earlier.. --> + <value name="CP_MEM_TO_MEM" value="0x73" variants="A5XX-"/> + + <doc> + General purpose 2D blit engine for image transfers and mipmap + generation. Reads through UCHE, writes through the CCU cache in + the PS stage. + </doc> + <value name="CP_BLIT" value="0x2c" variants="A5XX-"/> + + <!-- Test specified bit in specified register and set predicate --> + <value name="CP_REG_TEST" value="0x39" variants="A5XX-"/> + + <!-- + Seems to set the mode flags which control which CP_SET_DRAW_STATE + packets are executed, based on their ENABLE_MASK values + + CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE + packets w/ ENABLE_MASK & 0x6 to execute immediately + --> + <value name="CP_SET_MODE" value="0x63" variants="A6XX-"/> + + <!-- + Seems like there are now separate blocks of state for VS vs FS/CS + (probably these amounts to geometry vs fragments so that geometry + stage of the pipeline for next draw can start while fragment stage + of current draw is still running. The format of the payload of the + packets is the same, the only difference is the offsets of the regs + the firmware code that handles the packet writes. + + Note that for CL, starting with a6xx, the preferred # of local + threads is no longer the same as the max, implying that the shader + core can now run warps from unrelated shaders (ie. + CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE vs + CL_KERNEL_WORK_GROUP_SIZE) + --> + <value name="CP_LOAD_STATE6_GEOM" value="0x32" variants="A6XX-"/> + <value name="CP_LOAD_STATE6_FRAG" value="0x34" variants="A6XX-"/> + <!-- + Note: For IBO state (Image/SSBOs) which have shared state across + shader stages, for 3d pipeline CP_LOAD_STATE6 is used. But for + compute shaders, CP_LOAD_STATE6_FRAG is used. Possibly they are + interchangable. + --> + <value name="CP_LOAD_STATE6" value="0x36" variants="A6XX-"/> + + <!-- internal packets: --> + <value name="IN_IB_PREFETCH_END" value="0x17" variants="A2XX"/> + <value name="IN_SUBBLK_PREFETCH" value="0x1f" variants="A2XX"/> + <value name="IN_INSTR_PREFETCH" value="0x20" variants="A2XX"/> + <value name="IN_INSTR_MATCH" value="0x47" variants="A2XX"/> + <value name="IN_CONST_PREFETCH" value="0x49" variants="A2XX"/> + <value name="IN_INCR_UPDT_STATE" value="0x55" variants="A2XX"/> + <value name="IN_INCR_UPDT_CONST" value="0x56" variants="A2XX"/> + <value name="IN_INCR_UPDT_INSTR" value="0x57" variants="A2XX"/> + + <!-- internal jumptable entries on a6xx+, possibly a5xx: --> + + <!-- jmptable entry used to handle type4 packet on a5xx+: --> + <value name="PKT4" value="0x04" variants="A5XX-"/> + <!-- called when ROQ is empty, "returns" from an IB or merged sequence of IBs --> + <value name="IN_IB_END" value="0x0a" variants="A6XX-"/> + <!-- handles IFPC save/restore --> + <value name="IN_GMU_INTERRUPT" value="0x0b" variants="A6XX-"/> + <!-- preemption/context-swtich routine --> + <value name="IN_PREEMPT" value="0x0f" variants="A6XX-"/> + + <!-- TODO do these exist on A5xx? --> + <value name="CP_SCRATCH_WRITE" value="0x4c" variants="A6XX"/> + <value name="CP_REG_TO_MEM_OFFSET_MEM" value="0x74" variants="A6XX-"/> + <value name="CP_REG_TO_MEM_OFFSET_REG" value="0x72" variants="A6XX-"/> + <value name="CP_WAIT_MEM_GTE" value="0x14" variants="A6XX"/> + <value name="CP_WAIT_TWO_REGS" value="0x70" variants="A6XX"/> + <value name="CP_MEMCPY" value="0x75" variants="A6XX-"/> + <value name="CP_SET_BIN_DATA5_OFFSET" value="0x2e" variants="A6XX-"/> + <!-- A750+, set in place of CP_SET_BIN_DATA5_OFFSET but has different values --> + <value name="CP_SET_UNK_BIN_DATA" value="0x2d" variants="A7XX-"/> + <doc> + Write CP_CONTEXT_SWITCH_*_INFO from CP to the following dwords, + and forcibly switch to the indicated context. + </doc> + <value name="CP_CONTEXT_SWITCH" value="0x54" variants="A6XX"/> + <!-- Note, kgsl calls this CP_SET_AMBLE: --> + <value name="CP_SET_CTXSWITCH_IB" value="0x55" variants="A6XX-"/> + + <!-- + Seems to always have the payload: + 00000002 00008801 00004010 + or: + 00000002 00008801 00004090 + or: + 00000002 00008801 00000010 + 00000002 00008801 00010010 + 00000002 00008801 00d64010 + ... + Note set for compute shaders.. + Is 0x8801 a register offset? + This appears to be a special sort of register write packet + more or less, but the firmware has some special handling.. + Seems like it intercepts/modifies certain register offsets, + but others are treated like a normal PKT4 reg write. I + guess there are some registers that the fw controls certain + bits. + --> + <value name="CP_REG_WRITE" value="0x6d" variants="A6XX"/> + + <doc> + These first appear in a650_sqe.bin. They can in theory be used + to loop any sequence of IB1 commands, but in practice they are + used to loop over bins. There is a fixed-size per-iteration + prefix, used to set per-bin state, and then the following IB1 + commands are executed until CP_END_BIN which are always the same + for each iteration and usually contain a list of + CP_INDIRECT_BUFFER calls to IB2 commands which setup state and + execute restore/draw/save commands. This replaces the previous + technique of just repeating the CP_INDIRECT_BUFFER calls and + "unrolling" the loop. + </doc> + <value name="CP_START_BIN" value="0x50" variants="A6XX-"/> + <value name="CP_END_BIN" value="0x51" variants="A6XX-"/> + + <doc> Make next dword 1 to disable preemption, 0 to re-enable it. </doc> + <value name="CP_PREEMPT_DISABLE" value="0x6c" variants="A6XX"/> + + <value name="CP_WAIT_TIMESTAMP" value="0x14" variants="A7XX-"/> + <value name="CP_GLOBAL_TIMESTAMP" value="0x15" variants="A7XX-"/> <!-- payload 1 dword --> + <value name="CP_LOCAL_TIMESTAMP" value="0x16" variants="A7XX-"/> <!-- payload 1 dword, follows 0x15 --> + <value name="CP_THREAD_CONTROL" value="0x17" variants="A7XX-"/> + <!-- payload 4 dwords, last two could be render target addr (one pkt per MRT), possibly used for GMEM save/restore?--> + <value name="CP_RESOURCE_LIST" value="0x18" variants="A7XX-"/> + <doc> Can clear BV/BR counters, or wait until one catches up to another </doc> + <value name="CP_BV_BR_COUNT_OPS" value="0x1b" variants="A7XX-"/> + <doc> Clears, adds to local, or adds to global timestamp </doc> + <value name="CP_MODIFY_TIMESTAMP" value="0x1c" variants="A7XX-"/> + <!-- similar to CP_CONTEXT_REG_BUNCH, but discards first two dwords?? --> + <value name="CP_CONTEXT_REG_BUNCH2" value="0x5d" variants="A7XX-"/> + <doc> + Write to a scratch memory that is read by CP_REG_TEST with + SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers. + However it uses the same memory space. + </doc> + <value name="CP_MEM_TO_SCRATCH_MEM" value="0x49" variants="A7XX-"/> + + <doc> + Executes an array of fixed-size command buffers where each + buffer is assumed to have one draw call, skipping buffers with + non-visible draw calls. + </doc> + <value name="CP_FIXED_STRIDE_DRAW_TABLE" value="0x7f" variants="A7XX-"/> + + <doc>Reset various on-chip state used for synchronization</doc> + <value name="CP_RESET_CONTEXT_STATE" value="0x1f" variants="A7XX-"/> +</enum> + + +<domain name="CP_LOAD_STATE" width="32"> + <doc>Load state, a3xx (and later?)</doc> + <enum name="adreno_state_block"> + <value name="SB_VERT_TEX" value="0"/> + <value name="SB_VERT_MIPADDR" value="1"/> + <value name="SB_FRAG_TEX" value="2"/> + <value name="SB_FRAG_MIPADDR" value="3"/> + <value name="SB_VERT_SHADER" value="4"/> + <value name="SB_GEOM_SHADER" value="5"/> + <value name="SB_FRAG_SHADER" value="6"/> + <value name="SB_COMPUTE_SHADER" value="7"/> + </enum> + <enum name="adreno_state_type"> + <value name="ST_SHADER" value="0"/> + <value name="ST_CONSTANTS" value="1"/> + </enum> + <enum name="adreno_state_src"> + <value name="SS_DIRECT" value="0"> + <doc>inline with the CP_LOAD_STATE packet</doc> + </value> + <value name="SS_INVALID_ALL_IC" value="2"/> + <value name="SS_INVALID_PART_IC" value="3"/> + <value name="SS_INDIRECT" value="4"> + <doc>in buffer pointed to by EXT_SRC_ADDR</doc> + </value> + <value name="SS_INDIRECT_TCM" value="5"/> + <value name="SS_INDIRECT_STM" value="6"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="DST_OFF" low="0" high="15" type="uint"/> + <bitfield name="STATE_SRC" low="16" high="18" type="adreno_state_src"/> + <bitfield name="STATE_BLOCK" low="19" high="21" type="adreno_state_block"/> + <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="STATE_TYPE" low="0" high="1" type="adreno_state_type"/> + <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> + </reg32> +</domain> + +<domain name="CP_LOAD_STATE4" width="32" varset="chip"> + <doc>Load state, a4xx+</doc> + <enum name="a4xx_state_block"> + <!-- + unknown: 0x7 and 0xf <- seen in compute shader + + STATE_BLOCK = 0x6, STATE_TYPE = 0x2 possibly used for preemption? + Seen in some GL shaders. Payload is NUM_UNIT dwords, and it contains + the gpuaddr of the following shader constants block. DST_OFF seems + to specify which shader stage: + + 16 -> vert + 36 -> tcs + 56 -> tes + 76 -> geom + 96 -> frag + + Example: + +opcode: CP_LOAD_STATE4 (30) (12 dwords) + { DST_OFF = 16 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = 0x6 | NUM_UNIT = 4 } + { STATE_TYPE = 0x2 | EXT_SRC_ADDR = 0 } + { EXT_SRC_ADDR_HI = 0 } + 0000: c0264100 00000000 00000000 00000000 + 0000: 70b0000b 01180010 00000002 00000000 c0264100 00000000 00000000 00000000 + +opcode: CP_LOAD_STATE4 (30) (4 dwords) + { DST_OFF = 16 | STATE_SRC = SS4_INDIRECT | STATE_BLOCK = SB4_VS_SHADER | NUM_UNIT = 4 } + { STATE_TYPE = ST4_CONSTANTS | EXT_SRC_ADDR = 0xc0264100 } + { EXT_SRC_ADDR_HI = 0 } + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 + 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 + 0000: 00000040 0000000c 00000000 00000000 00000000 00000000 00000000 00000000 + + STATE_BLOCK = 0x6, STATE_TYPE = 0x1, seen in compute shader. NUM_UNITS * 2 dwords. + + --> + <value name="SB4_VS_TEX" value="0x0"/> + <value name="SB4_HS_TEX" value="0x1"/> <!-- aka. TCS --> + <value name="SB4_DS_TEX" value="0x2"/> <!-- aka. TES --> + <value name="SB4_GS_TEX" value="0x3"/> + <value name="SB4_FS_TEX" value="0x4"/> + <value name="SB4_CS_TEX" value="0x5"/> + <value name="SB4_VS_SHADER" value="0x8"/> + <value name="SB4_HS_SHADER" value="0x9"/> + <value name="SB4_DS_SHADER" value="0xa"/> + <value name="SB4_GS_SHADER" value="0xb"/> + <value name="SB4_FS_SHADER" value="0xc"/> + <value name="SB4_CS_SHADER" value="0xd"/> + <!-- + for SSBO, STATE_TYPE=0 appears to be addresses (four dwords each), + STATE_TYPE=1 sizes, STATE_TYPE=2 addresses again (two dwords each) + + Compute has it's own dedicated SSBO state, it seems, but the rest + of the stages share state + --> + <value name="SB4_SSBO" value="0xe"/> + <value name="SB4_CS_SSBO" value="0xf"/> + </enum> + <enum name="a4xx_state_type"> + <value name="ST4_SHADER" value="0"/> + <value name="ST4_CONSTANTS" value="1"/> + <value name="ST4_UBO" value="2"/> + </enum> + <enum name="a4xx_state_src"> + <value name="SS4_DIRECT" value="0"/> + <value name="SS4_INDIRECT" value="2"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="DST_OFF" low="0" high="13" type="uint"/> + <bitfield name="STATE_SRC" low="16" high="17" type="a4xx_state_src"/> + <bitfield name="STATE_BLOCK" low="18" high="21" type="a4xx_state_block"/> + <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="STATE_TYPE" low="0" high="1" type="a4xx_state_type"/> + <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> + </reg32> + <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> + <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/> + </reg32> +</domain> + +<!-- looks basically same CP_LOAD_STATE4 --> +<domain name="CP_LOAD_STATE6" width="32" varset="chip"> + <doc>Load state, a6xx+</doc> + <enum name="a6xx_state_block"> + <value name="SB6_VS_TEX" value="0x0"/> + <value name="SB6_HS_TEX" value="0x1"/> <!-- aka. TCS --> + <value name="SB6_DS_TEX" value="0x2"/> <!-- aka. TES --> + <value name="SB6_GS_TEX" value="0x3"/> + <value name="SB6_FS_TEX" value="0x4"/> + <value name="SB6_CS_TEX" value="0x5"/> + <value name="SB6_VS_SHADER" value="0x8"/> + <value name="SB6_HS_SHADER" value="0x9"/> + <value name="SB6_DS_SHADER" value="0xa"/> + <value name="SB6_GS_SHADER" value="0xb"/> + <value name="SB6_FS_SHADER" value="0xc"/> + <value name="SB6_CS_SHADER" value="0xd"/> + <value name="SB6_IBO" value="0xe"/> + <value name="SB6_CS_IBO" value="0xf"/> + </enum> + <enum name="a6xx_state_type"> + <value name="ST6_SHADER" value="0"/> + <value name="ST6_CONSTANTS" value="1"/> + <value name="ST6_UBO" value="2"/> + <value name="ST6_IBO" value="3"/> + </enum> + <enum name="a6xx_state_src"> + <value name="SS6_DIRECT" value="0"/> + <value name="SS6_BINDLESS" value="1"/> <!-- TODO does this exist on a4xx/a5xx? --> + <value name="SS6_INDIRECT" value="2"/> + <doc> + SS6_UBO used by the a6xx vulkan blob with tesselation constants + in this case, EXT_SRC_ADDR is (ubo_id shl 16 | offset) + to load constants from a UBO loaded with DST_OFF = 14 and offset 0, + EXT_SRC_ADDR = 0xe0000 + (offset is a guess, should be in bytes given that maxUniformBufferRange=64k) + </doc> + <value name="SS6_UBO" value="3"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="DST_OFF" low="0" high="13" type="uint"/> + <bitfield name="STATE_TYPE" low="14" high="15" type="a6xx_state_type"/> + <bitfield name="STATE_SRC" low="16" high="17" type="a6xx_state_src"/> + <bitfield name="STATE_BLOCK" low="18" high="21" type="a6xx_state_block"/> + <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/> + </reg32> + <reg64 offset="1" name="EXT_SRC_ADDR" type="address"/> +</domain> + +<bitset name="vgt_draw_initiator" inline="yes"> + <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/> + <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/> + <bitfield name="VIS_CULL" low="9" high="10" type="pc_di_vis_cull_mode"/> + <bitfield name="INDEX_SIZE" pos="11" type="pc_di_index_size"/> + <bitfield name="NOT_EOP" pos="12" type="boolean"/> + <bitfield name="SMALL_INDEX" pos="13" type="boolean"/> + <bitfield name="PRE_DRAW_INITIATOR_ENABLE" pos="14" type="boolean"/> + <bitfield name="NUM_INSTANCES" low="24" high="31" type="uint"/> +</bitset> + +<!-- changed on a4xx: --> +<enum name="a4xx_index_size"> + <value name="INDEX4_SIZE_8_BIT" value="0"/> + <value name="INDEX4_SIZE_16_BIT" value="1"/> + <value name="INDEX4_SIZE_32_BIT" value="2"/> +</enum> + +<enum name="a6xx_patch_type"> + <value name="TESS_QUADS" value="0"/> + <value name="TESS_TRIANGLES" value="1"/> + <value name="TESS_ISOLINES" value="2"/> +</enum> + +<bitset name="vgt_draw_initiator_a4xx" inline="yes"> + <!-- When the 0x20 bit is set, it's the number of patch vertices - 1 --> + <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/> + <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/> + <bitfield name="VIS_CULL" low="8" high="9" type="pc_di_vis_cull_mode"/> + <bitfield name="INDEX_SIZE" low="10" high="11" type="a4xx_index_size"/> + <bitfield name="PATCH_TYPE" low="12" high="13" type="a6xx_patch_type"/> + <bitfield name="GS_ENABLE" pos="16" type="boolean"/> + <bitfield name="TESS_ENABLE" pos="17" type="boolean"/> +</bitset> + +<domain name="CP_DRAW_INDX" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="VIZ_QUERY" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1" type="vgt_draw_initiator"/> + <reg32 offset="2" name="2"> + <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="INDX_BASE" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="INDX_SIZE" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_DRAW_INDX_2" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="VIZ_QUERY" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1" type="vgt_draw_initiator"/> + <reg32 offset="2" name="2"> + <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> + </reg32> + <!-- followed by NUM_INDICES indices.. --> +</domain> + +<domain name="CP_DRAW_INDX_OFFSET" width="32"> + <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> + <reg32 offset="1" name="1"> + <bitfield name="NUM_INSTANCES" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="FIRST_INDX" low="0" high="31"/> + </reg32> + + <stripe varset="chip" variants="A5XX-"> + <reg32 offset="4" name="4"> + <bitfield name="INDX_BASE_LO" low="0" high="31"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="INDX_BASE_HI" low="0" high="31"/> + </reg32> + <reg64 offset="4" name="INDX_BASE" type="address"/> + <reg32 offset="6" name="6"> + <!-- max # of elements in index buffer --> + <bitfield name="MAX_INDICES" low="0" high="31"/> + </reg32> + </stripe> + + <reg32 offset="4" name="4"> + <bitfield name="INDX_BASE" low="0" high="31" type="address"/> + </reg32> + + <reg32 offset="5" name="5"> + <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_DRAW_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> + <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> + <stripe varset="chip" variants="A4XX"> + <reg32 offset="1" name="1"> + <bitfield name="INDIRECT" low="0" high="31"/> + </reg32> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg32 offset="1" name="1"> + <bitfield name="INDIRECT_LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="INDIRECT_HI" low="0" high="31"/> + </reg32> + <reg64 offset="1" name="INDIRECT" type="address"/> + </stripe> +</domain> + +<domain name="CP_DRAW_INDX_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> + <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> + <stripe varset="chip" variants="A4XX"> + <reg32 offset="1" name="1"> + <bitfield name="INDX_BASE" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <!-- max # of bytes in index buffer --> + <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="INDIRECT" low="0" high="31"/> + </reg32> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg32 offset="1" name="1"> + <bitfield name="INDX_BASE_LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="INDX_BASE_HI" low="0" high="31"/> + </reg32> + <reg64 offset="1" name="INDX_BASE" type="address"/> + <reg32 offset="3" name="3"> + <!-- max # of elements in index buffer --> + <bitfield name="MAX_INDICES" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="INDIRECT_LO" low="0" high="31"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="INDIRECT_HI" low="0" high="31"/> + </reg32> + <reg64 offset="4" name="INDIRECT" type="address"/> + </stripe> +</domain> + +<domain name="CP_DRAW_INDIRECT_MULTI" width="32" varset="chip" prefix="chip" variants="A6XX-"> + <enum name="a6xx_draw_indirect_opcode"> + <value name="INDIRECT_OP_NORMAL" value="0x2"/> + <value name="INDIRECT_OP_INDEXED" value="0x4"/> + <value name="INDIRECT_OP_INDIRECT_COUNT" value="0x6"/> + <value name="INDIRECT_OP_INDIRECT_COUNT_INDEXED" value="0x7"/> + </enum> + <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> + <reg32 offset="1" name="1"> + <bitfield name="OPCODE" low="0" high="3" type="a6xx_draw_indirect_opcode" addvariant="yes"/> + <doc> + DST_OFF same as in CP_LOAD_STATE6 - vec4 VS const at this offset will + be updated for each draw to {draw_id, first_vertex, first_instance, 0} + value of 0 disables it + </doc> + <bitfield name="DST_OFF" low="8" high="21" type="hex"/> + </reg32> + <reg32 offset="2" name="DRAW_COUNT" type="uint"/> + <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_NORMAL"> + <reg64 offset="3" name="INDIRECT" type="address"/> + <reg32 offset="5" name="STRIDE" type="uint"/> + </stripe> + <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDEXED" prefix="INDEXED"> + <reg64 offset="3" name="INDEX" type="address"/> + <reg32 offset="5" name="MAX_INDICES" type="uint"/> + <reg64 offset="6" name="INDIRECT" type="address"/> + <reg32 offset="8" name="STRIDE" type="uint"/> + </stripe> + <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT" prefix="INDIRECT"> + <reg64 offset="3" name="INDIRECT" type="address"/> + <reg64 offset="5" name="INDIRECT_COUNT" type="address"/> + <reg32 offset="7" name="STRIDE" type="uint"/> + </stripe> + <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT_INDEXED" prefix="INDIRECT_INDEXED"> + <reg64 offset="3" name="INDEX" type="address"/> + <reg32 offset="5" name="MAX_INDICES" type="uint"/> + <reg64 offset="6" name="INDIRECT" type="address"/> + <reg64 offset="8" name="INDIRECT_COUNT" type="address"/> + <reg32 offset="10" name="STRIDE" type="uint"/> + </stripe> +</domain> + +<domain name="CP_DRAW_AUTO" width="32"> + <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/> + <reg32 offset="1" name="1"> + <bitfield name="NUM_INSTANCES" low="0" high="31" type="uint"/> + </reg32> + <reg64 offset="2" name="NUM_VERTICES_BASE" type="address"/> + <reg32 offset="4" name="4"> + <bitfield name="NUM_VERTICES_OFFSET" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="STRIDE" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_DRAW_PRED_ENABLE_GLOBAL" width="32" varset="chip"> + <reg32 offset="0" name="0"> + <bitfield name="ENABLE" pos="0" type="boolean"/> + </reg32> +</domain> + +<domain name="CP_DRAW_PRED_ENABLE_LOCAL" width="32" varset="chip"> + <reg32 offset="0" name="0"> + <bitfield name="ENABLE" pos="0" type="boolean"/> + </reg32> +</domain> + +<domain name="CP_DRAW_PRED_SET" width="32" varset="chip"> + <enum name="cp_draw_pred_src"> + <!-- + Sources 1-4 seem to be about combining reading + SO/primitive queries and setting the predicate, which is + a DX11-specific optimization (since in DX11 you can only + predicate on the result of queries). + --> + <value name="PRED_SRC_MEM" value="5"> + <doc> + Read a 64-bit value at the given address and + test if it equals/doesn't equal 0. + </doc> + </value> + </enum> + <enum name="cp_draw_pred_test"> + <value name="NE_0_PASS" value="0"/> + <value name="EQ_0_PASS" value="1"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="SRC" low="4" high="7" type="cp_draw_pred_src"/> + <bitfield name="TEST" pos="8" type="cp_draw_pred_test"/> + </reg32> + <reg64 offset="1" name="MEM_ADDR" type="address"/> +</domain> + +<domain name="CP_SET_DRAW_STATE" width="32" varset="chip" variants="A4XX-"> + <array offset="0" stride="3" length="100"> + <reg32 offset="0" name="0"> + <bitfield name="COUNT" low="0" high="15" type="uint"/> + <bitfield name="DIRTY" pos="16" type="boolean"/> + <bitfield name="DISABLE" pos="17" type="boolean"/> + <bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/> + <bitfield name="LOAD_IMMED" pos="19" type="boolean"/> + <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/> + <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/> + <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/> + <bitfield name="GROUP_ID" low="24" high="28" type="uint"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> + <bitfield name="ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + </array> +</domain> + +<domain name="CP_SET_BIN" width="32"> + <doc>value at offset 0 always seems to be 0x00000000..</doc> + <reg32 offset="0" name="0"/> + <reg32 offset="1" name="1"> + <bitfield name="X1" low="0" high="15" type="uint"/> + <bitfield name="Y1" low="16" high="31" type="uint"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="X2" low="0" high="15" type="uint"/> + <bitfield name="Y2" low="16" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_SET_BIN_DATA" width="32"> + <reg32 offset="0" name="0"> + <!-- corresponds to VSC_PIPE[n].DATA_ADDR --> + <bitfield name="BIN_DATA_ADDR" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="1" name="1"> + <!-- seesm to correspond to VSC_SIZE_ADDRESS --> + <bitfield name="BIN_SIZE_ADDRESS" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_SET_BIN_DATA5" width="32"> + <reg32 offset="0" name="0"> + <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: --> + <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> + <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: --> + <bitfield name="VSC_N" low="22" high="26" type="uint"/> + </reg32> + <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> + <reg32 offset="1" name="1"> + <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> + <reg32 offset="3" name="3"> + <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/> + </reg32> + <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: --> + <reg32 offset="5" name="5"> + <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/> + </reg32> + <reg32 offset="6" name="6"> + <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/> + </reg32> + <!-- + a7xx adds a few more addresses to the end of the pkt + --> + <reg64 offset="7" name="7"/> + <reg64 offset="9" name="9"/> +</domain> + +<domain name="CP_SET_BIN_DATA5_OFFSET" width="32"> + <doc> + Like CP_SET_BIN_DATA5, but set the pointers as offsets from the + pointers stored in VSC_PIPE_{DATA,DATA2,SIZE}_ADDRESS. Useful + for Vulkan where these values aren't known when the command + stream is recorded. + </doc> + <reg32 offset="0" name="0"> + <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: --> + <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/> + <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: --> + <bitfield name="VSC_N" low="22" high="26" type="uint"/> + </reg32> + <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS --> + <reg32 offset="1" name="1"> + <bitfield name="BIN_DATA_OFFSET" low="0" high="31" type="uint"/> + </reg32> + <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)--> + <reg32 offset="2" name="2"> + <bitfield name="BIN_SIZE_OFFSET" low="0" high="31" type="uint"/> + </reg32> + <!-- BIN_DATA2_ADDR -> VSC_PIPE[p].DATA2_ADDRESS --> + <reg32 offset="3" name="3"> + <bitfield name="BIN_DATA2_OFFSET" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_REG_RMW" width="32"> + <doc> + Modifies DST_REG using two sources that can either be registers + or immediates. If SRC1_ADD is set, then do the following: + + $dst = (($dst & $src0) rot $rotate) + $src1 + + Otherwise: + + $dst = (($dst & $src0) rot $rotate) | $src1 + + Here "rot" means rotate left. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="DST_REG" low="0" high="17" type="hex"/> + <bitfield name="ROTATE" low="24" high="28" type="uint"/> + <bitfield name="SRC1_ADD" pos="29" type="boolean"/> + <bitfield name="SRC1_IS_REG" pos="30" type="boolean"/> + <bitfield name="SRC0_IS_REG" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="SRC0" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="SRC1" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_REG_TO_MEM" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <!-- number of registers/dwords copied is max(CNT, 1). --> + <bitfield name="CNT" low="18" high="29" type="uint"/> + <bitfield name="64B" pos="30" type="boolean"/> + <bitfield name="ACCUMULATE" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="DEST" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> + <bitfield name="DEST_HI" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_REG_TO_MEM_OFFSET_REG" width="32"> + <doc> + Like CP_REG_TO_MEM, but the memory address to write to can be + offsetted using either one or two registers or scratch + registers. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <!-- number of registers/dwords copied is max(CNT, 1). --> + <bitfield name="CNT" low="18" high="29" type="uint"/> + <bitfield name="64B" pos="30" type="boolean"/> + <bitfield name="ACCUMULATE" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="DEST" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> + <bitfield name="DEST_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="OFFSET0" low="0" high="17" type="hex"/> + <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/> + </reg32> + <!-- followed by an optional identical OFFSET1 dword --> +</domain> + +<domain name="CP_REG_TO_MEM_OFFSET_MEM" width="32"> + <doc> + Like CP_REG_TO_MEM, but the memory address to write to can be + offsetted using a DWORD in memory. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <!-- number of registers/dwords copied is max(CNT, 1). --> + <bitfield name="CNT" low="18" high="29" type="uint"/> + <bitfield name="64B" pos="30" type="boolean"/> + <bitfield name="ACCUMULATE" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="DEST" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> + <bitfield name="DEST_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/> + </reg32> +</domain> + +<domain name="CP_MEM_TO_REG" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <!-- number of registers/dwords copied is max(CNT, 1). --> + <bitfield name="CNT" low="19" high="29" type="uint"/> + <!-- shift each DWORD left by 2 while copying --> + <bitfield name="SHIFT_BY_2" pos="30" type="boolean"/> + <!-- does the same thing as CP_MEM_TO_MEM::UNK31 --> + <bitfield name="UNK31" pos="31" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="SRC" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2" varset="chip" variants="A5XX-"> + <bitfield name="SRC_HI" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_MEM_TO_MEM" width="32"> + <reg32 offset="0" name="0"> + <!-- + not sure how many src operands we have, but the low + bits negate the n'th src argument. + --> + <bitfield name="NEG_A" pos="0" type="boolean"/> + <bitfield name="NEG_B" pos="1" type="boolean"/> + <bitfield name="NEG_C" pos="2" type="boolean"/> + + <!-- if set treat src/dst as 64bit values --> + <bitfield name="DOUBLE" pos="29" type="boolean"/> + <!-- execute CP_WAIT_FOR_MEM_WRITES beforehand --> + <bitfield name="WAIT_FOR_MEM_WRITES" pos="30" type="boolean"/> + <!-- some other kind of wait --> + <bitfield name="UNK31" pos="31" type="boolean"/> + </reg32> + <!-- + followed by sequence of addresses.. the first is the + destination and the rest are N src addresses which are + summed (after being negated if NEG_x bit set) allowing + to do things like 'result += end - start' (which turns + out to be useful for queries and accumulating results + across multiple tiles) + --> +</domain> + +<domain name="CP_MEMCPY" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="DWORDS" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="SRC_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="SRC_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="DST_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="DST_HI" low="0" high="31" type="hex"/> + </reg32> +</domain> + +<domain name="CP_REG_TO_SCRATCH" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <bitfield name="SCRATCH" low="20" high="22" type="uint"/> + <!-- number of registers/dwords copied is CNT + 1. --> + <bitfield name="CNT" low="24" high="26" type="uint"/> + </reg32> +</domain> + +<domain name="CP_SCRATCH_TO_REG" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="REG" low="0" high="17" type="hex"/> + <!-- note: CP_MEM_TO_REG always sets this when writing to the register --> + <bitfield name="UNK18" pos="18" type="boolean"/> + <bitfield name="SCRATCH" low="20" high="22" type="uint"/> + <!-- number of registers/dwords copied is CNT + 1. --> + <bitfield name="CNT" low="24" high="26" type="uint"/> + </reg32> +</domain> + +<domain name="CP_SCRATCH_WRITE" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="SCRATCH" low="20" high="22" type="uint"/> + </reg32> + <!-- followed by one or more DWORDs to write to scratch registers --> +</domain> + +<domain name="CP_MEM_WRITE" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="ADDR_LO" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_HI" low="0" high="31"/> + </reg32> + <!-- followed by the DWORDs to write --> +</domain> + +<enum name="cp_cond_function"> + <value value="0" name="WRITE_ALWAYS"/> + <value value="1" name="WRITE_LT"/> + <value value="2" name="WRITE_LE"/> + <value value="3" name="WRITE_EQ"/> + <value value="4" name="WRITE_NE"/> + <value value="5" name="WRITE_GE"/> + <value value="6" name="WRITE_GT"/> +</enum> + +<domain name="CP_COND_WRITE" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> + <bitfield name="POLL_MEMORY" pos="4" type="boolean"/> + <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="POLL_ADDR" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="REF" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="MASK" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="WRITE_ADDR" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="WRITE_DATA" low="0" high="31"/> + </reg32> +</domain> + +<enum name="poll_memory_type"> + <value value="0" name="POLL_REGISTER"/> + <value value="1" name="POLL_MEMORY"/> + <value value="2" name="POLL_SCRATCH"/> + <value value="3" name="POLL_ON_CHIP" varset="chip" variants="A7XX-"/> +</enum> + +<domain name="CP_COND_WRITE5" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> + <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> + <!-- POLL_REGISTER polls a register at POLL_ADDR_LO. --> + <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> + <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="REF" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="MASK" low="0" high="31"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="6" name="6"> + <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="7" name="7"> + <bitfield name="WRITE_DATA" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_WAIT_MEM_GTE" width="32"> + <doc> + Wait until a memory value is greater than or equal to the + reference, using signed comparison. + </doc> + <reg32 offset="0" name="0"> + <!-- Reserved for flags, presumably? Unused in FW --> + <bitfield name="RESERVED" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="REF" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_WAIT_REG_MEM" width="32"> + <doc> + This uses the same internal comparison as CP_COND_WRITE, + but waits until the comparison is true instead. It busy-loops in + the CP for the given number of cycles before trying again. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/> + <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/> + <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/> + <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="REF" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="MASK" low="0" high="31"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="DELAY_LOOP_CYCLES" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_WAIT_TWO_REGS" width="32"> + <doc> + Waits for REG0 to not be 0 or REG1 to not equal REF + </doc> + <reg32 offset="0" name="0"> + <bitfield name="REG0" low="0" high="17" type="hex"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="REG1" low="0" high="17" type="hex"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="REF" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_DISPATCH_COMPUTE" width="32"> + <reg32 offset="0" name="0"/> + <reg32 offset="1" name="1"> + <bitfield name="X" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="Y" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="Z" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_SET_RENDER_MODE" width="32"> + <enum name="render_mode_cmd"> + <value value="1" name="BYPASS"/> + <value value="2" name="BINNING"/> + <value value="3" name="GMEM"/> + <value value="5" name="BLIT2D"/> + <!-- placeholder name.. used when CP_BLIT packets with BLIT_OP_SCALE?? --> + <value value="7" name="BLIT2DSCALE"/> + <!-- 8 set before going back to BYPASS exiting 2D --> + <value value="8" name="END2D"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="MODE" low="0" high="8" type="render_mode_cmd"/> + <!-- + normally 0x1/0x3, sometimes see 0x5/0x8 with unknown registers in + 0x21xx range.. possibly (at least some) a5xx variants have a + 2d core? + --> + </reg32> + <!-- I think first buffer is for GPU to save context in case of ctx switch? --> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="ADDR_0_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <!-- + set when in GMEM.. maybe indicates GMEM contents need to be + preserved on ctx switch? + --> + <bitfield name="VSC_ENABLE" pos="3" type="boolean"/> + <bitfield name="GMEM_ENABLE" pos="4" type="boolean"/> + </reg32> + <reg32 offset="4" name="4"/> + <!-- second buffer looks like some cmdstream.. length in dwords: --> + <reg32 offset="5" name="5"> + <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="6" name="6"> + <bitfield name="ADDR_1_LO" low="0" high="31"/> + </reg32> + <reg32 offset="7" name="7"> + <bitfield name="ADDR_1_HI" low="0" high="31"/> + </reg32> +</domain> + +<!-- this looks fairly similar to CP_SET_RENDER_MODE minus first dword --> +<domain name="CP_COMPUTE_CHECKPOINT" width="32"> + <!-- I think first buffer is for GPU to save context in case of ctx switch? --> + <reg32 offset="0" name="0"> + <bitfield name="ADDR_0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_0_HI" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + </reg32> + <reg32 offset="3" name="3"/> + <!-- second buffer looks like some cmdstream.. length in dwords: --> + <reg32 offset="4" name="4"> + <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="ADDR_1_LO" low="0" high="31"/> + </reg32> + <reg32 offset="6" name="6"> + <bitfield name="ADDR_1_HI" low="0" high="31"/> + </reg32> + <reg32 offset="7" name="7"/> +</domain> + +<domain name="CP_PERFCOUNTER_ACTION" width="32"> + <reg32 offset="0" name="0"> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="ADDR_0_HI" low="0" high="31"/> + </reg32> +</domain> + +<domain varset="chip" name="CP_EVENT_WRITE" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="EVENT" low="0" high="7" type="vgt_event_type"/> + <!-- when set, write back timestamp instead of value from packet: --> + <bitfield name="TIMESTAMP" pos="30" type="boolean"/> + <bitfield name="IRQ" pos="31" type="boolean"/> + </reg32> + <!-- + TODO what is gpuaddr for, seems to be all 0's.. maybe needed for + context switch? + --> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="ADDR_0_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <!-- ??? --> + </reg32> +</domain> + +<domain varset="chip" name="CP_EVENT_WRITE7" width="32"> + <enum name="event_write_src"> + <!-- Write payload[0] --> + <value value="0" name="EV_WRITE_USER_32B"/> + <!-- Write payload[0] payload[1] --> + <value value="1" name="EV_WRITE_USER_64B"/> + <!-- Write (TIMESTAMP_GLOBAL + TIMESTAMP_LOCAL) --> + <value value="2" name="EV_WRITE_TIMESTAMP_SUM"/> + <value value="3" name="EV_WRITE_ALWAYSON"/> + <!-- Write payload[1] regs starting at payload[0] offset --> + <value value="4" name="EV_WRITE_REGS_CONTENT"/> + </enum> + + <enum name="event_write_dst"> + <value value="0" name="EV_DST_RAM"/> + <value value="1" name="EV_DST_ONCHIP"/> + </enum> + + <reg32 offset="0" name="0"> + <bitfield name="EVENT" low="0" high="7" type="vgt_event_type"/> + <bitfield name="WRITE_SAMPLE_COUNT" pos="12" type="boolean"/> + <!-- Write sample count at (iova + 16) --> + <bitfield name="SAMPLE_COUNT_END_OFFSET" pos="13" type="boolean"/> + <!-- *(iova + 8) = *(iova + 16) - *iova --> + <bitfield name="WRITE_SAMPLE_COUNT_DIFF" pos="14" type="boolean"/> + + <!-- Next 4 flags are valid to set only when concurrent binning is enabled --> + <!-- Increment 16b BV counter. Valid only in BV pipe --> + <bitfield name="INC_BV_COUNT" pos="16" type="boolean"/> + <!-- Increment 16b BR counter. Valid only in BR pipe --> + <bitfield name="INC_BR_COUNT" pos="17" type="boolean"/> + <bitfield name="CLEAR_RENDER_RESOURCE" pos="18" type="boolean"/> + <bitfield name="CLEAR_LRZ_RESOURCE" pos="19" type="boolean"/> + + <bitfield name="WRITE_SRC" low="20" high="22" type="event_write_src"/> + <bitfield name="WRITE_DST" pos="24" type="event_write_dst" addvariant="yes"/> + <!-- Writes into WRITE_DST from WRITE_SRC. RB_DONE_TS requires WRITE_ENABLED. --> + <bitfield name="WRITE_ENABLED" pos="27" type="boolean"/> + </reg32> + + <stripe varset="event_write_dst" variants="EV_DST_RAM"> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="ADDR_0_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="PAYLOAD_0" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="PAYLOAD_1" low="0" high="31"/> + </reg32> + </stripe> + + <stripe varset="event_write_dst" variants="EV_DST_ONCHIP"> + <reg32 offset="1" name="1"> + <bitfield name="ONCHIP_ADDR_0" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="PAYLOAD_0" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="PAYLOAD_1" low="0" high="31"/> + </reg32> + </stripe> +</domain> + +<domain name="CP_BLIT" width="32"> + <enum name="cp_blit_cmd"> + <value value="0" name="BLIT_OP_FILL"/> + <value value="1" name="BLIT_OP_COPY"/> + <value value="3" name="BLIT_OP_SCALE"/> <!-- used for mipmap generation --> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="OP" low="0" high="3" type="cp_blit_cmd"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="SRC_X1" low="0" high="13" type="uint"/> + <bitfield name="SRC_Y1" low="16" high="29" type="uint"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="SRC_X2" low="0" high="13" type="uint"/> + <bitfield name="SRC_Y2" low="16" high="29" type="uint"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="DST_X1" low="0" high="13" type="uint"/> + <bitfield name="DST_Y1" low="16" high="29" type="uint"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="DST_X2" low="0" high="13" type="uint"/> + <bitfield name="DST_Y2" low="16" high="29" type="uint"/> + </reg32> +</domain> + +<domain name="CP_EXEC_CS" width="32"> + <reg32 offset="0" name="0"> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="NGROUPS_X" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="NGROUPS_Y" low="0" high="31" type="uint"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="NGROUPS_Z" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_EXEC_CS_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-"> + <reg32 offset="0" name="0"> + </reg32> + <stripe varset="chip" variants="A4XX"> + <reg32 offset="1" name="1"> + <bitfield name="ADDR" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <!-- localsize is value minus one: --> + <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> + <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> + <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> + </reg32> + </stripe> + <stripe varset="chip" variants="A5XX-"> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="ADDR_HI" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <!-- localsize is value minus one: --> + <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/> + <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/> + <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/> + </reg32> + </stripe> +</domain> + +<domain name="CP_SET_MARKER" width="32" varset="chip" prefix="chip" variants="A6XX-"> + <doc>Tell CP the current operation mode, indicates save and restore procedure</doc> + <enum name="a6xx_marker"> + <value value="1" name="RM6_BYPASS"/> + <value value="2" name="RM6_BINNING"/> + <value value="4" name="RM6_GMEM"/> + <value value="5" name="RM6_ENDVIS"/> + <value value="6" name="RM6_RESOLVE"/> + <value value="7" name="RM6_YIELD"/> + <value value="8" name="RM6_COMPUTE"/> + <value value="0xc" name="RM6_BLIT2DSCALE"/> <!-- no-op (at least on current sqe fw) --> + + <!-- + These values come from a6xx_set_marker() in the + downstream kernel, and they can only be set by the kernel + --> + <value value="0xd" name="RM6_IB1LIST_START"/> + <value value="0xe" name="RM6_IB1LIST_END"/> + <!-- IFPC - inter-frame power collapse --> + <value value="0x100" name="RM6_IFPC_ENABLE"/> + <value value="0x101" name="RM6_IFPC_DISABLE"/> + </enum> + <reg32 offset="0" name="0"> + <!-- + NOTE: blob driver and some versions of freedreno/turnip set + b4, which is unused (at least by current sqe fw), but interferes + with parsing if we extend the size of the bitfield to include + b8 (only sent by kernel mode driver). Really, the way the + parsing works in the firmware, only b0-b3 are considered, but + if b8 is set, the low bits are interpreted differently. To + model this, without getting confused by spurious b4, this is + described as two overlapping bitfields: + --> + <bitfield name="MODE" low="0" high="8" type="a6xx_marker"/> + <bitfield name="MARKER" low="0" high="3" type="a6xx_marker"/> + </reg32> +</domain> + +<domain name="CP_SET_PSEUDO_REG" width="32" varset="chip" prefix="chip" variants="A6XX-"> + <doc>Set internal CP registers, used to indicate context save data addresses</doc> + <enum name="pseudo_reg"> + <value value="0" name="SMMU_INFO"/> + <value value="1" name="NON_SECURE_SAVE_ADDR"/> + <value value="2" name="SECURE_SAVE_ADDR"/> + <value value="3" name="NON_PRIV_SAVE_ADDR"/> + <value value="4" name="COUNTER"/> + + <!-- + On a6xx the registers are set directly and CP_SET_BIN_DATA5_OFFSET reads them, + but that doesn't work with concurrent binning because BR will be reading from + a different set of streams than BV is writing, so on a7xx we have these + pseudo-regs instead, which do the right thing. + + The corresponding VSC registers exist, and they're written by BV when it + encounters CP_SET_PSEUDO_REG. When BR later encounters the same CP_SET_PSEUDO_REG + it will only write some private scratch registers which are read by + CP_SET_BIN_DATA5_OFFSET. + + If concurrent binning is disabled then BR also does binning so it will also + write the "real" registers in BR. + --> + <value value="8" name="DRAW_STRM_ADDRESS"/> + <value value="9" name="DRAW_STRM_SIZE_ADDRESS"/> + <value value="10" name="PRIM_STRM_ADDRESS"/> + <value value="11" name="UNK_STRM_ADDRESS"/> + <value value="12" name="UNK_STRM_SIZE_ADDRESS"/> + + <value value="16" name="BINDLESS_BASE_0_ADDR"/> + <value value="17" name="BINDLESS_BASE_1_ADDR"/> + <value value="18" name="BINDLESS_BASE_2_ADDR"/> + <value value="19" name="BINDLESS_BASE_3_ADDR"/> + <value value="20" name="BINDLESS_BASE_4_ADDR"/> + <value value="21" name="BINDLESS_BASE_5_ADDR"/> + <value value="22" name="BINDLESS_BASE_6_ADDR"/> + </enum> + <array offset="0" stride="3" length="100"> + <reg32 offset="0" name="0"> + <bitfield name="PSEUDO_REG" low="0" high="10" type="pseudo_reg"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="LO" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="HI" low="0" high="31"/> + </reg32> + </array> +</domain> + +<domain name="CP_REG_TEST" width="32" varset="chip" prefix="chip" variants="A6XX-"> + <doc> + Tests bit in specified register and sets predicate for CP_COND_REG_EXEC. + So: + + opcode: CP_REG_TEST (39) (2 dwords) + { REG = 0xc10 | BIT = 0 } + 0000: 70b90001 00000c10 + opcode: CP_COND_REG_EXEC (47) (3 dwords) + 0000: 70c70002 10000000 00000004 + opcode: CP_INDIRECT_BUFFER (3f) (4 dwords) + + Will execute the CP_INDIRECT_BUFFER only if b0 in the register at + offset 0x0c10 is 1 + </doc> + <enum name="source_type"> + <value value="0" name="SOURCE_REG"/> + <!-- Don't confuse with scratch registers, this is a separate memory + written into by CP_MEM_TO_SCRATCH_MEM. --> + <value value="1" name="SOURCE_SCRATCH_MEM" varset="chip" variants="A7XX-"/> + </enum> + <reg32 offset="0" name="0"> + <!-- the register to test --> + <bitfield name="REG" low="0" high="17" varset="source_type" variants="SOURCE_REG"/> + <bitfield name="SCRATCH_MEM_OFFSET" low="0" high="17" varset="source_type" variants="SOURCE_SCRATCH_MEM"/> + <bitfield name="SOURCE" pos="18" type="source_type" addvariant="yes"/> + <!-- the bit to test --> + <bitfield name="BIT" low="20" high="24" type="uint"/> + <!-- skip implied CP_WAIT_FOR_ME --> + <bitfield name="SKIP_WAIT_FOR_ME" pos="25" type="boolean"/> + <!-- the predicate bit to set (new in gen3+) --> + <bitfield name="PRED_BIT" low="26" high="30" type="uint"/> + <!-- update the predicate reg directly (new in gen3+) --> + <bitfield name="PRED_UPDATE" pos="31" type="boolean"/> + </reg32> + + <!-- + In PRED_UPDATE mode, the predicate reg is updated directly using two + more dwords, ignoring other bits: + + PRED_REG = (PRED_REG & ~PRED_MASK) | (PRED_VAL & PRED_MASK); + --> + <reg32 offset="1" name="PRED_MASK" type="hex"/> + <reg32 offset="2" name="PRED_VAL" type="hex"/> +</domain> + +<!-- I *think* this existed at least as far back as a4xx --> +<domain name="CP_COND_REG_EXEC" width="32"> + <enum name="compare_mode"> + <!-- use the predicate bit set by CP_REG_TEST --> + <value value="1" name="PRED_TEST"/> + <!-- compare two registers directly for equality --> + <value value="2" name="REG_COMPARE"/> + <!-- test if certain render modes are set via CP_SET_MARKER --> + <value value="3" name="RENDER_MODE" varset="chip" variants="A6XX-"/> + <!-- compare REG0 for equality with immediate --> + <value value="4" name="REG_COMPARE_IMM" varset="chip" variants="A7XX-"/> + <!-- test which of BR/BV are enabled --> + <value value="5" name="THREAD_MODE" varset="chip" variants="A7XX-"/> + </enum> + <reg32 offset="0" name="0" varset="compare_mode"> + <bitfield name="REG0" low="0" high="17" variants="REG_COMPARE" type="hex"/> + + <!-- the predicate bit to test (new in gen3+) --> + <bitfield name="PRED_BIT" low="18" high="22" variants="PRED_TEST" type="uint"/> + <bitfield name="SKIP_WAIT_FOR_ME" pos="23" varset="chip" variants="A7XX-" type="boolean"/> + <!-- With REG_COMPARE instead of register read from ONCHIP memory --> + <bitfield name="ONCHIP_MEM" pos="24" varset="chip" variants="A7XX-" type="boolean"/> + + <!-- + Note: these bits have the same meaning, and use the same + internal mechanism as the bits in CP_SET_DRAW_STATE. + When RENDER_MODE is selected, they're used as + a bitmask of which modes pass the test. + --> + + <!-- RM6_BINNING --> + <bitfield name="BINNING" pos="25" variants="RENDER_MODE" type="boolean"/> + <!-- all others --> + <bitfield name="GMEM" pos="26" variants="RENDER_MODE" type="boolean"/> + <!-- RM6_BYPASS --> + <bitfield name="SYSMEM" pos="27" variants="RENDER_MODE" type="boolean"/> + + <bitfield name="BV" pos="25" variants="THREAD_MODE" type="boolean"/> + <bitfield name="BR" pos="26" variants="THREAD_MODE" type="boolean"/> + <bitfield name="LPAC" pos="27" variants="THREAD_MODE" type="boolean"/> + + <bitfield name="MODE" low="28" high="31" type="compare_mode" addvariant="yes"/> + </reg32> + + <stripe varset="compare_mode" variants="PRED_TEST"> + <reg32 offset="1" name="1"> + <bitfield name="DWORDS" low="0" high="23" type="uint"/> + </reg32> + </stripe> + + <stripe varset="compare_mode" variants="REG_COMPARE"> + <reg32 offset="1" name="1"> + <bitfield name="REG1" low="0" high="17" type="hex"/> + <!-- Instead of register read from ONCHIP memory --> + <bitfield name="ONCHIP_MEM" pos="24" varset="chip" variants="A7XX-" type="boolean"/> + </reg32> + </stripe> + + <stripe varset="compare_mode" variants="RENDER_MODE"> + <reg32 offset="1" name="1"> + <bitfield name="DWORDS" low="0" high="23" type="uint"/> + </reg32> + </stripe> + + <stripe varset="compare_mode" variants="REG_COMPARE_IMM"> + <reg32 offset="1" name="1"> + <bitfield name="IMM" low="0" high="31"/> + </reg32> + </stripe> + + <stripe varset="compare_mode" variants="THREAD_MODE"> + <reg32 offset="1" name="1"> + <bitfield name="DWORDS" low="0" high="23" type="uint"/> + </reg32> + </stripe> + + <reg32 offset="2" name="2"> + <bitfield name="DWORDS" low="0" high="23" type="uint"/> + </reg32> +</domain> + +<domain name="CP_COND_EXEC" width="32"> + <doc> + Executes the following DWORDs of commands if the dword at ADDR0 + is not equal to 0 and the dword at ADDR1 is less than REF + (signed comparison). + </doc> + <reg32 offset="0" name="0"> + <bitfield name="ADDR0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR0_HI" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="ADDR1_LO" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="ADDR1_HI" low="0" high="31"/> + </reg32> + <reg32 offset="4" name="4"> + <bitfield name="REF" low="0" high="31"/> + </reg32> + <reg32 offset="5" name="5"> + <bitfield name="DWORDS" low="0" high="31" type="uint"/> + </reg32> +</domain> + +<domain name="CP_SET_CTXSWITCH_IB" width="32"> + <doc> + Used by the userspace driver to set various IB's which are + executed during context save/restore for handling + state that isn't restored by the + context switch routine itself. + </doc> + <enum name="ctxswitch_ib"> + <value name="RESTORE_IB" value="0"> + <doc>Executed unconditionally when switching back to the context.</doc> + </value> + <value name="YIELD_RESTORE_IB" value="1"> + <doc> + Executed when switching back after switching + away during execution of + a CP_SET_MARKER packet with RM6_YIELD as the + payload *and* the normal save routine was + bypassed for a shorter one. I think this is + connected to the "skipsaverestore" bit set by + the kernel when preempting. + </doc> + </value> + <value name="SAVE_IB" value="2"> + <doc> + Executed when switching away from the context, + except for context switches initiated via + CP_YIELD. + </doc> + </value> + <value name="RB_SAVE_IB" value="3"> + <doc> + This can only be set by the RB (i.e. the kernel) + and executes with protected mode off, but + is otherwise similar to SAVE_IB. + + Note, kgsl calls this CP_KMD_AMBLE_TYPE + </doc> + </value> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="ADDR_LO" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="ADDR_HI" low="0" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="DWORDS" low="0" high="19" type="uint"/> + <bitfield name="TYPE" low="20" high="21" type="ctxswitch_ib"/> + </reg32> +</domain> + +<domain name="CP_REG_WRITE" width="32"> + <enum name="reg_tracker"> + <doc> + Keep shadow copies of these registers and only set them + when drawing, avoiding redundant writes: + - VPC_CNTL_0 + - HLSQ_CONTROL_1_REG + - HLSQ_UNKNOWN_B980 + </doc> + <value name="TRACK_CNTL_REG" value="0x1"/> + <doc> + Track RB_RENDER_CNTL, and insert a WFI in the following + situation: + - There is a write that disables binning + - There was a draw with binning left enabled, but in + BYPASS mode + Presumably this is a hang workaround? + </doc> + <value name="TRACK_RENDER_CNTL" value="0x2"/> + <doc> + Do a mysterious CP_EVENT_WRITE 0x3f when the low bit of + the data to write is 0. Used by the Vulkan blob with + PC_MULTIVIEW_CNTL, but this isn't predicated on particular + register(s) like the others. + </doc> + <value name="UNK_EVENT_WRITE" value="0x4"/> + <doc> + Tracks GRAS_LRZ_CNTL::GREATER, GRAS_LRZ_CNTL::DIR, and + GRAS_LRZ_DEPTH_VIEW with previous values, and if one of + the following is true: + - GRAS_LRZ_CNTL::GREATER has changed + - GRAS_LRZ_CNTL::DIR has changed, the old value is not + CUR_DIR_GE, and the new value is not CUR_DIR_DISABLED + - GRAS_LRZ_DEPTH_VIEW has changed + then it does a LRZ_FLUSH with GRAS_LRZ_CNTL::ENABLE + forced to 1. + Only exists in a650_sqe.fw. + </doc> + <value name="TRACK_LRZ" value="0x8"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="TRACKER" low="0" high="3" type="reg_tracker"/> + </reg32> + <reg32 offset="1" name="1"/> + <reg32 offset="2" name="2"/> +</domain> + +<domain name="CP_SMMU_TABLE_UPDATE" width="32"> + <doc> + Note that the SMMU's definition of TTBRn can take different forms + depending on the pgtable format. But a5xx+ only uses aarch64 + format. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="TTBR0_LO" low="0" high="31"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="TTBR0_HI" low="0" high="15"/> + <bitfield name="ASID" low="16" high="31"/> + </reg32> + <reg32 offset="2" name="2"> + <doc>Unused, does not apply to aarch64 pgtable format</doc> + <bitfield name="CONTEXTIDR" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="CONTEXTBANK" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_START_BIN" width="32"> + <reg32 offset="0" name="BIN_COUNT" type="uint"/> + <reg64 offset="1" name="PREFIX_ADDR" type="address"/> + <reg32 offset="3" name="PREFIX_DWORDS"> + <doc> + Size of prefix for each bin. For each bin index i, the + prefix commands at PREFIX_ADDR + i * PREFIX_DWORDS are + executed in an IB2 before the IB1 commands following + this packet. + </doc> + </reg32> + <reg32 offset="4" name="BODY_DWORDS"> + <doc>Number of dwords after this packet until CP_END_BIN</doc> + </reg32> +</domain> + +<domain name="CP_WAIT_TIMESTAMP" width="32"> + <enum name="ts_wait_value_src"> + <!-- Wait for value at memory address to be >= SRC_0 (signed comparison) --> + <value value="0" name="TS_WAIT_GE_32B"/> + <!-- Wait for value at memory address to be >= SRC_0 (unsigned) --> + <value value="1" name="TS_WAIT_GE_64B"/> + <!-- Write (TIMESTAMP_GLOBAL + TIMESTAMP_LOCAL) --> + <value value="2" name="TS_WAIT_GE_TIMESTAMP_SUM"/> + </enum> + + <enum name="ts_wait_type"> + <value value="0" name="TS_WAIT_RAM"/> + <value value="1" name="TS_WAIT_ONCHIP"/> + </enum> + + <reg32 offset="0" name="0"> + <bitfield name="WAIT_VALUE_SRC" low="0" high="1" type="ts_wait_value_src"/> + <bitfield name="WAIT_DST" pos="4" type="ts_wait_type" addvariant="yes"/> + </reg32> + + <stripe varset="ts_wait_type" variants="TS_WAIT_RAM"> + <reg64 offset="1" name="ADDR" type="address"/> + </stripe> + + <stripe varset="ts_wait_type" variants="TS_WAIT_ONCHIP"> + <reg32 offset="1" name="ONCHIP_ADDR_0" low="0" high="31"/> + </stripe> + + <reg32 offset="3" name="SRC_0"/> + <reg32 offset="4" name="SRC_1"/> +</domain> + +<domain name="CP_BV_BR_COUNT_OPS" width="32"> + <enum name="pipe_count_op"> + <value name="PIPE_CLEAR_BV_BR" value="0x1"/> + <value name="PIPE_SET_BR_OFFSET" value="0x2"/> + <!-- Wait until for BV_counter > BR_counter --> + <value name="PIPE_BR_WAIT_FOR_BV" value="0x3"/> + <!-- Wait until (BR_counter + BR_OFFSET) > BV_counter --> + <value name="PIPE_BV_WAIT_FOR_BR" value="0x4"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="OP" low="0" high="3" type="pipe_count_op"/> + </reg32> + <reg32 offset="1" name="1"> + <bitfield name="BR_OFFSET" low="0" high="15" type="uint"/> + </reg32> +</domain> + +<domain name="CP_MODIFY_TIMESTAMP" width="32"> + <enum name="timestamp_op"> + <value name="MODIFY_TIMESTAMP_CLEAR" value="0"/> + <value name="MODIFY_TIMESTAMP_ADD_GLOBAL" value="1"/> + <value name="MODIFY_TIMESTAMP_ADD_LOCAL" value="2"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield name="ADD" low="0" high="7" type="uint"/> + <bitfield name="OP" low="28" high="31" type="timestamp_op"/> + </reg32> +</domain> + +<domain name="CP_MEM_TO_SCRATCH_MEM" width="32"> + <doc> + Best guess is that it is a faster way to fetch all the VSC_STATE registers + and keep them in a local scratch memory instead of fetching every time + when skipping IBs. + </doc> + <reg32 offset="0" name="0"> + <bitfield name="CNT" low="0" high="5" type="uint"/> + </reg32> + <reg32 offset="1" name="1"> + <doc>Scratch memory size is 48 dwords`</doc> + <bitfield name="OFFSET" low="0" high="5" type="uint"/> + </reg32> + <reg32 offset="2" name="2"> + <bitfield name="SRC" low="0" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="SRC_HI" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_THREAD_CONTROL" width="32"> + <enum name="cp_thread"> + <value name="CP_SET_THREAD_BR" value="1"/> <!-- Render --> + <value name="CP_SET_THREAD_BV" value="2"/> <!-- Visibility --> + <value name="CP_SET_THREAD_BOTH" value="3"/> + </enum> + <reg32 offset="0" name="0"> + <bitfield low="0" high="1" name="THREAD" type="cp_thread"/> + <bitfield pos="27" name="CONCURRENT_BIN_DISABLE" type="boolean"/> + <bitfield pos="31" name="SYNC_THREADS" type="boolean"/> + </reg32> +</domain> + +<domain name="CP_FIXED_STRIDE_DRAW_TABLE" width="32"> + <reg64 offset="0" name="IB_BASE"/> + <reg32 offset="2" name="2"> + <!-- STRIDE * COUNT --> + <bitfield name="IB_SIZE" low="0" high="11"/> + <bitfield name="STRIDE" low="20" high="31"/> + </reg32> + <reg32 offset="3" name="3"> + <bitfield name="COUNT" low="0" high="31"/> + </reg32> +</domain> + +<domain name="CP_RESET_CONTEXT_STATE" width="32"> + <reg32 offset="0" name="0"> + <bitfield name="CLEAR_ON_CHIP_TS" pos="0" type="boolean"/> + <bitfield name="CLEAR_RESOURCE_TABLE" pos="1" type="boolean"/> + <bitfield name="CLEAR_GLOBAL_LOCAL_TS" pos="2" type="boolean"/> + </reg32> +</domain> + +</database> + |