summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml')
-rw-r--r--drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml2268
1 files changed, 2268 insertions, 0 deletions
diff --git a/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
new file mode 100644
index 000000000000..cab01af55d22
--- /dev/null
+++ b/drivers/gpu/drm/msm/registers/adreno/adreno_pm4.xml
@@ -0,0 +1,2268 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<database xmlns="http://nouveau.freedesktop.org/"
+xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+xsi:schemaLocation="https://gitlab.freedesktop.org/freedreno/ rules-fd.xsd">
+<import file="freedreno_copyright.xml"/>
+<import file="adreno/adreno_common.xml"/>
+
+<enum name="vgt_event_type" varset="chip">
+ <value name="VS_DEALLOC" value="0"/>
+ <value name="PS_DEALLOC" value="1" variants="A2XX-A6XX"/>
+ <value name="VS_DONE_TS" value="2"/>
+ <value name="PS_DONE_TS" value="3"/>
+ <doc>
+ Flushes dirty data from UCHE, and also writes a GPU timestamp to
+ the address if one is provided.
+ </doc>
+ <value name="CACHE_FLUSH_TS" value="4"/>
+ <value name="CONTEXT_DONE" value="5"/>
+ <value name="CACHE_FLUSH" value="6" variants="A2XX-A4XX"/>
+ <value name="VIZQUERY_START" value="7" variants="A2XX"/>
+ <value name="HLSQ_FLUSH" value="7" variants="A3XX-A4XX"/>
+ <value name="VIZQUERY_END" value="8" variants="A2XX"/>
+ <value name="SC_WAIT_WC" value="9" variants="A2XX"/>
+ <value name="WRITE_PRIMITIVE_COUNTS" value="9" variants="A6XX"/>
+ <value name="START_PRIMITIVE_CTRS" value="11" variants="A6XX"/>
+ <value name="STOP_PRIMITIVE_CTRS" value="12" variants="A6XX"/>
+ <!-- Not sure that these 4 events don't have the same meaning as on A5XX+ -->
+ <value name="RST_PIX_CNT" value="13" variants="A2XX-A4XX"/>
+ <value name="RST_VTX_CNT" value="14" variants="A2XX-A4XX"/>
+ <value name="TILE_FLUSH" value="15" variants="A2XX-A4XX"/>
+ <value name="STAT_EVENT" value="16" variants="A2XX-A4XX"/>
+ <value name="CACHE_FLUSH_AND_INV_TS_EVENT" value="20" variants="A2XX-A4XX"/>
+ <doc>
+ If A6XX_RB_SAMPLE_COUNT_CONTROL.copy is true, writes OQ Z passed
+ sample counts to RB_SAMPLE_COUNT_ADDR. This writes to main
+ memory, skipping UCHE.
+ </doc>
+ <value name="ZPASS_DONE" value="21"/>
+ <value name="CACHE_FLUSH_AND_INV_EVENT" value="22" variants="A2XX"/>
+
+ <doc>
+ Writes the GPU timestamp to the address that follows, once RB
+ access and flushes are complete.
+ </doc>
+ <value name="RB_DONE_TS" value="22" variants="A3XX-"/>
+
+ <value name="PERFCOUNTER_START" value="23" variants="A2XX-A4XX"/>
+ <value name="PERFCOUNTER_STOP" value="24" variants="A2XX-A4XX"/>
+ <value name="VS_FETCH_DONE" value="27"/>
+ <value name="FACENESS_FLUSH" value="28" variants="A2XX-A4XX"/>
+
+ <!-- a5xx events -->
+ <value name="WT_DONE_TS" value="8" variants="A5XX-"/>
+ <value name="START_FRAGMENT_CTRS" value="13" variants="A5XX-"/>
+ <value name="STOP_FRAGMENT_CTRS" value="14" variants="A5XX-"/>
+ <value name="START_COMPUTE_CTRS" value="15" variants="A5XX-"/>
+ <value name="STOP_COMPUTE_CTRS" value="16" variants="A5XX-"/>
+ <value name="FLUSH_SO_0" value="17" variants="A5XX-"/>
+ <value name="FLUSH_SO_1" value="18" variants="A5XX-"/>
+ <value name="FLUSH_SO_2" value="19" variants="A5XX-"/>
+ <value name="FLUSH_SO_3" value="20" variants="A5XX-"/>
+
+ <doc>
+ Invalidates depth attachment data from the CCU. We assume this
+ happens in the last stage.
+ </doc>
+ <value name="PC_CCU_INVALIDATE_DEPTH" value="24" variants="A5XX-"/>
+
+ <doc>
+ Invalidates color attachment data from the CCU. We assume this
+ happens in the last stage.
+ </doc>
+ <value name="PC_CCU_INVALIDATE_COLOR" value="25" variants="A5XX-"/>
+
+ <doc>
+ Flushes the small cache used by CP_EVENT_WRITE::BLIT (which,
+ along with its registers, would be better named RESOLVE).
+ </doc>
+ <value name="PC_CCU_RESOLVE_TS" value="26" variants="A6XX"/>
+
+ <doc>
+ Flushes depth attachment data from the CCU. We assume this
+ happens in the last stage.
+ </doc>
+ <value name="PC_CCU_FLUSH_DEPTH_TS" value="28" variants="A5XX-"/>
+
+ <doc>
+ Flushes color attachment data from the CCU. We assume this
+ happens in the last stage.
+ </doc>
+ <value name="PC_CCU_FLUSH_COLOR_TS" value="29" variants="A5XX-"/>
+
+ <doc>
+ 2D blit to resolve GMEM to system memory (skipping CCU) at the
+ end of a render pass. Compare to CP_BLIT's BLIT_OP_SCALE for
+ more general blitting.
+ </doc>
+ <value name="BLIT" value="30" variants="A5XX-"/>
+
+ <doc>
+ Clears based on GRAS_LRZ_CNTL configuration, could clear
+ fast-clear buffer or LRZ direction.
+ LRZ direction is stored at lrz_fc_offset + 0x200, has 1 byte which
+ could be expressed by enum:
+ CUR_DIR_DISABLED = 0x0
+ CUR_DIR_GE = 0x1
+ CUR_DIR_LE = 0x2
+ CUR_DIR_UNSET = 0x3
+ Clear of direction means setting the direction to CUR_DIR_UNSET.
+ </doc>
+ <value name="LRZ_CLEAR" value="37" variants="A5XX-"/>
+
+ <value name="LRZ_FLUSH" value="38" variants="A5XX-"/>
+ <value name="BLIT_OP_FILL_2D" value="39" variants="A5XX-"/>
+ <value name="BLIT_OP_COPY_2D" value="40" variants="A5XX-A6XX"/>
+ <value name="UNK_40" value="40" variants="A7XX"/>
+ <value name="BLIT_OP_SCALE_2D" value="42" variants="A5XX-"/>
+ <value name="CONTEXT_DONE_2D" value="43" variants="A5XX-"/>
+ <value name="UNK_2C" value="44" variants="A5XX-"/>
+ <value name="UNK_2D" value="45" variants="A5XX-"/>
+
+ <!-- a6xx events -->
+ <doc>
+ Invalidates UCHE.
+ </doc>
+ <value name="CACHE_INVALIDATE" value="49" variants="A6XX"/>
+
+ <value name="LABEL" value="63" variants="A6XX-"/>
+
+ <!-- note, some of these are the same as a6xx, just named differently -->
+
+ <doc> Doesn't seem to do anything </doc>
+ <value name="DUMMY_EVENT" value="1" variants="A7XX"/>
+ <value name="CCU_INVALIDATE_DEPTH" value="24" variants="A7XX"/>
+ <value name="CCU_INVALIDATE_COLOR" value="25" variants="A7XX"/>
+ <value name="CCU_RESOLVE_CLEAN" value="26" variants="A7XX"/>
+ <value name="CCU_FLUSH_DEPTH" value="28" variants="A7XX"/>
+ <value name="CCU_FLUSH_COLOR" value="29" variants="A7XX"/>
+ <value name="CCU_RESOLVE" value="30" variants="A7XX"/>
+ <value name="CCU_END_RESOLVE_GROUP" value="31" variants="A7XX"/>
+ <value name="CCU_CLEAN_DEPTH" value="32" variants="A7XX"/>
+ <value name="CCU_CLEAN_COLOR" value="33" variants="A7XX"/>
+ <value name="CACHE_RESET" value="48" variants="A7XX"/>
+ <value name="CACHE_CLEAN" value="49" variants="A7XX"/>
+ <!-- TODO: deal with name conflicts with other gens -->
+ <value name="CACHE_FLUSH7" value="50" variants="A7XX"/>
+ <value name="CACHE_INVALIDATE7" value="51" variants="A7XX"/>
+</enum>
+
+<enum name="pc_di_primtype">
+ <value name="DI_PT_NONE" value="0"/>
+ <!-- POINTLIST_PSIZE is used on a3xx/a4xx when gl_PointSize is written: -->
+ <value name="DI_PT_POINTLIST_PSIZE" value="1"/>
+ <value name="DI_PT_LINELIST" value="2"/>
+ <value name="DI_PT_LINESTRIP" value="3"/>
+ <value name="DI_PT_TRILIST" value="4"/>
+ <value name="DI_PT_TRIFAN" value="5"/>
+ <value name="DI_PT_TRISTRIP" value="6"/>
+ <value name="DI_PT_LINELOOP" value="7"/> <!-- a22x, a3xx -->
+ <value name="DI_PT_RECTLIST" value="8"/>
+ <value name="DI_PT_POINTLIST" value="9"/>
+ <value name="DI_PT_LINE_ADJ" value="0xa"/>
+ <value name="DI_PT_LINESTRIP_ADJ" value="0xb"/>
+ <value name="DI_PT_TRI_ADJ" value="0xc"/>
+ <value name="DI_PT_TRISTRIP_ADJ" value="0xd"/>
+
+ <value name="DI_PT_PATCHES0" value="0x1f"/>
+ <value name="DI_PT_PATCHES1" value="0x20"/>
+ <value name="DI_PT_PATCHES2" value="0x21"/>
+ <value name="DI_PT_PATCHES3" value="0x22"/>
+ <value name="DI_PT_PATCHES4" value="0x23"/>
+ <value name="DI_PT_PATCHES5" value="0x24"/>
+ <value name="DI_PT_PATCHES6" value="0x25"/>
+ <value name="DI_PT_PATCHES7" value="0x26"/>
+ <value name="DI_PT_PATCHES8" value="0x27"/>
+ <value name="DI_PT_PATCHES9" value="0x28"/>
+ <value name="DI_PT_PATCHES10" value="0x29"/>
+ <value name="DI_PT_PATCHES11" value="0x2a"/>
+ <value name="DI_PT_PATCHES12" value="0x2b"/>
+ <value name="DI_PT_PATCHES13" value="0x2c"/>
+ <value name="DI_PT_PATCHES14" value="0x2d"/>
+ <value name="DI_PT_PATCHES15" value="0x2e"/>
+ <value name="DI_PT_PATCHES16" value="0x2f"/>
+ <value name="DI_PT_PATCHES17" value="0x30"/>
+ <value name="DI_PT_PATCHES18" value="0x31"/>
+ <value name="DI_PT_PATCHES19" value="0x32"/>
+ <value name="DI_PT_PATCHES20" value="0x33"/>
+ <value name="DI_PT_PATCHES21" value="0x34"/>
+ <value name="DI_PT_PATCHES22" value="0x35"/>
+ <value name="DI_PT_PATCHES23" value="0x36"/>
+ <value name="DI_PT_PATCHES24" value="0x37"/>
+ <value name="DI_PT_PATCHES25" value="0x38"/>
+ <value name="DI_PT_PATCHES26" value="0x39"/>
+ <value name="DI_PT_PATCHES27" value="0x3a"/>
+ <value name="DI_PT_PATCHES28" value="0x3b"/>
+ <value name="DI_PT_PATCHES29" value="0x3c"/>
+ <value name="DI_PT_PATCHES30" value="0x3d"/>
+ <value name="DI_PT_PATCHES31" value="0x3e"/>
+</enum>
+
+<enum name="pc_di_src_sel">
+ <value name="DI_SRC_SEL_DMA" value="0"/>
+ <value name="DI_SRC_SEL_IMMEDIATE" value="1"/>
+ <value name="DI_SRC_SEL_AUTO_INDEX" value="2"/>
+ <value name="DI_SRC_SEL_AUTO_XFB" value="3"/>
+</enum>
+
+<enum name="pc_di_face_cull_sel">
+ <value name="DI_FACE_CULL_NONE" value="0"/>
+ <value name="DI_FACE_CULL_FETCH" value="1"/>
+ <value name="DI_FACE_BACKFACE_CULL" value="2"/>
+ <value name="DI_FACE_FRONTFACE_CULL" value="3"/>
+</enum>
+
+<enum name="pc_di_index_size">
+ <value name="INDEX_SIZE_IGN" value="0"/>
+ <value name="INDEX_SIZE_16_BIT" value="0"/>
+ <value name="INDEX_SIZE_32_BIT" value="1"/>
+ <value name="INDEX_SIZE_8_BIT" value="2"/>
+ <value name="INDEX_SIZE_INVALID"/>
+</enum>
+
+<enum name="pc_di_vis_cull_mode">
+ <value name="IGNORE_VISIBILITY" value="0"/>
+ <value name="USE_VISIBILITY" value="1"/>
+</enum>
+
+<enum name="adreno_pm4_packet_type">
+ <value name="CP_TYPE0_PKT" value="0x00000000"/>
+ <value name="CP_TYPE1_PKT" value="0x40000000"/>
+ <value name="CP_TYPE2_PKT" value="0x80000000"/>
+ <value name="CP_TYPE3_PKT" value="0xc0000000"/>
+ <value name="CP_TYPE4_PKT" value="0x40000000"/>
+ <value name="CP_TYPE7_PKT" value="0x70000000"/>
+</enum>
+
+<!--
+ Note that in some cases, the same packet id is recycled on a later
+ generation, so variants attribute is used to distinguish. They
+ may not be completely accurate, we would probably have to analyze
+ the pfp and me/pm4 firmware to verify the packet is actually
+ handled on a particular generation. But it is at least enough to
+ disambiguate the packet-id's that were re-used for different
+ packets starting with a5xx.
+ -->
+<enum name="adreno_pm4_type3_packets" varset="chip">
+ <doc>initialize CP's micro-engine</doc>
+ <value name="CP_ME_INIT" value="0x48"/>
+ <doc>skip N 32-bit words to get to the next packet</doc>
+ <value name="CP_NOP" value="0x10"/>
+ <doc>
+ indirect buffer dispatch. prefetch parser uses this packet
+ type to determine whether to pre-fetch the IB
+ </doc>
+ <value name="CP_PREEMPT_ENABLE" value="0x1c" variants="A5XX"/>
+ <value name="CP_PREEMPT_TOKEN" value="0x1e" variants="A5XX"/>
+ <value name="CP_INDIRECT_BUFFER" value="0x3f"/>
+ <doc>
+ Takes the same arguments as CP_INDIRECT_BUFFER, but jumps to
+ another buffer at the same level. Must be at the end of IB, and
+ doesn't work with draw state IB's.
+ </doc>
+ <value name="CP_INDIRECT_BUFFER_CHAIN" value="0x57" variants="A5XX-"/>
+ <doc>indirect buffer dispatch. same as IB, but init is pipelined</doc>
+ <value name="CP_INDIRECT_BUFFER_PFD" value="0x37"/>
+ <doc>
+ Waits for the IDLE state of the engine before further drawing.
+ This is pipelined, so the CP may continue.
+ </doc>
+ <value name="CP_WAIT_FOR_IDLE" value="0x26"/>
+ <doc>wait until a register or memory location is a specific value</doc>
+ <value name="CP_WAIT_REG_MEM" value="0x3c"/>
+ <doc>wait until a register location is equal to a specific value</doc>
+ <value name="CP_WAIT_REG_EQ" value="0x52"/>
+ <doc>wait until a register location is >= a specific value</doc>
+ <value name="CP_WAIT_REG_GTE" value="0x53" variants="A2XX-A4XX"/>
+ <doc>wait until a read completes</doc>
+ <value name="CP_WAIT_UNTIL_READ" value="0x5c" variants="A2XX-A4XX"/>
+ <doc>wait until all base/size writes from an IB_PFD packet have completed</doc>
+ <!--
+ NOTE: CP_WAIT_IB_PFD_COMPLETE unimplemented at least since a5xx fw, and
+ recycled for something new on a7xx
+ -->
+ <value name="CP_WAIT_IB_PFD_COMPLETE" value="0x5d" varset="chip" variants="A2XX-A4XX"/>
+ <doc>register read/modify/write</doc>
+ <value name="CP_REG_RMW" value="0x21"/>
+ <doc>Set binning configuration registers</doc>
+ <value name="CP_SET_BIN_DATA" value="0x2f" variants="A2XX-A4XX"/>
+ <value name="CP_SET_BIN_DATA5" value="0x2f" variants="A5XX-"/>
+ <doc>reads register in chip and writes to memory</doc>
+ <value name="CP_REG_TO_MEM" value="0x3e"/>
+ <doc>write N 32-bit words to memory</doc>
+ <value name="CP_MEM_WRITE" value="0x3d"/>
+ <doc>write CP_PROG_COUNTER value to memory</doc>
+ <value name="CP_MEM_WRITE_CNTR" value="0x4f"/>
+ <doc>conditional execution of a sequence of packets</doc>
+ <value name="CP_COND_EXEC" value="0x44"/>
+ <doc>conditional write to memory or register</doc>
+ <value name="CP_COND_WRITE" value="0x45" variants="A2XX-A4XX"/>
+ <value name="CP_COND_WRITE5" value="0x45" variants="A5XX-"/>
+ <doc>generate an event that creates a write to memory when completed</doc>
+ <value name="CP_EVENT_WRITE" value="0x46" variants="A2XX-A6XX"/>
+ <value name="CP_EVENT_WRITE7" value="0x46" variants="A7XX-"/>
+ <doc>generate a VS|PS_done event</doc>
+ <value name="CP_EVENT_WRITE_SHD" value="0x58"/>
+ <doc>generate a cache flush done event</doc>
+ <value name="CP_EVENT_WRITE_CFL" value="0x59"/>
+ <doc>generate a z_pass done event</doc>
+ <value name="CP_EVENT_WRITE_ZPD" value="0x5b"/>
+ <doc>
+ not sure the real name, but this seems to be what is used for
+ opencl, instead of CP_DRAW_INDX..
+ </doc>
+ <value name="CP_RUN_OPENCL" value="0x31"/>
+ <doc>initiate fetch of index buffer and draw</doc>
+ <value name="CP_DRAW_INDX" value="0x22"/>
+ <doc>draw using supplied indices in packet</doc>
+ <value name="CP_DRAW_INDX_2" value="0x36" variants="A2XX-A4XX"/> <!-- this is something different on a6xx and unused on a5xx -->
+ <doc>initiate fetch of index buffer and binIDs and draw</doc>
+ <value name="CP_DRAW_INDX_BIN" value="0x34" variants="A2XX-A4XX"/>
+ <doc>initiate fetch of bin IDs and draw using supplied indices</doc>
+ <value name="CP_DRAW_INDX_2_BIN" value="0x35" variants="A2XX-A4XX"/>
+ <doc>begin/end initiator for viz query extent processing</doc>
+ <value name="CP_VIZ_QUERY" value="0x23" variants="A2XX-A4XX"/>
+ <doc>fetch state sub-blocks and initiate shader code DMAs</doc>
+ <value name="CP_SET_STATE" value="0x25"/>
+ <doc>load constant into chip and to memory</doc>
+ <value name="CP_SET_CONSTANT" value="0x2d" variants="A2XX"/>
+ <doc>load sequencer instruction memory (pointer-based)</doc>
+ <value name="CP_IM_LOAD" value="0x27"/>
+ <doc>load sequencer instruction memory (code embedded in packet)</doc>
+ <value name="CP_IM_LOAD_IMMEDIATE" value="0x2b"/>
+ <doc>load constants from a location in memory</doc>
+ <value name="CP_LOAD_CONSTANT_CONTEXT" value="0x2e" variants="A2XX"/>
+ <doc>selective invalidation of state pointers</doc>
+ <value name="CP_INVALIDATE_STATE" value="0x3b"/>
+ <doc>dynamically changes shader instruction memory partition</doc>
+ <value name="CP_SET_SHADER_BASES" value="0x4a" variants="A2XX-A4XX"/>
+ <doc>sets the 64-bit BIN_MASK register in the PFP</doc>
+ <value name="CP_SET_BIN_MASK" value="0x50" variants="A2XX-A4XX"/>
+ <doc>sets the 64-bit BIN_SELECT register in the PFP</doc>
+ <value name="CP_SET_BIN_SELECT" value="0x51" variants="A2XX-A4XX"/>
+ <doc>updates the current context, if needed</doc>
+ <value name="CP_CONTEXT_UPDATE" value="0x5e"/>
+ <doc>generate interrupt from the command stream</doc>
+ <value name="CP_INTERRUPT" value="0x40"/>
+ <doc>copy sequencer instruction memory to system memory</doc>
+ <value name="CP_IM_STORE" value="0x2c" variants="A2XX"/>
+
+ <!-- For a20x -->
+<!-- TODO handle variants..
+ <doc>
+ Program an offset that will added to the BIN_BASE value of
+ the 3D_DRAW_INDX_BIN packet
+ </doc>
+ <value name="CP_SET_BIN_BASE_OFFSET" value="0x4b"/>
+ -->
+
+ <!-- for a22x -->
+ <doc>
+ sets draw initiator flags register in PFP, gets bitwise-ORed into
+ every draw initiator
+ </doc>
+ <value name="CP_SET_DRAW_INIT_FLAGS" value="0x4b"/>
+ <doc>sets the register protection mode</doc>
+ <value name="CP_SET_PROTECTED_MODE" value="0x5f"/>
+
+ <value name="CP_BOOTSTRAP_UCODE" value="0x6f"/>
+
+ <!-- for a3xx -->
+ <doc>load high level sequencer command</doc>
+ <value name="CP_LOAD_STATE" value="0x30" variants="A3XX"/>
+ <value name="CP_LOAD_STATE4" value="0x30" variants="A4XX-A5XX"/>
+ <doc>Conditionally load a IB based on a flag, prefetch enabled</doc>
+ <value name="CP_COND_INDIRECT_BUFFER_PFE" value="0x3a"/>
+ <doc>Conditionally load a IB based on a flag, prefetch disabled</doc>
+ <value name="CP_COND_INDIRECT_BUFFER_PFD" value="0x32" variants="A3XX"/>
+ <doc>Load a buffer with pre-fetch enabled</doc>
+ <value name="CP_INDIRECT_BUFFER_PFE" value="0x3f" variants="A5XX"/>
+ <doc>Set bin (?)</doc>
+ <value name="CP_SET_BIN" value="0x4c" variants="A2XX"/>
+
+ <doc>test 2 memory locations to dword values specified</doc>
+ <value name="CP_TEST_TWO_MEMS" value="0x71"/>
+
+ <doc>Write register, ignoring context state for context sensitive registers</doc>
+ <value name="CP_REG_WR_NO_CTXT" value="0x78"/>
+
+ <doc>Record the real-time when this packet is processed by PFP</doc>
+ <value name="CP_RECORD_PFP_TIMESTAMP" value="0x11"/>
+
+ <!-- Used to switch GPU between secure and non-secure modes -->
+ <value name="CP_SET_SECURE_MODE" value="0x66"/>
+
+ <doc>PFP waits until the FIFO between the PFP and the ME is empty</doc>
+ <value name="CP_WAIT_FOR_ME" value="0x13"/>
+
+ <!-- for a4xx -->
+ <doc>
+ Used a bit like CP_SET_CONSTANT on a2xx, but can write multiple
+ groups of registers. Looks like it can be used to create state
+ objects in GPU memory, and on state change only emit pointer
+ (via CP_SET_DRAW_STATE), which should be nice for reducing CPU
+ overhead:
+
+ (A4x) save PM4 stream pointers to execute upon a visible draw
+ </doc>
+ <value name="CP_SET_DRAW_STATE" value="0x43" variants="A4XX-"/>
+ <value name="CP_DRAW_INDX_OFFSET" value="0x38"/>
+ <value name="CP_DRAW_INDIRECT" value="0x28" variants="A4XX-"/>
+ <value name="CP_DRAW_INDX_INDIRECT" value="0x29" variants="A4XX-"/>
+ <value name="CP_DRAW_INDIRECT_MULTI" value="0x2a" variants="A6XX-"/>
+ <value name="CP_DRAW_AUTO" value="0x24"/>
+
+ <doc>
+ Enable or disable predication globally. Also resets the
+ predicate to "passing" and the local bit to enabled when
+ enabling global predication.
+ </doc>
+ <value name="CP_DRAW_PRED_ENABLE_GLOBAL" value="0x19"/>
+
+ <doc>
+ Enable or disable predication locally. Unlike globally enabling
+ predication, this packet doesn't touch any other state.
+ Predication only happens when enabled globally and locally and a
+ predicate has been set. This should be used for internal draws
+ which aren't supposed to use the predication state:
+
+ CP_DRAW_PRED_ENABLE_LOCAL(0)
+ ... do draw...
+ CP_DRAW_PRED_ENABLE_LOCAL(1)
+ </doc>
+ <value name="CP_DRAW_PRED_ENABLE_LOCAL" value="0x1a"/>
+
+ <doc>
+ Latch a draw predicate into the internal register.
+ </doc>
+ <value name="CP_DRAW_PRED_SET" value="0x4e"/>
+
+ <doc>
+ for A4xx
+ Write to register with address that does not fit into type-0 pkt
+ </doc>
+ <value name="CP_WIDE_REG_WRITE" value="0x74" variants="A4XX"/>
+
+ <doc>copy from ME scratch RAM to a register</doc>
+ <value name="CP_SCRATCH_TO_REG" value="0x4d"/>
+
+ <doc>Copy from REG to ME scratch RAM</doc>
+ <value name="CP_REG_TO_SCRATCH" value="0x4a"/>
+
+ <doc>Wait for memory writes to complete</doc>
+ <value name="CP_WAIT_MEM_WRITES" value="0x12"/>
+
+ <doc>Conditional execution based on register comparison</doc>
+ <value name="CP_COND_REG_EXEC" value="0x47"/>
+
+ <doc>Memory to REG copy</doc>
+ <value name="CP_MEM_TO_REG" value="0x42"/>
+
+ <value name="CP_EXEC_CS_INDIRECT" value="0x41" variants="A4XX-"/>
+ <value name="CP_EXEC_CS" value="0x33"/>
+
+ <doc>
+ for a5xx
+ </doc>
+ <value name="CP_PERFCOUNTER_ACTION" value="0x50" variants="A5XX"/>
+ <!-- switches SMMU pagetable, used on a5xx+ only -->
+ <value name="CP_SMMU_TABLE_UPDATE" value="0x53" variants="A5XX-"/>
+ <!-- for a6xx -->
+ <doc>Tells CP the current mode of GPU operation</doc>
+ <value name="CP_SET_MARKER" value="0x65" variants="A6XX-"/>
+ <doc>Instruct CP to set a few internal CP registers</doc>
+ <value name="CP_SET_PSEUDO_REG" value="0x56" variants="A6XX-"/>
+ <!--
+ pairs of regid and value.. seems to be used to program some TF
+ related regs:
+ -->
+ <value name="CP_CONTEXT_REG_BUNCH" value="0x5c" variants="A5XX-"/>
+ <!-- A5XX Enable yield in RB only -->
+ <value name="CP_YIELD_ENABLE" value="0x1c" variants="A5XX"/>
+ <doc>
+ Enables IB2 skipping. If both GLOBAL and LOCAL are 1 and
+ nothing is left in the visibility stream, then
+ CP_INDIRECT_BUFFER will be skipped, and draws will early return
+ from their IB.
+ </doc>
+ <value name="CP_SKIP_IB2_ENABLE_GLOBAL" value="0x1d" variants="A5XX-"/>
+ <value name="CP_SKIP_IB2_ENABLE_LOCAL" value="0x23" variants="A5XX-"/>
+ <value name="CP_SET_SUBDRAW_SIZE" value="0x35" variants="A5XX-"/>
+ <value name="CP_WHERE_AM_I" value="0x62" variants="A5XX-"/>
+ <value name="CP_SET_VISIBILITY_OVERRIDE" value="0x64" variants="A5XX-"/>
+ <!-- Enable/Disable/Defer A5x global preemption model -->
+ <value name="CP_PREEMPT_ENABLE_GLOBAL" value="0x69" variants="A5XX"/>
+ <!-- Enable/Disable A5x local preemption model -->
+ <value name="CP_PREEMPT_ENABLE_LOCAL" value="0x6a" variants="A5XX"/>
+ <!-- Yield token on a5xx similar to CP_PREEMPT on a4xx -->
+ <value name="CP_CONTEXT_SWITCH_YIELD" value="0x6b" variants="A5XX-"/>
+ <!-- Inform CP about current render mode (needed for a5xx preemption) -->
+ <value name="CP_SET_RENDER_MODE" value="0x6c" variants="A5XX"/>
+ <value name="CP_COMPUTE_CHECKPOINT" value="0x6e" variants="A5XX"/>
+ <!-- check if this works on earlier.. -->
+ <value name="CP_MEM_TO_MEM" value="0x73" variants="A5XX-"/>
+
+ <doc>
+ General purpose 2D blit engine for image transfers and mipmap
+ generation. Reads through UCHE, writes through the CCU cache in
+ the PS stage.
+ </doc>
+ <value name="CP_BLIT" value="0x2c" variants="A5XX-"/>
+
+ <!-- Test specified bit in specified register and set predicate -->
+ <value name="CP_REG_TEST" value="0x39" variants="A5XX-"/>
+
+ <!--
+ Seems to set the mode flags which control which CP_SET_DRAW_STATE
+ packets are executed, based on their ENABLE_MASK values
+
+ CP_SET_MODE w/ payload of 0x1 seems to cause CP_SET_DRAW_STATE
+ packets w/ ENABLE_MASK & 0x6 to execute immediately
+ -->
+ <value name="CP_SET_MODE" value="0x63" variants="A6XX-"/>
+
+ <!--
+ Seems like there are now separate blocks of state for VS vs FS/CS
+ (probably these amounts to geometry vs fragments so that geometry
+ stage of the pipeline for next draw can start while fragment stage
+ of current draw is still running. The format of the payload of the
+ packets is the same, the only difference is the offsets of the regs
+ the firmware code that handles the packet writes.
+
+ Note that for CL, starting with a6xx, the preferred # of local
+ threads is no longer the same as the max, implying that the shader
+ core can now run warps from unrelated shaders (ie.
+ CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE vs
+ CL_KERNEL_WORK_GROUP_SIZE)
+ -->
+ <value name="CP_LOAD_STATE6_GEOM" value="0x32" variants="A6XX-"/>
+ <value name="CP_LOAD_STATE6_FRAG" value="0x34" variants="A6XX-"/>
+ <!--
+ Note: For IBO state (Image/SSBOs) which have shared state across
+ shader stages, for 3d pipeline CP_LOAD_STATE6 is used. But for
+ compute shaders, CP_LOAD_STATE6_FRAG is used. Possibly they are
+ interchangable.
+ -->
+ <value name="CP_LOAD_STATE6" value="0x36" variants="A6XX-"/>
+
+ <!-- internal packets: -->
+ <value name="IN_IB_PREFETCH_END" value="0x17" variants="A2XX"/>
+ <value name="IN_SUBBLK_PREFETCH" value="0x1f" variants="A2XX"/>
+ <value name="IN_INSTR_PREFETCH" value="0x20" variants="A2XX"/>
+ <value name="IN_INSTR_MATCH" value="0x47" variants="A2XX"/>
+ <value name="IN_CONST_PREFETCH" value="0x49" variants="A2XX"/>
+ <value name="IN_INCR_UPDT_STATE" value="0x55" variants="A2XX"/>
+ <value name="IN_INCR_UPDT_CONST" value="0x56" variants="A2XX"/>
+ <value name="IN_INCR_UPDT_INSTR" value="0x57" variants="A2XX"/>
+
+ <!-- internal jumptable entries on a6xx+, possibly a5xx: -->
+
+ <!-- jmptable entry used to handle type4 packet on a5xx+: -->
+ <value name="PKT4" value="0x04" variants="A5XX-"/>
+ <!-- called when ROQ is empty, "returns" from an IB or merged sequence of IBs -->
+ <value name="IN_IB_END" value="0x0a" variants="A6XX-"/>
+ <!-- handles IFPC save/restore -->
+ <value name="IN_GMU_INTERRUPT" value="0x0b" variants="A6XX-"/>
+ <!-- preemption/context-swtich routine -->
+ <value name="IN_PREEMPT" value="0x0f" variants="A6XX-"/>
+
+ <!-- TODO do these exist on A5xx? -->
+ <value name="CP_SCRATCH_WRITE" value="0x4c" variants="A6XX"/>
+ <value name="CP_REG_TO_MEM_OFFSET_MEM" value="0x74" variants="A6XX-"/>
+ <value name="CP_REG_TO_MEM_OFFSET_REG" value="0x72" variants="A6XX-"/>
+ <value name="CP_WAIT_MEM_GTE" value="0x14" variants="A6XX"/>
+ <value name="CP_WAIT_TWO_REGS" value="0x70" variants="A6XX"/>
+ <value name="CP_MEMCPY" value="0x75" variants="A6XX-"/>
+ <value name="CP_SET_BIN_DATA5_OFFSET" value="0x2e" variants="A6XX-"/>
+ <!-- A750+, set in place of CP_SET_BIN_DATA5_OFFSET but has different values -->
+ <value name="CP_SET_UNK_BIN_DATA" value="0x2d" variants="A7XX-"/>
+ <doc>
+ Write CP_CONTEXT_SWITCH_*_INFO from CP to the following dwords,
+ and forcibly switch to the indicated context.
+ </doc>
+ <value name="CP_CONTEXT_SWITCH" value="0x54" variants="A6XX"/>
+ <!-- Note, kgsl calls this CP_SET_AMBLE: -->
+ <value name="CP_SET_CTXSWITCH_IB" value="0x55" variants="A6XX-"/>
+
+ <!--
+ Seems to always have the payload:
+ 00000002 00008801 00004010
+ or:
+ 00000002 00008801 00004090
+ or:
+ 00000002 00008801 00000010
+ 00000002 00008801 00010010
+ 00000002 00008801 00d64010
+ ...
+ Note set for compute shaders..
+ Is 0x8801 a register offset?
+ This appears to be a special sort of register write packet
+ more or less, but the firmware has some special handling..
+ Seems like it intercepts/modifies certain register offsets,
+ but others are treated like a normal PKT4 reg write. I
+ guess there are some registers that the fw controls certain
+ bits.
+ -->
+ <value name="CP_REG_WRITE" value="0x6d" variants="A6XX"/>
+
+ <doc>
+ These first appear in a650_sqe.bin. They can in theory be used
+ to loop any sequence of IB1 commands, but in practice they are
+ used to loop over bins. There is a fixed-size per-iteration
+ prefix, used to set per-bin state, and then the following IB1
+ commands are executed until CP_END_BIN which are always the same
+ for each iteration and usually contain a list of
+ CP_INDIRECT_BUFFER calls to IB2 commands which setup state and
+ execute restore/draw/save commands. This replaces the previous
+ technique of just repeating the CP_INDIRECT_BUFFER calls and
+ "unrolling" the loop.
+ </doc>
+ <value name="CP_START_BIN" value="0x50" variants="A6XX-"/>
+ <value name="CP_END_BIN" value="0x51" variants="A6XX-"/>
+
+ <doc> Make next dword 1 to disable preemption, 0 to re-enable it. </doc>
+ <value name="CP_PREEMPT_DISABLE" value="0x6c" variants="A6XX"/>
+
+ <value name="CP_WAIT_TIMESTAMP" value="0x14" variants="A7XX-"/>
+ <value name="CP_GLOBAL_TIMESTAMP" value="0x15" variants="A7XX-"/> <!-- payload 1 dword -->
+ <value name="CP_LOCAL_TIMESTAMP" value="0x16" variants="A7XX-"/> <!-- payload 1 dword, follows 0x15 -->
+ <value name="CP_THREAD_CONTROL" value="0x17" variants="A7XX-"/>
+ <!-- payload 4 dwords, last two could be render target addr (one pkt per MRT), possibly used for GMEM save/restore?-->
+ <value name="CP_RESOURCE_LIST" value="0x18" variants="A7XX-"/>
+ <doc> Can clear BV/BR counters, or wait until one catches up to another </doc>
+ <value name="CP_BV_BR_COUNT_OPS" value="0x1b" variants="A7XX-"/>
+ <doc> Clears, adds to local, or adds to global timestamp </doc>
+ <value name="CP_MODIFY_TIMESTAMP" value="0x1c" variants="A7XX-"/>
+ <!-- similar to CP_CONTEXT_REG_BUNCH, but discards first two dwords?? -->
+ <value name="CP_CONTEXT_REG_BUNCH2" value="0x5d" variants="A7XX-"/>
+ <doc>
+ Write to a scratch memory that is read by CP_REG_TEST with
+ SOURCE_SCRATCH_MEM set. It's not the same scratch as scratch registers.
+ However it uses the same memory space.
+ </doc>
+ <value name="CP_MEM_TO_SCRATCH_MEM" value="0x49" variants="A7XX-"/>
+
+ <doc>
+ Executes an array of fixed-size command buffers where each
+ buffer is assumed to have one draw call, skipping buffers with
+ non-visible draw calls.
+ </doc>
+ <value name="CP_FIXED_STRIDE_DRAW_TABLE" value="0x7f" variants="A7XX-"/>
+
+ <doc>Reset various on-chip state used for synchronization</doc>
+ <value name="CP_RESET_CONTEXT_STATE" value="0x1f" variants="A7XX-"/>
+</enum>
+
+
+<domain name="CP_LOAD_STATE" width="32">
+ <doc>Load state, a3xx (and later?)</doc>
+ <enum name="adreno_state_block">
+ <value name="SB_VERT_TEX" value="0"/>
+ <value name="SB_VERT_MIPADDR" value="1"/>
+ <value name="SB_FRAG_TEX" value="2"/>
+ <value name="SB_FRAG_MIPADDR" value="3"/>
+ <value name="SB_VERT_SHADER" value="4"/>
+ <value name="SB_GEOM_SHADER" value="5"/>
+ <value name="SB_FRAG_SHADER" value="6"/>
+ <value name="SB_COMPUTE_SHADER" value="7"/>
+ </enum>
+ <enum name="adreno_state_type">
+ <value name="ST_SHADER" value="0"/>
+ <value name="ST_CONSTANTS" value="1"/>
+ </enum>
+ <enum name="adreno_state_src">
+ <value name="SS_DIRECT" value="0">
+ <doc>inline with the CP_LOAD_STATE packet</doc>
+ </value>
+ <value name="SS_INVALID_ALL_IC" value="2"/>
+ <value name="SS_INVALID_PART_IC" value="3"/>
+ <value name="SS_INDIRECT" value="4">
+ <doc>in buffer pointed to by EXT_SRC_ADDR</doc>
+ </value>
+ <value name="SS_INDIRECT_TCM" value="5"/>
+ <value name="SS_INDIRECT_STM" value="6"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="DST_OFF" low="0" high="15" type="uint"/>
+ <bitfield name="STATE_SRC" low="16" high="18" type="adreno_state_src"/>
+ <bitfield name="STATE_BLOCK" low="19" high="21" type="adreno_state_block"/>
+ <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="STATE_TYPE" low="0" high="1" type="adreno_state_type"/>
+ <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/>
+ </reg32>
+</domain>
+
+<domain name="CP_LOAD_STATE4" width="32" varset="chip">
+ <doc>Load state, a4xx+</doc>
+ <enum name="a4xx_state_block">
+ <!--
+ unknown: 0x7 and 0xf <- seen in compute shader
+
+ STATE_BLOCK = 0x6, STATE_TYPE = 0x2 possibly used for preemption?
+ Seen in some GL shaders. Payload is NUM_UNIT dwords, and it contains
+ the gpuaddr of the following shader constants block. DST_OFF seems
+ to specify which shader stage:
+
+ 16 -> vert
+ 36 -> tcs
+ 56 -> tes
+ 76 -> geom
+ 96 -> frag
+
+ Example:
+
+opcode: CP_LOAD_STATE4 (30) (12 dwords)
+ { DST_OFF = 16 | STATE_SRC = SS4_DIRECT | STATE_BLOCK = 0x6 | NUM_UNIT = 4 }
+ { STATE_TYPE = 0x2 | EXT_SRC_ADDR = 0 }
+ { EXT_SRC_ADDR_HI = 0 }
+ 0000: c0264100 00000000 00000000 00000000
+ 0000: 70b0000b 01180010 00000002 00000000 c0264100 00000000 00000000 00000000
+
+opcode: CP_LOAD_STATE4 (30) (4 dwords)
+ { DST_OFF = 16 | STATE_SRC = SS4_INDIRECT | STATE_BLOCK = SB4_VS_SHADER | NUM_UNIT = 4 }
+ { STATE_TYPE = ST4_CONSTANTS | EXT_SRC_ADDR = 0xc0264100 }
+ { EXT_SRC_ADDR_HI = 0 }
+ 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
+ 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
+ 0000: 00000040 0000000c 00000000 00000000 00000000 00000000 00000000 00000000
+
+ STATE_BLOCK = 0x6, STATE_TYPE = 0x1, seen in compute shader. NUM_UNITS * 2 dwords.
+
+ -->
+ <value name="SB4_VS_TEX" value="0x0"/>
+ <value name="SB4_HS_TEX" value="0x1"/> <!-- aka. TCS -->
+ <value name="SB4_DS_TEX" value="0x2"/> <!-- aka. TES -->
+ <value name="SB4_GS_TEX" value="0x3"/>
+ <value name="SB4_FS_TEX" value="0x4"/>
+ <value name="SB4_CS_TEX" value="0x5"/>
+ <value name="SB4_VS_SHADER" value="0x8"/>
+ <value name="SB4_HS_SHADER" value="0x9"/>
+ <value name="SB4_DS_SHADER" value="0xa"/>
+ <value name="SB4_GS_SHADER" value="0xb"/>
+ <value name="SB4_FS_SHADER" value="0xc"/>
+ <value name="SB4_CS_SHADER" value="0xd"/>
+ <!--
+ for SSBO, STATE_TYPE=0 appears to be addresses (four dwords each),
+ STATE_TYPE=1 sizes, STATE_TYPE=2 addresses again (two dwords each)
+
+ Compute has it's own dedicated SSBO state, it seems, but the rest
+ of the stages share state
+ -->
+ <value name="SB4_SSBO" value="0xe"/>
+ <value name="SB4_CS_SSBO" value="0xf"/>
+ </enum>
+ <enum name="a4xx_state_type">
+ <value name="ST4_SHADER" value="0"/>
+ <value name="ST4_CONSTANTS" value="1"/>
+ <value name="ST4_UBO" value="2"/>
+ </enum>
+ <enum name="a4xx_state_src">
+ <value name="SS4_DIRECT" value="0"/>
+ <value name="SS4_INDIRECT" value="2"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="DST_OFF" low="0" high="13" type="uint"/>
+ <bitfield name="STATE_SRC" low="16" high="17" type="a4xx_state_src"/>
+ <bitfield name="STATE_BLOCK" low="18" high="21" type="a4xx_state_block"/>
+ <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="STATE_TYPE" low="0" high="1" type="a4xx_state_type"/>
+ <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/>
+ </reg32>
+ <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
+ <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/>
+ </reg32>
+</domain>
+
+<!-- looks basically same CP_LOAD_STATE4 -->
+<domain name="CP_LOAD_STATE6" width="32" varset="chip">
+ <doc>Load state, a6xx+</doc>
+ <enum name="a6xx_state_block">
+ <value name="SB6_VS_TEX" value="0x0"/>
+ <value name="SB6_HS_TEX" value="0x1"/> <!-- aka. TCS -->
+ <value name="SB6_DS_TEX" value="0x2"/> <!-- aka. TES -->
+ <value name="SB6_GS_TEX" value="0x3"/>
+ <value name="SB6_FS_TEX" value="0x4"/>
+ <value name="SB6_CS_TEX" value="0x5"/>
+ <value name="SB6_VS_SHADER" value="0x8"/>
+ <value name="SB6_HS_SHADER" value="0x9"/>
+ <value name="SB6_DS_SHADER" value="0xa"/>
+ <value name="SB6_GS_SHADER" value="0xb"/>
+ <value name="SB6_FS_SHADER" value="0xc"/>
+ <value name="SB6_CS_SHADER" value="0xd"/>
+ <value name="SB6_IBO" value="0xe"/>
+ <value name="SB6_CS_IBO" value="0xf"/>
+ </enum>
+ <enum name="a6xx_state_type">
+ <value name="ST6_SHADER" value="0"/>
+ <value name="ST6_CONSTANTS" value="1"/>
+ <value name="ST6_UBO" value="2"/>
+ <value name="ST6_IBO" value="3"/>
+ </enum>
+ <enum name="a6xx_state_src">
+ <value name="SS6_DIRECT" value="0"/>
+ <value name="SS6_BINDLESS" value="1"/> <!-- TODO does this exist on a4xx/a5xx? -->
+ <value name="SS6_INDIRECT" value="2"/>
+ <doc>
+ SS6_UBO used by the a6xx vulkan blob with tesselation constants
+ in this case, EXT_SRC_ADDR is (ubo_id shl 16 | offset)
+ to load constants from a UBO loaded with DST_OFF = 14 and offset 0,
+ EXT_SRC_ADDR = 0xe0000
+ (offset is a guess, should be in bytes given that maxUniformBufferRange=64k)
+ </doc>
+ <value name="SS6_UBO" value="3"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="DST_OFF" low="0" high="13" type="uint"/>
+ <bitfield name="STATE_TYPE" low="14" high="15" type="a6xx_state_type"/>
+ <bitfield name="STATE_SRC" low="16" high="17" type="a6xx_state_src"/>
+ <bitfield name="STATE_BLOCK" low="18" high="21" type="a6xx_state_block"/>
+ <bitfield name="NUM_UNIT" low="22" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="EXT_SRC_ADDR" low="2" high="31" shr="2"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="EXT_SRC_ADDR_HI" low="0" high="31" shr="0"/>
+ </reg32>
+ <reg64 offset="1" name="EXT_SRC_ADDR" type="address"/>
+</domain>
+
+<bitset name="vgt_draw_initiator" inline="yes">
+ <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/>
+ <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/>
+ <bitfield name="VIS_CULL" low="9" high="10" type="pc_di_vis_cull_mode"/>
+ <bitfield name="INDEX_SIZE" pos="11" type="pc_di_index_size"/>
+ <bitfield name="NOT_EOP" pos="12" type="boolean"/>
+ <bitfield name="SMALL_INDEX" pos="13" type="boolean"/>
+ <bitfield name="PRE_DRAW_INITIATOR_ENABLE" pos="14" type="boolean"/>
+ <bitfield name="NUM_INSTANCES" low="24" high="31" type="uint"/>
+</bitset>
+
+<!-- changed on a4xx: -->
+<enum name="a4xx_index_size">
+ <value name="INDEX4_SIZE_8_BIT" value="0"/>
+ <value name="INDEX4_SIZE_16_BIT" value="1"/>
+ <value name="INDEX4_SIZE_32_BIT" value="2"/>
+</enum>
+
+<enum name="a6xx_patch_type">
+ <value name="TESS_QUADS" value="0"/>
+ <value name="TESS_TRIANGLES" value="1"/>
+ <value name="TESS_ISOLINES" value="2"/>
+</enum>
+
+<bitset name="vgt_draw_initiator_a4xx" inline="yes">
+ <!-- When the 0x20 bit is set, it's the number of patch vertices - 1 -->
+ <bitfield name="PRIM_TYPE" low="0" high="5" type="pc_di_primtype"/>
+ <bitfield name="SOURCE_SELECT" low="6" high="7" type="pc_di_src_sel"/>
+ <bitfield name="VIS_CULL" low="8" high="9" type="pc_di_vis_cull_mode"/>
+ <bitfield name="INDEX_SIZE" low="10" high="11" type="a4xx_index_size"/>
+ <bitfield name="PATCH_TYPE" low="12" high="13" type="a6xx_patch_type"/>
+ <bitfield name="GS_ENABLE" pos="16" type="boolean"/>
+ <bitfield name="TESS_ENABLE" pos="17" type="boolean"/>
+</bitset>
+
+<domain name="CP_DRAW_INDX" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="VIZ_QUERY" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="1" name="1" type="vgt_draw_initiator"/>
+ <reg32 offset="2" name="2">
+ <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="INDX_BASE" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="INDX_SIZE" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_DRAW_INDX_2" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="VIZ_QUERY" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="1" name="1" type="vgt_draw_initiator"/>
+ <reg32 offset="2" name="2">
+ <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/>
+ </reg32>
+ <!-- followed by NUM_INDICES indices.. -->
+</domain>
+
+<domain name="CP_DRAW_INDX_OFFSET" width="32">
+ <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/>
+ <reg32 offset="1" name="1">
+ <bitfield name="NUM_INSTANCES" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="NUM_INDICES" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="FIRST_INDX" low="0" high="31"/>
+ </reg32>
+
+ <stripe varset="chip" variants="A5XX-">
+ <reg32 offset="4" name="4">
+ <bitfield name="INDX_BASE_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="INDX_BASE_HI" low="0" high="31"/>
+ </reg32>
+ <reg64 offset="4" name="INDX_BASE" type="address"/>
+ <reg32 offset="6" name="6">
+ <!-- max # of elements in index buffer -->
+ <bitfield name="MAX_INDICES" low="0" high="31"/>
+ </reg32>
+ </stripe>
+
+ <reg32 offset="4" name="4">
+ <bitfield name="INDX_BASE" low="0" high="31" type="address"/>
+ </reg32>
+
+ <reg32 offset="5" name="5">
+ <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_DRAW_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-">
+ <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/>
+ <stripe varset="chip" variants="A4XX">
+ <reg32 offset="1" name="1">
+ <bitfield name="INDIRECT" low="0" high="31"/>
+ </reg32>
+ </stripe>
+ <stripe varset="chip" variants="A5XX-">
+ <reg32 offset="1" name="1">
+ <bitfield name="INDIRECT_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="INDIRECT_HI" low="0" high="31"/>
+ </reg32>
+ <reg64 offset="1" name="INDIRECT" type="address"/>
+ </stripe>
+</domain>
+
+<domain name="CP_DRAW_INDX_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-">
+ <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/>
+ <stripe varset="chip" variants="A4XX">
+ <reg32 offset="1" name="1">
+ <bitfield name="INDX_BASE" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <!-- max # of bytes in index buffer -->
+ <bitfield name="INDX_SIZE" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="INDIRECT" low="0" high="31"/>
+ </reg32>
+ </stripe>
+ <stripe varset="chip" variants="A5XX-">
+ <reg32 offset="1" name="1">
+ <bitfield name="INDX_BASE_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="INDX_BASE_HI" low="0" high="31"/>
+ </reg32>
+ <reg64 offset="1" name="INDX_BASE" type="address"/>
+ <reg32 offset="3" name="3">
+ <!-- max # of elements in index buffer -->
+ <bitfield name="MAX_INDICES" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="INDIRECT_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="INDIRECT_HI" low="0" high="31"/>
+ </reg32>
+ <reg64 offset="4" name="INDIRECT" type="address"/>
+ </stripe>
+</domain>
+
+<domain name="CP_DRAW_INDIRECT_MULTI" width="32" varset="chip" prefix="chip" variants="A6XX-">
+ <enum name="a6xx_draw_indirect_opcode">
+ <value name="INDIRECT_OP_NORMAL" value="0x2"/>
+ <value name="INDIRECT_OP_INDEXED" value="0x4"/>
+ <value name="INDIRECT_OP_INDIRECT_COUNT" value="0x6"/>
+ <value name="INDIRECT_OP_INDIRECT_COUNT_INDEXED" value="0x7"/>
+ </enum>
+ <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/>
+ <reg32 offset="1" name="1">
+ <bitfield name="OPCODE" low="0" high="3" type="a6xx_draw_indirect_opcode" addvariant="yes"/>
+ <doc>
+ DST_OFF same as in CP_LOAD_STATE6 - vec4 VS const at this offset will
+ be updated for each draw to {draw_id, first_vertex, first_instance, 0}
+ value of 0 disables it
+ </doc>
+ <bitfield name="DST_OFF" low="8" high="21" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="DRAW_COUNT" type="uint"/>
+ <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_NORMAL">
+ <reg64 offset="3" name="INDIRECT" type="address"/>
+ <reg32 offset="5" name="STRIDE" type="uint"/>
+ </stripe>
+ <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDEXED" prefix="INDEXED">
+ <reg64 offset="3" name="INDEX" type="address"/>
+ <reg32 offset="5" name="MAX_INDICES" type="uint"/>
+ <reg64 offset="6" name="INDIRECT" type="address"/>
+ <reg32 offset="8" name="STRIDE" type="uint"/>
+ </stripe>
+ <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT" prefix="INDIRECT">
+ <reg64 offset="3" name="INDIRECT" type="address"/>
+ <reg64 offset="5" name="INDIRECT_COUNT" type="address"/>
+ <reg32 offset="7" name="STRIDE" type="uint"/>
+ </stripe>
+ <stripe varset="a6xx_draw_indirect_opcode" variants="INDIRECT_OP_INDIRECT_COUNT_INDEXED" prefix="INDIRECT_INDEXED">
+ <reg64 offset="3" name="INDEX" type="address"/>
+ <reg32 offset="5" name="MAX_INDICES" type="uint"/>
+ <reg64 offset="6" name="INDIRECT" type="address"/>
+ <reg64 offset="8" name="INDIRECT_COUNT" type="address"/>
+ <reg32 offset="10" name="STRIDE" type="uint"/>
+ </stripe>
+</domain>
+
+<domain name="CP_DRAW_AUTO" width="32">
+ <reg32 offset="0" name="0" type="vgt_draw_initiator_a4xx"/>
+ <reg32 offset="1" name="1">
+ <bitfield name="NUM_INSTANCES" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg64 offset="2" name="NUM_VERTICES_BASE" type="address"/>
+ <reg32 offset="4" name="4">
+ <bitfield name="NUM_VERTICES_OFFSET" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="STRIDE" low="0" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_DRAW_PRED_ENABLE_GLOBAL" width="32" varset="chip">
+ <reg32 offset="0" name="0">
+ <bitfield name="ENABLE" pos="0" type="boolean"/>
+ </reg32>
+</domain>
+
+<domain name="CP_DRAW_PRED_ENABLE_LOCAL" width="32" varset="chip">
+ <reg32 offset="0" name="0">
+ <bitfield name="ENABLE" pos="0" type="boolean"/>
+ </reg32>
+</domain>
+
+<domain name="CP_DRAW_PRED_SET" width="32" varset="chip">
+ <enum name="cp_draw_pred_src">
+ <!--
+ Sources 1-4 seem to be about combining reading
+ SO/primitive queries and setting the predicate, which is
+ a DX11-specific optimization (since in DX11 you can only
+ predicate on the result of queries).
+ -->
+ <value name="PRED_SRC_MEM" value="5">
+ <doc>
+ Read a 64-bit value at the given address and
+ test if it equals/doesn't equal 0.
+ </doc>
+ </value>
+ </enum>
+ <enum name="cp_draw_pred_test">
+ <value name="NE_0_PASS" value="0"/>
+ <value name="EQ_0_PASS" value="1"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="SRC" low="4" high="7" type="cp_draw_pred_src"/>
+ <bitfield name="TEST" pos="8" type="cp_draw_pred_test"/>
+ </reg32>
+ <reg64 offset="1" name="MEM_ADDR" type="address"/>
+</domain>
+
+<domain name="CP_SET_DRAW_STATE" width="32" varset="chip" variants="A4XX-">
+ <array offset="0" stride="3" length="100">
+ <reg32 offset="0" name="0">
+ <bitfield name="COUNT" low="0" high="15" type="uint"/>
+ <bitfield name="DIRTY" pos="16" type="boolean"/>
+ <bitfield name="DISABLE" pos="17" type="boolean"/>
+ <bitfield name="DISABLE_ALL_GROUPS" pos="18" type="boolean"/>
+ <bitfield name="LOAD_IMMED" pos="19" type="boolean"/>
+ <bitfield name="BINNING" pos="20" varset="chip" variants="A6XX-" type="boolean"/>
+ <bitfield name="GMEM" pos="21" varset="chip" variants="A6XX-" type="boolean"/>
+ <bitfield name="SYSMEM" pos="22" varset="chip" variants="A6XX-" type="boolean"/>
+ <bitfield name="GROUP_ID" low="24" high="28" type="uint"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
+ <bitfield name="ADDR_HI" low="0" high="31" type="hex"/>
+ </reg32>
+ </array>
+</domain>
+
+<domain name="CP_SET_BIN" width="32">
+ <doc>value at offset 0 always seems to be 0x00000000..</doc>
+ <reg32 offset="0" name="0"/>
+ <reg32 offset="1" name="1">
+ <bitfield name="X1" low="0" high="15" type="uint"/>
+ <bitfield name="Y1" low="16" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="X2" low="0" high="15" type="uint"/>
+ <bitfield name="Y2" low="16" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_SET_BIN_DATA" width="32">
+ <reg32 offset="0" name="0">
+ <!-- corresponds to VSC_PIPE[n].DATA_ADDR -->
+ <bitfield name="BIN_DATA_ADDR" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <!-- seesm to correspond to VSC_SIZE_ADDRESS -->
+ <bitfield name="BIN_SIZE_ADDRESS" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_SET_BIN_DATA5" width="32">
+ <reg32 offset="0" name="0">
+ <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: -->
+ <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/>
+ <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: -->
+ <bitfield name="VSC_N" low="22" high="26" type="uint"/>
+ </reg32>
+ <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS -->
+ <reg32 offset="1" name="1">
+ <bitfield name="BIN_DATA_ADDR_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="BIN_DATA_ADDR_HI" low="0" high="31" type="hex"/>
+ </reg32>
+ <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)-->
+ <reg32 offset="3" name="3">
+ <bitfield name="BIN_SIZE_ADDRESS_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="BIN_SIZE_ADDRESS_HI" low="0" high="31"/>
+ </reg32>
+ <!-- new on a6xx, where BIN_DATA_ADDR is the DRAW_STRM: -->
+ <reg32 offset="5" name="5">
+ <bitfield name="BIN_PRIM_STRM_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="6" name="6">
+ <bitfield name="BIN_PRIM_STRM_HI" low="0" high="31"/>
+ </reg32>
+ <!--
+ a7xx adds a few more addresses to the end of the pkt
+ -->
+ <reg64 offset="7" name="7"/>
+ <reg64 offset="9" name="9"/>
+</domain>
+
+<domain name="CP_SET_BIN_DATA5_OFFSET" width="32">
+ <doc>
+ Like CP_SET_BIN_DATA5, but set the pointers as offsets from the
+ pointers stored in VSC_PIPE_{DATA,DATA2,SIZE}_ADDRESS. Useful
+ for Vulkan where these values aren't known when the command
+ stream is recorded.
+ </doc>
+ <reg32 offset="0" name="0">
+ <!-- equiv to PC_VSTREAM_CONTROL.SIZE on a3xx/a4xx: -->
+ <bitfield name="VSC_SIZE" low="16" high="21" type="uint"/>
+ <!-- equiv to PC_VSTREAM_CONTROL.N on a3xx/a4xx: -->
+ <bitfield name="VSC_N" low="22" high="26" type="uint"/>
+ </reg32>
+ <!-- BIN_DATA_ADDR -> VSC_PIPE[p].DATA_ADDRESS -->
+ <reg32 offset="1" name="1">
+ <bitfield name="BIN_DATA_OFFSET" low="0" high="31" type="uint"/>
+ </reg32>
+ <!-- BIN_SIZE_ADDRESS -> VSC_SIZE_ADDRESS + (p * 4)-->
+ <reg32 offset="2" name="2">
+ <bitfield name="BIN_SIZE_OFFSET" low="0" high="31" type="uint"/>
+ </reg32>
+ <!-- BIN_DATA2_ADDR -> VSC_PIPE[p].DATA2_ADDRESS -->
+ <reg32 offset="3" name="3">
+ <bitfield name="BIN_DATA2_OFFSET" low="0" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_REG_RMW" width="32">
+ <doc>
+ Modifies DST_REG using two sources that can either be registers
+ or immediates. If SRC1_ADD is set, then do the following:
+
+ $dst = (($dst &amp; $src0) rot $rotate) + $src1
+
+ Otherwise:
+
+ $dst = (($dst &amp; $src0) rot $rotate) | $src1
+
+ Here "rot" means rotate left.
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="DST_REG" low="0" high="17" type="hex"/>
+ <bitfield name="ROTATE" low="24" high="28" type="uint"/>
+ <bitfield name="SRC1_ADD" pos="29" type="boolean"/>
+ <bitfield name="SRC1_IS_REG" pos="30" type="boolean"/>
+ <bitfield name="SRC0_IS_REG" pos="31" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="SRC0" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="SRC1" low="0" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_REG_TO_MEM" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="REG" low="0" high="17" type="hex"/>
+ <!-- number of registers/dwords copied is max(CNT, 1). -->
+ <bitfield name="CNT" low="18" high="29" type="uint"/>
+ <bitfield name="64B" pos="30" type="boolean"/>
+ <bitfield name="ACCUMULATE" pos="31" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="DEST" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
+ <bitfield name="DEST_HI" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_REG_TO_MEM_OFFSET_REG" width="32">
+ <doc>
+ Like CP_REG_TO_MEM, but the memory address to write to can be
+ offsetted using either one or two registers or scratch
+ registers.
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="REG" low="0" high="17" type="hex"/>
+ <!-- number of registers/dwords copied is max(CNT, 1). -->
+ <bitfield name="CNT" low="18" high="29" type="uint"/>
+ <bitfield name="64B" pos="30" type="boolean"/>
+ <bitfield name="ACCUMULATE" pos="31" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="DEST" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
+ <bitfield name="DEST_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="OFFSET0" low="0" high="17" type="hex"/>
+ <bitfield name="OFFSET0_SCRATCH" pos="19" type="boolean"/>
+ </reg32>
+ <!-- followed by an optional identical OFFSET1 dword -->
+</domain>
+
+<domain name="CP_REG_TO_MEM_OFFSET_MEM" width="32">
+ <doc>
+ Like CP_REG_TO_MEM, but the memory address to write to can be
+ offsetted using a DWORD in memory.
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="REG" low="0" high="17" type="hex"/>
+ <!-- number of registers/dwords copied is max(CNT, 1). -->
+ <bitfield name="CNT" low="18" high="29" type="uint"/>
+ <bitfield name="64B" pos="30" type="boolean"/>
+ <bitfield name="ACCUMULATE" pos="31" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="DEST" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
+ <bitfield name="DEST_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="OFFSET_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="OFFSET_HI" low="0" high="31" type="hex"/>
+ </reg32>
+</domain>
+
+<domain name="CP_MEM_TO_REG" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="REG" low="0" high="17" type="hex"/>
+ <!-- number of registers/dwords copied is max(CNT, 1). -->
+ <bitfield name="CNT" low="19" high="29" type="uint"/>
+ <!-- shift each DWORD left by 2 while copying -->
+ <bitfield name="SHIFT_BY_2" pos="30" type="boolean"/>
+ <!-- does the same thing as CP_MEM_TO_MEM::UNK31 -->
+ <bitfield name="UNK31" pos="31" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="SRC" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2" varset="chip" variants="A5XX-">
+ <bitfield name="SRC_HI" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_MEM_TO_MEM" width="32">
+ <reg32 offset="0" name="0">
+ <!--
+ not sure how many src operands we have, but the low
+ bits negate the n'th src argument.
+ -->
+ <bitfield name="NEG_A" pos="0" type="boolean"/>
+ <bitfield name="NEG_B" pos="1" type="boolean"/>
+ <bitfield name="NEG_C" pos="2" type="boolean"/>
+
+ <!-- if set treat src/dst as 64bit values -->
+ <bitfield name="DOUBLE" pos="29" type="boolean"/>
+ <!-- execute CP_WAIT_FOR_MEM_WRITES beforehand -->
+ <bitfield name="WAIT_FOR_MEM_WRITES" pos="30" type="boolean"/>
+ <!-- some other kind of wait -->
+ <bitfield name="UNK31" pos="31" type="boolean"/>
+ </reg32>
+ <!--
+ followed by sequence of addresses.. the first is the
+ destination and the rest are N src addresses which are
+ summed (after being negated if NEG_x bit set) allowing
+ to do things like 'result += end - start' (which turns
+ out to be useful for queries and accumulating results
+ across multiple tiles)
+ -->
+</domain>
+
+<domain name="CP_MEMCPY" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="DWORDS" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="SRC_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="SRC_HI" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="DST_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="DST_HI" low="0" high="31" type="hex"/>
+ </reg32>
+</domain>
+
+<domain name="CP_REG_TO_SCRATCH" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="REG" low="0" high="17" type="hex"/>
+ <bitfield name="SCRATCH" low="20" high="22" type="uint"/>
+ <!-- number of registers/dwords copied is CNT + 1. -->
+ <bitfield name="CNT" low="24" high="26" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_SCRATCH_TO_REG" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="REG" low="0" high="17" type="hex"/>
+ <!-- note: CP_MEM_TO_REG always sets this when writing to the register -->
+ <bitfield name="UNK18" pos="18" type="boolean"/>
+ <bitfield name="SCRATCH" low="20" high="22" type="uint"/>
+ <!-- number of registers/dwords copied is CNT + 1. -->
+ <bitfield name="CNT" low="24" high="26" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_SCRATCH_WRITE" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="SCRATCH" low="20" high="22" type="uint"/>
+ </reg32>
+ <!-- followed by one or more DWORDs to write to scratch registers -->
+</domain>
+
+<domain name="CP_MEM_WRITE" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="ADDR_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_HI" low="0" high="31"/>
+ </reg32>
+ <!-- followed by the DWORDs to write -->
+</domain>
+
+<enum name="cp_cond_function">
+ <value value="0" name="WRITE_ALWAYS"/>
+ <value value="1" name="WRITE_LT"/>
+ <value value="2" name="WRITE_LE"/>
+ <value value="3" name="WRITE_EQ"/>
+ <value value="4" name="WRITE_NE"/>
+ <value value="5" name="WRITE_GE"/>
+ <value value="6" name="WRITE_GT"/>
+</enum>
+
+<domain name="CP_COND_WRITE" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/>
+ <bitfield name="POLL_MEMORY" pos="4" type="boolean"/>
+ <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="POLL_ADDR" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="REF" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="MASK" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="WRITE_ADDR" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="WRITE_DATA" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<enum name="poll_memory_type">
+ <value value="0" name="POLL_REGISTER"/>
+ <value value="1" name="POLL_MEMORY"/>
+ <value value="2" name="POLL_SCRATCH"/>
+ <value value="3" name="POLL_ON_CHIP" varset="chip" variants="A7XX-"/>
+</enum>
+
+<domain name="CP_COND_WRITE5" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/>
+ <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/>
+ <!-- POLL_REGISTER polls a register at POLL_ADDR_LO. -->
+ <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/>
+ <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="REF" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="MASK" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="WRITE_ADDR_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="6" name="6">
+ <bitfield name="WRITE_ADDR_HI" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="7" name="7">
+ <bitfield name="WRITE_DATA" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_WAIT_MEM_GTE" width="32">
+ <doc>
+ Wait until a memory value is greater than or equal to the
+ reference, using signed comparison.
+ </doc>
+ <reg32 offset="0" name="0">
+ <!-- Reserved for flags, presumably? Unused in FW -->
+ <bitfield name="RESERVED" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="REF" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_WAIT_REG_MEM" width="32">
+ <doc>
+ This uses the same internal comparison as CP_COND_WRITE,
+ but waits until the comparison is true instead. It busy-loops in
+ the CP for the given number of cycles before trying again.
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="FUNCTION" low="0" high="2" type="cp_cond_function"/>
+ <bitfield name="SIGNED_COMPARE" pos="3" type="boolean"/>
+ <bitfield name="POLL" low="4" high="5" type="poll_memory_type"/>
+ <bitfield name="WRITE_MEMORY" pos="8" type="boolean"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="POLL_ADDR_LO" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="POLL_ADDR_HI" low="0" high="31" type="hex"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="REF" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="MASK" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="DELAY_LOOP_CYCLES" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_WAIT_TWO_REGS" width="32">
+ <doc>
+ Waits for REG0 to not be 0 or REG1 to not equal REF
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="REG0" low="0" high="17" type="hex"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="REG1" low="0" high="17" type="hex"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="REF" low="0" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_DISPATCH_COMPUTE" width="32">
+ <reg32 offset="0" name="0"/>
+ <reg32 offset="1" name="1">
+ <bitfield name="X" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="Y" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="Z" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_SET_RENDER_MODE" width="32">
+ <enum name="render_mode_cmd">
+ <value value="1" name="BYPASS"/>
+ <value value="2" name="BINNING"/>
+ <value value="3" name="GMEM"/>
+ <value value="5" name="BLIT2D"/>
+ <!-- placeholder name.. used when CP_BLIT packets with BLIT_OP_SCALE?? -->
+ <value value="7" name="BLIT2DSCALE"/>
+ <!-- 8 set before going back to BYPASS exiting 2D -->
+ <value value="8" name="END2D"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="MODE" low="0" high="8" type="render_mode_cmd"/>
+ <!--
+ normally 0x1/0x3, sometimes see 0x5/0x8 with unknown registers in
+ 0x21xx range.. possibly (at least some) a5xx variants have a
+ 2d core?
+ -->
+ </reg32>
+ <!-- I think first buffer is for GPU to save context in case of ctx switch? -->
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_0_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="ADDR_0_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <!--
+ set when in GMEM.. maybe indicates GMEM contents need to be
+ preserved on ctx switch?
+ -->
+ <bitfield name="VSC_ENABLE" pos="3" type="boolean"/>
+ <bitfield name="GMEM_ENABLE" pos="4" type="boolean"/>
+ </reg32>
+ <reg32 offset="4" name="4"/>
+ <!-- second buffer looks like some cmdstream.. length in dwords: -->
+ <reg32 offset="5" name="5">
+ <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="6" name="6">
+ <bitfield name="ADDR_1_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="7" name="7">
+ <bitfield name="ADDR_1_HI" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<!-- this looks fairly similar to CP_SET_RENDER_MODE minus first dword -->
+<domain name="CP_COMPUTE_CHECKPOINT" width="32">
+ <!-- I think first buffer is for GPU to save context in case of ctx switch? -->
+ <reg32 offset="0" name="0">
+ <bitfield name="ADDR_0_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_0_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ </reg32>
+ <reg32 offset="3" name="3"/>
+ <!-- second buffer looks like some cmdstream.. length in dwords: -->
+ <reg32 offset="4" name="4">
+ <bitfield name="ADDR_1_LEN" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="ADDR_1_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="6" name="6">
+ <bitfield name="ADDR_1_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="7" name="7"/>
+</domain>
+
+<domain name="CP_PERFCOUNTER_ACTION" width="32">
+ <reg32 offset="0" name="0">
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_0_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="ADDR_0_HI" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain varset="chip" name="CP_EVENT_WRITE" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="EVENT" low="0" high="7" type="vgt_event_type"/>
+ <!-- when set, write back timestamp instead of value from packet: -->
+ <bitfield name="TIMESTAMP" pos="30" type="boolean"/>
+ <bitfield name="IRQ" pos="31" type="boolean"/>
+ </reg32>
+ <!--
+ TODO what is gpuaddr for, seems to be all 0's.. maybe needed for
+ context switch?
+ -->
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_0_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="ADDR_0_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <!-- ??? -->
+ </reg32>
+</domain>
+
+<domain varset="chip" name="CP_EVENT_WRITE7" width="32">
+ <enum name="event_write_src">
+ <!-- Write payload[0] -->
+ <value value="0" name="EV_WRITE_USER_32B"/>
+ <!-- Write payload[0] payload[1] -->
+ <value value="1" name="EV_WRITE_USER_64B"/>
+ <!-- Write (TIMESTAMP_GLOBAL + TIMESTAMP_LOCAL) -->
+ <value value="2" name="EV_WRITE_TIMESTAMP_SUM"/>
+ <value value="3" name="EV_WRITE_ALWAYSON"/>
+ <!-- Write payload[1] regs starting at payload[0] offset -->
+ <value value="4" name="EV_WRITE_REGS_CONTENT"/>
+ </enum>
+
+ <enum name="event_write_dst">
+ <value value="0" name="EV_DST_RAM"/>
+ <value value="1" name="EV_DST_ONCHIP"/>
+ </enum>
+
+ <reg32 offset="0" name="0">
+ <bitfield name="EVENT" low="0" high="7" type="vgt_event_type"/>
+ <bitfield name="WRITE_SAMPLE_COUNT" pos="12" type="boolean"/>
+ <!-- Write sample count at (iova + 16) -->
+ <bitfield name="SAMPLE_COUNT_END_OFFSET" pos="13" type="boolean"/>
+ <!-- *(iova + 8) = *(iova + 16) - *iova -->
+ <bitfield name="WRITE_SAMPLE_COUNT_DIFF" pos="14" type="boolean"/>
+
+ <!-- Next 4 flags are valid to set only when concurrent binning is enabled -->
+ <!-- Increment 16b BV counter. Valid only in BV pipe -->
+ <bitfield name="INC_BV_COUNT" pos="16" type="boolean"/>
+ <!-- Increment 16b BR counter. Valid only in BR pipe -->
+ <bitfield name="INC_BR_COUNT" pos="17" type="boolean"/>
+ <bitfield name="CLEAR_RENDER_RESOURCE" pos="18" type="boolean"/>
+ <bitfield name="CLEAR_LRZ_RESOURCE" pos="19" type="boolean"/>
+
+ <bitfield name="WRITE_SRC" low="20" high="22" type="event_write_src"/>
+ <bitfield name="WRITE_DST" pos="24" type="event_write_dst" addvariant="yes"/>
+ <!-- Writes into WRITE_DST from WRITE_SRC. RB_DONE_TS requires WRITE_ENABLED. -->
+ <bitfield name="WRITE_ENABLED" pos="27" type="boolean"/>
+ </reg32>
+
+ <stripe varset="event_write_dst" variants="EV_DST_RAM">
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_0_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="ADDR_0_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="PAYLOAD_0" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="PAYLOAD_1" low="0" high="31"/>
+ </reg32>
+ </stripe>
+
+ <stripe varset="event_write_dst" variants="EV_DST_ONCHIP">
+ <reg32 offset="1" name="1">
+ <bitfield name="ONCHIP_ADDR_0" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="PAYLOAD_0" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="PAYLOAD_1" low="0" high="31"/>
+ </reg32>
+ </stripe>
+</domain>
+
+<domain name="CP_BLIT" width="32">
+ <enum name="cp_blit_cmd">
+ <value value="0" name="BLIT_OP_FILL"/>
+ <value value="1" name="BLIT_OP_COPY"/>
+ <value value="3" name="BLIT_OP_SCALE"/> <!-- used for mipmap generation -->
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="OP" low="0" high="3" type="cp_blit_cmd"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="SRC_X1" low="0" high="13" type="uint"/>
+ <bitfield name="SRC_Y1" low="16" high="29" type="uint"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="SRC_X2" low="0" high="13" type="uint"/>
+ <bitfield name="SRC_Y2" low="16" high="29" type="uint"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="DST_X1" low="0" high="13" type="uint"/>
+ <bitfield name="DST_Y1" low="16" high="29" type="uint"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="DST_X2" low="0" high="13" type="uint"/>
+ <bitfield name="DST_Y2" low="16" high="29" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_EXEC_CS" width="32">
+ <reg32 offset="0" name="0">
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="NGROUPS_X" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="NGROUPS_Y" low="0" high="31" type="uint"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="NGROUPS_Z" low="0" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_EXEC_CS_INDIRECT" width="32" varset="chip" prefix="chip" variants="A4XX-">
+ <reg32 offset="0" name="0">
+ </reg32>
+ <stripe varset="chip" variants="A4XX">
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <!-- localsize is value minus one: -->
+ <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/>
+ <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/>
+ <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/>
+ </reg32>
+ </stripe>
+ <stripe varset="chip" variants="A5XX-">
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="ADDR_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <!-- localsize is value minus one: -->
+ <bitfield name="LOCALSIZEX" low="2" high="11" type="uint"/>
+ <bitfield name="LOCALSIZEY" low="12" high="21" type="uint"/>
+ <bitfield name="LOCALSIZEZ" low="22" high="31" type="uint"/>
+ </reg32>
+ </stripe>
+</domain>
+
+<domain name="CP_SET_MARKER" width="32" varset="chip" prefix="chip" variants="A6XX-">
+ <doc>Tell CP the current operation mode, indicates save and restore procedure</doc>
+ <enum name="a6xx_marker">
+ <value value="1" name="RM6_BYPASS"/>
+ <value value="2" name="RM6_BINNING"/>
+ <value value="4" name="RM6_GMEM"/>
+ <value value="5" name="RM6_ENDVIS"/>
+ <value value="6" name="RM6_RESOLVE"/>
+ <value value="7" name="RM6_YIELD"/>
+ <value value="8" name="RM6_COMPUTE"/>
+ <value value="0xc" name="RM6_BLIT2DSCALE"/> <!-- no-op (at least on current sqe fw) -->
+
+ <!--
+ These values come from a6xx_set_marker() in the
+ downstream kernel, and they can only be set by the kernel
+ -->
+ <value value="0xd" name="RM6_IB1LIST_START"/>
+ <value value="0xe" name="RM6_IB1LIST_END"/>
+ <!-- IFPC - inter-frame power collapse -->
+ <value value="0x100" name="RM6_IFPC_ENABLE"/>
+ <value value="0x101" name="RM6_IFPC_DISABLE"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <!--
+ NOTE: blob driver and some versions of freedreno/turnip set
+ b4, which is unused (at least by current sqe fw), but interferes
+ with parsing if we extend the size of the bitfield to include
+ b8 (only sent by kernel mode driver). Really, the way the
+ parsing works in the firmware, only b0-b3 are considered, but
+ if b8 is set, the low bits are interpreted differently. To
+ model this, without getting confused by spurious b4, this is
+ described as two overlapping bitfields:
+ -->
+ <bitfield name="MODE" low="0" high="8" type="a6xx_marker"/>
+ <bitfield name="MARKER" low="0" high="3" type="a6xx_marker"/>
+ </reg32>
+</domain>
+
+<domain name="CP_SET_PSEUDO_REG" width="32" varset="chip" prefix="chip" variants="A6XX-">
+ <doc>Set internal CP registers, used to indicate context save data addresses</doc>
+ <enum name="pseudo_reg">
+ <value value="0" name="SMMU_INFO"/>
+ <value value="1" name="NON_SECURE_SAVE_ADDR"/>
+ <value value="2" name="SECURE_SAVE_ADDR"/>
+ <value value="3" name="NON_PRIV_SAVE_ADDR"/>
+ <value value="4" name="COUNTER"/>
+
+ <!--
+ On a6xx the registers are set directly and CP_SET_BIN_DATA5_OFFSET reads them,
+ but that doesn't work with concurrent binning because BR will be reading from
+ a different set of streams than BV is writing, so on a7xx we have these
+ pseudo-regs instead, which do the right thing.
+
+ The corresponding VSC registers exist, and they're written by BV when it
+ encounters CP_SET_PSEUDO_REG. When BR later encounters the same CP_SET_PSEUDO_REG
+ it will only write some private scratch registers which are read by
+ CP_SET_BIN_DATA5_OFFSET.
+
+ If concurrent binning is disabled then BR also does binning so it will also
+ write the "real" registers in BR.
+ -->
+ <value value="8" name="DRAW_STRM_ADDRESS"/>
+ <value value="9" name="DRAW_STRM_SIZE_ADDRESS"/>
+ <value value="10" name="PRIM_STRM_ADDRESS"/>
+ <value value="11" name="UNK_STRM_ADDRESS"/>
+ <value value="12" name="UNK_STRM_SIZE_ADDRESS"/>
+
+ <value value="16" name="BINDLESS_BASE_0_ADDR"/>
+ <value value="17" name="BINDLESS_BASE_1_ADDR"/>
+ <value value="18" name="BINDLESS_BASE_2_ADDR"/>
+ <value value="19" name="BINDLESS_BASE_3_ADDR"/>
+ <value value="20" name="BINDLESS_BASE_4_ADDR"/>
+ <value value="21" name="BINDLESS_BASE_5_ADDR"/>
+ <value value="22" name="BINDLESS_BASE_6_ADDR"/>
+ </enum>
+ <array offset="0" stride="3" length="100">
+ <reg32 offset="0" name="0">
+ <bitfield name="PSEUDO_REG" low="0" high="10" type="pseudo_reg"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="HI" low="0" high="31"/>
+ </reg32>
+ </array>
+</domain>
+
+<domain name="CP_REG_TEST" width="32" varset="chip" prefix="chip" variants="A6XX-">
+ <doc>
+ Tests bit in specified register and sets predicate for CP_COND_REG_EXEC.
+ So:
+
+ opcode: CP_REG_TEST (39) (2 dwords)
+ { REG = 0xc10 | BIT = 0 }
+ 0000: 70b90001 00000c10
+ opcode: CP_COND_REG_EXEC (47) (3 dwords)
+ 0000: 70c70002 10000000 00000004
+ opcode: CP_INDIRECT_BUFFER (3f) (4 dwords)
+
+ Will execute the CP_INDIRECT_BUFFER only if b0 in the register at
+ offset 0x0c10 is 1
+ </doc>
+ <enum name="source_type">
+ <value value="0" name="SOURCE_REG"/>
+ <!-- Don't confuse with scratch registers, this is a separate memory
+ written into by CP_MEM_TO_SCRATCH_MEM. -->
+ <value value="1" name="SOURCE_SCRATCH_MEM" varset="chip" variants="A7XX-"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <!-- the register to test -->
+ <bitfield name="REG" low="0" high="17" varset="source_type" variants="SOURCE_REG"/>
+ <bitfield name="SCRATCH_MEM_OFFSET" low="0" high="17" varset="source_type" variants="SOURCE_SCRATCH_MEM"/>
+ <bitfield name="SOURCE" pos="18" type="source_type" addvariant="yes"/>
+ <!-- the bit to test -->
+ <bitfield name="BIT" low="20" high="24" type="uint"/>
+ <!-- skip implied CP_WAIT_FOR_ME -->
+ <bitfield name="SKIP_WAIT_FOR_ME" pos="25" type="boolean"/>
+ <!-- the predicate bit to set (new in gen3+) -->
+ <bitfield name="PRED_BIT" low="26" high="30" type="uint"/>
+ <!-- update the predicate reg directly (new in gen3+) -->
+ <bitfield name="PRED_UPDATE" pos="31" type="boolean"/>
+ </reg32>
+
+ <!--
+ In PRED_UPDATE mode, the predicate reg is updated directly using two
+ more dwords, ignoring other bits:
+
+ PRED_REG = (PRED_REG & ~PRED_MASK) | (PRED_VAL & PRED_MASK);
+ -->
+ <reg32 offset="1" name="PRED_MASK" type="hex"/>
+ <reg32 offset="2" name="PRED_VAL" type="hex"/>
+</domain>
+
+<!-- I *think* this existed at least as far back as a4xx -->
+<domain name="CP_COND_REG_EXEC" width="32">
+ <enum name="compare_mode">
+ <!-- use the predicate bit set by CP_REG_TEST -->
+ <value value="1" name="PRED_TEST"/>
+ <!-- compare two registers directly for equality -->
+ <value value="2" name="REG_COMPARE"/>
+ <!-- test if certain render modes are set via CP_SET_MARKER -->
+ <value value="3" name="RENDER_MODE" varset="chip" variants="A6XX-"/>
+ <!-- compare REG0 for equality with immediate -->
+ <value value="4" name="REG_COMPARE_IMM" varset="chip" variants="A7XX-"/>
+ <!-- test which of BR/BV are enabled -->
+ <value value="5" name="THREAD_MODE" varset="chip" variants="A7XX-"/>
+ </enum>
+ <reg32 offset="0" name="0" varset="compare_mode">
+ <bitfield name="REG0" low="0" high="17" variants="REG_COMPARE" type="hex"/>
+
+ <!-- the predicate bit to test (new in gen3+) -->
+ <bitfield name="PRED_BIT" low="18" high="22" variants="PRED_TEST" type="uint"/>
+ <bitfield name="SKIP_WAIT_FOR_ME" pos="23" varset="chip" variants="A7XX-" type="boolean"/>
+ <!-- With REG_COMPARE instead of register read from ONCHIP memory -->
+ <bitfield name="ONCHIP_MEM" pos="24" varset="chip" variants="A7XX-" type="boolean"/>
+
+ <!--
+ Note: these bits have the same meaning, and use the same
+ internal mechanism as the bits in CP_SET_DRAW_STATE.
+ When RENDER_MODE is selected, they're used as
+ a bitmask of which modes pass the test.
+ -->
+
+ <!-- RM6_BINNING -->
+ <bitfield name="BINNING" pos="25" variants="RENDER_MODE" type="boolean"/>
+ <!-- all others -->
+ <bitfield name="GMEM" pos="26" variants="RENDER_MODE" type="boolean"/>
+ <!-- RM6_BYPASS -->
+ <bitfield name="SYSMEM" pos="27" variants="RENDER_MODE" type="boolean"/>
+
+ <bitfield name="BV" pos="25" variants="THREAD_MODE" type="boolean"/>
+ <bitfield name="BR" pos="26" variants="THREAD_MODE" type="boolean"/>
+ <bitfield name="LPAC" pos="27" variants="THREAD_MODE" type="boolean"/>
+
+ <bitfield name="MODE" low="28" high="31" type="compare_mode" addvariant="yes"/>
+ </reg32>
+
+ <stripe varset="compare_mode" variants="PRED_TEST">
+ <reg32 offset="1" name="1">
+ <bitfield name="DWORDS" low="0" high="23" type="uint"/>
+ </reg32>
+ </stripe>
+
+ <stripe varset="compare_mode" variants="REG_COMPARE">
+ <reg32 offset="1" name="1">
+ <bitfield name="REG1" low="0" high="17" type="hex"/>
+ <!-- Instead of register read from ONCHIP memory -->
+ <bitfield name="ONCHIP_MEM" pos="24" varset="chip" variants="A7XX-" type="boolean"/>
+ </reg32>
+ </stripe>
+
+ <stripe varset="compare_mode" variants="RENDER_MODE">
+ <reg32 offset="1" name="1">
+ <bitfield name="DWORDS" low="0" high="23" type="uint"/>
+ </reg32>
+ </stripe>
+
+ <stripe varset="compare_mode" variants="REG_COMPARE_IMM">
+ <reg32 offset="1" name="1">
+ <bitfield name="IMM" low="0" high="31"/>
+ </reg32>
+ </stripe>
+
+ <stripe varset="compare_mode" variants="THREAD_MODE">
+ <reg32 offset="1" name="1">
+ <bitfield name="DWORDS" low="0" high="23" type="uint"/>
+ </reg32>
+ </stripe>
+
+ <reg32 offset="2" name="2">
+ <bitfield name="DWORDS" low="0" high="23" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_COND_EXEC" width="32">
+ <doc>
+ Executes the following DWORDs of commands if the dword at ADDR0
+ is not equal to 0 and the dword at ADDR1 is less than REF
+ (signed comparison).
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="ADDR0_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR0_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="ADDR1_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="ADDR1_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="4" name="4">
+ <bitfield name="REF" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="5" name="5">
+ <bitfield name="DWORDS" low="0" high="31" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_SET_CTXSWITCH_IB" width="32">
+ <doc>
+ Used by the userspace driver to set various IB's which are
+ executed during context save/restore for handling
+ state that isn't restored by the
+ context switch routine itself.
+ </doc>
+ <enum name="ctxswitch_ib">
+ <value name="RESTORE_IB" value="0">
+ <doc>Executed unconditionally when switching back to the context.</doc>
+ </value>
+ <value name="YIELD_RESTORE_IB" value="1">
+ <doc>
+ Executed when switching back after switching
+ away during execution of
+ a CP_SET_MARKER packet with RM6_YIELD as the
+ payload *and* the normal save routine was
+ bypassed for a shorter one. I think this is
+ connected to the "skipsaverestore" bit set by
+ the kernel when preempting.
+ </doc>
+ </value>
+ <value name="SAVE_IB" value="2">
+ <doc>
+ Executed when switching away from the context,
+ except for context switches initiated via
+ CP_YIELD.
+ </doc>
+ </value>
+ <value name="RB_SAVE_IB" value="3">
+ <doc>
+ This can only be set by the RB (i.e. the kernel)
+ and executes with protected mode off, but
+ is otherwise similar to SAVE_IB.
+
+ Note, kgsl calls this CP_KMD_AMBLE_TYPE
+ </doc>
+ </value>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="ADDR_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="ADDR_HI" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="DWORDS" low="0" high="19" type="uint"/>
+ <bitfield name="TYPE" low="20" high="21" type="ctxswitch_ib"/>
+ </reg32>
+</domain>
+
+<domain name="CP_REG_WRITE" width="32">
+ <enum name="reg_tracker">
+ <doc>
+ Keep shadow copies of these registers and only set them
+ when drawing, avoiding redundant writes:
+ - VPC_CNTL_0
+ - HLSQ_CONTROL_1_REG
+ - HLSQ_UNKNOWN_B980
+ </doc>
+ <value name="TRACK_CNTL_REG" value="0x1"/>
+ <doc>
+ Track RB_RENDER_CNTL, and insert a WFI in the following
+ situation:
+ - There is a write that disables binning
+ - There was a draw with binning left enabled, but in
+ BYPASS mode
+ Presumably this is a hang workaround?
+ </doc>
+ <value name="TRACK_RENDER_CNTL" value="0x2"/>
+ <doc>
+ Do a mysterious CP_EVENT_WRITE 0x3f when the low bit of
+ the data to write is 0. Used by the Vulkan blob with
+ PC_MULTIVIEW_CNTL, but this isn't predicated on particular
+ register(s) like the others.
+ </doc>
+ <value name="UNK_EVENT_WRITE" value="0x4"/>
+ <doc>
+ Tracks GRAS_LRZ_CNTL::GREATER, GRAS_LRZ_CNTL::DIR, and
+ GRAS_LRZ_DEPTH_VIEW with previous values, and if one of
+ the following is true:
+ - GRAS_LRZ_CNTL::GREATER has changed
+ - GRAS_LRZ_CNTL::DIR has changed, the old value is not
+ CUR_DIR_GE, and the new value is not CUR_DIR_DISABLED
+ - GRAS_LRZ_DEPTH_VIEW has changed
+ then it does a LRZ_FLUSH with GRAS_LRZ_CNTL::ENABLE
+ forced to 1.
+ Only exists in a650_sqe.fw.
+ </doc>
+ <value name="TRACK_LRZ" value="0x8"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="TRACKER" low="0" high="3" type="reg_tracker"/>
+ </reg32>
+ <reg32 offset="1" name="1"/>
+ <reg32 offset="2" name="2"/>
+</domain>
+
+<domain name="CP_SMMU_TABLE_UPDATE" width="32">
+ <doc>
+ Note that the SMMU's definition of TTBRn can take different forms
+ depending on the pgtable format. But a5xx+ only uses aarch64
+ format.
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="TTBR0_LO" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="TTBR0_HI" low="0" high="15"/>
+ <bitfield name="ASID" low="16" high="31"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <doc>Unused, does not apply to aarch64 pgtable format</doc>
+ <bitfield name="CONTEXTIDR" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="CONTEXTBANK" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_START_BIN" width="32">
+ <reg32 offset="0" name="BIN_COUNT" type="uint"/>
+ <reg64 offset="1" name="PREFIX_ADDR" type="address"/>
+ <reg32 offset="3" name="PREFIX_DWORDS">
+ <doc>
+ Size of prefix for each bin. For each bin index i, the
+ prefix commands at PREFIX_ADDR + i * PREFIX_DWORDS are
+ executed in an IB2 before the IB1 commands following
+ this packet.
+ </doc>
+ </reg32>
+ <reg32 offset="4" name="BODY_DWORDS">
+ <doc>Number of dwords after this packet until CP_END_BIN</doc>
+ </reg32>
+</domain>
+
+<domain name="CP_WAIT_TIMESTAMP" width="32">
+ <enum name="ts_wait_value_src">
+ <!-- Wait for value at memory address to be >= SRC_0 (signed comparison) -->
+ <value value="0" name="TS_WAIT_GE_32B"/>
+ <!-- Wait for value at memory address to be >= SRC_0 (unsigned) -->
+ <value value="1" name="TS_WAIT_GE_64B"/>
+ <!-- Write (TIMESTAMP_GLOBAL + TIMESTAMP_LOCAL) -->
+ <value value="2" name="TS_WAIT_GE_TIMESTAMP_SUM"/>
+ </enum>
+
+ <enum name="ts_wait_type">
+ <value value="0" name="TS_WAIT_RAM"/>
+ <value value="1" name="TS_WAIT_ONCHIP"/>
+ </enum>
+
+ <reg32 offset="0" name="0">
+ <bitfield name="WAIT_VALUE_SRC" low="0" high="1" type="ts_wait_value_src"/>
+ <bitfield name="WAIT_DST" pos="4" type="ts_wait_type" addvariant="yes"/>
+ </reg32>
+
+ <stripe varset="ts_wait_type" variants="TS_WAIT_RAM">
+ <reg64 offset="1" name="ADDR" type="address"/>
+ </stripe>
+
+ <stripe varset="ts_wait_type" variants="TS_WAIT_ONCHIP">
+ <reg32 offset="1" name="ONCHIP_ADDR_0" low="0" high="31"/>
+ </stripe>
+
+ <reg32 offset="3" name="SRC_0"/>
+ <reg32 offset="4" name="SRC_1"/>
+</domain>
+
+<domain name="CP_BV_BR_COUNT_OPS" width="32">
+ <enum name="pipe_count_op">
+ <value name="PIPE_CLEAR_BV_BR" value="0x1"/>
+ <value name="PIPE_SET_BR_OFFSET" value="0x2"/>
+ <!-- Wait until for BV_counter > BR_counter -->
+ <value name="PIPE_BR_WAIT_FOR_BV" value="0x3"/>
+ <!-- Wait until (BR_counter + BR_OFFSET) > BV_counter -->
+ <value name="PIPE_BV_WAIT_FOR_BR" value="0x4"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="OP" low="0" high="3" type="pipe_count_op"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <bitfield name="BR_OFFSET" low="0" high="15" type="uint"/>
+ </reg32>
+</domain>
+
+<domain name="CP_MODIFY_TIMESTAMP" width="32">
+ <enum name="timestamp_op">
+ <value name="MODIFY_TIMESTAMP_CLEAR" value="0"/>
+ <value name="MODIFY_TIMESTAMP_ADD_GLOBAL" value="1"/>
+ <value name="MODIFY_TIMESTAMP_ADD_LOCAL" value="2"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield name="ADD" low="0" high="7" type="uint"/>
+ <bitfield name="OP" low="28" high="31" type="timestamp_op"/>
+ </reg32>
+</domain>
+
+<domain name="CP_MEM_TO_SCRATCH_MEM" width="32">
+ <doc>
+ Best guess is that it is a faster way to fetch all the VSC_STATE registers
+ and keep them in a local scratch memory instead of fetching every time
+ when skipping IBs.
+ </doc>
+ <reg32 offset="0" name="0">
+ <bitfield name="CNT" low="0" high="5" type="uint"/>
+ </reg32>
+ <reg32 offset="1" name="1">
+ <doc>Scratch memory size is 48 dwords`</doc>
+ <bitfield name="OFFSET" low="0" high="5" type="uint"/>
+ </reg32>
+ <reg32 offset="2" name="2">
+ <bitfield name="SRC" low="0" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="SRC_HI" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_THREAD_CONTROL" width="32">
+ <enum name="cp_thread">
+ <value name="CP_SET_THREAD_BR" value="1"/> <!-- Render -->
+ <value name="CP_SET_THREAD_BV" value="2"/> <!-- Visibility -->
+ <value name="CP_SET_THREAD_BOTH" value="3"/>
+ </enum>
+ <reg32 offset="0" name="0">
+ <bitfield low="0" high="1" name="THREAD" type="cp_thread"/>
+ <bitfield pos="27" name="CONCURRENT_BIN_DISABLE" type="boolean"/>
+ <bitfield pos="31" name="SYNC_THREADS" type="boolean"/>
+ </reg32>
+</domain>
+
+<domain name="CP_FIXED_STRIDE_DRAW_TABLE" width="32">
+ <reg64 offset="0" name="IB_BASE"/>
+ <reg32 offset="2" name="2">
+ <!-- STRIDE * COUNT -->
+ <bitfield name="IB_SIZE" low="0" high="11"/>
+ <bitfield name="STRIDE" low="20" high="31"/>
+ </reg32>
+ <reg32 offset="3" name="3">
+ <bitfield name="COUNT" low="0" high="31"/>
+ </reg32>
+</domain>
+
+<domain name="CP_RESET_CONTEXT_STATE" width="32">
+ <reg32 offset="0" name="0">
+ <bitfield name="CLEAR_ON_CHIP_TS" pos="0" type="boolean"/>
+ <bitfield name="CLEAR_RESOURCE_TABLE" pos="1" type="boolean"/>
+ <bitfield name="CLEAR_GLOBAL_LOCAL_TS" pos="2" type="boolean"/>
+ </reg32>
+</domain>
+
+</database>
+