summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/uc/guc_capture_fwif.h
blob: 1fc0c17b12309afc78a6dfc9bc19ce652c3b67d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
/* SPDX-License-Identifier: MIT */
/*
 * Copyright © 2021-2022 Intel Corporation
 */

#ifndef _INTEL_GUC_CAPTURE_FWIF_H
#define _INTEL_GUC_CAPTURE_FWIF_H

#include <linux/types.h>
#include "intel_guc_fwif.h"

struct intel_guc;
struct file;

/*
 * struct __guc_capture_bufstate
 *
 * Book-keeping structure used to track read and write pointers
 * as we extract error capture data from the GuC-log-buffer's
 * error-capture region as a stream of dwords.
 */
struct __guc_capture_bufstate {
	u32 size;
	void *data;
	u32 rd;
	u32 wr;
};

/*
 * struct __guc_capture_parsed_output - extracted error capture node
 *
 * A single unit of extracted error-capture output data grouped together
 * at an engine-instance level. We keep these nodes in a linked list.
 * See cachelist and outlist below.
 */
struct __guc_capture_parsed_output {
	/*
	 * A single set of 3 capture lists: a global-list
	 * an engine-class-list and an engine-instance list.
	 * outlist in __guc_capture_parsed_output will keep
	 * a linked list of these nodes that will eventually
	 * be detached from outlist and attached into to
	 * i915_gpu_codedump in response to a context reset
	 */
	struct list_head link;
	bool is_partial;
	u32 eng_class;
	u32 eng_inst;
	u32 guc_id;
	u32 lrca;
	struct gcap_reg_list_info {
		u32 vfid;
		u32 num_regs;
		struct guc_mmio_reg *regs;
	} reginfo[GUC_CAPTURE_LIST_TYPE_MAX];
#define GCAP_PARSED_REGLIST_INDEX_GLOBAL   BIT(GUC_CAPTURE_LIST_TYPE_GLOBAL)
#define GCAP_PARSED_REGLIST_INDEX_ENGCLASS BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_CLASS)
#define GCAP_PARSED_REGLIST_INDEX_ENGINST  BIT(GUC_CAPTURE_LIST_TYPE_ENGINE_INSTANCE)
};

/*
 * struct guc_debug_capture_list_header / struct guc_debug_capture_list
 *
 * As part of ADS registration, these header structures (followed by
 * an array of 'struct guc_mmio_reg' entries) are used to register with
 * GuC microkernel the list of registers we want it to dump out prior
 * to a engine reset.
 */
struct guc_debug_capture_list_header {
	u32 info;
#define GUC_CAPTURELISTHDR_NUMDESCR GENMASK(15, 0)
} __packed;

struct guc_debug_capture_list {
	struct guc_debug_capture_list_header header;
	struct guc_mmio_reg regs[];
} __packed;

/*
 * struct __guc_mmio_reg_descr / struct __guc_mmio_reg_descr_group
 *
 * intel_guc_capture module uses these structures to maintain static
 * tables (per unique platform) that consists of lists of registers
 * (offsets, names, flags,...) that are used at the ADS regisration
 * time as well as during runtime processing and reporting of error-
 * capture states generated by GuC just prior to engine reset events.
 */
struct __guc_mmio_reg_descr {
	i915_reg_t reg;
	u32 flags;
	u32 mask;
	const char *regname;
};

struct __guc_mmio_reg_descr_group {
	const struct __guc_mmio_reg_descr *list;
	u32 num_regs;
	u32 owner; /* see enum guc_capture_owner */
	u32 type; /* see enum guc_capture_type */
	u32 engine; /* as per MAX_ENGINE_CLASS */
	struct __guc_mmio_reg_descr *extlist; /* only used for steered registers */
};

/*
 * struct guc_state_capture_header_t / struct guc_state_capture_t /
 * guc_state_capture_group_header_t / guc_state_capture_group_t
 *
 * Prior to resetting engines that have hung or faulted, GuC microkernel
 * reports the engine error-state (register values that was read) by
 * logging them into the shared GuC log buffer using these hierarchy
 * of structures.
 */
struct guc_state_capture_header_t {
	u32 owner;
#define CAP_HDR_CAPTURE_VFID GENMASK(7, 0)
	u32 info;
#define CAP_HDR_CAPTURE_TYPE GENMASK(3, 0) /* see enum guc_capture_type */
#define CAP_HDR_ENGINE_CLASS GENMASK(7, 4) /* see GUC_MAX_ENGINE_CLASSES */
#define CAP_HDR_ENGINE_INSTANCE GENMASK(11, 8)
	u32 lrca; /* if type-instance, LRCA (address) that hung, else set to ~0 */
	u32 guc_id; /* if type-instance, context index of hung context, else set to ~0 */
	u32 num_mmios;
#define CAP_HDR_NUM_MMIOS GENMASK(9, 0)
} __packed;

struct guc_state_capture_t {
	struct guc_state_capture_header_t header;
	struct guc_mmio_reg mmio_entries[];
} __packed;

enum guc_capture_group_types {
	GUC_STATE_CAPTURE_GROUP_TYPE_FULL,
	GUC_STATE_CAPTURE_GROUP_TYPE_PARTIAL,
	GUC_STATE_CAPTURE_GROUP_TYPE_MAX,
};

struct guc_state_capture_group_header_t {
	u32 owner;
#define CAP_GRP_HDR_CAPTURE_VFID GENMASK(7, 0)
	u32 info;
#define CAP_GRP_HDR_NUM_CAPTURES GENMASK(7, 0)
#define CAP_GRP_HDR_CAPTURE_TYPE GENMASK(15, 8) /* guc_capture_group_types */
} __packed;

/* this is the top level structure where an error-capture dump starts */
struct guc_state_capture_group_t {
	struct guc_state_capture_group_header_t grp_header;
	struct guc_state_capture_t capture_entries[];
} __packed;

/*
 * struct __guc_capture_ads_cache
 *
 * A structure to cache register lists that were populated and registered
 * with GuC at startup during ADS registration. This allows much quicker
 * GuC resets without re-parsing all the tables for the given gt.
 */
struct __guc_capture_ads_cache {
	bool is_valid;
	void *ptr;
	size_t size;
	int status;
};

/**
 * struct intel_guc_state_capture
 *
 * Internal context of the intel_guc_capture module.
 */
struct intel_guc_state_capture {
	/**
	 * @reglists: static table of register lists used for error-capture state.
	 */
	const struct __guc_mmio_reg_descr_group *reglists;

	/**
	 * @extlists: allocated table of steered register lists used for error-capture state.
	 *
	 * NOTE: steered registers have multiple instances depending on the HW configuration
	 * (slices or dual-sub-slices) and thus depends on HW fuses discovered at startup
	 */
	struct __guc_mmio_reg_descr_group *extlists;

	/**
	 * @ads_cache: cached register lists that is ADS format ready
	 */
	struct __guc_capture_ads_cache ads_cache[GUC_CAPTURE_LIST_INDEX_MAX]
						[GUC_CAPTURE_LIST_TYPE_MAX]
						[GUC_MAX_ENGINE_CLASSES];

	/**
	 * @ads_null_cache: ADS null cache.
	 */
	void *ads_null_cache;

	/**
	 * @cachelist: Pool of pre-allocated nodes for error capture output
	 *
	 * We need this pool of pre-allocated nodes because we cannot
	 * dynamically allocate new nodes when receiving the G2H notification
	 * because the event handlers for all G2H event-processing is called
	 * by the ct processing worker queue and when that queue is being
	 * processed, there is no absoluate guarantee that we are not in the
	 * midst of a GT reset operation (which doesn't allow allocations).
	 */
	struct list_head cachelist;
#define PREALLOC_NODES_MAX_COUNT (3 * GUC_MAX_ENGINE_CLASSES * GUC_MAX_INSTANCES_PER_CLASS)
#define PREALLOC_NODES_DEFAULT_NUMREGS 64

	/**
	 * @max_mmio_per_node: Max MMIO per node.
	 */
	int max_mmio_per_node;

	/**
	 * @outlist: Pool of pre-allocated nodes for error capture output
	 *
	 * A linked list of parsed GuC error-capture output data before
	 * reporting with formatting via i915_gpu_coredump. Each node in this linked list shall
	 * contain a single engine-capture including global, engine-class and
	 * engine-instance register dumps as per guc_capture_parsed_output_node
	 */
	struct list_head outlist;
};

#endif /* _INTEL_GUC_CAPTURE_FWIF_H */