summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau/nvkm/engine/gr
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau/nvkm/engine/gr')
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c423
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h79
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c114
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c15
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c190
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c28
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c20
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c9
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c93
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c120
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c22
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c100
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c42
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c48
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c11
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c215
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c417
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h109
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c42
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c14
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c143
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c31
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c17
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c16
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c37
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c178
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c175
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c180
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c77
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c66
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c21
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c120
42 files changed, 2217 insertions, 1096 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 8a22558b7b52..93e3733f54e2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -33,8 +33,10 @@ nvkm-y += nvkm/engine/gr/gm200.o
nvkm-y += nvkm/engine/gr/gm20b.o
nvkm-y += nvkm/engine/gr/gp100.o
nvkm-y += nvkm/engine/gr/gp102.o
+nvkm-y += nvkm/engine/gr/gp104.o
nvkm-y += nvkm/engine/gr/gp107.o
nvkm-y += nvkm/engine/gr/gp10b.o
+nvkm-y += nvkm/engine/gr/gv100.o
nvkm-y += nvkm/engine/gr/ctxnv40.o
nvkm-y += nvkm/engine/gr/ctxnv50.o
@@ -54,4 +56,6 @@ nvkm-y += nvkm/engine/gr/ctxgm200.o
nvkm-y += nvkm/engine/gr/ctxgm20b.o
nvkm-y += nvkm/engine/gr/ctxgp100.o
nvkm-y += nvkm/engine/gr/ctxgp102.o
+nvkm-y += nvkm/engine/gr/ctxgp104.o
nvkm-y += nvkm/engine/gr/ctxgp107.o
+nvkm-y += nvkm/engine/gr/ctxgv100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 881015080d83..e813a3f8ea93 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -850,12 +850,17 @@ gf100_grctx_init_gcc_0[] = {
};
const struct gf100_gr_pack
-gf100_grctx_pack_gpc[] = {
+gf100_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf100_grctx_init_prop_0 },
{ gf100_grctx_init_gpc_unk_1 },
{ gf100_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+const struct gf100_gr_pack
+gf100_grctx_pack_gpc_1[] = {
{ gf100_grctx_init_crstr_0 },
{ gf100_grctx_init_gpm_0 },
{ gf100_grctx_init_gcc_0 },
@@ -1025,6 +1030,13 @@ gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
}
void
+gf100_grctx_generate_r419cb8(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x419cb8, 0x00007c00, 0x00000000);
+}
+
+void
gf100_grctx_generate_bundle(struct gf100_grctx *info)
{
const struct gf100_grctx_func *grctx = info->gr->func->grctx;
@@ -1080,89 +1092,38 @@ gf100_grctx_generate_unkn(struct gf100_gr *gr)
}
void
-gf100_grctx_generate_tpcid(struct gf100_gr *gr)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- int gpc, tpc, id;
-
- for (tpc = 0, id = 0; tpc < 4; tpc++) {
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- if (tpc < gr->tpc_nr[gpc]) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
- id++;
- }
-
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
- }
- }
-}
-
-void
-gf100_grctx_generate_r406028(struct gf100_gr *gr)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- u32 tmp[GPC_MAX / 8] = {}, i = 0;
- for (i = 0; i < gr->gpc_nr; i++)
- tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4);
- for (i = 0; i < 4; i++) {
- nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]);
- nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]);
- }
-}
-
-void
gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- u8 tpcnr[GPC_MAX], data[TPC_MAX];
- int gpc, tpc, i;
-
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- memset(data, 0x1f, sizeof(data));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
- data[tpc] = gpc;
+ const u8 gpcmax = nvkm_rd32(device, 0x022430);
+ const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax;
+ int i, j, sm = 0;
+ u32 data;
+
+ for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) {
+ for (data = 0, j = 0; j < 4; j++) {
+ if (sm < gr->sm_nr)
+ data |= gr->sm[sm++].gpc << (j * 8);
+ else
+ data |= 0x1f << (j * 8);
+ }
+ nvkm_wr32(device, 0x4060a8 + (i * 4), data);
}
-
- for (i = 0; i < 4; i++)
- nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
}
void
-gf100_grctx_generate_r418bb8(struct gf100_gr *gr)
+gf100_grctx_generate_rop_mapping(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
u32 data[6] = {}, data2[2] = {};
- u8 tpcnr[GPC_MAX];
u8 shift, ntpcv;
- int gpc, tpc, i;
-
- /* calculate first set of magics */
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
-
- data[tpc / 6] |= gpc << ((tpc % 6) * 5);
- }
+ int i;
- for (; tpc < 32; tpc++)
- data[tpc / 6] |= 7 << ((tpc % 6) * 5);
+ /* Pack tile map into register format. */
+ for (i = 0; i < 32; i++)
+ data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
- /* and the second... */
+ /* Magic. */
shift = 0;
ntpcv = gr->tpc_total;
while (!(ntpcv & (1 << 4))) {
@@ -1197,40 +1158,214 @@ gf100_grctx_generate_r418bb8(struct gf100_gr *gr)
}
void
-gf100_grctx_generate_r406800(struct gf100_gr *gr)
+gf100_grctx_generate_max_ways_evict(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- u64 tpc_mask = 0, tpc_set = 0;
- u8 tpcnr[GPC_MAX];
- int gpc, tpc;
- int i, a, b;
-
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (gpc = 0; gpc < gr->gpc_nr; gpc++)
- tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8);
-
- for (i = 0, gpc = -1, b = -1; i < 32; i++) {
- a = (i * (gr->tpc_total - 1)) / 32;
- if (a != b) {
- b = a;
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- tpc_set |= 1ULL << ((gpc * 8) + tpc);
+ u32 fbps = nvkm_rd32(device, 0x121c74);
+ if (fbps == 1)
+ nvkm_mask(device, 0x17e91c, 0x001f0000, 0x00090000);
+}
+
+static const u32
+gf100_grctx_alpha_beta_map[17][32] = {
+ [1] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ },
+ [2] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ },
+ //XXX: 3
+ [4] = {
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3,
+ },
+ //XXX: 5
+ //XXX: 6
+ [7] = {
+ 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5, 5,
+ 6, 6, 6, 6,
+ },
+ [8] = {
+ 1, 1, 1,
+ 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4,
+ 5, 5, 5, 5, 5,
+ 6, 6, 6, 6, 6,
+ 7, 7, 7,
+ },
+ //XXX: 9
+ //XXX: 10
+ [11] = {
+ 1, 1,
+ 2, 2, 2, 2,
+ 3, 3, 3,
+ 4, 4, 4, 4,
+ 5, 5, 5,
+ 6, 6, 6,
+ 7, 7, 7, 7,
+ 8, 8, 8,
+ 9, 9, 9, 9,
+ 10, 10,
+ },
+ //XXX: 12
+ //XXX: 13
+ [14] = {
+ 1, 1,
+ 2, 2,
+ 3, 3, 3,
+ 4, 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8, 8,
+ 9, 9,
+ 10, 10, 10,
+ 11, 11, 11,
+ 12, 12,
+ 13, 13,
+ },
+ [15] = {
+ 1, 1,
+ 2, 2,
+ 3, 3,
+ 4, 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8,
+ 9, 9, 9,
+ 10, 10,
+ 11, 11, 11,
+ 12, 12,
+ 13, 13,
+ 14, 14,
+ },
+ [16] = {
+ 1, 1,
+ 2, 2,
+ 3, 3,
+ 4, 4,
+ 5, 5,
+ 6, 6, 6,
+ 7, 7,
+ 8, 8,
+ 9, 9,
+ 10, 10, 10,
+ 11, 11,
+ 12, 12,
+ 13, 13,
+ 14, 14,
+ 15, 15,
+ },
+};
+
+void
+gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *gr)
+{
+ struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+ struct nvkm_device *device = subdev->device;
+ int i, gpc;
+
+ for (i = 0; i < 32; i++) {
+ u32 atarget = gf100_grctx_alpha_beta_map[gr->tpc_total][i];
+ u32 abits[GPC_MAX] = {}, amask = 0, bmask = 0;
+
+ if (!atarget) {
+ nvkm_warn(subdev, "missing alpha/beta mapping table\n");
+ atarget = max_t(u32, gr->tpc_total * i / 32, 1);
+ }
+
+ while (atarget) {
+ for (gpc = 0; atarget && gpc < gr->gpc_nr; gpc++) {
+ if (abits[gpc] < gr->tpc_nr[gpc]) {
+ abits[gpc]++;
+ atarget--;
+ }
+ }
}
- nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
- nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
- if (gr->gpc_nr > 4) {
- nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
- nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ u32 bbits = gr->tpc_nr[gpc] - abits[gpc];
+ amask |= ((1 << abits[gpc]) - 1) << (gpc * 8);
+ bmask |= ((1 << bbits) - 1) << abits[gpc] << (gpc * 8);
}
+
+ nvkm_wr32(device, 0x406800 + (i * 0x20), amask);
+ nvkm_wr32(device, 0x406c00 + (i * 0x20), bmask);
}
}
void
+gf100_grctx_generate_tpc_nr(struct gf100_gr *gr, int gpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
+}
+
+void
+gf100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), sm);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
+}
+
+void
+gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ const struct gf100_grctx_func *func = gr->func->grctx;
+ int gpc, sm, i, j;
+ u32 data;
+
+ for (sm = 0; sm < gr->sm_nr; sm++) {
+ func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm);
+ if (func->tpc_nr)
+ func->tpc_nr(gr, gr->sm[sm].gpc);
+ }
+
+ for (gpc = 0, i = 0; i < 4; i++) {
+ for (data = 0, j = 0; j < 8 && gpc < gr->gpc_nr; j++, gpc++)
+ data |= gr->tpc_nr[gpc] << (j * 4);
+ nvkm_wr32(device, 0x406028 + (i * 4), data);
+ nvkm_wr32(device, 0x405870 + (i * 4), data);
+ }
+
+ if (func->r4060a8)
+ func->r4060a8(gr);
+
+ func->rop_mapping(gr);
+
+ if (func->alpha_beta_tables)
+ func->alpha_beta_tables(gr);
+ if (func->max_ways_evict)
+ func->max_ways_evict(gr);
+ if (func->dist_skip_table)
+ func->dist_skip_table(gr);
+ if (func->r406500)
+ func->r406500(gr);
+ if (func->gpc_tpc_nr)
+ func->gpc_tpc_nr(gr);
+ if (func->r419f78)
+ func->r419f78(gr);
+ if (func->tpc_mask)
+ func->tpc_mask(gr);
+ if (func->smid_config)
+ func->smid_config(gr);
+}
+
+void
gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -1239,29 +1374,63 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
nvkm_mc_unk260(device, 0);
- gf100_gr_mmio(gr, grctx->hub);
- gf100_gr_mmio(gr, grctx->gpc);
- gf100_gr_mmio(gr, grctx->zcull);
- gf100_gr_mmio(gr, grctx->tpc);
- gf100_gr_mmio(gr, grctx->ppc);
+ if (!gr->fuc_sw_ctx) {
+ gf100_gr_mmio(gr, grctx->hub);
+ gf100_gr_mmio(gr, grctx->gpc_0);
+ gf100_gr_mmio(gr, grctx->zcull);
+ gf100_gr_mmio(gr, grctx->gpc_1);
+ gf100_gr_mmio(gr, grctx->tpc);
+ gf100_gr_mmio(gr, grctx->ppc);
+ } else {
+ gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+ }
+
+ gf100_gr_wait_idle(gr);
idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
- grctx->bundle(info);
grctx->pagepool(info);
+ grctx->bundle(info);
grctx->attrib(info);
+ if (grctx->patch_ltc)
+ grctx->patch_ltc(info);
grctx->unkn(gr);
- gf100_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gf100_grctx_generate_r4060a8(gr);
- gf100_grctx_generate_r418bb8(gr);
- gf100_grctx_generate_r406800(gr);
+ gf100_grctx_generate_floorsweep(gr);
+
+ gf100_gr_wait_idle(gr);
+
+ if (grctx->r400088) grctx->r400088(gr, false);
+ if (gr->fuc_bundle)
+ gf100_gr_icmd(gr, gr->fuc_bundle);
+ else
+ gf100_gr_icmd(gr, grctx->icmd);
+ if (grctx->sw_veid_bundle_init)
+ gf100_gr_icmd(gr, grctx->sw_veid_bundle_init);
+ if (grctx->r400088) grctx->r400088(gr, true);
- gf100_gr_icmd(gr, grctx->icmd);
nvkm_wr32(device, 0x404154, idle_timeout);
- gf100_gr_mthd(gr, grctx->mthd);
+
+ if (gr->fuc_method)
+ gf100_gr_mthd(gr, gr->fuc_method);
+ else
+ gf100_gr_mthd(gr, grctx->mthd);
nvkm_mc_unk260(device, 1);
+
+ if (grctx->r419cb8)
+ grctx->r419cb8(gr);
+ if (grctx->r418800)
+ grctx->r418800(gr);
+ if (grctx->r419eb0)
+ grctx->r419eb0(gr);
+ if (grctx->r419e00)
+ grctx->r419e00(gr);
+ if (grctx->r418e94)
+ grctx->r418e94(gr);
+ if (grctx->r419a3c)
+ grctx->r419a3c(gr);
+ if (grctx->r408840)
+ grctx->r408840(gr);
}
#define CB_RESERVED 0x80000
@@ -1280,6 +1449,32 @@ gf100_grctx_generate(struct gf100_gr *gr)
int ret, i;
u64 addr;
+ /* NV_PGRAPH_FE_PWR_MODE_FORCE_ON. */
+ nvkm_wr32(device, 0x404170, 0x00000012);
+ nvkm_msec(device, 2000,
+ if (!(nvkm_rd32(device, 0x404170) & 0x00000010))
+ break;
+ );
+
+ if (grctx->unkn88c)
+ grctx->unkn88c(gr, true);
+
+ /* Reset FECS. */
+ nvkm_wr32(device, 0x409614, 0x00000070);
+ nvkm_usec(device, 10, NVKM_DELAY);
+ nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
+ nvkm_usec(device, 10, NVKM_DELAY);
+ nvkm_rd32(device, 0x409614);
+
+ if (grctx->unkn88c)
+ grctx->unkn88c(gr, false);
+
+ /* NV_PGRAPH_FE_PWR_MODE_AUTO. */
+ nvkm_wr32(device, 0x404170, 0x00000010);
+
+ /* Init SCC RAM. */
+ nvkm_wr32(device, 0x40802c, 0x00000001);
+
/* Allocate memory to for a "channel", which we'll use to generate
* the default context values.
*/
@@ -1392,7 +1587,8 @@ gf100_grctx = {
.main = gf100_grctx_generate_main,
.unkn = gf100_grctx_generate_unkn,
.hub = gf100_grctx_pack_hub,
- .gpc = gf100_grctx_pack_gpc,
+ .gpc_0 = gf100_grctx_pack_gpc_0,
+ .gpc_1 = gf100_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gf100_grctx_pack_tpc,
.icmd = gf100_grctx_pack_icmd,
@@ -1404,4 +1600,11 @@ gf100_grctx = {
.attrib = gf100_grctx_generate_attrib,
.attrib_nr_max = 0x324,
.attrib_nr = 0x218,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .r4060a8 = gf100_grctx_generate_r4060a8,
+ .rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+ .max_ways_evict = gf100_grctx_generate_max_ways_evict,
+ .r419cb8 = gf100_grctx_generate_r419cb8,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 5199e5aa0cb7..33e932bd73b1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -21,19 +21,22 @@ void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int)
#define mmio_wr32(a,b,c) mmio_refn((a), (b), (c), 0, -1)
struct gf100_grctx_func {
+ void (*unkn88c)(struct gf100_gr *, bool on);
/* main context generation function */
void (*main)(struct gf100_gr *, struct gf100_grctx *);
/* context-specific modify-on-first-load list generation function */
void (*unkn)(struct gf100_gr *);
/* mmio context data */
const struct gf100_gr_pack *hub;
- const struct gf100_gr_pack *gpc;
+ const struct gf100_gr_pack *gpc_0;
+ const struct gf100_gr_pack *gpc_1;
const struct gf100_gr_pack *zcull;
const struct gf100_gr_pack *tpc;
const struct gf100_gr_pack *ppc;
/* indirect context data, generated with icmds/mthds */
const struct gf100_gr_pack *icmd;
const struct gf100_gr_pack *mthd;
+ const struct gf100_gr_pack *sw_veid_bundle_init;
/* bundle circular buffer */
void (*bundle)(struct gf100_grctx *);
u32 bundle_size;
@@ -48,6 +51,31 @@ struct gf100_grctx_func {
u32 attrib_nr;
u32 alpha_nr_max;
u32 alpha_nr;
+ u32 gfxp_nr;
+ /* other patch buffer stuff */
+ void (*patch_ltc)(struct gf100_grctx *);
+ /* floorsweeping */
+ void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm);
+ void (*tpc_nr)(struct gf100_gr *, int gpc);
+ void (*r4060a8)(struct gf100_gr *);
+ void (*rop_mapping)(struct gf100_gr *);
+ void (*alpha_beta_tables)(struct gf100_gr *);
+ void (*max_ways_evict)(struct gf100_gr *);
+ void (*dist_skip_table)(struct gf100_gr *);
+ void (*r406500)(struct gf100_gr *);
+ void (*gpc_tpc_nr)(struct gf100_gr *);
+ void (*r419f78)(struct gf100_gr *);
+ void (*tpc_mask)(struct gf100_gr *);
+ void (*smid_config)(struct gf100_gr *);
+ /* misc other things */
+ void (*r400088)(struct gf100_gr *, bool);
+ void (*r419cb8)(struct gf100_gr *);
+ void (*r418800)(struct gf100_gr *);
+ void (*r419eb0)(struct gf100_gr *);
+ void (*r419e00)(struct gf100_gr *);
+ void (*r418e94)(struct gf100_gr *);
+ void (*r419a3c)(struct gf100_gr *);
+ void (*r408840)(struct gf100_gr *);
};
extern const struct gf100_grctx_func gf100_grctx;
@@ -57,11 +85,14 @@ void gf100_grctx_generate_bundle(struct gf100_grctx *);
void gf100_grctx_generate_pagepool(struct gf100_grctx *);
void gf100_grctx_generate_attrib(struct gf100_grctx *);
void gf100_grctx_generate_unkn(struct gf100_gr *);
-void gf100_grctx_generate_tpcid(struct gf100_gr *);
-void gf100_grctx_generate_r406028(struct gf100_gr *);
+void gf100_grctx_generate_floorsweep(struct gf100_gr *);
+void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
+void gf100_grctx_generate_tpc_nr(struct gf100_gr *, int);
void gf100_grctx_generate_r4060a8(struct gf100_gr *);
-void gf100_grctx_generate_r418bb8(struct gf100_gr *);
-void gf100_grctx_generate_r406800(struct gf100_gr *);
+void gf100_grctx_generate_rop_mapping(struct gf100_gr *);
+void gf100_grctx_generate_alpha_beta_tables(struct gf100_gr *);
+void gf100_grctx_generate_max_ways_evict(struct gf100_gr *);
+void gf100_grctx_generate_r419cb8(struct gf100_gr *);
extern const struct gf100_grctx_func gf108_grctx;
void gf108_grctx_generate_attrib(struct gf100_grctx *);
@@ -72,22 +103,25 @@ extern const struct gf100_grctx_func gf110_grctx;
extern const struct gf100_grctx_func gf117_grctx;
void gf117_grctx_generate_attrib(struct gf100_grctx *);
+void gf117_grctx_generate_rop_mapping(struct gf100_gr *);
+void gf117_grctx_generate_dist_skip_table(struct gf100_gr *);
extern const struct gf100_grctx_func gf119_grctx;
extern const struct gf100_grctx_func gk104_grctx;
+void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *);
+void gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *);
+
extern const struct gf100_grctx_func gk20a_grctx;
-void gk104_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
void gk104_grctx_generate_bundle(struct gf100_grctx *);
void gk104_grctx_generate_pagepool(struct gf100_grctx *);
+void gk104_grctx_generate_patch_ltc(struct gf100_grctx *);
void gk104_grctx_generate_unkn(struct gf100_gr *);
-void gk104_grctx_generate_r418bb8(struct gf100_gr *);
-
-void gm107_grctx_generate_bundle(struct gf100_grctx *);
-void gm107_grctx_generate_pagepool(struct gf100_grctx *);
-void gm107_grctx_generate_attrib(struct gf100_grctx *);
+void gk104_grctx_generate_r418800(struct gf100_gr *);
extern const struct gf100_grctx_func gk110_grctx;
+void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+
extern const struct gf100_grctx_func gk110b_grctx;
extern const struct gf100_grctx_func gk208_grctx;
@@ -95,22 +129,30 @@ extern const struct gf100_grctx_func gm107_grctx;
void gm107_grctx_generate_bundle(struct gf100_grctx *);
void gm107_grctx_generate_pagepool(struct gf100_grctx *);
void gm107_grctx_generate_attrib(struct gf100_grctx *);
+void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
extern const struct gf100_grctx_func gm200_grctx;
-void gm200_grctx_generate_tpcid(struct gf100_gr *);
-void gm200_grctx_generate_405b60(struct gf100_gr *);
+void gm200_grctx_generate_dist_skip_table(struct gf100_gr *);
+void gm200_grctx_generate_r406500(struct gf100_gr *);
+void gm200_grctx_generate_tpc_mask(struct gf100_gr *);
+void gm200_grctx_generate_smid_config(struct gf100_gr *);
+void gm200_grctx_generate_r419a3c(struct gf100_gr *);
extern const struct gf100_grctx_func gm20b_grctx;
extern const struct gf100_grctx_func gp100_grctx;
-void gp100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
void gp100_grctx_generate_pagepool(struct gf100_grctx *);
+void gp100_grctx_generate_smid_config(struct gf100_gr *);
extern const struct gf100_grctx_func gp102_grctx;
void gp102_grctx_generate_attrib(struct gf100_grctx *);
+extern const struct gf100_grctx_func gp104_grctx;
+
extern const struct gf100_grctx_func gp107_grctx;
+extern const struct gf100_grctx_func gv100_grctx;
+
/* context init value lists */
extern const struct gf100_gr_pack gf100_grctx_pack_icmd[];
@@ -128,7 +170,8 @@ extern const struct gf100_gr_init gf100_grctx_init_memfmt_0[];
extern const struct gf100_gr_init gf100_grctx_init_rstr2d_0[];
extern const struct gf100_gr_init gf100_grctx_init_scc_0[];
-extern const struct gf100_gr_pack gf100_grctx_pack_gpc[];
+extern const struct gf100_gr_pack gf100_grctx_pack_gpc_0[];
+extern const struct gf100_gr_pack gf100_grctx_pack_gpc_1[];
extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_0[];
extern const struct gf100_gr_init gf100_grctx_init_prop_0[];
extern const struct gf100_gr_init gf100_grctx_init_gpc_unk_1[];
@@ -177,6 +220,8 @@ extern const struct gf100_gr_init gf117_grctx_init_pe_0[];
extern const struct gf100_gr_init gf117_grctx_init_wwdx_0[];
+extern const struct gf100_gr_pack gf117_grctx_pack_gpc_1[];
+
extern const struct gf100_gr_init gk104_grctx_init_memfmt_0[];
extern const struct gf100_gr_init gk104_grctx_init_ds_0[];
extern const struct gf100_gr_init gk104_grctx_init_scc_0[];
@@ -186,7 +231,6 @@ extern const struct gf100_gr_init gk104_grctx_init_gpm_0[];
extern const struct gf100_gr_init gk104_grctx_init_pes_0[];
extern const struct gf100_gr_pack gk104_grctx_pack_hub[];
-extern const struct gf100_gr_pack gk104_grctx_pack_gpc[];
extern const struct gf100_gr_pack gk104_grctx_pack_tpc[];
extern const struct gf100_gr_pack gk104_grctx_pack_ppc[];
extern const struct gf100_gr_pack gk104_grctx_pack_icmd[];
@@ -200,7 +244,8 @@ extern const struct gf100_gr_pack gk110_grctx_pack_hub[];
extern const struct gf100_gr_init gk110_grctx_init_pri_0[];
extern const struct gf100_gr_init gk110_grctx_init_cwd_0[];
-extern const struct gf100_gr_pack gk110_grctx_pack_gpc[];
+extern const struct gf100_gr_pack gk110_grctx_pack_gpc_0[];
+extern const struct gf100_gr_pack gk110_grctx_pack_gpc_1[];
extern const struct gf100_gr_init gk110_grctx_init_gpc_unk_2[];
extern const struct gf100_gr_init gk110_grctx_init_tex_0[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
index 54fd74e9cca0..7a0564b6e3c7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
@@ -84,7 +84,8 @@ gf104_grctx = {
.main = gf100_grctx_generate_main,
.unkn = gf100_grctx_generate_unkn,
.hub = gf100_grctx_pack_hub,
- .gpc = gf100_grctx_pack_gpc,
+ .gpc_0 = gf100_grctx_pack_gpc_0,
+ .gpc_1 = gf100_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gf104_grctx_pack_tpc,
.icmd = gf100_grctx_pack_icmd,
@@ -96,4 +97,11 @@ gf104_grctx = {
.attrib = gf100_grctx_generate_attrib,
.attrib_nr_max = 0x324,
.attrib_nr = 0x218,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .r4060a8 = gf100_grctx_generate_r4060a8,
+ .rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+ .max_ways_evict = gf100_grctx_generate_max_ways_evict,
+ .r419cb8 = gf100_grctx_generate_r419cb8,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
index 82f71b10c06e..dda2c32e6232 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
@@ -667,12 +667,17 @@ gf108_grctx_init_gpm_0[] = {
};
static const struct gf100_gr_pack
-gf108_grctx_pack_gpc[] = {
+gf108_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf100_grctx_init_prop_0 },
{ gf100_grctx_init_gpc_unk_1 },
{ gf108_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+static const struct gf100_gr_pack
+gf108_grctx_pack_gpc_1[] = {
{ gf100_grctx_init_crstr_0 },
{ gf108_grctx_init_gpm_0 },
{ gf100_grctx_init_gcc_0 },
@@ -780,7 +785,8 @@ gf108_grctx = {
.main = gf100_grctx_generate_main,
.unkn = gf108_grctx_generate_unkn,
.hub = gf108_grctx_pack_hub,
- .gpc = gf108_grctx_pack_gpc,
+ .gpc_0 = gf108_grctx_pack_gpc_0,
+ .gpc_1 = gf108_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gf108_grctx_pack_tpc,
.icmd = gf108_grctx_pack_icmd,
@@ -794,4 +800,11 @@ gf108_grctx = {
.attrib_nr = 0x218,
.alpha_nr_max = 0x324,
.alpha_nr = 0x218,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .r4060a8 = gf100_grctx_generate_r4060a8,
+ .rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+ .max_ways_evict = gf100_grctx_generate_max_ways_evict,
+ .r419cb8 = gf100_grctx_generate_r419cb8,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
index 7df398b53f8f..f5cca5e6a4f2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
@@ -314,15 +314,12 @@ gf110_grctx_init_setup_0[] = {
};
static const struct gf100_gr_pack
-gf110_grctx_pack_gpc[] = {
+gf110_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf100_grctx_init_prop_0 },
{ gf100_grctx_init_gpc_unk_1 },
{ gf110_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
- { gf100_grctx_init_crstr_0 },
- { gf100_grctx_init_gpm_0 },
- { gf100_grctx_init_gcc_0 },
{}
};
@@ -335,7 +332,8 @@ gf110_grctx = {
.main = gf100_grctx_generate_main,
.unkn = gf100_grctx_generate_unkn,
.hub = gf100_grctx_pack_hub,
- .gpc = gf110_grctx_pack_gpc,
+ .gpc_0 = gf110_grctx_pack_gpc_0,
+ .gpc_1 = gf100_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gf100_grctx_pack_tpc,
.icmd = gf110_grctx_pack_icmd,
@@ -347,4 +345,11 @@ gf110_grctx = {
.attrib = gf100_grctx_generate_attrib,
.attrib_nr_max = 0x324,
.attrib_nr = 0x218,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .r4060a8 = gf100_grctx_generate_r4060a8,
+ .rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+ .max_ways_evict = gf100_grctx_generate_max_ways_evict,
+ .r419cb8 = gf100_grctx_generate_r419cb8,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 19301d88577d..276c282d19aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -84,12 +84,17 @@ gf117_grctx_init_setup_0[] = {
};
static const struct gf100_gr_pack
-gf117_grctx_pack_gpc[] = {
+gf117_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf119_grctx_init_prop_0 },
{ gf119_grctx_init_gpc_unk_1 },
{ gf117_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+const struct gf100_gr_pack
+gf117_grctx_pack_gpc_1[] = {
{ gf119_grctx_init_crstr_0 },
{ gf108_grctx_init_gpm_0 },
{ gf100_grctx_init_gcc_0 },
@@ -180,6 +185,62 @@ gf117_grctx_pack_ppc[] = {
******************************************************************************/
void
+gf117_grctx_generate_dist_skip_table(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+}
+
+void
+gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ u32 data[6] = {}, data2[2] = {};
+ u8 shift, ntpcv;
+ int i;
+
+ /* Pack tile map into register format. */
+ for (i = 0; i < 32; i++)
+ data[i / 6] |= (gr->tile[i] & 0x07) << ((i % 6) * 5);
+
+ /* Magic. */
+ shift = 0;
+ ntpcv = gr->tpc_total;
+ while (!(ntpcv & (1 << 4))) {
+ ntpcv <<= 1;
+ shift++;
+ }
+
+ data2[0] = (ntpcv << 16);
+ data2[0] |= (shift << 21);
+ data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24);
+ for (i = 1; i < 7; i++)
+ data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
+
+ /* GPC_BROADCAST */
+ nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
+ gr->screen_tile_row_offset);
+ for (i = 0; i < 6; i++)
+ nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
+
+ /* GPC_BROADCAST.TP_BROADCAST */
+ nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
+ gr->screen_tile_row_offset | data2[0]);
+ nvkm_wr32(device, 0x41bfe4, data2[1]);
+ for (i = 0; i < 6; i++)
+ nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]);
+
+ /* UNK78xx */
+ nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
+ gr->screen_tile_row_offset);
+ for (i = 0; i < 6; i++)
+ nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
+}
+
+void
gf117_grctx_generate_attrib(struct gf100_grctx *info)
{
struct gf100_gr *gr = info->gr;
@@ -217,50 +278,13 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info)
}
}
-static void
-gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- const struct gf100_grctx_func *grctx = gr->func->grctx;
- u32 idle_timeout;
- int i;
-
- nvkm_mc_unk260(device, 0);
-
- gf100_gr_mmio(gr, grctx->hub);
- gf100_gr_mmio(gr, grctx->gpc);
- gf100_gr_mmio(gr, grctx->zcull);
- gf100_gr_mmio(gr, grctx->tpc);
- gf100_gr_mmio(gr, grctx->ppc);
-
- idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
- grctx->bundle(info);
- grctx->pagepool(info);
- grctx->attrib(info);
- grctx->unkn(gr);
-
- gf100_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gf100_grctx_generate_r4060a8(gr);
- gk104_grctx_generate_r418bb8(gr);
- gf100_grctx_generate_r406800(gr);
-
- for (i = 0; i < 8; i++)
- nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-
- gf100_gr_icmd(gr, grctx->icmd);
- nvkm_wr32(device, 0x404154, idle_timeout);
- gf100_gr_mthd(gr, grctx->mthd);
- nvkm_mc_unk260(device, 1);
-}
-
const struct gf100_grctx_func
gf117_grctx = {
- .main = gf117_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.hub = gf117_grctx_pack_hub,
- .gpc = gf117_grctx_pack_gpc,
+ .gpc_0 = gf117_grctx_pack_gpc_0,
+ .gpc_1 = gf117_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gf117_grctx_pack_tpc,
.ppc = gf117_grctx_pack_ppc,
@@ -275,4 +299,12 @@ gf117_grctx = {
.attrib_nr = 0x218,
.alpha_nr_max = 0x7ff,
.alpha_nr = 0x324,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .r4060a8 = gf100_grctx_generate_r4060a8,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+ .max_ways_evict = gf100_grctx_generate_max_ways_evict,
+ .dist_skip_table = gf117_grctx_generate_dist_skip_table,
+ .r419cb8 = gf100_grctx_generate_r419cb8,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
index 605185b078be..0cfe46366af6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
@@ -431,15 +431,12 @@ gf119_grctx_init_crstr_0[] = {
};
static const struct gf100_gr_pack
-gf119_grctx_pack_gpc[] = {
+gf119_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf119_grctx_init_prop_0 },
{ gf119_grctx_init_gpc_unk_1 },
{ gf119_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
- { gf119_grctx_init_crstr_0 },
- { gf108_grctx_init_gpm_0 },
- { gf100_grctx_init_gcc_0 },
{}
};
@@ -503,7 +500,8 @@ gf119_grctx = {
.main = gf100_grctx_generate_main,
.unkn = gf108_grctx_generate_unkn,
.hub = gf119_grctx_pack_hub,
- .gpc = gf119_grctx_pack_gpc,
+ .gpc_0 = gf119_grctx_pack_gpc_0,
+ .gpc_1 = gf117_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gf119_grctx_pack_tpc,
.icmd = gf119_grctx_pack_icmd,
@@ -517,4 +515,11 @@ gf119_grctx = {
.attrib_nr = 0x218,
.alpha_nr_max = 0x324,
.alpha_nr = 0x218,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .r4060a8 = gf100_grctx_generate_r4060a8,
+ .rop_mapping = gf100_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables,
+ .max_ways_evict = gf100_grctx_generate_max_ways_evict,
+ .r419cb8 = gf100_grctx_generate_r419cb8,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 825c8fd500bc..304e9d268bad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -739,13 +739,18 @@ gk104_grctx_init_gpm_0[] = {
{}
};
-const struct gf100_gr_pack
-gk104_grctx_pack_gpc[] = {
+static const struct gf100_gr_pack
+gk104_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf119_grctx_init_prop_0 },
{ gf119_grctx_init_gpc_unk_1 },
{ gk104_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+static const struct gf100_gr_pack
+gk104_grctx_pack_gpc_1[] = {
{ gf119_grctx_init_crstr_0 },
{ gk104_grctx_init_gpm_0 },
{ gf100_grctx_init_gcc_0 },
@@ -841,6 +846,32 @@ gk104_grctx_pack_ppc[] = {
******************************************************************************/
void
+gk104_grctx_generate_r418800(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ /*XXX: Not real sure where to apply these, there doesn't seem
+ * to be any pattern to which chipsets it's done on.
+ *
+ * Perhaps a VBIOS tweak?
+ */
+ if (0) {
+ nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
+ nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
+ }
+}
+
+void
+gk104_grctx_generate_patch_ltc(struct gf100_grctx *info)
+{
+ struct nvkm_device *device = info->gr->base.engine.subdev.device;
+ u32 data0 = nvkm_rd32(device, 0x17e91c);
+ u32 data1 = nvkm_rd32(device, 0x17e920);
+ /*XXX: Figure out how to modify this correctly! */
+ mmio_wr32(info, 0x17e91c, data0);
+ mmio_wr32(info, 0x17e920, data1);
+}
+
+void
gk104_grctx_generate_bundle(struct gf100_grctx *info)
{
const struct gf100_grctx_func *grctx = info->gr->func->grctx;
@@ -881,114 +912,74 @@ gk104_grctx_generate_unkn(struct gf100_gr *gr)
nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
}
-void
-gk104_grctx_generate_r418bb8(struct gf100_gr *gr)
+static void
+gk104_grctx_generate_r419f78(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- u32 data[6] = {}, data2[2] = {};
- u8 tpcnr[GPC_MAX];
- u8 shift, ntpcv;
- int gpc, tpc, i;
-
- /* calculate first set of magics */
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- gpc = -1;
- for (tpc = 0; tpc < gr->tpc_total; tpc++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpcnr[gpc]--;
-
- data[tpc / 6] |= gpc << ((tpc % 6) * 5);
- }
-
- for (; tpc < 32; tpc++)
- data[tpc / 6] |= 7 << ((tpc % 6) * 5);
-
- /* and the second... */
- shift = 0;
- ntpcv = gr->tpc_total;
- while (!(ntpcv & (1 << 4))) {
- ntpcv <<= 1;
- shift++;
- }
-
- data2[0] = (ntpcv << 16);
- data2[0] |= (shift << 21);
- data2[0] |= (((1 << (0 + 5)) % ntpcv) << 24);
- for (i = 1; i < 7; i++)
- data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
-
- /* GPC_BROADCAST */
- nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
- gr->screen_tile_row_offset);
- for (i = 0; i < 6; i++)
- nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
-
- /* GPC_BROADCAST.TP_BROADCAST */
- nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
- gr->screen_tile_row_offset | data2[0]);
- nvkm_wr32(device, 0x41bfe4, data2[1]);
- for (i = 0; i < 6; i++)
- nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]);
-
- /* UNK78xx */
- nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
- gr->screen_tile_row_offset);
- for (i = 0; i < 6; i++)
- nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
+ nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
}
void
-gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- const struct gf100_grctx_func *grctx = gr->func->grctx;
- u32 idle_timeout;
- int i;
-
- nvkm_mc_unk260(device, 0);
-
- gf100_gr_mmio(gr, grctx->hub);
- gf100_gr_mmio(gr, grctx->gpc);
- gf100_gr_mmio(gr, grctx->zcull);
- gf100_gr_mmio(gr, grctx->tpc);
- gf100_gr_mmio(gr, grctx->ppc);
-
- idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
- grctx->bundle(info);
- grctx->pagepool(info);
- grctx->attrib(info);
- grctx->unkn(gr);
-
- gf100_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gk104_grctx_generate_r418bb8(gr);
- gf100_grctx_generate_r406800(gr);
-
- for (i = 0; i < 8; i++)
- nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
-
nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
- nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
-
- gf100_gr_icmd(gr, grctx->icmd);
- nvkm_wr32(device, 0x404154, idle_timeout);
- gf100_gr_mthd(gr, grctx->mthd);
- nvkm_mc_unk260(device, 1);
+}
- nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
- nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
+void
+gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ int i, j, gpc, ppc;
+
+ for (i = 0; i < 32; i++) {
+ u32 atarget = max_t(u32, gr->tpc_total * i / 32, 1);
+ u32 btarget = gr->tpc_total - atarget;
+ bool alpha = atarget < btarget;
+ u64 amask = 0, bmask = 0;
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
+ u32 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
+ u32 abits, bbits, pmask;
+
+ if (alpha) {
+ abits = atarget ? ppc_tpcs : 0;
+ bbits = ppc_tpcs - abits;
+ } else {
+ bbits = btarget ? ppc_tpcs : 0;
+ abits = ppc_tpcs - bbits;
+ }
+
+ pmask = gr->ppc_tpc_mask[gpc][ppc];
+ while (ppc_tpcs-- > abits)
+ pmask &= pmask - 1;
+ amask |= (u64)pmask << (gpc * 8);
+
+ pmask ^= gr->ppc_tpc_mask[gpc][ppc];
+ bmask |= (u64)pmask << (gpc * 8);
+
+ atarget -= min(abits, atarget);
+ btarget -= min(bbits, btarget);
+ if ((abits > 0) || (bbits > 0))
+ alpha = !alpha;
+ }
+ }
+
+ for (j = 0; j < gr->gpc_nr; j += 4, amask >>= 32, bmask >>= 32) {
+ nvkm_wr32(device, 0x406800 + (i * 0x20) + j, amask);
+ nvkm_wr32(device, 0x406c00 + (i * 0x20) + j, bmask);
+ }
+ }
}
const struct gf100_grctx_func
gk104_grctx = {
- .main = gk104_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.hub = gk104_grctx_pack_hub,
- .gpc = gk104_grctx_pack_gpc,
+ .gpc_0 = gk104_grctx_pack_gpc_0,
+ .gpc_1 = gk104_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gk104_grctx_pack_tpc,
.ppc = gk104_grctx_pack_ppc,
@@ -1005,4 +996,13 @@ gk104_grctx = {
.attrib_nr = 0x218,
.alpha_nr_max = 0x7ff,
.alpha_nr = 0x648,
+ .patch_ltc = gk104_grctx_generate_patch_ltc,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+ .dist_skip_table = gf117_grctx_generate_dist_skip_table,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r419f78 = gk104_grctx_generate_r419f78,
+ .r418800 = gk104_grctx_generate_r418800,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 7b95ec2fe453..86547cfc38dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -704,12 +704,17 @@ gk110_grctx_init_gpc_unk_2[] = {
};
const struct gf100_gr_pack
-gk110_grctx_pack_gpc[] = {
+gk110_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gf119_grctx_init_prop_0 },
{ gf119_grctx_init_gpc_unk_1 },
{ gk110_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+const struct gf100_gr_pack
+gk110_grctx_pack_gpc_1[] = {
{ gf119_grctx_init_crstr_0 },
{ gk104_grctx_init_gpm_0 },
{ gk110_grctx_init_gpc_unk_2 },
@@ -808,12 +813,20 @@ gk110_grctx_pack_ppc[] = {
* PGRAPH context implementation
******************************************************************************/
+void
+gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
+}
+
const struct gf100_grctx_func
gk110_grctx = {
- .main = gk104_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.hub = gk110_grctx_pack_hub,
- .gpc = gk110_grctx_pack_gpc,
+ .gpc_0 = gk110_grctx_pack_gpc_0,
+ .gpc_1 = gk110_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gk110_grctx_pack_tpc,
.ppc = gk110_grctx_pack_ppc,
@@ -830,4 +843,13 @@ gk110_grctx = {
.attrib_nr = 0x218,
.alpha_nr_max = 0x7ff,
.alpha_nr = 0x648,
+ .patch_ltc = gk104_grctx_generate_patch_ltc,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+ .dist_skip_table = gf117_grctx_generate_dist_skip_table,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r418800 = gk104_grctx_generate_r418800,
+ .r419eb0 = gk110_grctx_generate_r419eb0,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index 048b1152da44..ebb947bd1446 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -71,10 +71,11 @@ gk110b_grctx_pack_tpc[] = {
const struct gf100_grctx_func
gk110b_grctx = {
- .main = gk104_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.hub = gk110_grctx_pack_hub,
- .gpc = gk110_grctx_pack_gpc,
+ .gpc_0 = gk110_grctx_pack_gpc_0,
+ .gpc_1 = gk110_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gk110b_grctx_pack_tpc,
.ppc = gk110_grctx_pack_ppc,
@@ -91,4 +92,13 @@ gk110b_grctx = {
.attrib_nr = 0x218,
.alpha_nr_max = 0x7ff,
.alpha_nr = 0x648,
+ .patch_ltc = gk104_grctx_generate_patch_ltc,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+ .dist_skip_table = gf117_grctx_generate_dist_skip_table,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r418800 = gk104_grctx_generate_r418800,
+ .r419eb0 = gk110_grctx_generate_r419eb0,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index 67b7a1b43617..4d40512b5c99 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -443,12 +443,17 @@ gk208_grctx_init_gpm_0[] = {
};
static const struct gf100_gr_pack
-gk208_grctx_pack_gpc[] = {
+gk208_grctx_pack_gpc_0[] = {
{ gf100_grctx_init_gpc_unk_0 },
{ gk208_grctx_init_prop_0 },
{ gk208_grctx_init_gpc_unk_1 },
{ gk208_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+static const struct gf100_gr_pack
+gk208_grctx_pack_gpc_1[] = {
{ gk208_grctx_init_crstr_0 },
{ gk208_grctx_init_gpm_0 },
{ gk110_grctx_init_gpc_unk_2 },
@@ -532,10 +537,11 @@ gk208_grctx_pack_ppc[] = {
const struct gf100_grctx_func
gk208_grctx = {
- .main = gk104_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.hub = gk208_grctx_pack_hub,
- .gpc = gk208_grctx_pack_gpc,
+ .gpc_0 = gk208_grctx_pack_gpc_0,
+ .gpc_1 = gk208_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gk208_grctx_pack_tpc,
.ppc = gk208_grctx_pack_ppc,
@@ -552,4 +558,12 @@ gk208_grctx = {
.attrib_nr = 0x218,
.alpha_nr_max = 0x7ff,
.alpha_nr = 0x648,
+ .patch_ltc = gk104_grctx_generate_patch_ltc,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+ .dist_skip_table = gf117_grctx_generate_dist_skip_table,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r418800 = gk104_grctx_generate_r418800,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index da7c35a6a3d2..896d473dcc0f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -42,10 +42,7 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
grctx->unkn(gr);
- gf100_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gk104_grctx_generate_r418bb8(gr);
- gf100_grctx_generate_r406800(gr);
+ gf100_grctx_generate_floorsweep(gr);
for (i = 0; i < 8; i++)
nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
@@ -82,4 +79,8 @@ gk20a_grctx = {
.attrib_nr = 0x240,
.alpha_nr_max = 0x648 + (0x648 / 2),
.alpha_nr = 0x648,
+ .sm_id = gf100_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index 9b43d4ce3eaa..0b3964e6b36e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -744,12 +744,17 @@ gm107_grctx_init_gpc_unk_2[] = {
};
static const struct gf100_gr_pack
-gm107_grctx_pack_gpc[] = {
+gm107_grctx_pack_gpc_0[] = {
{ gm107_grctx_init_gpc_unk_0 },
{ gk208_grctx_init_prop_0 },
{ gm107_grctx_init_gpc_unk_1 },
{ gm107_grctx_init_setup_0 },
{ gf100_grctx_init_zcull_0 },
+ {}
+};
+
+static const struct gf100_gr_pack
+gm107_grctx_pack_gpc_1[] = {
{ gk208_grctx_init_crstr_0 },
{ gk104_grctx_init_gpm_0 },
{ gm107_grctx_init_gpc_unk_2 },
@@ -860,6 +865,16 @@ gm107_grctx_pack_ppc[] = {
* PGRAPH context implementation
******************************************************************************/
+static void
+gm107_grctx_generate_r419e00(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080);
+ nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000);
+ nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000);
+ nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000);
+}
+
void
gm107_grctx_generate_bundle(struct gf100_grctx *info)
{
@@ -931,75 +946,27 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info)
}
static void
-gm107_grctx_generate_tpcid(struct gf100_gr *gr)
+gm107_grctx_generate_r406500(struct gf100_gr *gr)
{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- int gpc, tpc, id;
-
- for (tpc = 0, id = 0; tpc < 4; tpc++) {
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- if (tpc < gr->tpc_nr[gpc]) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
- id++;
- }
-
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
- }
- }
+ nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000001);
}
-static void
-gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+void
+gm107_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- const struct gf100_grctx_func *grctx = gr->func->grctx;
- u32 idle_timeout;
- int i;
-
- gf100_gr_mmio(gr, grctx->hub);
- gf100_gr_mmio(gr, grctx->gpc);
- gf100_gr_mmio(gr, grctx->zcull);
- gf100_gr_mmio(gr, grctx->tpc);
- gf100_gr_mmio(gr, grctx->ppc);
-
- idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
- grctx->bundle(info);
- grctx->pagepool(info);
- grctx->attrib(info);
- grctx->unkn(gr);
-
- gm107_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gk104_grctx_generate_r418bb8(gr);
- gf100_grctx_generate_r406800(gr);
-
- nvkm_wr32(device, 0x4064d0, 0x00000001);
- for (i = 1; i < 8; i++)
- nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
- nvkm_wr32(device, 0x406500, 0x00000001);
-
- nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-
- gf100_gr_icmd(gr, grctx->icmd);
- nvkm_wr32(device, 0x404154, idle_timeout);
- gf100_gr_mthd(gr, grctx->mthd);
-
- nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080);
- nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000);
- nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000);
- nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), sm);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
}
const struct gf100_grctx_func
gm107_grctx = {
- .main = gm107_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.hub = gm107_grctx_pack_hub,
- .gpc = gm107_grctx_pack_gpc,
+ .gpc_0 = gm107_grctx_pack_gpc_0,
+ .gpc_1 = gm107_grctx_pack_gpc_1,
.zcull = gf100_grctx_pack_zcull,
.tpc = gm107_grctx_pack_tpc,
.ppc = gm107_grctx_pack_ppc,
@@ -1016,4 +983,12 @@ gm107_grctx = {
.attrib_nr = 0xaa0,
.alpha_nr_max = 0x1800,
.alpha_nr = 0x1000,
+ .sm_id = gm107_grctx_generate_sm_id,
+ .tpc_nr = gf100_grctx_generate_tpc_nr,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables,
+ .dist_skip_table = gf117_grctx_generate_dist_skip_table,
+ .r406500 = gm107_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .r419e00 = gm107_grctx_generate_r419e00,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
index db209d33f486..013d05a0f0f6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -28,47 +28,34 @@
******************************************************************************/
void
-gm200_grctx_generate_tpcid(struct gf100_gr *gr)
+gm200_grctx_generate_r419a3c(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- int gpc, tpc, id;
+ nvkm_mask(device, 0x419a3c, 0x00000014, 0x00000000);
+}
- for (tpc = 0, id = 0; tpc < TPC_MAX_PER_GPC; tpc++) {
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- if (tpc < gr->tpc_nr[gpc]) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
- id++;
- }
- }
- }
+static void
+gm200_grctx_generate_r418e94(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
+ nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
}
void
-gm200_grctx_generate_405b60(struct gf100_gr *gr)
+gm200_grctx_generate_smid_config(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
u32 dist[TPC_MAX / 4] = {};
u32 gpcs[GPC_MAX] = {};
- u8 tpcnr[GPC_MAX];
- int tpc, gpc, i;
+ u8 sm, i;
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- /* won't result in the same distribution as the binary driver where
- * some of the gpcs have more tpcs than others, but this shall do
- * for the moment. the code for earlier gpus has this issue too.
- */
- for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while(!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
- gpcs[gpc] |= i << (tpc * 8);
+ for (sm = 0; sm < gr->sm_nr; sm++) {
+ const u8 gpc = gr->sm[sm].gpc;
+ const u8 tpc = gr->sm[sm].tpc;
+ dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
+ gpcs[gpc] |= sm << (tpc * 8);
}
for (i = 0; i < dist_nr; i++)
@@ -77,50 +64,46 @@ gm200_grctx_generate_405b60(struct gf100_gr *gr)
nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
}
-static void
-gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+void
+gm200_grctx_generate_tpc_mask(struct gf100_gr *gr)
{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- const struct gf100_grctx_func *grctx = gr->func->grctx;
- u32 idle_timeout, tmp;
- int i;
-
- gf100_gr_mmio(gr, gr->fuc_sw_ctx);
-
- idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
- grctx->bundle(info);
- grctx->pagepool(info);
- grctx->attrib(info);
- grctx->unkn(gr);
-
- gm200_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gk104_grctx_generate_r418bb8(gr);
-
- for (i = 0; i < 8; i++)
- nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
- nvkm_wr32(device, 0x406500, 0x00000000);
-
- nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-
+ u32 tmp, i;
for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
- tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
- nvkm_wr32(device, 0x4041c4, tmp);
+ tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * gr->func->tpc_nr);
+ nvkm_wr32(gr->base.engine.subdev.device, 0x4041c4, tmp);
+}
- gm200_grctx_generate_405b60(gr);
+void
+gm200_grctx_generate_r406500(struct gf100_gr *gr)
+{
+ nvkm_wr32(gr->base.engine.subdev.device, 0x406500, 0x00000000);
+}
- gf100_gr_icmd(gr, gr->fuc_bundle);
- nvkm_wr32(device, 0x404154, idle_timeout);
- gf100_gr_mthd(gr, gr->fuc_method);
+void
+gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ u32 data[8] = {};
+ int gpc, ppc, i;
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+ u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
+ u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc];
+ while (ppc_tpcs-- > gr->ppc_tpc_min)
+ ppc_tpcm &= ppc_tpcm - 1;
+ ppc_tpcm ^= gr->ppc_tpc_mask[gpc][ppc];
+ ((u8 *)data)[gpc] |= ppc_tpcm;
+ }
+ }
- nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
- nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
+ for (i = 0; i < ARRAY_SIZE(data); i++)
+ nvkm_wr32(device, 0x4064d0 + (i * 0x04), data[i]);
}
const struct gf100_grctx_func
gm200_grctx = {
- .main = gm200_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.bundle = gm107_grctx_generate_bundle,
.bundle_size = 0x3000,
@@ -133,4 +116,13 @@ gm200_grctx = {
.attrib_nr = 0x400,
.alpha_nr_max = 0x1800,
.alpha_nr = 0x1000,
+ .sm_id = gm107_grctx_generate_sm_id,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .dist_skip_table = gm200_grctx_generate_dist_skip_table,
+ .r406500 = gm200_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .tpc_mask = gm200_grctx_generate_tpc_mask,
+ .smid_config = gm200_grctx_generate_smid_config,
+ .r418e94 = gm200_grctx_generate_r418e94,
+ .r419a3c = gm200_grctx_generate_r419a3c,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index e5702e3e0a5a..a1d9e114ebeb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -22,20 +22,6 @@
#include "ctxgf100.h"
static void
-gm20b_grctx_generate_r406028(struct gf100_gr *gr)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- u32 tpc_per_gpc = 0;
- int i;
-
- for (i = 0; i < gr->gpc_nr; i++)
- tpc_per_gpc |= gr->tpc_nr[i] << (4 * i);
-
- nvkm_wr32(device, 0x406028, tpc_per_gpc);
- nvkm_wr32(device, 0x405870, tpc_per_gpc);
-}
-
-static void
gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -53,9 +39,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
grctx->unkn(gr);
- gm200_grctx_generate_tpcid(gr);
- gm20b_grctx_generate_r406028(gr);
- gk104_grctx_generate_r418bb8(gr);
+ gf100_grctx_generate_floorsweep(gr);
for (i = 0; i < 8; i++)
nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
@@ -68,7 +52,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
nvkm_wr32(device, 0x4041c4, tmp);
- gm200_grctx_generate_405b60(gr);
+ gm200_grctx_generate_smid_config(gr);
gf100_gr_wait_idle(gr);
@@ -98,4 +82,6 @@ gm20b_grctx = {
.attrib_nr = 0x400,
.alpha_nr_max = 0xc00,
.alpha_nr = 0x800,
+ .sm_id = gm107_grctx_generate_sm_id,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
index 88ea322d956c..0b3326262e12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -36,7 +36,7 @@ gp100_grctx_generate_pagepool(struct gf100_grctx *info)
const int s = 8;
const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
mmio_refn(info, 0x40800c, 0x00000000, s, b);
- mmio_wr32(info, 0x408010, 0x80000000);
+ mmio_wr32(info, 0x408010, 0x8007d800);
mmio_refn(info, 0x419004, 0x00000000, s, b);
mmio_wr32(info, 0x419008, 0x00000000);
}
@@ -48,14 +48,17 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info)
const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr;
const u32 attrib = grctx->attrib_nr;
- const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
- const u32 size = roundup(gr->tpc_total * pertpc, 0x80);
const int s = 12;
- const int b = mmio_vram(info, size, (1 << s), false);
const int max_batches = 0xffff;
+ u32 size = grctx->alpha_nr_max * gr->tpc_total;
u32 ao = 0;
- u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
- int gpc, ppc, n = 0;
+ u32 bo = ao + size;
+ int gpc, ppc, b, n = 0;
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+ size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
+ size = ((size * 0x20) + 128) & ~127;
+ b = mmio_vram(info, size, (1 << s), false);
mmio_refn(info, 0x418810, 0x80000000, s, b);
mmio_refn(info, 0x419848, 0x10000000, s, b);
@@ -69,7 +72,7 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info)
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc];
- const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+ const u32 bs = attrib * gr->ppc_tpc_max;
const u32 u = 0x418ea0 + (n * 0x04);
const u32 o = PPC_UNIT(gpc, ppc, 0);
if (!(gr->ppc_mask[gpc] & (1 << ppc)))
@@ -77,7 +80,7 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info)
mmio_wr32(info, o + 0xc0, bs);
mmio_wr32(info, o + 0xf4, bo);
mmio_wr32(info, o + 0xf0, bs);
- bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+ bo += grctx->attrib_nr_max * gr->ppc_tpc_max;
mmio_wr32(info, o + 0xe4, as);
mmio_wr32(info, o + 0xf8, ao);
ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
@@ -89,79 +92,30 @@ gp100_grctx_generate_attrib(struct gf100_grctx *info)
mmio_wr32(info, 0x41befc, 0x00000000);
}
-static void
-gp100_grctx_generate_405b60(struct gf100_gr *gr)
+void
+gp100_grctx_generate_smid_config(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
- u32 dist[TPC_MAX / 4] = {};
- u32 gpcs[GPC_MAX * 2] = {};
- u8 tpcnr[GPC_MAX];
- int tpc, gpc, i;
-
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
-
- /* won't result in the same distribution as the binary driver where
- * some of the gpcs have more tpcs than others, but this shall do
- * for the moment. the code for earlier gpus has this issue too.
- */
- for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while(!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
- gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
+ u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {};
+ u8 sm, i;
+
+ for (sm = 0; sm < gr->sm_nr; sm++) {
+ const u8 gpc = gr->sm[sm].gpc;
+ const u8 tpc = gr->sm[sm].tpc;
+ dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
+ gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8);
}
for (i = 0; i < dist_nr; i++)
nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
- for (i = 0; i < gr->gpc_nr * 2; i++)
+ for (i = 0; i < ARRAY_SIZE(gpcs); i++)
nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
}
-void
-gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- const struct gf100_grctx_func *grctx = gr->func->grctx;
- u32 idle_timeout, tmp;
- int i;
-
- gf100_gr_mmio(gr, gr->fuc_sw_ctx);
-
- idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
-
- grctx->pagepool(info);
- grctx->bundle(info);
- grctx->attrib(info);
- grctx->unkn(gr);
-
- gm200_grctx_generate_tpcid(gr);
- gf100_grctx_generate_r406028(gr);
- gk104_grctx_generate_r418bb8(gr);
-
- for (i = 0; i < 8; i++)
- nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
- nvkm_wr32(device, 0x406500, 0x00000000);
-
- nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
-
- for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
- tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5);
- nvkm_wr32(device, 0x4041c4, tmp);
-
- gp100_grctx_generate_405b60(gr);
-
- gf100_gr_icmd(gr, gr->fuc_bundle);
- nvkm_wr32(device, 0x404154, idle_timeout);
- gf100_gr_mthd(gr, gr->fuc_method);
-}
-
const struct gf100_grctx_func
gp100_grctx = {
- .main = gp100_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.bundle = gm107_grctx_generate_bundle,
.bundle_size = 0x3000,
@@ -174,4 +128,12 @@ gp100_grctx = {
.attrib_nr = 0x440,
.alpha_nr_max = 0xc00,
.alpha_nr = 0x800,
+ .sm_id = gm107_grctx_generate_sm_id,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .dist_skip_table = gm200_grctx_generate_dist_skip_table,
+ .r406500 = gm200_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .tpc_mask = gm200_grctx_generate_tpc_mask,
+ .smid_config = gp100_grctx_generate_smid_config,
+ .r419a3c = gm200_grctx_generate_r419a3c,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
index 7a66b4c2eb18..daee17bf7d0d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
@@ -29,6 +29,13 @@
* PGRAPH context implementation
******************************************************************************/
+static void
+gp102_grctx_generate_r408840(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x408840, 0x00000003, 0x00000000);
+}
+
void
gp102_grctx_generate_attrib(struct gf100_grctx *info)
{
@@ -36,14 +43,18 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info)
const struct gf100_grctx_func *grctx = gr->func->grctx;
const u32 alpha = grctx->alpha_nr;
const u32 attrib = grctx->attrib_nr;
- const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
- const u32 size = roundup(gr->tpc_total * pertpc, 0x80);
+ const u32 gfxp = grctx->gfxp_nr;
const int s = 12;
- const int b = mmio_vram(info, size, (1 << s), false);
const int max_batches = 0xffff;
+ u32 size = grctx->alpha_nr_max * gr->tpc_total;
u32 ao = 0;
- u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
- int gpc, ppc, n = 0;
+ u32 bo = ao + size;
+ int gpc, ppc, b, n = 0;
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+ size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
+ size = ((size * 0x20) + 128) & ~127;
+ b = mmio_vram(info, size, (1 << s), false);
mmio_refn(info, 0x418810, 0x80000000, s, b);
mmio_refn(info, 0x419848, 0x10000000, s, b);
@@ -57,17 +68,18 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info)
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc];
- const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+ const u32 bs = attrib * gr->ppc_tpc_max;
+ const u32 gs = gfxp * gr->ppc_tpc_max;
const u32 u = 0x418ea0 + (n * 0x04);
const u32 o = PPC_UNIT(gpc, ppc, 0);
const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4));
if (!(gr->ppc_mask[gpc] & (1 << ppc)))
continue;
- mmio_wr32(info, o + 0xc0, bs);
+ mmio_wr32(info, o + 0xc0, gs);
mmio_wr32(info, p, bs);
mmio_wr32(info, o + 0xf4, bo);
mmio_wr32(info, o + 0xf0, bs);
- bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+ bo += gs;
mmio_wr32(info, o + 0xe4, as);
mmio_wr32(info, o + 0xf8, ao);
ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
@@ -81,7 +93,7 @@ gp102_grctx_generate_attrib(struct gf100_grctx *info)
const struct gf100_grctx_func
gp102_grctx = {
- .main = gp100_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.bundle = gm107_grctx_generate_bundle,
.bundle_size = 0x3000,
@@ -90,8 +102,18 @@ gp102_grctx = {
.pagepool = gp100_grctx_generate_pagepool,
.pagepool_size = 0x20000,
.attrib = gp102_grctx_generate_attrib,
- .attrib_nr_max = 0x5d4,
+ .attrib_nr_max = 0x4b0,
.attrib_nr = 0x320,
.alpha_nr_max = 0xc00,
.alpha_nr = 0x800,
+ .gfxp_nr = 0xba8,
+ .sm_id = gm107_grctx_generate_sm_id,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .dist_skip_table = gm200_grctx_generate_dist_skip_table,
+ .r406500 = gm200_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .tpc_mask = gm200_grctx_generate_tpc_mask,
+ .smid_config = gp100_grctx_generate_smid_config,
+ .r419a3c = gm200_grctx_generate_r419a3c,
+ .r408840 = gp102_grctx_generate_r408840,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
new file mode 100644
index 000000000000..3b85e3d326b2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ctxgf100.h"
+
+const struct gf100_grctx_func
+gp104_grctx = {
+ .main = gf100_grctx_generate_main,
+ .unkn = gk104_grctx_generate_unkn,
+ .bundle = gm107_grctx_generate_bundle,
+ .bundle_size = 0x3000,
+ .bundle_min_gpm_fifo_depth = 0x180,
+ .bundle_token_limit = 0x900,
+ .pagepool = gp100_grctx_generate_pagepool,
+ .pagepool_size = 0x20000,
+ .attrib = gp102_grctx_generate_attrib,
+ .attrib_nr_max = 0x4b0,
+ .attrib_nr = 0x320,
+ .alpha_nr_max = 0xc00,
+ .alpha_nr = 0x800,
+ .gfxp_nr = 0xba8,
+ .sm_id = gm107_grctx_generate_sm_id,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .dist_skip_table = gm200_grctx_generate_dist_skip_table,
+ .r406500 = gm200_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .tpc_mask = gm200_grctx_generate_tpc_mask,
+ .smid_config = gp100_grctx_generate_smid_config,
+ .r419a3c = gm200_grctx_generate_r419a3c,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
index 8da91a0b3bd2..5060c5ee5ce0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
@@ -31,7 +31,7 @@
const struct gf100_grctx_func
gp107_grctx = {
- .main = gp100_grctx_generate_main,
+ .main = gf100_grctx_generate_main,
.unkn = gk104_grctx_generate_unkn,
.bundle = gm107_grctx_generate_bundle,
.bundle_size = 0x3000,
@@ -44,4 +44,13 @@ gp107_grctx = {
.attrib_nr = 0x540,
.alpha_nr_max = 0xc00,
.alpha_nr = 0x800,
+ .gfxp_nr = 0xe94,
+ .sm_id = gm107_grctx_generate_sm_id,
+ .rop_mapping = gf117_grctx_generate_rop_mapping,
+ .dist_skip_table = gm200_grctx_generate_dist_skip_table,
+ .r406500 = gm200_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .tpc_mask = gm200_grctx_generate_tpc_mask,
+ .smid_config = gp100_grctx_generate_smid_config,
+ .r419a3c = gm200_grctx_generate_r419a3c,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
new file mode 100644
index 000000000000..0990765ef191
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ctxgf100.h"
+
+/*******************************************************************************
+ * PGRAPH context implementation
+ ******************************************************************************/
+
+static const struct gf100_gr_init
+gv100_grctx_init_sw_veid_bundle_init_0[] = {
+ { 0x00001000, 64, 0x00100000, 0x00000008 },
+ { 0x00000941, 64, 0x00100000, 0x00000000 },
+ { 0x0000097e, 64, 0x00100000, 0x00000000 },
+ { 0x0000097f, 64, 0x00100000, 0x00000100 },
+ { 0x0000035c, 64, 0x00100000, 0x00000000 },
+ { 0x0000035d, 64, 0x00100000, 0x00000000 },
+ { 0x00000a08, 64, 0x00100000, 0x00000000 },
+ { 0x00000a09, 64, 0x00100000, 0x00000000 },
+ { 0x00000a0a, 64, 0x00100000, 0x00000000 },
+ { 0x00000352, 64, 0x00100000, 0x00000000 },
+ { 0x00000353, 64, 0x00100000, 0x00000000 },
+ { 0x00000358, 64, 0x00100000, 0x00000000 },
+ { 0x00000359, 64, 0x00100000, 0x00000000 },
+ { 0x00000370, 64, 0x00100000, 0x00000000 },
+ { 0x00000371, 64, 0x00100000, 0x00000000 },
+ { 0x00000372, 64, 0x00100000, 0x000fffff },
+ { 0x00000366, 64, 0x00100000, 0x00000000 },
+ { 0x00000367, 64, 0x00100000, 0x00000000 },
+ { 0x00000368, 64, 0x00100000, 0x00000fff },
+ { 0x00000623, 64, 0x00100000, 0x00000000 },
+ { 0x00000624, 64, 0x00100000, 0x00000000 },
+ { 0x0001e100, 1, 0x00000001, 0x02000001 },
+ {}
+};
+
+static const struct gf100_gr_pack
+gv100_grctx_pack_sw_veid_bundle_init[] = {
+ { gv100_grctx_init_sw_veid_bundle_init_0 },
+ {}
+};
+
+static void
+gv100_grctx_generate_attrib(struct gf100_grctx *info)
+{
+ struct gf100_gr *gr = info->gr;
+ const struct gf100_grctx_func *grctx = gr->func->grctx;
+ const u32 alpha = grctx->alpha_nr;
+ const u32 attrib = grctx->attrib_nr;
+ const u32 gfxp = grctx->gfxp_nr;
+ const int s = 12;
+ const int max_batches = 0xffff;
+ u32 size = grctx->alpha_nr_max * gr->tpc_total;
+ u32 ao = 0;
+ u32 bo = ao + size;
+ int gpc, ppc, b, n = 0;
+
+ size += grctx->gfxp_nr * gr->tpc_total;
+ size = ((size * 0x20) + 128) & ~127;
+ b = mmio_vram(info, size, (1 << s), false);
+
+ mmio_refn(info, 0x418810, 0x80000000, s, b);
+ mmio_refn(info, 0x419848, 0x10000000, s, b);
+ mmio_refn(info, 0x419c2c, 0x10000000, s, b);
+ mmio_refn(info, 0x419e00, 0x00000000, s, b);
+ mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7);
+ mmio_wr32(info, 0x405830, attrib);
+ mmio_wr32(info, 0x40585c, alpha);
+ mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+ const u32 as = alpha * gr->ppc_tpc_nr[gpc][ppc];
+ const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+ const u32 gs = gfxp * gr->ppc_tpc_nr[gpc][ppc];
+ const u32 u = 0x418ea0 + (n * 0x04);
+ const u32 o = PPC_UNIT(gpc, ppc, 0);
+ if (!(gr->ppc_mask[gpc] & (1 << ppc)))
+ continue;
+ mmio_wr32(info, o + 0xc0, gs);
+ mmio_wr32(info, o + 0xf4, bo);
+ mmio_wr32(info, o + 0xf0, bs);
+ bo += gs;
+ mmio_wr32(info, o + 0xe4, as);
+ mmio_wr32(info, o + 0xf8, ao);
+ ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+ mmio_wr32(info, u, bs);
+ }
+ }
+
+ mmio_wr32(info, 0x4181e4, 0x00000100);
+ mmio_wr32(info, 0x41befc, 0x00000100);
+}
+
+static void
+gv100_grctx_generate_rop_mapping(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ u32 data;
+ int i, j;
+
+ /* Pack tile map into register format. */
+ nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
+ gr->screen_tile_row_offset);
+ for (i = 0; i < 11; i++) {
+ for (data = 0, j = 0; j < 6; j++)
+ data |= (gr->tile[i * 6 + j] & 0x1f) << (j * 5);
+ nvkm_wr32(device, 0x418b08 + (i * 4), data);
+ nvkm_wr32(device, 0x41bf00 + (i * 4), data);
+ nvkm_wr32(device, 0x40780c + (i * 4), data);
+ }
+
+ /* GPC_BROADCAST.TP_BROADCAST */
+ nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
+ gr->screen_tile_row_offset);
+ for (i = 0, j = 1; i < 5; i++, j += 4) {
+ u8 v19 = (1 << (j + 0)) % gr->tpc_total;
+ u8 v20 = (1 << (j + 1)) % gr->tpc_total;
+ u8 v21 = (1 << (j + 2)) % gr->tpc_total;
+ u8 v22 = (1 << (j + 3)) % gr->tpc_total;
+ nvkm_wr32(device, 0x41bfb0 + (i * 4), (v22 << 24) |
+ (v21 << 16) |
+ (v20 << 8) |
+ v19);
+ }
+
+ /* UNK78xx */
+ nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
+ gr->screen_tile_row_offset);
+}
+
+static void
+gv100_grctx_generate_r400088(struct gf100_gr *gr, bool on)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x400088, 0x00060000, on ? 0x00060000 : 0x00000000);
+}
+
+static void
+gv100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
+}
+
+static void
+gv100_grctx_generate_unkn(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+ nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004);
+ nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000);
+ nvkm_mask(device, 0x405800, 0x08000000, 0x08000000);
+ nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
+}
+
+static void
+gv100_grctx_unkn88c(struct gf100_gr *gr, bool on)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ const u32 mask = 0x00000010, data = on ? mask : 0x00000000;
+ nvkm_mask(device, 0x40988c, mask, data);
+ nvkm_rd32(device, 0x40988c);
+ nvkm_mask(device, 0x41a88c, mask, data);
+ nvkm_rd32(device, 0x41a88c);
+ nvkm_mask(device, 0x408a14, mask, data);
+ nvkm_rd32(device, 0x408a14);
+}
+
+const struct gf100_grctx_func
+gv100_grctx = {
+ .unkn88c = gv100_grctx_unkn88c,
+ .main = gf100_grctx_generate_main,
+ .unkn = gv100_grctx_generate_unkn,
+ .sw_veid_bundle_init = gv100_grctx_pack_sw_veid_bundle_init,
+ .bundle = gm107_grctx_generate_bundle,
+ .bundle_size = 0x3000,
+ .bundle_min_gpm_fifo_depth = 0x180,
+ .bundle_token_limit = 0x1680,
+ .pagepool = gp100_grctx_generate_pagepool,
+ .pagepool_size = 0x20000,
+ .attrib = gv100_grctx_generate_attrib,
+ .attrib_nr_max = 0x6c0,
+ .attrib_nr = 0x480,
+ .alpha_nr_max = 0xc00,
+ .alpha_nr = 0x800,
+ .gfxp_nr = 0xd10,
+ .sm_id = gv100_grctx_generate_sm_id,
+ .rop_mapping = gv100_grctx_generate_rop_mapping,
+ .dist_skip_table = gm200_grctx_generate_dist_skip_table,
+ .r406500 = gm200_grctx_generate_r406500,
+ .gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
+ .smid_config = gp100_grctx_generate_smid_config,
+ .r400088 = gv100_grctx_generate_r400088,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 2f8dc107047d..70d3d41e616c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -32,6 +32,7 @@
#include <subdev/fb.h>
#include <subdev/mc.h>
#include <subdev/pmu.h>
+#include <subdev/therm.h>
#include <subdev/timer.h>
#include <engine/fifo.h>
@@ -91,7 +92,7 @@ gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
gr->zbc_color[zbc].format = format;
nvkm_ltc_zbc_color_get(ltc, zbc, l2);
- gf100_gr_zbc_clear_color(gr, zbc);
+ gr->func->zbc->clear_color(gr, zbc);
return zbc;
}
@@ -136,10 +137,16 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
gr->zbc_depth[zbc].ds = ds;
gr->zbc_depth[zbc].l2 = l2;
nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
- gf100_gr_zbc_clear_depth(gr, zbc);
+ gr->func->zbc->clear_depth(gr, zbc);
return zbc;
}
+const struct gf100_gr_func_zbc
+gf100_gr_zbc = {
+ .clear_color = gf100_gr_zbc_clear_color,
+ .clear_depth = gf100_gr_zbc_clear_depth,
+};
+
/*******************************************************************************
* Graphics object classes
******************************************************************************/
@@ -743,21 +750,31 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
- int index;
+ int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min;
if (!gr->zbc_color[0].format) {
- gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]);
- gf100_gr_zbc_color_get(gr, 2, & one[0], &one[4]);
- gf100_gr_zbc_color_get(gr, 4, &f32_0[0], &f32_0[4]);
- gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]);
- gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
- gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
- }
-
- for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
- gf100_gr_zbc_clear_color(gr, index);
- for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
- gf100_gr_zbc_clear_depth(gr, index);
+ gf100_gr_zbc_color_get(gr, 1, & zero[0], &zero[4]); c++;
+ gf100_gr_zbc_color_get(gr, 2, & one[0], &one[4]); c++;
+ gf100_gr_zbc_color_get(gr, 4, &f32_0[0], &f32_0[4]); c++;
+ gf100_gr_zbc_color_get(gr, 4, &f32_1[0], &f32_1[4]); c++;
+ gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000); d++;
+ gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000); d++;
+ if (gr->func->zbc->stencil_get) {
+ gr->func->zbc->stencil_get(gr, 1, 0x00, 0x00); s++;
+ gr->func->zbc->stencil_get(gr, 1, 0x01, 0x01); s++;
+ gr->func->zbc->stencil_get(gr, 1, 0xff, 0xff); s++;
+ }
+ }
+
+ for (index = c; index <= ltc->zbc_max; index++)
+ gr->func->zbc->clear_color(gr, index);
+ for (index = d; index <= ltc->zbc_max; index++)
+ gr->func->zbc->clear_depth(gr, index);
+
+ if (gr->func->zbc->clear_stencil) {
+ for (index = s; index <= ltc->zbc_max; index++)
+ gr->func->zbc->clear_stencil(gr, index);
+ }
}
/**
@@ -970,7 +987,7 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
}
-static const struct nvkm_enum gf100_mp_warp_error[] = {
+const struct nvkm_enum gf100_mp_warp_error[] = {
{ 0x01, "STACK_ERROR" },
{ 0x02, "API_STACK_ERROR" },
{ 0x03, "RET_EMPTY_STACK_ERROR" },
@@ -995,7 +1012,7 @@ static const struct nvkm_enum gf100_mp_warp_error[] = {
{}
};
-static const struct nvkm_bitfield gf100_mp_global_error[] = {
+const struct nvkm_bitfield gf100_mp_global_error[] = {
{ 0x00000001, "SM_TO_SM_FAULT" },
{ 0x00000002, "L1_ERROR" },
{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
@@ -1009,7 +1026,7 @@ static const struct nvkm_bitfield gf100_mp_global_error[] = {
{}
};
-static void
+void
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
{
struct nvkm_subdev *subdev = &gr->base.engine.subdev;
@@ -1045,7 +1062,7 @@ gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
}
if (stat & 0x00000002) {
- gf100_gr_trap_mp(gr, gpc, tpc);
+ gr->func->trap_mp(gr, gpc, tpc);
stat &= ~0x00000002;
}
@@ -1611,7 +1628,8 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
/* load register lists */
gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
- gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
+ gf100_gr_init_csdata(gr, grctx->gpc_0, 0x41a000, 0x000, 0x418000);
+ gf100_gr_init_csdata(gr, grctx->gpc_1, 0x41a000, 0x000, 0x418000);
gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
@@ -1651,6 +1669,97 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
return ret;
}
+void
+gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
+{
+ int tpc, gpc;
+ for (tpc = 0; tpc < gr->tpc_max; tpc++) {
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ if (tpc < gr->tpc_nr[gpc]) {
+ gr->sm[gr->sm_nr].gpc = gpc;
+ gr->sm[gr->sm_nr].tpc = tpc;
+ gr->sm_nr++;
+ }
+ }
+ }
+}
+
+void
+gf100_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+ static const u8 primes[] = {
+ 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61
+ };
+ int init_frac[GPC_MAX], init_err[GPC_MAX], run_err[GPC_MAX], i, j;
+ u32 mul_factor, comm_denom;
+ u8 gpc_map[GPC_MAX];
+ bool sorted;
+
+ switch (gr->tpc_total) {
+ case 15: gr->screen_tile_row_offset = 0x06; break;
+ case 14: gr->screen_tile_row_offset = 0x05; break;
+ case 13: gr->screen_tile_row_offset = 0x02; break;
+ case 11: gr->screen_tile_row_offset = 0x07; break;
+ case 10: gr->screen_tile_row_offset = 0x06; break;
+ case 7:
+ case 5: gr->screen_tile_row_offset = 0x01; break;
+ case 3: gr->screen_tile_row_offset = 0x02; break;
+ case 2:
+ case 1: gr->screen_tile_row_offset = 0x01; break;
+ default: gr->screen_tile_row_offset = 0x03;
+ for (i = 0; i < ARRAY_SIZE(primes); i++) {
+ if (gr->tpc_total % primes[i]) {
+ gr->screen_tile_row_offset = primes[i];
+ break;
+ }
+ }
+ break;
+ }
+
+ /* Sort GPCs by TPC count, highest-to-lowest. */
+ for (i = 0; i < gr->gpc_nr; i++)
+ gpc_map[i] = i;
+ sorted = false;
+
+ while (!sorted) {
+ for (sorted = true, i = 0; i < gr->gpc_nr - 1; i++) {
+ if (gr->tpc_nr[gpc_map[i + 1]] >
+ gr->tpc_nr[gpc_map[i + 0]]) {
+ u8 swap = gpc_map[i];
+ gpc_map[i + 0] = gpc_map[i + 1];
+ gpc_map[i + 1] = swap;
+ sorted = false;
+ }
+ }
+ }
+
+ /* Determine tile->GPC mapping */
+ mul_factor = gr->gpc_nr * gr->tpc_max;
+ if (mul_factor & 1)
+ mul_factor = 2;
+ else
+ mul_factor = 1;
+
+ comm_denom = gr->gpc_nr * gr->tpc_max * mul_factor;
+
+ for (i = 0; i < gr->gpc_nr; i++) {
+ init_frac[i] = gr->tpc_nr[gpc_map[i]] * gr->gpc_nr * mul_factor;
+ init_err[i] = i * gr->tpc_max * mul_factor - comm_denom/2;
+ run_err[i] = init_frac[i] + init_err[i];
+ }
+
+ for (i = 0; i < gr->tpc_total;) {
+ for (j = 0; j < gr->gpc_nr; j++) {
+ if ((run_err[j] * 2) >= comm_denom) {
+ gr->tile[i++] = gpc_map[j];
+ run_err[j] += init_frac[j] - comm_denom;
+ } else {
+ run_err[j] += init_frac[j];
+ }
+ }
+ }
+}
+
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
@@ -1674,55 +1783,27 @@ gf100_gr_oneinit(struct nvkm_gr *base)
gr->gpc_nr = nvkm_rd32(device, 0x409604) & 0x0000001f;
for (i = 0; i < gr->gpc_nr; i++) {
gr->tpc_nr[i] = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
+ gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]);
gr->tpc_total += gr->tpc_nr[i];
gr->ppc_nr[i] = gr->func->ppc_nr;
for (j = 0; j < gr->ppc_nr[i]; j++) {
- u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
- if (mask)
- gr->ppc_mask[i] |= (1 << j);
- gr->ppc_tpc_nr[i][j] = hweight8(mask);
- }
- }
-
- /*XXX: these need figuring out... though it might not even matter */
- switch (device->chipset) {
- case 0xc0:
- if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
- gr->screen_tile_row_offset = 0x07;
- } else
- if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
- gr->screen_tile_row_offset = 0x05;
- } else
- if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
- gr->screen_tile_row_offset = 0x06;
+ gr->ppc_tpc_mask[i][j] =
+ nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
+ if (gr->ppc_tpc_mask[i][j] == 0)
+ continue;
+ gr->ppc_mask[i] |= (1 << j);
+ gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
+ if (gr->ppc_tpc_min == 0 ||
+ gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j])
+ gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j];
+ if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j])
+ gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j];
}
- break;
- case 0xc3: /* 450, 4/0/0/0, 2 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xc4: /* 460, 3/4/0/0, 4 */
- gr->screen_tile_row_offset = 0x01;
- break;
- case 0xc1: /* 2/0/0/0, 1 */
- gr->screen_tile_row_offset = 0x01;
- break;
- case 0xc8: /* 4/4/3/4, 5 */
- gr->screen_tile_row_offset = 0x06;
- break;
- case 0xce: /* 4/4/0/0, 4 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xcf: /* 4/0/0/0, 3 */
- gr->screen_tile_row_offset = 0x03;
- break;
- case 0xd7:
- case 0xd9: /* 1/0/0/0, 1 */
- case 0xea: /* gk20a */
- case 0x12b: /* gm20b */
- gr->screen_tile_row_offset = 0x01;
- break;
}
+ memset(gr->tile, 0xff, sizeof(gr->tile));
+ gr->func->oneinit_tiles(gr);
+ gr->func->oneinit_sm_id(gr);
return 0;
}
@@ -1914,13 +1995,68 @@ gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
}
void
+gf100_gr_init_400054(struct gf100_gr *gr)
+{
+ nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x34ce3464);
+}
+
+void
+gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+}
+
+void
+gf100_gr_init_tex_hww_esr(struct gf100_gr *gr, int gpc, int tpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+}
+
+void
+gf100_gr_init_419eb4(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+}
+
+void
+gf100_gr_init_419cc0(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ int gpc, tpc;
+
+ nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++)
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+ }
+}
+
+void
+gf100_gr_init_40601c(struct gf100_gr *gr)
+{
+ nvkm_wr32(gr->base.engine.subdev.device, 0x40601c, 0xc0000000);
+}
+
+void
+gf100_gr_init_fecs_exceptions(struct gf100_gr *gr)
+{
+ const u32 data = gr->firmware ? 0x000e0000 : 0x000e0001;
+ nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, data);
+}
+
+void
gf100_gr_init_gpc_mmu(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
struct nvkm_fb *fb = device->fb;
nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0x00000001);
- nvkm_wr32(device, 0x4188a4, 0x00000000);
+ nvkm_wr32(device, 0x4188a4, 0x03000000);
nvkm_wr32(device, 0x418888, 0x00000000);
nvkm_wr32(device, 0x41888c, 0x00000000);
nvkm_wr32(device, 0x418890, 0x00000000);
@@ -1929,37 +2065,30 @@ gf100_gr_init_gpc_mmu(struct gf100_gr *gr)
nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(fb->mmu_rd) >> 8);
}
-int
-gf100_gr_init(struct gf100_gr *gr)
+void
+gf100_gr_init_num_active_ltcs(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
- u32 data[TPC_MAX / 8] = {};
- u8 tpcnr[GPC_MAX];
- int gpc, tpc, rop;
- int i;
-
- gr->func->init_gpc_mmu(gr);
-
- gf100_gr_mmio(gr, gr->func->mmio);
-
- nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
-
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+ nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+}
- data[i / 8] |= tpc << ((i % 8) * 4);
+void
+gf100_gr_init_zcull(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+ const u8 tile_nr = ALIGN(gr->tpc_total, 32);
+ u8 bank[GPC_MAX] = {}, gpc, i, j;
+ u32 data;
+
+ for (i = 0; i < tile_nr; i += 8) {
+ for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) {
+ data |= bank[gr->tile[i + j]] << (j * 4);
+ bank[gr->tile[i + j]]++;
+ }
+ nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data);
}
- nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
- nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
- nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
- nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
@@ -1968,29 +2097,88 @@ gf100_gr_init(struct gf100_gr *gr)
nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
}
- if (device->chipset != 0xd7)
- nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
+ nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
+}
+
+void
+gf100_gr_init_vsc_stream_master(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
+}
+
+int
+gf100_gr_init(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ int gpc, tpc, rop;
+
+ if (gr->func->init_419bd8)
+ gr->func->init_419bd8(gr);
+
+ gr->func->init_gpc_mmu(gr);
+
+ if (gr->fuc_sw_nonctx)
+ gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
else
- nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+ gf100_gr_mmio(gr, gr->func->mmio);
- nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+ gf100_gr_wait_idle(gr);
+
+ if (gr->func->init_r405a14)
+ gr->func->init_r405a14(gr);
+
+ if (gr->func->clkgate_pack)
+ nvkm_therm_clkgate_init(device->therm, gr->func->clkgate_pack);
+
+ if (gr->func->init_bios)
+ gr->func->init_bios(gr);
+
+ gr->func->init_vsc_stream_master(gr);
+ gr->func->init_zcull(gr);
+ gr->func->init_num_active_ltcs(gr);
+ if (gr->func->init_rop_active_fbps)
+ gr->func->init_rop_active_fbps(gr);
+ if (gr->func->init_bios_2)
+ gr->func->init_bios_2(gr);
+ if (gr->func->init_swdx_pes_mask)
+ gr->func->init_swdx_pes_mask(gr);
nvkm_wr32(device, 0x400500, 0x00010001);
nvkm_wr32(device, 0x400100, 0xffffffff);
nvkm_wr32(device, 0x40013c, 0xffffffff);
+ nvkm_wr32(device, 0x400124, 0x00000002);
+
+ gr->func->init_fecs_exceptions(gr);
+ if (gr->func->init_ds_hww_esr_2)
+ gr->func->init_ds_hww_esr_2(gr);
- nvkm_wr32(device, 0x409c24, 0x000f0000);
nvkm_wr32(device, 0x404000, 0xc0000000);
nvkm_wr32(device, 0x404600, 0xc0000000);
nvkm_wr32(device, 0x408030, 0xc0000000);
- nvkm_wr32(device, 0x40601c, 0xc0000000);
+
+ if (gr->func->init_40601c)
+ gr->func->init_40601c(gr);
+
nvkm_wr32(device, 0x404490, 0xc0000000);
nvkm_wr32(device, 0x406018, 0xc0000000);
+
+ if (gr->func->init_sked_hww_esr)
+ gr->func->init_sked_hww_esr(gr);
+
nvkm_wr32(device, 0x405840, 0xc0000000);
nvkm_wr32(device, 0x405844, 0x00ffffff);
- nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
- nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+
+ if (gr->func->init_419cc0)
+ gr->func->init_419cc0(gr);
+ if (gr->func->init_419eb4)
+ gr->func->init_419eb4(gr);
+ if (gr->func->init_419c9c)
+ gr->func->init_419c9c(gr);
+
+ if (gr->func->init_ppc_exceptions)
+ gr->func->init_ppc_exceptions(gr);
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
@@ -2000,19 +2188,20 @@ gf100_gr_init(struct gf100_gr *gr)
for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+ if (gr->func->init_tex_hww_esr)
+ gr->func->init_tex_hww_esr(gr, gpc, tpc);
nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+ if (gr->func->init_504430)
+ gr->func->init_504430(gr, gpc, tpc);
+ gr->func->init_shader_exceptions(gr, gpc, tpc);
}
nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
}
for (rop = 0; rop < gr->rop_nr; rop++) {
- nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+ nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+ nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
}
@@ -2024,10 +2213,14 @@ gf100_gr_init(struct gf100_gr *gr)
nvkm_wr32(device, 0x40011c, 0xffffffff);
nvkm_wr32(device, 0x400134, 0xffffffff);
- nvkm_wr32(device, 0x400054, 0x34ce3464);
+ if (gr->func->init_400054)
+ gr->func->init_400054(gr);
gf100_gr_zbc_init(gr);
+ if (gr->func->init_4188a4)
+ gr->func->init_4188a4(gr);
+
return gf100_gr_init_ctxctl(gr);
}
@@ -2053,13 +2246,27 @@ gf100_gr_gpccs_ucode = {
static const struct gf100_gr_func
gf100_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+ .init_zcull = gf100_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_40601c = gf100_gr_init_40601c,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gf100_gr_init_419eb4,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gf100_gr_pack_mmio,
.fecs.ucode = &gf100_gr_fecs_ucode,
.gpccs.ucode = &gf100_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.grctx = &gf100_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index c8ec3fd97155..dc46cf0131db 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -72,6 +72,12 @@ struct gf100_gr_zbc_depth {
u32 l2;
};
+struct gf100_gr_zbc_stencil {
+ u32 format;
+ u32 ds;
+ u32 l2;
+};
+
struct gf100_gr {
const struct gf100_gr_func *func;
struct nvkm_gr base;
@@ -95,21 +101,33 @@ struct gf100_gr {
struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT];
struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT];
+ struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT];
u8 rop_nr;
u8 gpc_nr;
u8 tpc_nr[GPC_MAX];
+ u8 tpc_max;
u8 tpc_total;
u8 ppc_nr[GPC_MAX];
u8 ppc_mask[GPC_MAX];
+ u8 ppc_tpc_mask[GPC_MAX][4];
u8 ppc_tpc_nr[GPC_MAX][4];
+ u8 ppc_tpc_min;
+ u8 ppc_tpc_max;
+
+ u8 screen_tile_row_offset;
+ u8 tile[TPC_MAX];
+
+ struct {
+ u8 gpc;
+ u8 tpc;
+ } sm[TPC_MAX];
+ u8 sm_nr;
struct gf100_gr_data mmio_data[4];
struct gf100_gr_mmio mmio_list[4096/8];
u32 size;
u32 *data;
-
- u8 screen_tile_row_offset;
};
int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
@@ -118,14 +136,43 @@ int gf100_gr_new_(const struct gf100_gr_func *, struct nvkm_device *,
int, struct nvkm_gr **);
void *gf100_gr_dtor(struct nvkm_gr *);
+struct gf100_gr_func_zbc {
+ void (*clear_color)(struct gf100_gr *, int zbc);
+ void (*clear_depth)(struct gf100_gr *, int zbc);
+ int (*stencil_get)(struct gf100_gr *, int format,
+ const u32 ds, const u32 l2);
+ void (*clear_stencil)(struct gf100_gr *, int zbc);
+};
+
struct gf100_gr_func {
void (*dtor)(struct gf100_gr *);
+ void (*oneinit_tiles)(struct gf100_gr *);
+ void (*oneinit_sm_id)(struct gf100_gr *);
int (*init)(struct gf100_gr *);
+ void (*init_419bd8)(struct gf100_gr *);
void (*init_gpc_mmu)(struct gf100_gr *);
+ void (*init_r405a14)(struct gf100_gr *);
+ void (*init_bios)(struct gf100_gr *);
+ void (*init_vsc_stream_master)(struct gf100_gr *);
+ void (*init_zcull)(struct gf100_gr *);
+ void (*init_num_active_ltcs)(struct gf100_gr *);
void (*init_rop_active_fbps)(struct gf100_gr *);
- void (*init_ppc_exceptions)(struct gf100_gr *);
+ void (*init_bios_2)(struct gf100_gr *);
void (*init_swdx_pes_mask)(struct gf100_gr *);
- void (*init_num_active_ltcs)(struct gf100_gr *);
+ void (*init_fecs_exceptions)(struct gf100_gr *);
+ void (*init_ds_hww_esr_2)(struct gf100_gr *);
+ void (*init_40601c)(struct gf100_gr *);
+ void (*init_sked_hww_esr)(struct gf100_gr *);
+ void (*init_419cc0)(struct gf100_gr *);
+ void (*init_419eb4)(struct gf100_gr *);
+ void (*init_419c9c)(struct gf100_gr *);
+ void (*init_ppc_exceptions)(struct gf100_gr *);
+ void (*init_tex_hww_esr)(struct gf100_gr *, int gpc, int tpc);
+ void (*init_504430)(struct gf100_gr *, int gpc, int tpc);
+ void (*init_shader_exceptions)(struct gf100_gr *, int gpc, int tpc);
+ void (*init_400054)(struct gf100_gr *);
+ void (*init_4188a4)(struct gf100_gr *);
+ void (*trap_mp)(struct gf100_gr *, int gpc, int tpc);
void (*set_hww_esr_report_mask)(struct gf100_gr *);
const struct gf100_gr_pack *mmio;
struct {
@@ -135,26 +182,60 @@ struct gf100_gr_func {
struct gf100_gr_ucode *ucode;
} gpccs;
int (*rops)(struct gf100_gr *);
+ int gpc_nr;
+ int tpc_nr;
int ppc_nr;
const struct gf100_grctx_func *grctx;
const struct nvkm_therm_clkgate_pack *clkgate_pack;
+ const struct gf100_gr_func_zbc *zbc;
struct nvkm_sclass sclass[];
};
-int gf100_gr_init(struct gf100_gr *);
int gf100_gr_rops(struct gf100_gr *);
-
-int gk104_gr_init(struct gf100_gr *);
+void gf100_gr_oneinit_tiles(struct gf100_gr *);
+void gf100_gr_oneinit_sm_id(struct gf100_gr *);
+int gf100_gr_init(struct gf100_gr *);
+void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
+void gf100_gr_init_zcull(struct gf100_gr *);
+void gf100_gr_init_num_active_ltcs(struct gf100_gr *);
+void gf100_gr_init_fecs_exceptions(struct gf100_gr *);
+void gf100_gr_init_40601c(struct gf100_gr *);
+void gf100_gr_init_419cc0(struct gf100_gr *);
+void gf100_gr_init_419eb4(struct gf100_gr *);
+void gf100_gr_init_tex_hww_esr(struct gf100_gr *, int, int);
+void gf100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gf100_gr_init_400054(struct gf100_gr *);
+extern const struct gf100_gr_func_zbc gf100_gr_zbc;
+
+void gf117_gr_init_zcull(struct gf100_gr *);
+
+void gk104_gr_init_vsc_stream_master(struct gf100_gr *);
void gk104_gr_init_rop_active_fbps(struct gf100_gr *);
void gk104_gr_init_ppc_exceptions(struct gf100_gr *);
+void gk104_gr_init_sked_hww_esr(struct gf100_gr *);
+
+void gk110_gr_init_419eb4(struct gf100_gr *);
+
+void gm107_gr_init_504430(struct gf100_gr *, int, int);
+void gm107_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gm107_gr_init_400054(struct gf100_gr *);
int gk20a_gr_init(struct gf100_gr *);
-int gm200_gr_init(struct gf100_gr *);
+void gm200_gr_oneinit_tiles(struct gf100_gr *);
+void gm200_gr_oneinit_sm_id(struct gf100_gr *);
int gm200_gr_rops(struct gf100_gr *);
+void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
+void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
-int gp100_gr_init(struct gf100_gr *);
void gp100_gr_init_rop_active_fbps(struct gf100_gr *);
+void gp100_gr_init_fecs_exceptions(struct gf100_gr *);
+void gp100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gp100_gr_zbc_clear_color(struct gf100_gr *, int);
+void gp100_gr_zbc_clear_depth(struct gf100_gr *, int);
+
+void gp102_gr_init_swdx_pes_mask(struct gf100_gr *);
+extern const struct gf100_gr_func_zbc gp102_gr_zbc;
#define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object)
#include <core/object.h>
@@ -187,7 +268,7 @@ extern const struct nvkm_object_func gf100_fermi;
struct gf100_gr_init {
u32 addr;
u8 count;
- u8 pitch;
+ u32 pitch;
u32 data;
};
@@ -257,6 +338,9 @@ extern const struct gf100_gr_init gf100_gr_init_be_0[];
extern const struct gf100_gr_init gf100_gr_init_fe_1[];
extern const struct gf100_gr_init gf100_gr_init_pe_1[];
void gf100_gr_init_gpc_mmu(struct gf100_gr *);
+void gf100_gr_trap_mp(struct gf100_gr *, int, int);
+extern const struct nvkm_bitfield gf100_mp_global_error[];
+extern const struct nvkm_enum gf100_mp_warp_error[];
extern const struct gf100_gr_init gf104_gr_init_ds_0[];
extern const struct gf100_gr_init gf104_gr_init_tex_0[];
@@ -279,6 +363,7 @@ extern const struct gf100_gr_init gf117_gr_init_wwdx_0[];
extern const struct gf100_gr_init gf117_gr_init_cbm_0[];
extern const struct gf100_gr_init gk104_gr_init_main_0[];
+extern const struct gf100_gr_init gk104_gr_init_gpc_unk_2[];
extern const struct gf100_gr_init gk104_gr_init_tpccs_0[];
extern const struct gf100_gr_init gk104_gr_init_pe_0[];
extern const struct gf100_gr_init gk104_gr_init_be_0[];
@@ -306,8 +391,4 @@ extern const struct gf100_gr_init gm107_gr_init_cbm_0[];
void gm107_gr_init_bios(struct gf100_gr *);
void gm200_gr_init_gpc_mmu(struct gf100_gr *);
-
-void gp100_gr_init_num_active_ltcs(struct gf100_gr *gr);
-
-void gp102_gr_init_swdx_pes_mask(struct gf100_gr *);
#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
index ec0f11983b23..42c2fd9fc04e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
@@ -114,13 +114,27 @@ gf104_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf104_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+ .init_zcull = gf100_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_40601c = gf100_gr_init_40601c,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gf100_gr_init_419eb4,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gf104_gr_pack_mmio,
.fecs.ucode = &gf100_gr_fecs_ucode,
.gpccs.ucode = &gf100_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.grctx = &gf104_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
index cc152eb74123..4731a460adc7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
@@ -103,15 +103,36 @@ gf108_gr_pack_mmio[] = {
* PGRAPH engine/subdev functions
******************************************************************************/
+static void
+gf108_gr_init_r405a14(struct gf100_gr *gr)
+{
+ nvkm_wr32(gr->base.engine.subdev.device, 0x405a14, 0x80000000);
+}
+
static const struct gf100_gr_func
gf108_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_r405a14 = gf108_gr_init_r405a14,
+ .init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+ .init_zcull = gf100_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_40601c = gf100_gr_init_40601c,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gf100_gr_init_419eb4,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gf108_gr_pack_mmio,
.fecs.ucode = &gf100_gr_fecs_ucode,
.gpccs.ucode = &gf100_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.grctx = &gf108_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
index 10d2d73ca8c3..cdf759c8cd7f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -86,13 +86,27 @@ gf110_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf110_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+ .init_zcull = gf100_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_40601c = gf100_gr_init_40601c,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gf100_gr_init_419eb4,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gf110_gr_pack_mmio,
.fecs.ucode = &gf100_gr_fecs_ucode,
.gpccs.ucode = &gf100_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.grctx = &gf110_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index ac09a07c4150..a4158f84c649 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -120,16 +120,58 @@ gf117_gr_gpccs_ucode = {
.data.size = sizeof(gf117_grgpc_data),
};
+void
+gf117_gr_init_zcull(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+ const u8 tile_nr = ALIGN(gr->tpc_total, 32);
+ u8 bank[GPC_MAX] = {}, gpc, i, j;
+ u32 data;
+
+ for (i = 0; i < tile_nr; i += 8) {
+ for (data = 0, j = 0; j < 8 && i + j < gr->tpc_total; j++) {
+ data |= bank[gr->tile[i + j]] << (j * 4);
+ bank[gr->tile[i + j]]++;
+ }
+ nvkm_wr32(device, GPC_BCAST(0x0980 + ((i / 8) * 4)), data);
+ }
+
+ for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+ gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+ gr->tpc_total);
+ nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
+ }
+
+ nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+}
+
static const struct gf100_gr_func
gf117_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_40601c = gf100_gr_init_40601c,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gf100_gr_init_419eb4,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gf117_gr_pack_mmio,
.fecs.ucode = &gf117_gr_fecs_ucode,
.gpccs.ucode = &gf117_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.ppc_nr = 1,
.grctx = &gf117_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
index 7f449ec6f760..4197844870b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -177,13 +177,27 @@ gf119_gr_pack_mmio[] = {
static const struct gf100_gr_func
gf119_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
+ .init_zcull = gf100_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_40601c = gf100_gr_init_40601c,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gf100_gr_init_419eb4,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gf119_gr_pack_mmio,
.fecs.ucode = &gf100_gr_fecs_ucode,
.gpccs.ucode = &gf100_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.grctx = &gf119_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
index 1b52fcb2c49a..477fee3e3715 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -83,6 +83,12 @@ gk104_gr_init_gpc_unk_1[] = {
};
const struct gf100_gr_init
+gk104_gr_init_gpc_unk_2[] = {
+ { 0x418884, 1, 0x04, 0x00000000 },
+ {}
+};
+
+const struct gf100_gr_init
gk104_gr_init_tpccs_0[] = {
{ 0x419d0c, 1, 0x04, 0x00000000 },
{ 0x419d10, 1, 0x04, 0x00000014 },
@@ -160,6 +166,7 @@ gk104_gr_pack_mmio[] = {
{ gf119_gr_init_gpm_0 },
{ gk104_gr_init_gpc_unk_1 },
{ gf100_gr_init_gcc_0 },
+ { gk104_gr_init_gpc_unk_2 },
{ gk104_gr_init_tpccs_0 },
{ gf119_gr_init_tex_0 },
{ gk104_gr_init_pe_0 },
@@ -381,6 +388,21 @@ gk104_clkgate_pack[] = {
******************************************************************************/
void
+gk104_gr_init_sked_hww_esr(struct gf100_gr *gr)
+{
+ nvkm_wr32(gr->base.engine.subdev.device, 0x407020, 0x40000000);
+}
+
+static void
+gk104_gr_init_fecs_exceptions(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, 0x409ffc, 0x00000000);
+ nvkm_wr32(device, 0x409c14, 0x00003e3e);
+ nvkm_wr32(device, 0x409c24, 0x000f0001);
+}
+
+void
gk104_gr_init_rop_active_fbps(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -404,112 +426,11 @@ gk104_gr_init_ppc_exceptions(struct gf100_gr *gr)
}
}
-int
-gk104_gr_init(struct gf100_gr *gr)
+void
+gk104_gr_init_vsc_stream_master(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
- u32 data[TPC_MAX / 8] = {};
- u8 tpcnr[GPC_MAX];
- int gpc, tpc, rop;
- int i;
-
- gr->func->init_gpc_mmu(gr);
-
- gf100_gr_mmio(gr, gr->func->mmio);
- if (gr->func->clkgate_pack)
- nvkm_therm_clkgate_init(gr->base.engine.subdev.device->therm,
- gr->func->clkgate_pack);
-
nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
- memset(data, 0x00, sizeof(data));
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- data[i / 8] |= tpc << ((i % 8) * 4);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
- nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
- nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
- nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
- gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
- gr->tpc_total);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
- nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-
- gr->func->init_rop_active_fbps(gr);
-
- nvkm_wr32(device, 0x400500, 0x00010001);
-
- nvkm_wr32(device, 0x400100, 0xffffffff);
- nvkm_wr32(device, 0x40013c, 0xffffffff);
-
- nvkm_wr32(device, 0x409ffc, 0x00000000);
- nvkm_wr32(device, 0x409c14, 0x00003e3e);
- nvkm_wr32(device, 0x409c24, 0x000f0001);
- nvkm_wr32(device, 0x404000, 0xc0000000);
- nvkm_wr32(device, 0x404600, 0xc0000000);
- nvkm_wr32(device, 0x408030, 0xc0000000);
- nvkm_wr32(device, 0x404490, 0xc0000000);
- nvkm_wr32(device, 0x406018, 0xc0000000);
- nvkm_wr32(device, 0x407020, 0x40000000);
- nvkm_wr32(device, 0x405840, 0xc0000000);
- nvkm_wr32(device, 0x405844, 0x00ffffff);
- nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
- nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
-
- gr->func->init_ppc_exceptions(gr);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
- for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
- }
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
- }
-
- for (rop = 0; rop < gr->rop_nr; rop++) {
- nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
- nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
- }
-
- nvkm_wr32(device, 0x400108, 0xffffffff);
- nvkm_wr32(device, 0x400138, 0xffffffff);
- nvkm_wr32(device, 0x400118, 0xffffffff);
- nvkm_wr32(device, 0x400130, 0xffffffff);
- nvkm_wr32(device, 0x40011c, 0xffffffff);
- nvkm_wr32(device, 0x400134, 0xffffffff);
-
- nvkm_wr32(device, 0x400054, 0x34ce3464);
-
- gf100_gr_zbc_init(gr);
-
- return gf100_gr_init_ctxctl(gr);
}
#include "fuc/hubgk104.fuc3.h"
@@ -534,10 +455,23 @@ gk104_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk104_gr = {
- .init = gk104_gr_init,
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+ .init_fecs_exceptions = gk104_gr_init_fecs_exceptions,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gf100_gr_init_419eb4,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gk104_gr_pack_mmio,
.fecs.ucode = &gk104_gr_fecs_ucode,
.gpccs.ucode = &gk104_gr_gpccs_ucode,
@@ -545,6 +479,7 @@ gk104_gr = {
.ppc_nr = 1,
.grctx = &gk104_grctx,
.clkgate_pack = gk104_clkgate_pack,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
index 4da916a9fc73..7cd628c84e07 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
@@ -143,6 +143,7 @@ gk110_gr_pack_mmio[] = {
{ gf119_gr_init_gpm_0 },
{ gk110_gr_init_gpc_unk_1 },
{ gf100_gr_init_gcc_0 },
+ { gk104_gr_init_gpc_unk_2 },
{ gk104_gr_init_tpccs_0 },
{ gk110_gr_init_tex_0 },
{ gk104_gr_init_pe_0 },
@@ -334,12 +335,39 @@ gk110_gr_gpccs_ucode = {
.data.size = sizeof(gk110_grgpc_data),
};
+void
+gk110_gr_init_419eb4(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+ nvkm_mask(device, 0x419eb4, 0x00002000, 0x00002000);
+ nvkm_mask(device, 0x419eb4, 0x00004000, 0x00004000);
+ nvkm_mask(device, 0x419eb4, 0x00008000, 0x00008000);
+ nvkm_mask(device, 0x419eb4, 0x00001000, 0x00000000);
+ nvkm_mask(device, 0x419eb4, 0x00002000, 0x00000000);
+ nvkm_mask(device, 0x419eb4, 0x00004000, 0x00000000);
+ nvkm_mask(device, 0x419eb4, 0x00008000, 0x00000000);
+}
+
static const struct gf100_gr_func
gk110_gr = {
- .init = gk104_gr_init,
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gk110_gr_init_419eb4,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gk110_gr_pack_mmio,
.fecs.ucode = &gk110_gr_fecs_ucode,
.gpccs.ucode = &gk110_gr_gpccs_ucode,
@@ -347,6 +375,7 @@ gk110_gr = {
.ppc_nr = 2,
.grctx = &gk110_grctx,
.clkgate_pack = gk110_clkgate_pack,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
index 1912c0bfd7ee..a38faa215635 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
@@ -82,6 +82,7 @@ gk110b_gr_pack_mmio[] = {
{ gf119_gr_init_gpm_0 },
{ gk110_gr_init_gpc_unk_1 },
{ gf100_gr_init_gcc_0 },
+ { gk104_gr_init_gpc_unk_2 },
{ gk104_gr_init_tpccs_0 },
{ gk110_gr_init_tex_0 },
{ gk104_gr_init_pe_0 },
@@ -102,16 +103,30 @@ gk110b_gr_pack_mmio[] = {
static const struct gf100_gr_func
gk110b_gr = {
- .init = gk104_gr_init,
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419eb4 = gk110_gr_init_419eb4,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gk110b_gr_pack_mmio,
.fecs.ucode = &gk110_gr_fecs_ucode,
.gpccs.ucode = &gk110_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.ppc_nr = 2,
.grctx = &gk110b_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
index 1fc258163f25..58456660e603 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
@@ -121,6 +121,7 @@ gk208_gr_pack_mmio[] = {
{ gf119_gr_init_gpm_0 },
{ gk110_gr_init_gpc_unk_1 },
{ gf100_gr_init_gcc_0 },
+ { gk104_gr_init_gpc_unk_2 },
{ gk104_gr_init_tpccs_0 },
{ gk208_gr_init_tex_0 },
{ gk104_gr_init_pe_0 },
@@ -161,16 +162,29 @@ gk208_gr_gpccs_ucode = {
static const struct gf100_gr_func
gk208_gr = {
- .init = gk104_gr_init,
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_shader_exceptions = gf100_gr_init_shader_exceptions,
+ .init_400054 = gf100_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gk208_gr_pack_mmio,
.fecs.ucode = &gk208_gr_fecs_ucode,
.gpccs.ucode = &gk208_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.ppc_nr = 1,
.grctx = &gk208_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index de8b806b88fd..500cb08dd608 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -219,11 +219,7 @@ int
gk20a_gr_init(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
- u32 data[TPC_MAX / 8] = {};
- u8 tpcnr[GPC_MAX];
- int gpc, tpc;
- int ret, i;
+ int ret;
/* Clear SCC RAM */
nvkm_wr32(device, 0x40802c, 0x1);
@@ -246,31 +242,7 @@ gk20a_gr_init(struct gf100_gr *gr)
nvkm_mask(device, 0x503018, 0x1, 0x1);
/* Zcull init */
- memset(data, 0x00, sizeof(data));
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- data[i / 8] |= tpc << ((i % 8) * 4);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
- nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
- nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
- nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
- gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
- gr->tpc_total);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+ gr->func->init_zcull(gr);
gr->func->init_rop_active_fbps(gr);
@@ -310,12 +282,17 @@ gk20a_gr_init(struct gf100_gr *gr)
static const struct gf100_gr_func
gk20a_gr = {
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
.init = gk20a_gr_init,
+ .init_zcull = gf117_gr_init_zcull,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+ .trap_mp = gf100_gr_trap_mp,
.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
.rops = gf100_gr_rops,
.ppc_nr = 1,
.grctx = &gk20a_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index 2c67fac576d1..92e31d397207 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -25,6 +25,8 @@
#include "ctxgf100.h"
#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/init.h>
#include <subdev/bios/P0260.h>
#include <subdev/fb.h>
@@ -36,6 +38,10 @@
static const struct gf100_gr_init
gm107_gr_init_main_0[] = {
+ { 0x40880c, 1, 0x04, 0x00000000 },
+ { 0x408910, 1, 0x04, 0x00000000 },
+ { 0x408984, 1, 0x04, 0x00000000 },
+ { 0x41a8a0, 1, 0x04, 0x00000000 },
{ 0x400080, 1, 0x04, 0x003003c2 },
{ 0x400088, 1, 0x04, 0x0001bfe7 },
{ 0x40008c, 1, 0x04, 0x00060000 },
@@ -210,14 +216,13 @@ gm107_gr_init_cbm_0[] = {
static const struct gf100_gr_init
gm107_gr_init_be_0[] = {
{ 0x408890, 1, 0x04, 0x000000ff },
- { 0x40880c, 1, 0x04, 0x00000000 },
{ 0x408850, 1, 0x04, 0x00000004 },
{ 0x408878, 1, 0x04, 0x00c81603 },
{ 0x40887c, 1, 0x04, 0x80543432 },
{ 0x408880, 1, 0x04, 0x0010581e },
{ 0x408884, 1, 0x04, 0x00001205 },
{ 0x408974, 1, 0x04, 0x000000ff },
- { 0x408910, 9, 0x04, 0x00000000 },
+ { 0x408914, 8, 0x04, 0x00000000 },
{ 0x408950, 1, 0x04, 0x00000000 },
{ 0x408954, 1, 0x04, 0x0000ffff },
{ 0x408958, 1, 0x04, 0x00000034 },
@@ -227,7 +232,6 @@ gm107_gr_init_be_0[] = {
{ 0x408968, 1, 0x04, 0x02808833 },
{ 0x40896c, 1, 0x04, 0x01f02438 },
{ 0x408970, 1, 0x04, 0x00012c00 },
- { 0x408984, 1, 0x04, 0x00000000 },
{ 0x408988, 1, 0x04, 0x08040201 },
{ 0x40898c, 1, 0x04, 0x80402010 },
{}
@@ -260,6 +264,7 @@ gm107_gr_pack_mmio[] = {
{ gf100_gr_init_gpm_0 },
{ gm107_gr_init_gpc_unk_1 },
{ gf100_gr_init_gcc_0 },
+ { gk104_gr_init_gpc_unk_2 },
{ gm107_gr_init_tpccs_0 },
{ gm107_gr_init_tex_0 },
{ gm107_gr_init_pe_0 },
@@ -280,6 +285,52 @@ gm107_gr_pack_mmio[] = {
******************************************************************************/
void
+gm107_gr_init_400054(struct gf100_gr *gr)
+{
+ nvkm_wr32(gr->base.engine.subdev.device, 0x400054, 0x2c350f63);
+}
+
+void
+gm107_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
+}
+
+void
+gm107_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+}
+
+static void
+gm107_gr_init_bios_2(struct gf100_gr *gr)
+{
+ struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+ struct nvkm_device *device = subdev->device;
+ struct nvkm_bios *bios = device->bios;
+ struct bit_entry bit_P;
+ if (!bit_entry(bios, 'P', &bit_P) &&
+ bit_P.version == 2 && bit_P.length >= 0x2c) {
+ u32 data = nvbios_rd32(bios, bit_P.offset + 0x28);
+ if (data) {
+ u8 ver = nvbios_rd08(bios, data + 0x00);
+ u8 hdr = nvbios_rd08(bios, data + 0x01);
+ if (ver == 0x20 && hdr >= 8) {
+ data = nvbios_rd32(bios, data + 0x04);
+ if (data) {
+ u32 save = nvkm_rd32(device, 0x619444);
+ nvbios_init(subdev, data);
+ nvkm_wr32(device, 0x619444, save);
+ }
+ }
+ }
+ }
+}
+
+void
gm107_gr_init_bios(struct gf100_gr *gr)
{
static const struct {
@@ -308,115 +359,17 @@ gm107_gr_init_bios(struct gf100_gr *gr)
}
}
-static int
-gm107_gr_init(struct gf100_gr *gr)
+static void
+gm107_gr_init_gpc_mmu(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
struct nvkm_fb *fb = device->fb;
- const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
- u32 data[TPC_MAX / 8] = {};
- u8 tpcnr[GPC_MAX];
- int gpc, tpc, rop;
- int i;
nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(fb->mmu_wr) >> 8);
nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(fb->mmu_rd) >> 8);
-
- gf100_gr_mmio(gr, gr->func->mmio);
-
- gm107_gr_init_bios(gr);
-
- nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
- memset(data, 0x00, sizeof(data));
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- data[i / 8] |= tpc << ((i % 8) * 4);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
- nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
- nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
- nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
- gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
- gr->tpc_total);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
- nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-
- gr->func->init_rop_active_fbps(gr);
-
- nvkm_wr32(device, 0x400500, 0x00010001);
-
- nvkm_wr32(device, 0x400100, 0xffffffff);
- nvkm_wr32(device, 0x40013c, 0xffffffff);
- nvkm_wr32(device, 0x400124, 0x00000002);
- nvkm_wr32(device, 0x409c24, 0x000e0000);
-
- nvkm_wr32(device, 0x404000, 0xc0000000);
- nvkm_wr32(device, 0x404600, 0xc0000000);
- nvkm_wr32(device, 0x408030, 0xc0000000);
- nvkm_wr32(device, 0x404490, 0xc0000000);
- nvkm_wr32(device, 0x406018, 0xc0000000);
- nvkm_wr32(device, 0x407020, 0x40000000);
- nvkm_wr32(device, 0x405840, 0xc0000000);
- nvkm_wr32(device, 0x405844, 0x00ffffff);
- nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-
- gr->func->init_ppc_exceptions(gr);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
- for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
- }
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
- }
-
- for (rop = 0; rop < gr->rop_nr; rop++) {
- nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
- nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
- }
-
- nvkm_wr32(device, 0x400108, 0xffffffff);
- nvkm_wr32(device, 0x400138, 0xffffffff);
- nvkm_wr32(device, 0x400118, 0xffffffff);
- nvkm_wr32(device, 0x400130, 0xffffffff);
- nvkm_wr32(device, 0x40011c, 0xffffffff);
- nvkm_wr32(device, 0x400134, 0xffffffff);
-
- nvkm_wr32(device, 0x400054, 0x2c350f63);
-
- gf100_gr_zbc_init(gr);
-
- return gf100_gr_init_ctxctl(gr);
}
#include "fuc/hubgm107.fuc5.h"
@@ -441,15 +394,32 @@ gm107_gr_gpccs_ucode = {
static const struct gf100_gr_func
gm107_gr = {
- .init = gm107_gr_init,
+ .oneinit_tiles = gf100_gr_oneinit_tiles,
+ .oneinit_sm_id = gf100_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
+ .init_gpc_mmu = gm107_gr_init_gpc_mmu,
+ .init_bios = gm107_gr_init_bios,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+ .init_bios_2 = gm107_gr_init_bios_2,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_504430 = gm107_gr_init_504430,
+ .init_shader_exceptions = gm107_gr_init_shader_exceptions,
+ .init_400054 = gm107_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.mmio = gm107_gr_pack_mmio,
.fecs.ucode = &gm107_gr_fecs_ucode,
.gpccs.ucode = &gm107_gr_gpccs_ucode,
.rops = gf100_gr_rops,
.ppc_nr = 2,
.grctx = &gm107_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 6435f1257572..eff30662b984 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -39,6 +39,22 @@ gm200_gr_rops(struct gf100_gr *gr)
}
void
+gm200_gr_init_ds_hww_esr_2(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, 0x405848, 0xc0000000);
+ nvkm_mask(device, 0x40584c, 0x00000001, 0x00000001);
+}
+
+void
+gm200_gr_init_num_active_ltcs(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+ nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+}
+
+void
gm200_gr_init_gpc_mmu(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -61,111 +77,51 @@ gm200_gr_init_rop_active_fbps(struct gf100_gr *gr)
nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
}
-int
-gm200_gr_init(struct gf100_gr *gr)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
- const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
- u32 data[TPC_MAX / 8] = {};
- u8 tpcnr[GPC_MAX];
- int gpc, tpc, rop;
- int i;
-
- gr->func->init_gpc_mmu(gr);
-
- gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
-
- gm107_gr_init_bios(gr);
-
- nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
- memset(data, 0x00, sizeof(data));
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- data[i / 8] |= tpc << ((i % 8) * 4);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
- nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
- nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
- nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
- gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
- gr->tpc_total);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
- }
+static u8
+gm200_gr_tile_map_6_24[] = {
+ 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2, 0, 1, 2, 3, 4, 5, 3, 4, 5, 0, 1, 2,
+};
- nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
- nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
- nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+static u8
+gm200_gr_tile_map_4_16[] = {
+ 0, 1, 2, 3, 2, 3, 0, 1, 3, 0, 1, 2, 1, 2, 3, 0,
+};
- gr->func->init_rop_active_fbps(gr);
+static u8
+gm200_gr_tile_map_2_8[] = {
+ 0, 1, 1, 0, 0, 1, 1, 0,
+};
- nvkm_wr32(device, 0x400500, 0x00010001);
- nvkm_wr32(device, 0x400100, 0xffffffff);
- nvkm_wr32(device, 0x40013c, 0xffffffff);
- nvkm_wr32(device, 0x400124, 0x00000002);
- nvkm_wr32(device, 0x409c24, 0x000e0000);
- nvkm_wr32(device, 0x405848, 0xc0000000);
- nvkm_wr32(device, 0x40584c, 0x00000001);
- nvkm_wr32(device, 0x404000, 0xc0000000);
- nvkm_wr32(device, 0x404600, 0xc0000000);
- nvkm_wr32(device, 0x408030, 0xc0000000);
- nvkm_wr32(device, 0x404490, 0xc0000000);
- nvkm_wr32(device, 0x406018, 0xc0000000);
- nvkm_wr32(device, 0x407020, 0x40000000);
- nvkm_wr32(device, 0x405840, 0xc0000000);
- nvkm_wr32(device, 0x405844, 0x00ffffff);
- nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
-
- gr->func->init_ppc_exceptions(gr);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
- for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
- }
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
- }
+void
+gm200_gr_oneinit_sm_id(struct gf100_gr *gr)
+{
+ /*XXX: There's a different algorithm here I've not yet figured out. */
+ gf100_gr_oneinit_sm_id(gr);
+}
- for (rop = 0; rop < gr->rop_nr; rop++) {
- nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
- nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+void
+gm200_gr_oneinit_tiles(struct gf100_gr *gr)
+{
+ /*XXX: Not sure what this is about. The algorithm from NVGPU
+ * seems to work for all boards I tried from earlier (and
+ * later) GPUs except in these specific configurations.
+ *
+ * Let's just hardcode them for now.
+ */
+ if (gr->gpc_nr == 2 && gr->tpc_total == 8) {
+ memcpy(gr->tile, gm200_gr_tile_map_2_8, gr->tpc_total);
+ gr->screen_tile_row_offset = 1;
+ } else
+ if (gr->gpc_nr == 4 && gr->tpc_total == 16) {
+ memcpy(gr->tile, gm200_gr_tile_map_4_16, gr->tpc_total);
+ gr->screen_tile_row_offset = 4;
+ } else
+ if (gr->gpc_nr == 6 && gr->tpc_total == 24) {
+ memcpy(gr->tile, gm200_gr_tile_map_6_24, gr->tpc_total);
+ gr->screen_tile_row_offset = 5;
+ } else {
+ gf100_gr_oneinit_tiles(gr);
}
-
- nvkm_wr32(device, 0x400108, 0xffffffff);
- nvkm_wr32(device, 0x400138, 0xffffffff);
- nvkm_wr32(device, 0x400118, 0xffffffff);
- nvkm_wr32(device, 0x400130, 0xffffffff);
- nvkm_wr32(device, 0x40011c, 0xffffffff);
- nvkm_wr32(device, 0x400134, 0xffffffff);
-
- nvkm_wr32(device, 0x400054, 0x2c350f63);
-
- gf100_gr_zbc_init(gr);
-
- return gf100_gr_init_ctxctl(gr);
}
int
@@ -208,13 +164,30 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
static const struct gf100_gr_func
gm200_gr = {
- .init = gm200_gr_init,
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+ .init_bios = gm107_gr_init_bios,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
.init_rop_active_fbps = gm200_gr_init_rop_active_fbps,
+ .init_fecs_exceptions = gf100_gr_init_fecs_exceptions,
+ .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_504430 = gm107_gr_init_504430,
+ .init_shader_exceptions = gm107_gr_init_shader_exceptions,
+ .init_400054 = gm107_gr_init_400054,
+ .trap_mp = gf100_gr_trap_mp,
.rops = gm200_gr_rops,
+ .tpc_nr = 4,
.ppc_nr = 2,
.grctx = &gm200_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
index 69479af1d829..a667770ce3cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -64,13 +64,18 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gm20b_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
.init = gk20a_gr_init,
+ .init_zcull = gf117_gr_init_zcull,
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
+ .trap_mp = gf100_gr_trap_mp,
.set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask,
.rops = gm200_gr_rops,
.ppc_nr = 1,
.grctx = &gm20b_grctx,
+ .zbc = &gf100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
index 867a5f7cc5bc..9d0521ce309a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -29,143 +29,103 @@
/*******************************************************************************
* PGRAPH engine/subdev functions
******************************************************************************/
-
void
-gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
+gp100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- /*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */
- const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f;
- nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
- nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
+ const int znum = zbc - 1;
+ const u32 zoff = znum * 4;
+
+ if (gr->zbc_color[zbc].format) {
+ nvkm_wr32(device, 0x418010 + zoff, gr->zbc_color[zbc].ds[0]);
+ nvkm_wr32(device, 0x41804c + zoff, gr->zbc_color[zbc].ds[1]);
+ nvkm_wr32(device, 0x418088 + zoff, gr->zbc_color[zbc].ds[2]);
+ nvkm_wr32(device, 0x4180c4 + zoff, gr->zbc_color[zbc].ds[3]);
+ }
+
+ nvkm_mask(device, 0x418100 + ((znum / 4) * 4),
+ 0x0000007f << ((znum % 4) * 7),
+ gr->zbc_color[zbc].format << ((znum % 4) * 7));
}
void
-gp100_gr_init_num_active_ltcs(struct gf100_gr *gr)
+gp100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
-
- nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
- nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+ const int znum = zbc - 1;
+ const u32 zoff = znum * 4;
+
+ if (gr->zbc_depth[zbc].format)
+ nvkm_wr32(device, 0x418110 + zoff, gr->zbc_depth[zbc].ds);
+ nvkm_mask(device, 0x41814c + ((znum / 4) * 4),
+ 0x0000007f << ((znum % 4) * 7),
+ gr->zbc_depth[zbc].format << ((znum % 4) * 7));
}
-int
-gp100_gr_init(struct gf100_gr *gr)
+static const struct gf100_gr_func_zbc
+gp100_gr_zbc = {
+ .clear_color = gp100_gr_zbc_clear_color,
+ .clear_depth = gp100_gr_zbc_clear_depth,
+};
+
+void
+gp100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
- u32 data[TPC_MAX / 8] = {};
- u8 tpcnr[GPC_MAX];
- int gpc, tpc, rop;
- int i;
-
- gr->func->init_gpc_mmu(gr);
-
- gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
-
- nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
-
- memset(data, 0x00, sizeof(data));
- memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
- for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
- do {
- gpc = (gpc + 1) % gr->gpc_nr;
- } while (!tpcnr[gpc]);
- tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
-
- data[i / 8] |= tpc << ((i % 8) * 4);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
- nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
- nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
- nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
- gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
- gr->tpc_total);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
- }
-
- nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
- gr->func->init_num_active_ltcs(gr);
-
- gr->func->init_rop_active_fbps(gr);
- if (gr->func->init_swdx_pes_mask)
- gr->func->init_swdx_pes_mask(gr);
-
- nvkm_wr32(device, 0x400500, 0x00010001);
- nvkm_wr32(device, 0x400100, 0xffffffff);
- nvkm_wr32(device, 0x40013c, 0xffffffff);
- nvkm_wr32(device, 0x400124, 0x00000002);
- nvkm_wr32(device, 0x409c24, 0x000f0002);
- nvkm_wr32(device, 0x405848, 0xc0000000);
- nvkm_mask(device, 0x40584c, 0x00000000, 0x00000001);
- nvkm_wr32(device, 0x404000, 0xc0000000);
- nvkm_wr32(device, 0x404600, 0xc0000000);
- nvkm_wr32(device, 0x408030, 0xc0000000);
- nvkm_wr32(device, 0x404490, 0xc0000000);
- nvkm_wr32(device, 0x406018, 0xc0000000);
- nvkm_wr32(device, 0x407020, 0x40000000);
- nvkm_wr32(device, 0x405840, 0xc0000000);
- nvkm_wr32(device, 0x405844, 0x00ffffff);
- nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105);
+}
+static void
+gp100_gr_init_419c9c(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000);
nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000);
+}
- gr->func->init_ppc_exceptions(gr);
-
- for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
- for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
- nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105);
- }
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
- nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
- }
-
- for (rop = 0; rop < gr->rop_nr; rop++) {
- nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
- nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
- nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
- }
-
- nvkm_wr32(device, 0x400108, 0xffffffff);
- nvkm_wr32(device, 0x400138, 0xffffffff);
- nvkm_wr32(device, 0x400118, 0xffffffff);
- nvkm_wr32(device, 0x400130, 0xffffffff);
- nvkm_wr32(device, 0x40011c, 0xffffffff);
- nvkm_wr32(device, 0x400134, 0xffffffff);
-
- gf100_gr_zbc_init(gr);
+void
+gp100_gr_init_fecs_exceptions(struct gf100_gr *gr)
+{
+ nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000f0002);
+}
- return gf100_gr_init_ctxctl(gr);
+void
+gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ /*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */
+ const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f;
+ nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+ nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
}
static const struct gf100_gr_func
gp100_gr = {
- .init = gp100_gr_init,
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+ .init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+ .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_419c9c = gp100_gr_init_419c9c,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
- .init_num_active_ltcs = gp100_gr_init_num_active_ltcs,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_504430 = gm107_gr_init_504430,
+ .init_shader_exceptions = gp100_gr_init_shader_exceptions,
+ .trap_mp = gf100_gr_trap_mp,
.rops = gm200_gr_rops,
+ .gpc_nr = 6,
+ .tpc_nr = 5,
.ppc_nr = 2,
.grctx = &gp100_grctx,
+ .zbc = &gp100_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
index 61e3a0b08559..37f7d739bf80 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
@@ -26,6 +26,62 @@
#include <nvif/class.h>
+static void
+gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ const int znum = zbc - 1;
+ const u32 zoff = znum * 4;
+
+ if (gr->zbc_stencil[zbc].format)
+ nvkm_wr32(device, 0x41815c + zoff, gr->zbc_stencil[zbc].ds);
+ nvkm_mask(device, 0x418198 + ((znum / 4) * 4),
+ 0x0000007f << ((znum % 4) * 7),
+ gr->zbc_stencil[zbc].format << ((znum % 4) * 7));
+}
+
+static int
+gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format,
+ const u32 ds, const u32 l2)
+{
+ struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
+ int zbc = -ENOSPC, i;
+
+ for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+ if (gr->zbc_stencil[i].format) {
+ if (gr->zbc_stencil[i].format != format)
+ continue;
+ if (gr->zbc_stencil[i].ds != ds)
+ continue;
+ if (gr->zbc_stencil[i].l2 != l2) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+ return i;
+ } else {
+ zbc = (zbc < 0) ? i : zbc;
+ }
+ }
+
+ if (zbc < 0)
+ return zbc;
+
+ gr->zbc_stencil[zbc].format = format;
+ gr->zbc_stencil[zbc].ds = ds;
+ gr->zbc_stencil[zbc].l2 = l2;
+ nvkm_ltc_zbc_stencil_get(ltc, zbc, l2);
+ gr->func->zbc->clear_stencil(gr, zbc);
+ return zbc;
+}
+
+const struct gf100_gr_func_zbc
+gp102_gr_zbc = {
+ .clear_color = gp100_gr_zbc_clear_color,
+ .clear_depth = gp100_gr_zbc_clear_depth,
+ .stencil_get = gp102_gr_zbc_stencil_get,
+ .clear_stencil = gp102_gr_zbc_clear_stencil,
+};
+
void
gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
{
@@ -42,15 +98,30 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
static const struct gf100_gr_func
gp102_gr = {
- .init = gp100_gr_init,
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
- .init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
- .init_num_active_ltcs = gp100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+ .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_504430 = gm107_gr_init_504430,
+ .init_shader_exceptions = gp100_gr_init_shader_exceptions,
+ .trap_mp = gf100_gr_trap_mp,
.rops = gm200_gr_rops,
+ .gpc_nr = 6,
+ .tpc_nr = 5,
.ppc_nr = 3,
.grctx = &gp102_grctx,
+ .zbc = &gp102_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
new file mode 100644
index 000000000000..4573c914c021
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <nvif/class.h>
+
+static const struct gf100_gr_func
+gp104_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
+ .init_gpc_mmu = gm200_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
+ .init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+ .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
+ .init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+ .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_504430 = gm107_gr_init_504430,
+ .init_shader_exceptions = gp100_gr_init_shader_exceptions,
+ .trap_mp = gf100_gr_trap_mp,
+ .rops = gm200_gr_rops,
+ .gpc_nr = 6,
+ .tpc_nr = 5,
+ .ppc_nr = 3,
+ .grctx = &gp104_grctx,
+ .zbc = &gp102_gr_zbc,
+ .sclass = {
+ { -1, -1, FERMI_TWOD_A },
+ { -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+ { -1, -1, PASCAL_B, &gf100_fermi },
+ { -1, -1, PASCAL_COMPUTE_B },
+ {}
+ }
+};
+
+int
+gp104_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+ return gm200_gr_new_(&gp104_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
index f7272323f694..812aba91653f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
@@ -28,15 +28,30 @@
static const struct gf100_gr_func
gp107_gr = {
- .init = gp100_gr_init,
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
- .init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
- .init_num_active_ltcs = gp100_gr_init_num_active_ltcs,
+ .init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+ .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
+ .init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_504430 = gm107_gr_init_504430,
+ .init_shader_exceptions = gp100_gr_init_shader_exceptions,
+ .trap_mp = gf100_gr_trap_mp,
.rops = gm200_gr_rops,
+ .gpc_nr = 2,
+ .tpc_nr = 3,
.ppc_nr = 1,
.grctx = &gp107_grctx,
+ .zbc = &gp102_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
index 5f3d161a0842..303dceddd4a8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
@@ -25,24 +25,31 @@
#include <nvif/class.h>
-static void
-gp10b_gr_init_num_active_ltcs(struct gf100_gr *gr)
-{
- struct nvkm_device *device = gr->base.engine.subdev.device;
-
- nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
-}
-
static const struct gf100_gr_func
gp10b_gr = {
- .init = gp100_gr_init,
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+ .init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+ .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_419cc0 = gf100_gr_init_419cc0,
.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
- .init_num_active_ltcs = gp10b_gr_init_num_active_ltcs,
+ .init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
+ .init_504430 = gm107_gr_init_504430,
+ .init_shader_exceptions = gp100_gr_init_shader_exceptions,
+ .trap_mp = gf100_gr_trap_mp,
.rops = gm200_gr_rops,
+ .gpc_nr = 1,
+ .tpc_nr = 2,
.ppc_nr = 1,
.grctx = &gp102_grctx,
+ .zbc = &gp102_gr_zbc,
.sclass = {
{ -1, -1, FERMI_TWOD_A },
{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
new file mode 100644
index 000000000000..19173ea19096
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <nvif/class.h>
+
+static void
+gv100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
+{
+ struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+ struct nvkm_device *device = subdev->device;
+ u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x730));
+ u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x734));
+ const struct nvkm_enum *warp;
+ char glob[128];
+
+ nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
+ warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);
+
+ nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
+ "global %08x [%s] warp %04x [%s]\n",
+ gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
+
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x730), 0x00000000);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x734), gerr);
+}
+
+static void
+gv100_gr_init_4188a4(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x4188a4, 0x03000000, 0x03000000);
+}
+
+static void
+gv100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ int sm;
+ for (sm = 0; sm < 0x100; sm += 0x80) {
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x728 + sm), 0x0085eb64);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x610), 0x00000001);
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x72c + sm), 0x00000004);
+ }
+}
+
+static void
+gv100_gr_init_504430(struct gf100_gr *gr, int gpc, int tpc)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0x403f0000);
+}
+
+static void
+gv100_gr_init_419bd8(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+ nvkm_mask(device, 0x419bd8, 0x00000700, 0x00000000);
+}
+
+static const struct gf100_gr_func
+gv100_gr = {
+ .oneinit_tiles = gm200_gr_oneinit_tiles,
+ .oneinit_sm_id = gm200_gr_oneinit_sm_id,
+ .init = gf100_gr_init,
+ .init_419bd8 = gv100_gr_init_419bd8,
+ .init_gpc_mmu = gm200_gr_init_gpc_mmu,
+ .init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+ .init_zcull = gf117_gr_init_zcull,
+ .init_num_active_ltcs = gm200_gr_init_num_active_ltcs,
+ .init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+ .init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
+ .init_fecs_exceptions = gp100_gr_init_fecs_exceptions,
+ .init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+ .init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+ .init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+ .init_504430 = gv100_gr_init_504430,
+ .init_shader_exceptions = gv100_gr_init_shader_exceptions,
+ .init_4188a4 = gv100_gr_init_4188a4,
+ .trap_mp = gv100_gr_trap_mp,
+ .rops = gm200_gr_rops,
+ .gpc_nr = 6,
+ .tpc_nr = 5,
+ .ppc_nr = 3,
+ .grctx = &gv100_grctx,
+ .zbc = &gp102_gr_zbc,
+ .sclass = {
+ { -1, -1, FERMI_TWOD_A },
+ { -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+ { -1, -1, VOLTA_A, &gf100_fermi },
+ { -1, -1, VOLTA_COMPUTE_A },
+ {}
+ }
+};
+
+int
+gv100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+ return gm200_gr_new_(&gv100_gr, device, index, pgr);
+}