1 files changed, 68 insertions, 0 deletions
diff --git a/0002-drm-nouveau-gr-gp107-gp108-implement-workaround-for-.patch b/0002-drm-nouveau-gr-gp107-gp108-implement-workaround-for-.patch
new file mode 100644
index 000000000..554800010
--- /dev/null
+++ b/0002-drm-nouveau-gr-gp107-gp108-implement-workaround-for-.patch
@@ -0,0 +1,68 @@
+From 37b556606d1217b4367e622d88cef11c65764386 Mon Sep 17 00:00:00 2001
+From: Ben Skeggs <bskeggs@redhat.com>
+Date: Tue, 31 Mar 2020 16:08:44 +1000
+Subject: [PATCH 2/2] drm/nouveau/gr/gp107,gp108: implement workaround for HW
+ hanging during init
+
+Certain boards with GP107/GP108 chipsets hang (often, but randomly) for
+unknown reasons during GR initialisation.
+
+The first tell-tale symptom of this issue is:
+
+nouveau 0000:01:00.0: bus: MMIO read of 00000000 FAULT at 409800 [ TIMEOUT ]
+
+appearing in dmesg, likely followed by many other failures being logged.
+
+Karol found this WAR for the issue a while back, but efforts to isolate
+the root cause and proper fix have not yielded success so far.  I've
+modified the original patch to include a few more details, limit it to
+GP107/GP108 by default, and added a config option to override this choice.
+
+Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
+Reviewed-by: Karol Herbst <kherbst@redhat.com>
+---
+ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c    | 26 +++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+index dd8f85b8b3a7..f2f5636efac4 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+@@ -1981,8 +1981,34 @@ gf100_gr_init_(struct nvkm_gr *base)
+ {
+ 	struct gf100_gr *gr = gf100_gr(base);
+ 	struct nvkm_subdev *subdev = &base->engine.subdev;
++	struct nvkm_device *device = subdev->device;
++	bool reset = device->chipset == 0x137 || device->chipset == 0x138;
+ 	u32 ret;
+ 
++	/* On certain GP107/GP108 boards, we trigger a weird issue where
++	 * GR will stop responding to PRI accesses after we've asked the
++	 * SEC2 RTOS to boot the GR falcons.  This happens with far more
++	 * frequency when cold-booting a board (ie. returning from D3).
++	 *
++	 * The root cause for this is not known and has proven difficult
++	 * to isolate, with many avenues being dead-ends.
++	 *
++	 * A workaround was discovered by Karol, whereby putting GR into
++	 * reset for an extended period right before initialisation
++	 * prevents the problem from occuring.
++	 *
++	 * XXX: As RM does not require any such workaround, this is more
++	 *      of a hack than a true fix.
++	 */
++	reset = nvkm_boolopt(device->cfgopt, "NvGrResetWar", reset);
++	if (reset) {
++		nvkm_mask(device, 0x000200, 0x00001000, 0x00000000);
++		nvkm_rd32(device, 0x000200);
++		msleep(50);
++		nvkm_mask(device, 0x000200, 0x00001000, 0x00001000);
++		nvkm_rd32(device, 0x000200);
++	}
++
+ 	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
+ 
+ 	ret = nvkm_falcon_get(&gr->fecs.falcon, subdev);
+-- 
+2.25.1
+