1 files changed, 1275 insertions, 0 deletions
diff --git a/drm-nouveau-Enable-GP10B-by-default.patch b/drm-nouveau-Enable-GP10B-by-default.patch
new file mode 100644
index 000000000..e44a5b339
--- /dev/null
+++ b/drm-nouveau-Enable-GP10B-by-default.patch
@@ -0,0 +1,1275 @@
+From patchwork Mon Sep 16 15:04:02 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [01/11] drm/nouveau: tegra: Avoid pulsing reset twice
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331044
+Message-Id: <20190916150412.10025-2-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:02 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+When the GPU powergate is controlled by a generic power domain provider,
+the reset will automatically be asserted and deasserted as part of the
+power-ungating procedure.
+
+On some Jetson TX2 boards, doing an additional assert and deassert of
+the GPU outside of the power-ungate procedure can cause the GPU to go
+into a bad state where the memory interface can no longer access system
+memory.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index 0e372a190d3f..747a775121cf 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -52,18 +52,18 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
+ 	clk_set_rate(tdev->clk_pwr, 204000000);
+ 	udelay(10);
+ 
+-	reset_control_assert(tdev->rst);
+-	udelay(10);
+-
+ 	if (!tdev->pdev->dev.pm_domain) {
++		reset_control_assert(tdev->rst);
++		udelay(10);
++
+ 		ret = tegra_powergate_remove_clamping(TEGRA_POWERGATE_3D);
+ 		if (ret)
+ 			goto err_clamp;
+ 		udelay(10);
+-	}
+ 
+-	reset_control_deassert(tdev->rst);
+-	udelay(10);
++		reset_control_deassert(tdev->rst);
++		udelay(10);
++	}
+ 
+ 	return 0;
+ 
+
+From patchwork Mon Sep 16 15:04:03 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [02/11] drm/nouveau: tegra: Set clock rate if not set
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331046
+Message-Id: <20190916150412.10025-3-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:03 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+If the GPU clock has not had a rate set, initialize it to the maximum
+clock rate to make sure it does run.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index 747a775121cf..d0d52c1d4aee 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -279,6 +279,7 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+ 		      struct nvkm_device **pdevice)
+ {
+ 	struct nvkm_device_tegra *tdev;
++	unsigned long rate;
+ 	int ret;
+ 
+ 	if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
+@@ -307,6 +308,17 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+ 		goto free;
+ 	}
+ 
++	rate = clk_get_rate(tdev->clk);
++	if (rate == 0) {
++		ret = clk_set_rate(tdev->clk, ULONG_MAX);
++		if (ret < 0)
++			goto free;
++
++		rate = clk_get_rate(tdev->clk);
++
++		dev_dbg(&pdev->dev, "GPU clock set to %lu\n", rate);
++	}
++
+ 	if (func->require_ref_clk)
+ 		tdev->clk_ref = devm_clk_get(&pdev->dev, "ref");
+ 	if (IS_ERR(tdev->clk_ref)) {
+
+From patchwork Mon Sep 16 15:04:04 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [03/11] drm/nouveau: secboot: Read WPR configuration from GPU
+ registers
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331048
+Message-Id: <20190916150412.10025-4-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:04 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPUs found on Tegra SoCs have registers that can be used to read the
+WPR configuration. Use these registers instead of reaching into the
+memory controller's register space to read the same information.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/subdev/secboot/gm200.h   |  2 +-
+ .../drm/nouveau/nvkm/subdev/secboot/gm20b.c   | 81 ++++++++++++-------
+ .../drm/nouveau/nvkm/subdev/secboot/gp10b.c   |  4 +-
+ 3 files changed, 53 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
+index 62c5e162099a..280b1448df88 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
+@@ -41,6 +41,6 @@ int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *,
+ 			   struct nvkm_falcon *);
+ 
+ /* Tegra-only */
+-int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *, u32);
++int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *);
+ 
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+index df8b919dcf09..f8a543122219 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+@@ -23,39 +23,65 @@
+ #include "acr.h"
+ #include "gm200.h"
+ 
+-#define TEGRA210_MC_BASE			0x70019000
+-
+ #ifdef CONFIG_ARCH_TEGRA
+-#define MC_SECURITY_CARVEOUT2_CFG0		0xc58
+-#define MC_SECURITY_CARVEOUT2_BOM_0		0xc5c
+-#define MC_SECURITY_CARVEOUT2_BOM_HI_0		0xc60
+-#define MC_SECURITY_CARVEOUT2_SIZE_128K		0xc64
+-#define TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED	(1 << 1)
+ /**
+  * gm20b_secboot_tegra_read_wpr() - read the WPR registers on Tegra
+  *
+- * On dGPU, we can manage the WPR region ourselves, but on Tegra the WPR region
+- * is reserved from system memory by the bootloader and irreversibly locked.
+- * This function reads the address and size of the pre-configured WPR region.
++ * On dGPU, we can manage the WPR region ourselves, but on Tegra this region
++ * is allocated from system memory by the secure firmware. The region is then
++ * marked as a "secure carveout" and irreversibly locked. Furthermore, the WPR
++ * secure carveout is also configured to be sent to the GPU via a dedicated
++ * serial bus between the memory controller and the GPU. The GPU requests this
++ * information upon leaving reset and exposes it through a FIFO register at
++ * offset 0x100cd4.
++ *
++ * The FIFO register's lower 4 bits can be used to set the read index into the
++ * FIFO. After each read of the FIFO register, the read index is incremented.
++ *
++ * Indices 2 and 3 contain the lower and upper addresses of the WPR. These are
++ * stored in units of 256 B. The WPR is inclusive of both addresses.
++ *
++ * Unfortunately, for some reason the WPR info register doesn't contain the
++ * correct values for the secure carveout. It seems like the upper address is
++ * always too small by 128 KiB - 1. Given that the secure carvout size in the
++ * memory controller configuration is specified in units of 128 KiB, it's
++ * possible that the computation of the upper address of the WPR is wrong and
++ * causes this difference.
+  */
+ int
+-gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base)
++gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb)
+ {
++	struct nvkm_device *device = gsb->base.subdev.device;
+ 	struct nvkm_secboot *sb = &gsb->base;
+-	void __iomem *mc;
+-	u32 cfg;
++	u64 base, limit;
++	u32 value;
+ 
+-	mc = ioremap(mc_base, 0xd00);
+-	if (!mc) {
+-		nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
+-		return -ENOMEM;
+-	}
+-	sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+-	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
+-	sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+-		<< 17;
+-	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
+-	iounmap(mc);
++	/* set WPR info register to point at WPR base address register */
++	value = nvkm_rd32(device, 0x100cd4);
++	value &= ~0xf;
++	value |= 0x2;
++	nvkm_wr32(device, 0x100cd4, value);
++
++	/* read base address */
++	value = nvkm_rd32(device, 0x100cd4);
++	base = (u64)(value >> 4) << 12;
++
++	/* read limit */
++	value = nvkm_rd32(device, 0x100cd4);
++	limit = (u64)(value >> 4) << 12;
++
++	/*
++	 * The upper address of the WPR seems to be computed wrongly and is
++	 * actually SZ_128K - 1 bytes lower than it should be. Adjust the
++	 * value accordingly.
++	 */
++	limit += SZ_128K - 1;
++
++	sb->wpr_size = limit - base + 1;
++	sb->wpr_addr = base;
++
++	nvkm_info(&sb->subdev, "WPR: %016llx-%016llx\n", sb->wpr_addr,
++		  sb->wpr_addr + sb->wpr_size - 1);
+ 
+ 	/* Check that WPR settings are valid */
+ 	if (sb->wpr_size == 0) {
+@@ -63,11 +89,6 @@ gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base)
+ 		return -EINVAL;
+ 	}
+ 
+-	if (!(cfg & TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED)) {
+-		nvkm_error(&sb->subdev, "WPR region not locked\n");
+-		return -EINVAL;
+-	}
+-
+ 	return 0;
+ }
+ #else
+@@ -85,7 +106,7 @@ gm20b_secboot_oneinit(struct nvkm_secboot *sb)
+ 	struct gm200_secboot *gsb = gm200_secboot(sb);
+ 	int ret;
+ 
+-	ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA210_MC_BASE);
++	ret = gm20b_secboot_tegra_read_wpr(gsb);
+ 	if (ret)
+ 		return ret;
+ 
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c
+index 28ca29d0eeee..d84e85825995 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c
+@@ -23,15 +23,13 @@
+ #include "acr.h"
+ #include "gm200.h"
+ 
+-#define TEGRA186_MC_BASE			0x02c10000
+-
+ static int
+ gp10b_secboot_oneinit(struct nvkm_secboot *sb)
+ {
+ 	struct gm200_secboot *gsb = gm200_secboot(sb);
+ 	int ret;
+ 
+-	ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA186_MC_BASE);
++	ret = gm20b_secboot_tegra_read_wpr(gsb);
+ 	if (ret)
+ 		return ret;
+ 
+
+From patchwork Mon Sep 16 15:04:05 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [04/11] drm/nouveau: gp10b: Add custom L2 cache implementation
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331049
+Message-Id: <20190916150412.10025-5-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:05 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+There are extra registers that need to be programmed to make the level 2
+cache work on GP10B, such as the stream ID register that is used when an
+SMMU is used to translate memory addresses.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../gpu/drm/nouveau/include/nvkm/subdev/ltc.h |  1 +
+ .../gpu/drm/nouveau/nvkm/engine/device/base.c |  2 +-
+ .../gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild    |  1 +
+ .../gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c   | 69 +++++++++++++++++++
+ .../gpu/drm/nouveau/nvkm/subdev/ltc/priv.h    |  2 +
+ 5 files changed, 74 insertions(+), 1 deletion(-)
+ create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+
+diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+index 644d527c3b96..d76f60d7d29a 100644
+--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
++++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+@@ -40,4 +40,5 @@ int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ int gp102_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
++int gp10b_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+index c3c7159f3411..d2d6d5f4028a 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+@@ -2380,7 +2380,7 @@ nv13b_chipset = {
+ 	.fuse = gm107_fuse_new,
+ 	.ibus = gp10b_ibus_new,
+ 	.imem = gk20a_instmem_new,
+-	.ltc = gp102_ltc_new,
++	.ltc = gp10b_ltc_new,
+ 	.mc = gp10b_mc_new,
+ 	.mmu = gp10b_mmu_new,
+ 	.secboot = gp10b_secboot_new,
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+index 2b6d36ea7067..728d75010847 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+@@ -6,3 +6,4 @@ nvkm-y += nvkm/subdev/ltc/gm107.o
+ nvkm-y += nvkm/subdev/ltc/gm200.o
+ nvkm-y += nvkm/subdev/ltc/gp100.o
+ nvkm-y += nvkm/subdev/ltc/gp102.o
++nvkm-y += nvkm/subdev/ltc/gp10b.o
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+new file mode 100644
+index 000000000000..4d27c6ea1552
+--- /dev/null
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+@@ -0,0 +1,69 @@
++/*
++ * Copyright (c) 2019 NVIDIA Corporation.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Thierry Reding
++ */
++
++#include "priv.h"
++
++static void
++gp10b_ltc_init(struct nvkm_ltc *ltc)
++{
++	struct nvkm_device *device = ltc->subdev.device;
++#ifdef CONFIG_IOMMU_API
++	struct iommu_fwspec *spec;
++#endif
++
++	nvkm_wr32(device, 0x17e27c, ltc->ltc_nr);
++	nvkm_wr32(device, 0x17e000, ltc->ltc_nr);
++	nvkm_wr32(device, 0x100800, ltc->ltc_nr);
++
++#ifdef CONFIG_IOMMU_API
++	spec = dev_iommu_fwspec_get(device->dev);
++	if (spec) {
++		u32 sid = spec->ids[0] & 0xffff;
++
++		/* stream ID */
++		nvkm_wr32(device, 0x160000, sid << 2);
++	}
++#endif
++}
++
++static const struct nvkm_ltc_func
++gp10b_ltc = {
++	.oneinit = gp100_ltc_oneinit,
++	.init = gp10b_ltc_init,
++	.intr = gp100_ltc_intr,
++	.cbc_clear = gm107_ltc_cbc_clear,
++	.cbc_wait = gm107_ltc_cbc_wait,
++	.zbc = 16,
++	.zbc_clear_color = gm107_ltc_zbc_clear_color,
++	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
++	.zbc_clear_stencil = gp102_ltc_zbc_clear_stencil,
++	.invalidate = gf100_ltc_invalidate,
++	.flush = gf100_ltc_flush,
++};
++
++int
++gp10b_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
++{
++	return nvkm_ltc_new_(&gp10b_ltc, device, index, pltc);
++}
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+index 2fcf18e46ce3..eca5a711b1b8 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+@@ -46,4 +46,6 @@ void gm107_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32);
+ int gp100_ltc_oneinit(struct nvkm_ltc *);
+ void gp100_ltc_init(struct nvkm_ltc *);
+ void gp100_ltc_intr(struct nvkm_ltc *);
++
++void gp102_ltc_zbc_clear_stencil(struct nvkm_ltc *, int, const u32);
+ #endif
+
+From patchwork Mon Sep 16 15:04:06 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [05/11] drm/nouveau: gp10b: Use correct copy engine
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331052
+Message-Id: <20190916150412.10025-6-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:06 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+gp10b uses the new engine enumeration mechanism introduced in the Pascal
+architecture. As a result, the copy engine, which used to be at index 2
+for prior Tegra GPU instantiations, has now moved to index 0. Fix up the
+index and also use the gp100 variant of the copy engine class because on
+gp10b the PASCAL_DMA_COPY_B class is not supported.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+index d2d6d5f4028a..99d3fa3fad89 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+@@ -2387,7 +2387,7 @@ nv13b_chipset = {
+ 	.pmu = gm20b_pmu_new,
+ 	.timer = gk20a_timer_new,
+ 	.top = gk104_top_new,
+-	.ce[2] = gp102_ce_new,
++	.ce[0] = gp100_ce_new,
+ 	.dma = gf119_dma_new,
+ 	.fifo = gp10b_fifo_new,
+ 	.gr = gp10b_gr_new,
+
+From patchwork Mon Sep 16 15:04:07 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [06/11] drm/nouveau: gk20a: Set IOMMU bit for DMA API if appropriate
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331053
+Message-Id: <20190916150412.10025-7-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:07 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+Detect if the DMA API is backed by an IOMMU and set the IOMMU bit if so.
+This is needed to make sure IOMMU addresses are properly translated even
+the explicit IOMMU API is not used.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/subdev/instmem/gk20a.c   | 35 +++++++++++++------
+ 1 file changed, 25 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+index b0493f8df1fe..1120a2a7d5f1 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+@@ -100,12 +100,14 @@ struct gk20a_instmem {
+ 	unsigned int vaddr_max;
+ 	struct list_head vaddr_lru;
+ 
++	/* IOMMU mapping */
++	unsigned int page_shift;
++	u64 iommu_mask;
++
+ 	/* Only used if IOMMU if present */
+ 	struct mutex *mm_mutex;
+ 	struct nvkm_mm *mm;
+ 	struct iommu_domain *domain;
+-	unsigned long iommu_pgshift;
+-	u16 iommu_bit;
+ 
+ 	/* Only used by DMA API */
+ 	unsigned long attrs;
+@@ -357,12 +359,12 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
+ 	mutex_unlock(&imem->lock);
+ 
+ 	/* clear IOMMU bit to unmap pages */
+-	r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
++	r->offset &= ~imem->iommu_mask;
+ 
+ 	/* Unmap pages from GPU address space and free them */
+ 	for (i = 0; i < node->base.mn->length; i++) {
+ 		iommu_unmap(imem->domain,
+-			    (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
++			    (r->offset + i) << imem->page_shift, PAGE_SIZE);
+ 		dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
+ 			       DMA_BIDIRECTIONAL);
+ 		__free_page(node->pages[i]);
+@@ -440,7 +442,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
+ 
+ 	/* present memory for being mapped using small pages */
+ 	node->r.type = 12;
+-	node->r.offset = node->handle >> 12;
++	node->r.offset = imem->iommu_mask | node->handle >> 12;
+ 	node->r.length = (npages << PAGE_SHIFT) >> 12;
+ 
+ 	node->base.mn = &node->r;
+@@ -493,7 +495,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
+ 	mutex_lock(imem->mm_mutex);
+ 	/* Reserve area from GPU address space */
+ 	ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages,
+-			   align >> imem->iommu_pgshift, &r);
++			   align >> imem->page_shift, &r);
+ 	mutex_unlock(imem->mm_mutex);
+ 	if (ret) {
+ 		nvkm_error(subdev, "IOMMU space is full!\n");
+@@ -502,7 +504,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
+ 
+ 	/* Map into GPU address space */
+ 	for (i = 0; i < npages; i++) {
+-		u32 offset = (r->offset + i) << imem->iommu_pgshift;
++		u32 offset = (r->offset + i) << imem->page_shift;
+ 
+ 		ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
+ 				PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
+@@ -518,7 +520,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
+ 	}
+ 
+ 	/* IOMMU bit tells that an address is to be resolved through the IOMMU */
+-	r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
++	r->offset |= imem->iommu_mask;
+ 
+ 	node->base.mn = r;
+ 	return 0;
+@@ -619,11 +621,12 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
+ 		imem->mm_mutex = &tdev->iommu.mutex;
+ 		imem->mm = &tdev->iommu.mm;
+ 		imem->domain = tdev->iommu.domain;
+-		imem->iommu_pgshift = tdev->iommu.pgshift;
+-		imem->iommu_bit = tdev->func->iommu_bit;
++		imem->page_shift = tdev->iommu.pgshift;
+ 
+ 		nvkm_info(&imem->base.subdev, "using IOMMU\n");
+ 	} else {
++		imem->page_shift = PAGE_SHIFT;
++
+ 		imem->attrs = DMA_ATTR_NON_CONSISTENT |
+ 			      DMA_ATTR_WEAK_ORDERING |
+ 			      DMA_ATTR_WRITE_COMBINE;
+@@ -631,5 +634,17 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
+ 		nvkm_info(&imem->base.subdev, "using DMA API\n");
+ 	}
+ 
++	/*
++	 * The IOMMU mask needs to be set if an IOMMU is used explicitly (via
++	 * direct IOMMU API usage) or implicitly (via the DMA API). In both
++	 * cases the device will have been attached to an IOMMU domain.
++	 */
++	if (iommu_get_domain_for_dev(device->dev)) {
++		imem->iommu_mask = BIT_ULL(tdev->func->iommu_bit -
++					   imem->page_shift);
++		nvkm_debug(&imem->base.subdev, "IOMMU mask: %016llx\n",
++			   imem->iommu_mask);
++	}
++
+ 	return 0;
+ }
+
+From patchwork Mon Sep 16 15:04:08 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [07/11] drm/nouveau: gk20a: Implement custom MMU class
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331057
+Message-Id: <20190916150412.10025-8-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:08 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPU integrated in NVIDIA Tegra SoCs is connected to system memory
+via two paths: one direct path to the memory controller and another path
+that goes through a system MMU first. It's not typically necessary to go
+through the system MMU because the GPU's MMU can already map buffers so
+that they appear contiguous to the GPU.
+
+However, in order to support big pages, the system MMU has to be used to
+combine multiple small pages into one virtually contiguous chunk so that
+the GPU can then treat that as a single big page.
+
+In order to prepare for big page support, implement a custom MMU class
+that takes care of setting the IOMMU bit when writing page tables and
+when appropriate.
+
+This is also necessary to make sure that Nouveau works correctly on
+Tegra devices where the GPU is connected to a system MMU and that IOMMU
+is used to back the DMA API. Currently Nouveau assumes that the DMA API
+is never backed by an IOMMU, so access to DMA-mapped buffers fault when
+suddenly this assumption is no longer true.
+
+One situation where this can happen is on 32-bit Tegra SoCs where the
+ARM architecture code automatically attaches the GPU with a DMA/IOMMU
+domain. This is currently worked around by detaching the GPU from the
+IOMMU domain at probe time. However, with Tegra186 and later this can
+now also happen, but unfortunately no mechanism exists to detach from
+the domain in the 64-bit ARM architecture code.
+
+Using this Tegra-specific MMU class ensures that DMA-mapped buffers are
+properly mapped (with the IOMMU bit set) if the DMA API is backed by an
+IOMMU domain.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c   | 50 ++++++++++++++++++-
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h   | 44 ++++++++++++++++
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c   |  6 ++-
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c   |  4 +-
+ drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h |  1 +
+ .../drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c    | 22 +++++++-
+ .../drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c    |  4 +-
+ .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c    | 20 +++++++-
+ 8 files changed, 142 insertions(+), 9 deletions(-)
+ create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c
+index ac74965a60d4..d9a5e05b7dc7 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c
+@@ -19,11 +19,59 @@
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  */
++
++#include "gk20a.h"
+ #include "mem.h"
+ #include "vmm.h"
+ 
++#include <nvkm/core/tegra.h>
+ #include <nvif/class.h>
+ 
++static void
++gk20a_mmu_ctor(const struct nvkm_mmu_func *func, struct nvkm_device *device,
++	       int index, struct gk20a_mmu *mmu)
++{
++	struct iommu_domain *domain = iommu_get_domain_for_dev(device->dev);
++	struct nvkm_device_tegra *tegra = device->func->tegra(device);
++
++	nvkm_mmu_ctor(func, device, index, &mmu->base);
++
++	/*
++	 * If the DMA API is backed by an IOMMU, make sure the IOMMU bit is
++	 * set for all buffer accesses. If the IOMMU is explicitly used, it
++	 * is only used for instance blocks and the MMU doesn't care, since
++	 * buffer objects are only mapped through the MMU, not through the
++	 * IOMMU.
++	 *
++	 * Big page support could be implemented using explicit IOMMU usage,
++	 * but the DMA API already provides that for free, so we don't worry
++	 * about it for now.
++	 */
++	if (domain && !tegra->iommu.domain) {
++		mmu->iommu_mask = BIT_ULL(tegra->func->iommu_bit);
++		nvkm_debug(&mmu->base.subdev, "IOMMU mask: %llx\n",
++			   mmu->iommu_mask);
++	}
++}
++
++int
++gk20a_mmu_new_(const struct nvkm_mmu_func *func, struct nvkm_device *device,
++	       int index, struct nvkm_mmu **pmmu)
++{
++	struct gk20a_mmu *mmu;
++
++	mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
++	if (!mmu)
++		return -ENOMEM;
++
++	gk20a_mmu_ctor(func, device, index, mmu);
++
++	if (pmmu)
++		*pmmu = &mmu->base;
++
++	return 0;
++}
++
+ static const struct nvkm_mmu_func
+ gk20a_mmu = {
+ 	.dma_bits = 40,
+@@ -37,5 +85,5 @@ gk20a_mmu = {
+ int
+ gk20a_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+ {
+-	return nvkm_mmu_new_(&gk20a_mmu, device, index, pmmu);
++	return gk20a_mmu_new_(&gk20a_mmu, device, index, pmmu);
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h
+new file mode 100644
+index 000000000000..bb81fc62509c
+--- /dev/null
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2019 NVIDIA Corporation.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef __NVKM_MMU_GK20A_H__
++#define __NVKM_MMU_GK20A_H__
++
++#include "priv.h"
++
++struct gk20a_mmu {
++	struct nvkm_mmu base;
++
++	/*
++	 * If an IOMMU is used, indicates which address bit will trigger an
++	 * IOMMU translation when set (when this bit is not set, the IOMMU is
++	 * bypassed). A value of 0 means an IOMMU is never used.
++	 */
++	u64 iommu_mask;
++};
++
++#define gk20a_mmu(mmu) container_of(mmu, struct gk20a_mmu, base)
++
++int gk20a_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *,
++		   int index, struct nvkm_mmu **);
++
++#endif
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c
+index 7353a94b4091..7fccd4df52a8 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c
+@@ -19,6 +19,8 @@
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  */
++
++#include "gk20a.h"
+ #include "mem.h"
+ #include "vmm.h"
+ 
+@@ -50,6 +52,6 @@ int
+ gm20b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+ {
+ 	if (device->fb->page)
+-		return nvkm_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu);
+-	return nvkm_mmu_new_(&gm20b_mmu, device, index, pmmu);
++		return gk20a_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu);
++	return gk20a_mmu_new_(&gm20b_mmu, device, index, pmmu);
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
+index 0a50be9a785a..ae3cb47be3d8 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
+@@ -19,6 +19,8 @@
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  */
++
++#include "gk20a.h"
+ #include "mem.h"
+ #include "vmm.h"
+ 
+@@ -41,5 +43,5 @@ gp10b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+ {
+ 	if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true))
+ 		return gm20b_mmu_new(device, index, pmmu);
+-	return nvkm_mmu_new_(&gp10b_mmu, device, index, pmmu);
++	return gk20a_mmu_new_(&gp10b_mmu, device, index, pmmu);
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+index 5e55ecbd8005..fb3a9e8bb9cd 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+@@ -213,6 +213,7 @@ void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type);
+ void gf100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr);
+ 
+ int gk20a_vmm_aper(enum nvkm_memory_target);
++int gk20a_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
+ 
+ int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *,
+ 		   struct nvkm_mmu *, bool, u64, u64, void *, u32,
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
+index 5a9582dce970..16d7bf727292 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
+@@ -19,6 +19,8 @@
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  */
++
++#include "gk20a.h"
+ #include "vmm.h"
+ 
+ #include <core/memory.h>
+@@ -33,12 +35,28 @@ gk20a_vmm_aper(enum nvkm_memory_target target)
+ 	}
+ }
+ 
++int
++gk20a_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
++		struct nvkm_vmm_map *map)
++{
++	struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu);
++	int ret;
++
++	ret = gf100_vmm_valid(vmm, argv, argc, map);
++	if (ret < 0)
++		return ret;
++
++	map->type |= mmu->iommu_mask >> 8;
++
++	return 0;
++}
++
+ static const struct nvkm_vmm_func
+ gk20a_vmm_17 = {
+ 	.join = gf100_vmm_join,
+ 	.part = gf100_vmm_part,
+ 	.aper = gf100_vmm_aper,
+-	.valid = gf100_vmm_valid,
++	.valid = gk20a_vmm_valid,
+ 	.flush = gf100_vmm_flush,
+ 	.invalidate_pdb = gf100_vmm_invalidate_pdb,
+ 	.page = {
+@@ -53,7 +71,7 @@ gk20a_vmm_16 = {
+ 	.join = gf100_vmm_join,
+ 	.part = gf100_vmm_part,
+ 	.aper = gf100_vmm_aper,
+-	.valid = gf100_vmm_valid,
++	.valid = gk20a_vmm_valid,
+ 	.flush = gf100_vmm_flush,
+ 	.invalidate_pdb = gf100_vmm_invalidate_pdb,
+ 	.page = {
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
+index 96b759695dd8..7a6066d886cd 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
+@@ -26,7 +26,7 @@ gm20b_vmm_17 = {
+ 	.join = gm200_vmm_join,
+ 	.part = gf100_vmm_part,
+ 	.aper = gk20a_vmm_aper,
+-	.valid = gf100_vmm_valid,
++	.valid = gk20a_vmm_valid,
+ 	.flush = gf100_vmm_flush,
+ 	.invalidate_pdb = gf100_vmm_invalidate_pdb,
+ 	.page = {
+@@ -42,7 +42,7 @@ gm20b_vmm_16 = {
+ 	.join = gm200_vmm_join,
+ 	.part = gf100_vmm_part,
+ 	.aper = gk20a_vmm_aper,
+-	.valid = gf100_vmm_valid,
++	.valid = gk20a_vmm_valid,
+ 	.flush = gf100_vmm_flush,
+ 	.invalidate_pdb = gf100_vmm_invalidate_pdb,
+ 	.page = {
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
+index e081239afe58..180c8f006e32 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
+@@ -19,14 +19,32 @@
+  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+  * OTHER DEALINGS IN THE SOFTWARE.
+  */
++
++#include "gk20a.h"
+ #include "vmm.h"
+ 
++static int
++gp10b_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
++		struct nvkm_vmm_map *map)
++{
++	struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu);
++	int ret;
++
++	ret = gp100_vmm_valid(vmm, argv, argc, map);
++	if (ret < 0)
++		return ret;
++
++	map->type |= mmu->iommu_mask >> 4;
++
++	return 0;
++}
++
+ static const struct nvkm_vmm_func
+ gp10b_vmm = {
+ 	.join = gp100_vmm_join,
+ 	.part = gf100_vmm_part,
+ 	.aper = gk20a_vmm_aper,
+-	.valid = gp100_vmm_valid,
++	.valid = gp10b_vmm_valid,
+ 	.flush = gp100_vmm_flush,
+ 	.mthd = gp100_vmm_mthd,
+ 	.invalidate_pdb = gp100_vmm_invalidate_pdb,
+
+From patchwork Mon Sep 16 15:04:09 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [08/11] drm/nouveau: tegra: Skip IOMMU initialization if already
+ attached
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331060
+Message-Id: <20190916150412.10025-9-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:09 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+If the GPU is already attached to an IOMMU, don't detach it and setup an
+explicit IOMMU domain. Since Nouveau can now properly handle the case of
+the DMA API being backed by an IOMMU, just continue using the DMA API.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/engine/device/tegra.c    | 19 +++++++------------
+ 1 file changed, 7 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index d0d52c1d4aee..fc652aaa41c7 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -23,10 +23,6 @@
+ #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
+ #include "priv.h"
+ 
+-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+-#include <asm/dma-iommu.h>
+-#endif
+-
+ static int
+ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
+ {
+@@ -109,14 +105,13 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ 	unsigned long pgsize_bitmap;
+ 	int ret;
+ 
+-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+-	if (dev->archdata.mapping) {
+-		struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+-
+-		arm_iommu_detach_device(dev);
+-		arm_iommu_release_mapping(mapping);
+-	}
+-#endif
++	/*
++	 * Skip explicit IOMMU initialization if the GPU is already attached
++	 * to an IOMMU domain. This can happen if the DMA API is backed by an
++	 * IOMMU.
++	 */
++	if (iommu_get_domain_for_dev(dev))
++		return;
+ 
+ 	if (!tdev->func->iommu_bit)
+ 		return;
+
+From patchwork Mon Sep 16 15:04:10 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [09/11] drm/nouveau: tegra: Fall back to 32-bit DMA mask without IOMMU
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331061
+Message-Id: <20190916150412.10025-10-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:10 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPU can usually address more than 32-bit, even without being
+attached to an IOMMU. However, if the GPU is not attached to an IOMMU,
+it's likely that there is no IOMMU in the system, in which case any
+buffers allocated by Nouveau will likely end up in a region of memory
+that cannot be accessed by host1x.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/engine/device/tegra.c    | 111 +++++++++++-------
+ 1 file changed, 70 insertions(+), 41 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index fc652aaa41c7..221238a2cf53 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -97,7 +97,7 @@ nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev)
+ 	return 0;
+ }
+ 
+-static void
++static int
+ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ {
+ #if IS_ENABLED(CONFIG_IOMMU_API)
+@@ -111,47 +111,65 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ 	 * IOMMU.
+ 	 */
+ 	if (iommu_get_domain_for_dev(dev))
+-		return;
++		return -ENODEV;
+ 
+ 	if (!tdev->func->iommu_bit)
+-		return;
++		return -ENODEV;
++
++	if (!iommu_present(&platform_bus_type))
++		return -ENODEV;
+ 
+ 	mutex_init(&tdev->iommu.mutex);
+ 
+-	if (iommu_present(&platform_bus_type)) {
+-		tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
+-		if (!tdev->iommu.domain)
+-			goto error;
++	tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
++	if (!tdev->iommu.domain)
++		return -ENOMEM;
+ 
+-		/*
+-		 * A IOMMU is only usable if it supports page sizes smaller
+-		 * or equal to the system's PAGE_SIZE, with a preference if
+-		 * both are equal.
+-		 */
+-		pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap;
+-		if (pgsize_bitmap & PAGE_SIZE) {
+-			tdev->iommu.pgshift = PAGE_SHIFT;
+-		} else {
+-			tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK);
+-			if (tdev->iommu.pgshift == 0) {
+-				dev_warn(dev, "unsupported IOMMU page size\n");
+-				goto free_domain;
+-			}
+-			tdev->iommu.pgshift -= 1;
++	/*
++	 * An IOMMU is only usable if it supports page sizes smaller or equal
++	 * to the system's PAGE_SIZE, with a preference if both are equal.
++	 */
++	pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap;
++	if (pgsize_bitmap & PAGE_SIZE) {
++		tdev->iommu.pgshift = PAGE_SHIFT;
++	} else {
++		tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK);
++		if (tdev->iommu.pgshift == 0) {
++			dev_warn(dev, "unsupported IOMMU page size\n");
++			ret = -ENOTSUPP;
++			goto free_domain;
+ 		}
+ 
+-		ret = iommu_attach_device(tdev->iommu.domain, dev);
+-		if (ret)
+-			goto free_domain;
++		tdev->iommu.pgshift -= 1;
++	}
+ 
+-		ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0,
+-				   (1ULL << tdev->func->iommu_bit) >>
+-				   tdev->iommu.pgshift, 1);
+-		if (ret)
+-			goto detach_device;
++	ret = iommu_attach_device(tdev->iommu.domain, dev);
++	if (ret) {
++		dev_warn(dev, "failed to attach to IOMMU: %d\n", ret);
++		goto free_domain;
++	}
++
++	ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0,
++			   (1ULL << tdev->func->iommu_bit) >>
++			   tdev->iommu.pgshift, 1);
++	if (ret) {
++		dev_warn(dev, "failed to initialize IOVA space: %d\n", ret);
++		goto detach_device;
++	}
++
++	/*
++	 * The IOMMU bit defines the upper limit of the GPU-addressable space.
++	 */
++	ret = dma_set_mask(dev, DMA_BIT_MASK(tdev->func->iommu_bit));
++	if (ret) {
++		dev_warn(dev, "failed to set DMA mask: %d\n", ret);
++		goto fini_mm;
+ 	}
+ 
+-	return;
++	return 0;
++
++fini_mm:
++	nvkm_mm_fini(&tdev->iommu.mm);
+ 
+ detach_device:
+ 	iommu_detach_device(tdev->iommu.domain, dev);
+@@ -159,10 +177,15 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ free_domain:
+ 	iommu_domain_free(tdev->iommu.domain);
+ 
+-error:
++	/* reset these so that the DMA API code paths are executed */
+ 	tdev->iommu.domain = NULL;
+ 	tdev->iommu.pgshift = 0;
+-	dev_err(dev, "cannot initialize IOMMU MM\n");
++
++	dev_warn(dev, "cannot initialize IOMMU MM\n");
++
++	return ret;
++#else
++	return -ENOTSUPP;
+ #endif
+ }
+ 
+@@ -327,14 +350,20 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+ 		goto free;
+ 	}
+ 
+-	/**
+-	 * The IOMMU bit defines the upper limit of the GPU-addressable space.
+-	 */
+-	ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit));
+-	if (ret)
+-		goto free;
+-
+-	nvkm_device_tegra_probe_iommu(tdev);
++	ret = nvkm_device_tegra_probe_iommu(tdev);
++	if (ret) {
++		/*
++		 * If we fail to set up an IOMMU, fall back to a 32-bit DMA
++		 * mask. This is not necessary for the GPU to work because it
++		 * can usually address all of system memory. However, if the
++		 * buffers allocated by Nouveau are meant to be shared with
++		 * the display controller, we need to restrict where they can
++		 * come from.
++		 */
++		ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
++		if (ret)
++			goto free;
++	}
+ 
+ 	ret = nvkm_device_tegra_power_up(tdev);
+ 	if (ret)
+
+From patchwork Mon Sep 16 15:04:11 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [10/11] arm64: tegra: Enable GPU on Jetson TX2
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331063
+Message-Id: <20190916150412.10025-11-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:11 +0200
+
+From: Alexandre Courbot <acourbot@nvidia.com>
+
+Enable the GPU node for the Jetson TX2 board.
+
+Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+index bdace01561ba..6f7c7c4c5c29 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
++++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+@@ -276,6 +276,10 @@
+ 		};
+ 	};
+ 
++	gpu@17000000 {
++		status = "okay";
++	};
++
+ 	gpio-keys {
+ 		compatible = "gpio-keys";
+ 
+
+From patchwork Mon Sep 16 15:04:12 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [11/11] arm64: tegra: Enable SMMU for GPU on Tegra186
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331062
+Message-Id: <20190916150412.10025-12-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:12 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPU has a connection to the ARM SMMU found on Tegra186, which can be
+used to support large pages. Make sure the GPU is attached to the SMMU
+to take advantage of its capabilities.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+index 47cd831fcf44..171fd4dfa58d 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
++++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+@@ -1172,6 +1172,7 @@
+ 		status = "disabled";
+ 
+ 		power-domains = <&bpmp TEGRA186_POWER_DOMAIN_GPU>;
++		iommus = <&smmu TEGRA186_SID_GPU>;
+ 	};
+ 
+ 	sysram@30000000 {