summaryrefslogtreecommitdiffstats
path: root/drm-nouveau-Enable-GP10B-by-default.patch
diff options
context:
space:
mode:
Diffstat (limited to 'drm-nouveau-Enable-GP10B-by-default.patch')
-rw-r--r--drm-nouveau-Enable-GP10B-by-default.patch1275
1 files changed, 1275 insertions, 0 deletions
diff --git a/drm-nouveau-Enable-GP10B-by-default.patch b/drm-nouveau-Enable-GP10B-by-default.patch
new file mode 100644
index 000000000..e44a5b339
--- /dev/null
+++ b/drm-nouveau-Enable-GP10B-by-default.patch
@@ -0,0 +1,1275 @@
+From patchwork Mon Sep 16 15:04:02 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [01/11] drm/nouveau: tegra: Avoid pulsing reset twice
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331044
+Message-Id: <20190916150412.10025-2-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:02 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+When the GPU powergate is controlled by a generic power domain provider,
+the reset will automatically be asserted and deasserted as part of the
+power-ungating procedure.
+
+On some Jetson TX2 boards, doing an additional assert and deassert of
+the GPU outside of the power-ungate procedure can cause the GPU to go
+into a bad state where the memory interface can no longer access system
+memory.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index 0e372a190d3f..747a775121cf 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -52,18 +52,18 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
+ clk_set_rate(tdev->clk_pwr, 204000000);
+ udelay(10);
+
+- reset_control_assert(tdev->rst);
+- udelay(10);
+-
+ if (!tdev->pdev->dev.pm_domain) {
++ reset_control_assert(tdev->rst);
++ udelay(10);
++
+ ret = tegra_powergate_remove_clamping(TEGRA_POWERGATE_3D);
+ if (ret)
+ goto err_clamp;
+ udelay(10);
+- }
+
+- reset_control_deassert(tdev->rst);
+- udelay(10);
++ reset_control_deassert(tdev->rst);
++ udelay(10);
++ }
+
+ return 0;
+
+
+From patchwork Mon Sep 16 15:04:03 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [02/11] drm/nouveau: tegra: Set clock rate if not set
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331046
+Message-Id: <20190916150412.10025-3-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:03 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+If the GPU clock has not had a rate set, initialize it to the maximum
+clock rate to make sure it does run.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index 747a775121cf..d0d52c1d4aee 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -279,6 +279,7 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+ struct nvkm_device **pdevice)
+ {
+ struct nvkm_device_tegra *tdev;
++ unsigned long rate;
+ int ret;
+
+ if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL)))
+@@ -307,6 +308,17 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+ goto free;
+ }
+
++ rate = clk_get_rate(tdev->clk);
++ if (rate == 0) {
++ ret = clk_set_rate(tdev->clk, ULONG_MAX);
++ if (ret < 0)
++ goto free;
++
++ rate = clk_get_rate(tdev->clk);
++
++ dev_dbg(&pdev->dev, "GPU clock set to %lu\n", rate);
++ }
++
+ if (func->require_ref_clk)
+ tdev->clk_ref = devm_clk_get(&pdev->dev, "ref");
+ if (IS_ERR(tdev->clk_ref)) {
+
+From patchwork Mon Sep 16 15:04:04 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [03/11] drm/nouveau: secboot: Read WPR configuration from GPU
+ registers
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331048
+Message-Id: <20190916150412.10025-4-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:04 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPUs found on Tegra SoCs have registers that can be used to read the
+WPR configuration. Use these registers instead of reaching into the
+memory controller's register space to read the same information.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/subdev/secboot/gm200.h | 2 +-
+ .../drm/nouveau/nvkm/subdev/secboot/gm20b.c | 81 ++++++++++++-------
+ .../drm/nouveau/nvkm/subdev/secboot/gp10b.c | 4 +-
+ 3 files changed, 53 insertions(+), 34 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
+index 62c5e162099a..280b1448df88 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
+@@ -41,6 +41,6 @@ int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *,
+ struct nvkm_falcon *);
+
+ /* Tegra-only */
+-int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *, u32);
++int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *);
+
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+index df8b919dcf09..f8a543122219 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+@@ -23,39 +23,65 @@
+ #include "acr.h"
+ #include "gm200.h"
+
+-#define TEGRA210_MC_BASE 0x70019000
+-
+ #ifdef CONFIG_ARCH_TEGRA
+-#define MC_SECURITY_CARVEOUT2_CFG0 0xc58
+-#define MC_SECURITY_CARVEOUT2_BOM_0 0xc5c
+-#define MC_SECURITY_CARVEOUT2_BOM_HI_0 0xc60
+-#define MC_SECURITY_CARVEOUT2_SIZE_128K 0xc64
+-#define TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED (1 << 1)
+ /**
+ * gm20b_secboot_tegra_read_wpr() - read the WPR registers on Tegra
+ *
+- * On dGPU, we can manage the WPR region ourselves, but on Tegra the WPR region
+- * is reserved from system memory by the bootloader and irreversibly locked.
+- * This function reads the address and size of the pre-configured WPR region.
++ * On dGPU, we can manage the WPR region ourselves, but on Tegra this region
++ * is allocated from system memory by the secure firmware. The region is then
++ * marked as a "secure carveout" and irreversibly locked. Furthermore, the WPR
++ * secure carveout is also configured to be sent to the GPU via a dedicated
++ * serial bus between the memory controller and the GPU. The GPU requests this
++ * information upon leaving reset and exposes it through a FIFO register at
++ * offset 0x100cd4.
++ *
++ * The FIFO register's lower 4 bits can be used to set the read index into the
++ * FIFO. After each read of the FIFO register, the read index is incremented.
++ *
++ * Indices 2 and 3 contain the lower and upper addresses of the WPR. These are
++ * stored in units of 256 B. The WPR is inclusive of both addresses.
++ *
++ * Unfortunately, for some reason the WPR info register doesn't contain the
++ * correct values for the secure carveout. It seems like the upper address is
++ * always too small by 128 KiB - 1. Given that the secure carvout size in the
++ * memory controller configuration is specified in units of 128 KiB, it's
++ * possible that the computation of the upper address of the WPR is wrong and
++ * causes this difference.
+ */
+ int
+-gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base)
++gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb)
+ {
++ struct nvkm_device *device = gsb->base.subdev.device;
+ struct nvkm_secboot *sb = &gsb->base;
+- void __iomem *mc;
+- u32 cfg;
++ u64 base, limit;
++ u32 value;
+
+- mc = ioremap(mc_base, 0xd00);
+- if (!mc) {
+- nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
+- return -ENOMEM;
+- }
+- sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+- ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
+- sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+- << 17;
+- cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
+- iounmap(mc);
++ /* set WPR info register to point at WPR base address register */
++ value = nvkm_rd32(device, 0x100cd4);
++ value &= ~0xf;
++ value |= 0x2;
++ nvkm_wr32(device, 0x100cd4, value);
++
++ /* read base address */
++ value = nvkm_rd32(device, 0x100cd4);
++ base = (u64)(value >> 4) << 12;
++
++ /* read limit */
++ value = nvkm_rd32(device, 0x100cd4);
++ limit = (u64)(value >> 4) << 12;
++
++ /*
++ * The upper address of the WPR seems to be computed wrongly and is
++ * actually SZ_128K - 1 bytes lower than it should be. Adjust the
++ * value accordingly.
++ */
++ limit += SZ_128K - 1;
++
++ sb->wpr_size = limit - base + 1;
++ sb->wpr_addr = base;
++
++ nvkm_info(&sb->subdev, "WPR: %016llx-%016llx\n", sb->wpr_addr,
++ sb->wpr_addr + sb->wpr_size - 1);
+
+ /* Check that WPR settings are valid */
+ if (sb->wpr_size == 0) {
+@@ -63,11 +89,6 @@ gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base)
+ return -EINVAL;
+ }
+
+- if (!(cfg & TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED)) {
+- nvkm_error(&sb->subdev, "WPR region not locked\n");
+- return -EINVAL;
+- }
+-
+ return 0;
+ }
+ #else
+@@ -85,7 +106,7 @@ gm20b_secboot_oneinit(struct nvkm_secboot *sb)
+ struct gm200_secboot *gsb = gm200_secboot(sb);
+ int ret;
+
+- ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA210_MC_BASE);
++ ret = gm20b_secboot_tegra_read_wpr(gsb);
+ if (ret)
+ return ret;
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c
+index 28ca29d0eeee..d84e85825995 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c
+@@ -23,15 +23,13 @@
+ #include "acr.h"
+ #include "gm200.h"
+
+-#define TEGRA186_MC_BASE 0x02c10000
+-
+ static int
+ gp10b_secboot_oneinit(struct nvkm_secboot *sb)
+ {
+ struct gm200_secboot *gsb = gm200_secboot(sb);
+ int ret;
+
+- ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA186_MC_BASE);
++ ret = gm20b_secboot_tegra_read_wpr(gsb);
+ if (ret)
+ return ret;
+
+
+From patchwork Mon Sep 16 15:04:05 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [04/11] drm/nouveau: gp10b: Add custom L2 cache implementation
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331049
+Message-Id: <20190916150412.10025-5-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:05 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+There are extra registers that need to be programmed to make the level 2
+cache work on GP10B, such as the stream ID register that is used when an
+SMMU is used to translate memory addresses.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../gpu/drm/nouveau/include/nvkm/subdev/ltc.h | 1 +
+ .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +-
+ .../gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild | 1 +
+ .../gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c | 69 +++++++++++++++++++
+ .../gpu/drm/nouveau/nvkm/subdev/ltc/priv.h | 2 +
+ 5 files changed, 74 insertions(+), 1 deletion(-)
+ create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+
+diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+index 644d527c3b96..d76f60d7d29a 100644
+--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
++++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+@@ -40,4 +40,5 @@ int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ int gp102_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
++int gp10b_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+ #endif
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+index c3c7159f3411..d2d6d5f4028a 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+@@ -2380,7 +2380,7 @@ nv13b_chipset = {
+ .fuse = gm107_fuse_new,
+ .ibus = gp10b_ibus_new,
+ .imem = gk20a_instmem_new,
+- .ltc = gp102_ltc_new,
++ .ltc = gp10b_ltc_new,
+ .mc = gp10b_mc_new,
+ .mmu = gp10b_mmu_new,
+ .secboot = gp10b_secboot_new,
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+index 2b6d36ea7067..728d75010847 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+@@ -6,3 +6,4 @@ nvkm-y += nvkm/subdev/ltc/gm107.o
+ nvkm-y += nvkm/subdev/ltc/gm200.o
+ nvkm-y += nvkm/subdev/ltc/gp100.o
+ nvkm-y += nvkm/subdev/ltc/gp102.o
++nvkm-y += nvkm/subdev/ltc/gp10b.o
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+new file mode 100644
+index 000000000000..4d27c6ea1552
+--- /dev/null
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+@@ -0,0 +1,69 @@
++/*
++ * Copyright (c) 2019 NVIDIA Corporation.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: Thierry Reding
++ */
++
++#include "priv.h"
++
++static void
++gp10b_ltc_init(struct nvkm_ltc *ltc)
++{
++ struct nvkm_device *device = ltc->subdev.device;
++#ifdef CONFIG_IOMMU_API
++ struct iommu_fwspec *spec;
++#endif
++
++ nvkm_wr32(device, 0x17e27c, ltc->ltc_nr);
++ nvkm_wr32(device, 0x17e000, ltc->ltc_nr);
++ nvkm_wr32(device, 0x100800, ltc->ltc_nr);
++
++#ifdef CONFIG_IOMMU_API
++ spec = dev_iommu_fwspec_get(device->dev);
++ if (spec) {
++ u32 sid = spec->ids[0] & 0xffff;
++
++ /* stream ID */
++ nvkm_wr32(device, 0x160000, sid << 2);
++ }
++#endif
++}
++
++static const struct nvkm_ltc_func
++gp10b_ltc = {
++ .oneinit = gp100_ltc_oneinit,
++ .init = gp10b_ltc_init,
++ .intr = gp100_ltc_intr,
++ .cbc_clear = gm107_ltc_cbc_clear,
++ .cbc_wait = gm107_ltc_cbc_wait,
++ .zbc = 16,
++ .zbc_clear_color = gm107_ltc_zbc_clear_color,
++ .zbc_clear_depth = gm107_ltc_zbc_clear_depth,
++ .zbc_clear_stencil = gp102_ltc_zbc_clear_stencil,
++ .invalidate = gf100_ltc_invalidate,
++ .flush = gf100_ltc_flush,
++};
++
++int
++gp10b_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
++{
++ return nvkm_ltc_new_(&gp10b_ltc, device, index, pltc);
++}
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+index 2fcf18e46ce3..eca5a711b1b8 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+@@ -46,4 +46,6 @@ void gm107_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32);
+ int gp100_ltc_oneinit(struct nvkm_ltc *);
+ void gp100_ltc_init(struct nvkm_ltc *);
+ void gp100_ltc_intr(struct nvkm_ltc *);
++
++void gp102_ltc_zbc_clear_stencil(struct nvkm_ltc *, int, const u32);
+ #endif
+
+From patchwork Mon Sep 16 15:04:06 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [05/11] drm/nouveau: gp10b: Use correct copy engine
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331052
+Message-Id: <20190916150412.10025-6-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:06 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+gp10b uses the new engine enumeration mechanism introduced in the Pascal
+architecture. As a result, the copy engine, which used to be at index 2
+for prior Tegra GPU instantiations, has now moved to index 0. Fix up the
+index and also use the gp100 variant of the copy engine class because on
+gp10b the PASCAL_DMA_COPY_B class is not supported.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+index d2d6d5f4028a..99d3fa3fad89 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+@@ -2387,7 +2387,7 @@ nv13b_chipset = {
+ .pmu = gm20b_pmu_new,
+ .timer = gk20a_timer_new,
+ .top = gk104_top_new,
+- .ce[2] = gp102_ce_new,
++ .ce[0] = gp100_ce_new,
+ .dma = gf119_dma_new,
+ .fifo = gp10b_fifo_new,
+ .gr = gp10b_gr_new,
+
+From patchwork Mon Sep 16 15:04:07 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [06/11] drm/nouveau: gk20a: Set IOMMU bit for DMA API if appropriate
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331053
+Message-Id: <20190916150412.10025-7-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:07 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+Detect if the DMA API is backed by an IOMMU and set the IOMMU bit if so.
+This is needed to make sure IOMMU addresses are properly translated even
+the explicit IOMMU API is not used.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/subdev/instmem/gk20a.c | 35 +++++++++++++------
+ 1 file changed, 25 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+index b0493f8df1fe..1120a2a7d5f1 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+@@ -100,12 +100,14 @@ struct gk20a_instmem {
+ unsigned int vaddr_max;
+ struct list_head vaddr_lru;
+
++ /* IOMMU mapping */
++ unsigned int page_shift;
++ u64 iommu_mask;
++
+ /* Only used if IOMMU if present */
+ struct mutex *mm_mutex;
+ struct nvkm_mm *mm;
+ struct iommu_domain *domain;
+- unsigned long iommu_pgshift;
+- u16 iommu_bit;
+
+ /* Only used by DMA API */
+ unsigned long attrs;
+@@ -357,12 +359,12 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
+ mutex_unlock(&imem->lock);
+
+ /* clear IOMMU bit to unmap pages */
+- r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
++ r->offset &= ~imem->iommu_mask;
+
+ /* Unmap pages from GPU address space and free them */
+ for (i = 0; i < node->base.mn->length; i++) {
+ iommu_unmap(imem->domain,
+- (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE);
++ (r->offset + i) << imem->page_shift, PAGE_SIZE);
+ dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ __free_page(node->pages[i]);
+@@ -440,7 +442,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
+
+ /* present memory for being mapped using small pages */
+ node->r.type = 12;
+- node->r.offset = node->handle >> 12;
++ node->r.offset = imem->iommu_mask | node->handle >> 12;
+ node->r.length = (npages << PAGE_SHIFT) >> 12;
+
+ node->base.mn = &node->r;
+@@ -493,7 +495,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
+ mutex_lock(imem->mm_mutex);
+ /* Reserve area from GPU address space */
+ ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages,
+- align >> imem->iommu_pgshift, &r);
++ align >> imem->page_shift, &r);
+ mutex_unlock(imem->mm_mutex);
+ if (ret) {
+ nvkm_error(subdev, "IOMMU space is full!\n");
+@@ -502,7 +504,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
+
+ /* Map into GPU address space */
+ for (i = 0; i < npages; i++) {
+- u32 offset = (r->offset + i) << imem->iommu_pgshift;
++ u32 offset = (r->offset + i) << imem->page_shift;
+
+ ret = iommu_map(imem->domain, offset, node->dma_addrs[i],
+ PAGE_SIZE, IOMMU_READ | IOMMU_WRITE);
+@@ -518,7 +520,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
+ }
+
+ /* IOMMU bit tells that an address is to be resolved through the IOMMU */
+- r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
++ r->offset |= imem->iommu_mask;
+
+ node->base.mn = r;
+ return 0;
+@@ -619,11 +621,12 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
+ imem->mm_mutex = &tdev->iommu.mutex;
+ imem->mm = &tdev->iommu.mm;
+ imem->domain = tdev->iommu.domain;
+- imem->iommu_pgshift = tdev->iommu.pgshift;
+- imem->iommu_bit = tdev->func->iommu_bit;
++ imem->page_shift = tdev->iommu.pgshift;
+
+ nvkm_info(&imem->base.subdev, "using IOMMU\n");
+ } else {
++ imem->page_shift = PAGE_SHIFT;
++
+ imem->attrs = DMA_ATTR_NON_CONSISTENT |
+ DMA_ATTR_WEAK_ORDERING |
+ DMA_ATTR_WRITE_COMBINE;
+@@ -631,5 +634,17 @@ gk20a_instmem_new(struct nvkm_device *device, int index,
+ nvkm_info(&imem->base.subdev, "using DMA API\n");
+ }
+
++ /*
++ * The IOMMU mask needs to be set if an IOMMU is used explicitly (via
++ * direct IOMMU API usage) or implicitly (via the DMA API). In both
++ * cases the device will have been attached to an IOMMU domain.
++ */
++ if (iommu_get_domain_for_dev(device->dev)) {
++ imem->iommu_mask = BIT_ULL(tdev->func->iommu_bit -
++ imem->page_shift);
++ nvkm_debug(&imem->base.subdev, "IOMMU mask: %016llx\n",
++ imem->iommu_mask);
++ }
++
+ return 0;
+ }
+
+From patchwork Mon Sep 16 15:04:08 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [07/11] drm/nouveau: gk20a: Implement custom MMU class
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331057
+Message-Id: <20190916150412.10025-8-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:08 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPU integrated in NVIDIA Tegra SoCs is connected to system memory
+via two paths: one direct path to the memory controller and another path
+that goes through a system MMU first. It's not typically necessary to go
+through the system MMU because the GPU's MMU can already map buffers so
+that they appear contiguous to the GPU.
+
+However, in order to support big pages, the system MMU has to be used to
+combine multiple small pages into one virtually contiguous chunk so that
+the GPU can then treat that as a single big page.
+
+In order to prepare for big page support, implement a custom MMU class
+that takes care of setting the IOMMU bit when writing page tables and
+when appropriate.
+
+This is also necessary to make sure that Nouveau works correctly on
+Tegra devices where the GPU is connected to a system MMU and that IOMMU
+is used to back the DMA API. Currently Nouveau assumes that the DMA API
+is never backed by an IOMMU, so access to DMA-mapped buffers fault when
+suddenly this assumption is no longer true.
+
+One situation where this can happen is on 32-bit Tegra SoCs where the
+ARM architecture code automatically attaches the GPU with a DMA/IOMMU
+domain. This is currently worked around by detaching the GPU from the
+IOMMU domain at probe time. However, with Tegra186 and later this can
+now also happen, but unfortunately no mechanism exists to detach from
+the domain in the 64-bit ARM architecture code.
+
+Using this Tegra-specific MMU class ensures that DMA-mapped buffers are
+properly mapped (with the IOMMU bit set) if the DMA API is backed by an
+IOMMU domain.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c | 50 ++++++++++++++++++-
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h | 44 ++++++++++++++++
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c | 6 ++-
+ .../gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c | 4 +-
+ drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 1 +
+ .../drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c | 22 +++++++-
+ .../drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c | 4 +-
+ .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 20 +++++++-
+ 8 files changed, 142 insertions(+), 9 deletions(-)
+ create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c
+index ac74965a60d4..d9a5e05b7dc7 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c
+@@ -19,11 +19,59 @@
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
++
++#include "gk20a.h"
+ #include "mem.h"
+ #include "vmm.h"
+
++#include <nvkm/core/tegra.h>
+ #include <nvif/class.h>
+
++static void
++gk20a_mmu_ctor(const struct nvkm_mmu_func *func, struct nvkm_device *device,
++ int index, struct gk20a_mmu *mmu)
++{
++ struct iommu_domain *domain = iommu_get_domain_for_dev(device->dev);
++ struct nvkm_device_tegra *tegra = device->func->tegra(device);
++
++ nvkm_mmu_ctor(func, device, index, &mmu->base);
++
++ /*
++ * If the DMA API is backed by an IOMMU, make sure the IOMMU bit is
++ * set for all buffer accesses. If the IOMMU is explicitly used, it
++ * is only used for instance blocks and the MMU doesn't care, since
++ * buffer objects are only mapped through the MMU, not through the
++ * IOMMU.
++ *
++ * Big page support could be implemented using explicit IOMMU usage,
++ * but the DMA API already provides that for free, so we don't worry
++ * about it for now.
++ */
++ if (domain && !tegra->iommu.domain) {
++ mmu->iommu_mask = BIT_ULL(tegra->func->iommu_bit);
++ nvkm_debug(&mmu->base.subdev, "IOMMU mask: %llx\n",
++ mmu->iommu_mask);
++ }
++}
++
++int
++gk20a_mmu_new_(const struct nvkm_mmu_func *func, struct nvkm_device *device,
++ int index, struct nvkm_mmu **pmmu)
++{
++ struct gk20a_mmu *mmu;
++
++ mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
++ if (!mmu)
++ return -ENOMEM;
++
++ gk20a_mmu_ctor(func, device, index, mmu);
++
++ if (pmmu)
++ *pmmu = &mmu->base;
++
++ return 0;
++}
++
+ static const struct nvkm_mmu_func
+ gk20a_mmu = {
+ .dma_bits = 40,
+@@ -37,5 +85,5 @@ gk20a_mmu = {
+ int
+ gk20a_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+ {
+- return nvkm_mmu_new_(&gk20a_mmu, device, index, pmmu);
++ return gk20a_mmu_new_(&gk20a_mmu, device, index, pmmu);
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h
+new file mode 100644
+index 000000000000..bb81fc62509c
+--- /dev/null
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2019 NVIDIA Corporation.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef __NVKM_MMU_GK20A_H__
++#define __NVKM_MMU_GK20A_H__
++
++#include "priv.h"
++
++struct gk20a_mmu {
++ struct nvkm_mmu base;
++
++ /*
++ * If an IOMMU is used, indicates which address bit will trigger an
++ * IOMMU translation when set (when this bit is not set, the IOMMU is
++ * bypassed). A value of 0 means an IOMMU is never used.
++ */
++ u64 iommu_mask;
++};
++
++#define gk20a_mmu(mmu) container_of(mmu, struct gk20a_mmu, base)
++
++int gk20a_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *,
++ int index, struct nvkm_mmu **);
++
++#endif
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c
+index 7353a94b4091..7fccd4df52a8 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c
+@@ -19,6 +19,8 @@
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
++
++#include "gk20a.h"
+ #include "mem.h"
+ #include "vmm.h"
+
+@@ -50,6 +52,6 @@ int
+ gm20b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+ {
+ if (device->fb->page)
+- return nvkm_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu);
+- return nvkm_mmu_new_(&gm20b_mmu, device, index, pmmu);
++ return gk20a_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu);
++ return gk20a_mmu_new_(&gm20b_mmu, device, index, pmmu);
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
+index 0a50be9a785a..ae3cb47be3d8 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c
+@@ -19,6 +19,8 @@
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
++
++#include "gk20a.h"
+ #include "mem.h"
+ #include "vmm.h"
+
+@@ -41,5 +43,5 @@ gp10b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+ {
+ if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true))
+ return gm20b_mmu_new(device, index, pmmu);
+- return nvkm_mmu_new_(&gp10b_mmu, device, index, pmmu);
++ return gk20a_mmu_new_(&gp10b_mmu, device, index, pmmu);
+ }
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+index 5e55ecbd8005..fb3a9e8bb9cd 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+@@ -213,6 +213,7 @@ void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type);
+ void gf100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr);
+
+ int gk20a_vmm_aper(enum nvkm_memory_target);
++int gk20a_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
+
+ int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *,
+ struct nvkm_mmu *, bool, u64, u64, void *, u32,
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
+index 5a9582dce970..16d7bf727292 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c
+@@ -19,6 +19,8 @@
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
++
++#include "gk20a.h"
+ #include "vmm.h"
+
+ #include <core/memory.h>
+@@ -33,12 +35,28 @@ gk20a_vmm_aper(enum nvkm_memory_target target)
+ }
+ }
+
++int
++gk20a_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
++ struct nvkm_vmm_map *map)
++{
++ struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu);
++ int ret;
++
++ ret = gf100_vmm_valid(vmm, argv, argc, map);
++ if (ret < 0)
++ return ret;
++
++ map->type |= mmu->iommu_mask >> 8;
++
++ return 0;
++}
++
+ static const struct nvkm_vmm_func
+ gk20a_vmm_17 = {
+ .join = gf100_vmm_join,
+ .part = gf100_vmm_part,
+ .aper = gf100_vmm_aper,
+- .valid = gf100_vmm_valid,
++ .valid = gk20a_vmm_valid,
+ .flush = gf100_vmm_flush,
+ .invalidate_pdb = gf100_vmm_invalidate_pdb,
+ .page = {
+@@ -53,7 +71,7 @@ gk20a_vmm_16 = {
+ .join = gf100_vmm_join,
+ .part = gf100_vmm_part,
+ .aper = gf100_vmm_aper,
+- .valid = gf100_vmm_valid,
++ .valid = gk20a_vmm_valid,
+ .flush = gf100_vmm_flush,
+ .invalidate_pdb = gf100_vmm_invalidate_pdb,
+ .page = {
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
+index 96b759695dd8..7a6066d886cd 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c
+@@ -26,7 +26,7 @@ gm20b_vmm_17 = {
+ .join = gm200_vmm_join,
+ .part = gf100_vmm_part,
+ .aper = gk20a_vmm_aper,
+- .valid = gf100_vmm_valid,
++ .valid = gk20a_vmm_valid,
+ .flush = gf100_vmm_flush,
+ .invalidate_pdb = gf100_vmm_invalidate_pdb,
+ .page = {
+@@ -42,7 +42,7 @@ gm20b_vmm_16 = {
+ .join = gm200_vmm_join,
+ .part = gf100_vmm_part,
+ .aper = gk20a_vmm_aper,
+- .valid = gf100_vmm_valid,
++ .valid = gk20a_vmm_valid,
+ .flush = gf100_vmm_flush,
+ .invalidate_pdb = gf100_vmm_invalidate_pdb,
+ .page = {
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
+index e081239afe58..180c8f006e32 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c
+@@ -19,14 +19,32 @@
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
++
++#include "gk20a.h"
+ #include "vmm.h"
+
++static int
++gp10b_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
++ struct nvkm_vmm_map *map)
++{
++ struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu);
++ int ret;
++
++ ret = gp100_vmm_valid(vmm, argv, argc, map);
++ if (ret < 0)
++ return ret;
++
++ map->type |= mmu->iommu_mask >> 4;
++
++ return 0;
++}
++
+ static const struct nvkm_vmm_func
+ gp10b_vmm = {
+ .join = gp100_vmm_join,
+ .part = gf100_vmm_part,
+ .aper = gk20a_vmm_aper,
+- .valid = gp100_vmm_valid,
++ .valid = gp10b_vmm_valid,
+ .flush = gp100_vmm_flush,
+ .mthd = gp100_vmm_mthd,
+ .invalidate_pdb = gp100_vmm_invalidate_pdb,
+
+From patchwork Mon Sep 16 15:04:09 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [08/11] drm/nouveau: tegra: Skip IOMMU initialization if already
+ attached
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331060
+Message-Id: <20190916150412.10025-9-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:09 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+If the GPU is already attached to an IOMMU, don't detach it and setup an
+explicit IOMMU domain. Since Nouveau can now properly handle the case of
+the DMA API being backed by an IOMMU, just continue using the DMA API.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/engine/device/tegra.c | 19 +++++++------------
+ 1 file changed, 7 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index d0d52c1d4aee..fc652aaa41c7 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -23,10 +23,6 @@
+ #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
+ #include "priv.h"
+
+-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+-#include <asm/dma-iommu.h>
+-#endif
+-
+ static int
+ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev)
+ {
+@@ -109,14 +105,13 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ unsigned long pgsize_bitmap;
+ int ret;
+
+-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+- if (dev->archdata.mapping) {
+- struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+-
+- arm_iommu_detach_device(dev);
+- arm_iommu_release_mapping(mapping);
+- }
+-#endif
++ /*
++ * Skip explicit IOMMU initialization if the GPU is already attached
++ * to an IOMMU domain. This can happen if the DMA API is backed by an
++ * IOMMU.
++ */
++ if (iommu_get_domain_for_dev(dev))
++ return;
+
+ if (!tdev->func->iommu_bit)
+ return;
+
+From patchwork Mon Sep 16 15:04:10 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [09/11] drm/nouveau: tegra: Fall back to 32-bit DMA mask without IOMMU
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331061
+Message-Id: <20190916150412.10025-10-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:10 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPU can usually address more than 32-bit, even without being
+attached to an IOMMU. However, if the GPU is not attached to an IOMMU,
+it's likely that there is no IOMMU in the system, in which case any
+buffers allocated by Nouveau will likely end up in a region of memory
+that cannot be accessed by host1x.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ .../drm/nouveau/nvkm/engine/device/tegra.c | 111 +++++++++++-------
+ 1 file changed, 70 insertions(+), 41 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+index fc652aaa41c7..221238a2cf53 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
++++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+@@ -97,7 +97,7 @@ nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev)
+ return 0;
+ }
+
+-static void
++static int
+ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ {
+ #if IS_ENABLED(CONFIG_IOMMU_API)
+@@ -111,47 +111,65 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ * IOMMU.
+ */
+ if (iommu_get_domain_for_dev(dev))
+- return;
++ return -ENODEV;
+
+ if (!tdev->func->iommu_bit)
+- return;
++ return -ENODEV;
++
++ if (!iommu_present(&platform_bus_type))
++ return -ENODEV;
+
+ mutex_init(&tdev->iommu.mutex);
+
+- if (iommu_present(&platform_bus_type)) {
+- tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
+- if (!tdev->iommu.domain)
+- goto error;
++ tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
++ if (!tdev->iommu.domain)
++ return -ENOMEM;
+
+- /*
+- * A IOMMU is only usable if it supports page sizes smaller
+- * or equal to the system's PAGE_SIZE, with a preference if
+- * both are equal.
+- */
+- pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap;
+- if (pgsize_bitmap & PAGE_SIZE) {
+- tdev->iommu.pgshift = PAGE_SHIFT;
+- } else {
+- tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK);
+- if (tdev->iommu.pgshift == 0) {
+- dev_warn(dev, "unsupported IOMMU page size\n");
+- goto free_domain;
+- }
+- tdev->iommu.pgshift -= 1;
++ /*
++ * An IOMMU is only usable if it supports page sizes smaller or equal
++ * to the system's PAGE_SIZE, with a preference if both are equal.
++ */
++ pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap;
++ if (pgsize_bitmap & PAGE_SIZE) {
++ tdev->iommu.pgshift = PAGE_SHIFT;
++ } else {
++ tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK);
++ if (tdev->iommu.pgshift == 0) {
++ dev_warn(dev, "unsupported IOMMU page size\n");
++ ret = -ENOTSUPP;
++ goto free_domain;
+ }
+
+- ret = iommu_attach_device(tdev->iommu.domain, dev);
+- if (ret)
+- goto free_domain;
++ tdev->iommu.pgshift -= 1;
++ }
+
+- ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0,
+- (1ULL << tdev->func->iommu_bit) >>
+- tdev->iommu.pgshift, 1);
+- if (ret)
+- goto detach_device;
++ ret = iommu_attach_device(tdev->iommu.domain, dev);
++ if (ret) {
++ dev_warn(dev, "failed to attach to IOMMU: %d\n", ret);
++ goto free_domain;
++ }
++
++ ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0,
++ (1ULL << tdev->func->iommu_bit) >>
++ tdev->iommu.pgshift, 1);
++ if (ret) {
++ dev_warn(dev, "failed to initialize IOVA space: %d\n", ret);
++ goto detach_device;
++ }
++
++ /*
++ * The IOMMU bit defines the upper limit of the GPU-addressable space.
++ */
++ ret = dma_set_mask(dev, DMA_BIT_MASK(tdev->func->iommu_bit));
++ if (ret) {
++ dev_warn(dev, "failed to set DMA mask: %d\n", ret);
++ goto fini_mm;
+ }
+
+- return;
++ return 0;
++
++fini_mm:
++ nvkm_mm_fini(&tdev->iommu.mm);
+
+ detach_device:
+ iommu_detach_device(tdev->iommu.domain, dev);
+@@ -159,10 +177,15 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
+ free_domain:
+ iommu_domain_free(tdev->iommu.domain);
+
+-error:
++ /* reset these so that the DMA API code paths are executed */
+ tdev->iommu.domain = NULL;
+ tdev->iommu.pgshift = 0;
+- dev_err(dev, "cannot initialize IOMMU MM\n");
++
++ dev_warn(dev, "cannot initialize IOMMU MM\n");
++
++ return ret;
++#else
++ return -ENOTSUPP;
+ #endif
+ }
+
+@@ -327,14 +350,20 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
+ goto free;
+ }
+
+- /**
+- * The IOMMU bit defines the upper limit of the GPU-addressable space.
+- */
+- ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit));
+- if (ret)
+- goto free;
+-
+- nvkm_device_tegra_probe_iommu(tdev);
++ ret = nvkm_device_tegra_probe_iommu(tdev);
++ if (ret) {
++ /*
++ * If we fail to set up an IOMMU, fall back to a 32-bit DMA
++ * mask. This is not necessary for the GPU to work because it
++ * can usually address all of system memory. However, if the
++ * buffers allocated by Nouveau are meant to be shared with
++ * the display controller, we need to restrict where they can
++ * come from.
++ */
++ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
++ if (ret)
++ goto free;
++ }
+
+ ret = nvkm_device_tegra_power_up(tdev);
+ if (ret)
+
+From patchwork Mon Sep 16 15:04:11 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [10/11] arm64: tegra: Enable GPU on Jetson TX2
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331063
+Message-Id: <20190916150412.10025-11-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:11 +0200
+
+From: Alexandre Courbot <acourbot@nvidia.com>
+
+Enable the GPU node for the Jetson TX2 board.
+
+Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+index bdace01561ba..6f7c7c4c5c29 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
++++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts
+@@ -276,6 +276,10 @@
+ };
+ };
+
++ gpu@17000000 {
++ status = "okay";
++ };
++
+ gpio-keys {
+ compatible = "gpio-keys";
+
+
+From patchwork Mon Sep 16 15:04:12 2019
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [11/11] arm64: tegra: Enable SMMU for GPU on Tegra186
+From: Thierry Reding <thierry.reding@gmail.com>
+X-Patchwork-Id: 331062
+Message-Id: <20190916150412.10025-12-thierry.reding@gmail.com>
+To: Ben Skeggs <bskeggs@redhat.com>, Thierry Reding <thierry.reding@gmail.com>
+Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org,
+ dri-devel@lists.freedesktop.org
+Date: Mon, 16 Sep 2019 17:04:12 +0200
+
+From: Thierry Reding <treding@nvidia.com>
+
+The GPU has a connection to the ARM SMMU found on Tegra186, which can be
+used to support large pages. Make sure the GPU is attached to the SMMU
+to take advantage of its capabilities.
+
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+---
+ arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+index 47cd831fcf44..171fd4dfa58d 100644
+--- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi
++++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi
+@@ -1172,6 +1172,7 @@
+ status = "disabled";
+
+ power-domains = <&bpmp TEGRA186_POWER_DOMAIN_GPU>;
++ iommus = <&smmu TEGRA186_SID_GPU>;
+ };
+
+ sysram@30000000 {