summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThorsten Leemhuis <fedora@leemhuis.info>2017-06-20 21:47:18 +0200
committerThorsten Leemhuis <fedora@leemhuis.info>2017-06-20 21:47:18 +0200
commitcf203cd574020a47cd460d0eb22bccf40172e5de (patch)
tree38731f9e6b3ab1dcec07c65e3a4b2bd064d77457
parente0260a5ef682636e2d0269968881729f0294838f (diff)
parent96cfd83feb4d478b06e69537927e14d5f9264a14 (diff)
downloadkernel-4.11.6-301.vanilla.knurd.1.fc26.tar.gz
kernel-4.11.6-301.vanilla.knurd.1.fc26.tar.xz
kernel-4.11.6-301.vanilla.knurd.1.fc26.zip
Merge remote-tracking branch 'origin/f26' into f26-user-thl-vanilla-fedorakernel-4.11.6-301.vanilla.knurd.1.fc26
-rw-r--r--0001-efi-Fix-boot-panic-because-of-invalid-BGRT-image-add.patch114
-rw-r--r--RFC-audit-fix-a-race-condition-with-the-auditd-tracking-code.patch156
-rw-r--r--arm64-cavium-fixes.patch1789
-rw-r--r--baseconfig/CONFIG_B43LEGACY_DEBUG2
-rw-r--r--baseconfig/CONFIG_B43_DEBUG2
-rw-r--r--drm-i915-Do-not-drop-pagetables-when-empty.patch95
-rw-r--r--kernel-aarch64.config4
-rw-r--r--kernel-armv7hl-lpae.config4
-rw-r--r--kernel-armv7hl.config4
-rw-r--r--kernel-i686-PAE.config4
-rw-r--r--kernel-i686.config4
-rw-r--r--kernel-ppc64.config4
-rw-r--r--kernel-ppc64le.config4
-rw-r--r--kernel-ppc64p7.config4
-rw-r--r--kernel-s390x.config4
-rw-r--r--kernel-x86_64.config4
-rw-r--r--kernel.spec42
-rw-r--r--mm-fix-new-crash-in-unmapped_area_topdown.patch53
-rw-r--r--mm-larger-stack-guard-gap-between-vmas.patch889
-rw-r--r--sources2
-rw-r--r--vc4-tformat-fixes.patch428
21 files changed, 3370 insertions, 242 deletions
diff --git a/0001-efi-Fix-boot-panic-because-of-invalid-BGRT-image-add.patch b/0001-efi-Fix-boot-panic-because-of-invalid-BGRT-image-add.patch
deleted file mode 100644
index 4a714e36d..000000000
--- a/0001-efi-Fix-boot-panic-because-of-invalid-BGRT-image-add.patch
+++ /dev/null
@@ -1,114 +0,0 @@
-From 87c19e8de4f56d803d133c3e38bbd7b069e06df3 Mon Sep 17 00:00:00 2001
-From: Dave Young <dyoung@redhat.com>
-Date: Fri, 9 Jun 2017 08:45:58 +0000
-Subject: [PATCH] efi: Fix boot panic because of invalid BGRT image address
-
-Maniaxx reported a kernel boot crash in the EFI code, which I emulated
-by using same invalid phys addr in code:
-
- BUG: unable to handle kernel paging request at ffffffffff280001
- IP: efi_bgrt_init+0xfb/0x153
- ...
- Call Trace:
- ? bgrt_init+0xbc/0xbc
- acpi_parse_bgrt+0xe/0x12
- acpi_table_parse+0x89/0xb8
- acpi_boot_init+0x445/0x4e2
- ? acpi_parse_x2apic+0x79/0x79
- ? dmi_ignore_irq0_timer_override+0x33/0x33
- setup_arch+0xb63/0xc82
- ? early_idt_handler_array+0x120/0x120
- start_kernel+0xb7/0x443
- ? early_idt_handler_array+0x120/0x120
- x86_64_start_reservations+0x29/0x2b
- x86_64_start_kernel+0x154/0x177
- secondary_startup_64+0x9f/0x9f
-
-There is also a similar bug filed in bugzilla.kernel.org:
-
- https://bugzilla.kernel.org/show_bug.cgi?id=195633
-
-The crash is caused by this commit:
-
- 7b0a911478c7 efi/x86: Move the EFI BGRT init code to early init code
-
-The root cause is the firmware on those machines provides invalid BGRT
-image addresses.
-
-In a kernel before above commit BGRT initializes late and uses ioremap()
-to map the image address. Ioremap validates the address, if it is not a
-valid physical address ioremap() just fails and returns. However in current
-kernel EFI BGRT initializes early and uses early_memremap() which does not
-validate the image address, and kernel panic happens.
-
-According to ACPI spec the BGRT image address should fall into
-EFI_BOOT_SERVICES_DATA, see the section 5.2.22.4 of below document:
-
- http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
-
-Fix this issue by validating the image address in efi_bgrt_init(). If the
-image address does not fall into any EFI_BOOT_SERVICES_DATA areas we just
-bail out with a warning message.
-
-Reported-by: Maniaxx <tripleshiftone@gmail.com>
-Signed-off-by: Dave Young <dyoung@redhat.com>
-Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
-Cc: Linus Torvalds <torvalds@linux-foundation.org>
-Cc: Matt Fleming <matt@codeblueprint.co.uk>
-Cc: Peter Zijlstra <peterz@infradead.org>
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: linux-efi@vger.kernel.org
-Fixes: 7b0a911478c7 ("efi/x86: Move the EFI BGRT init code to early init code")
-Link: http://lkml.kernel.org/r/20170609084558.26766-2-ard.biesheuvel@linaro.org
-Signed-off-by: Ingo Molnar <mingo@kernel.org>
-[labbott@redhat.com: Backport to 4.11]
-Signed-off-by: Laura Abbott <labbott@redhat.com>
----
- arch/x86/platform/efi/efi-bgrt.c | 24 ++++++++++++++++++++++++
- 1 file changed, 24 insertions(+)
-
-diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
-index 04ca876..08ee795 100644
---- a/arch/x86/platform/efi/efi-bgrt.c
-+++ b/arch/x86/platform/efi/efi-bgrt.c
-@@ -27,6 +27,26 @@ struct bmp_header {
- u32 size;
- } __packed;
-
-+static bool efi_bgrt_addr_valid(u64 addr)
-+{
-+ efi_memory_desc_t *md;
-+
-+ for_each_efi_memory_desc(md) {
-+ u64 size;
-+ u64 end;
-+
-+ if (md->type != EFI_BOOT_SERVICES_DATA)
-+ continue;
-+
-+ size = md->num_pages << EFI_PAGE_SHIFT;
-+ end = md->phys_addr + size;
-+ if (addr >= md->phys_addr && addr < end)
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
- void __init efi_bgrt_init(struct acpi_table_header *table)
- {
- void *image;
-@@ -62,6 +82,10 @@ void __init efi_bgrt_init(struct acpi_table_header *table)
- goto out;
- }
-
-+ if (!efi_bgrt_addr_valid(bgrt->image_address)) {
-+ pr_notice("Ignoring BGRT: invalid image address\n");
-+ goto out;
-+ }
- image = early_memremap(bgrt->image_address, sizeof(bmp_header));
- if (!image) {
- pr_notice("Ignoring BGRT: failed to map image header memory\n");
---
-2.7.5
-
diff --git a/RFC-audit-fix-a-race-condition-with-the-auditd-tracking-code.patch b/RFC-audit-fix-a-race-condition-with-the-auditd-tracking-code.patch
new file mode 100644
index 000000000..d79fd256f
--- /dev/null
+++ b/RFC-audit-fix-a-race-condition-with-the-auditd-tracking-code.patch
@@ -0,0 +1,156 @@
+From patchwork Thu Jun 15 15:28:58 2017
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [RFC] audit: fix a race condition with the auditd tracking code
+From: Paul Moore <pmoore@redhat.com>
+X-Patchwork-Id: 9789009
+Message-Id: <149754053819.11365.5047864735077505545.stgit@sifl>
+To: linux-audit@redhat.com
+Cc: Dusty Mabe <dustymabe@redhat.com>
+Date: Thu, 15 Jun 2017 11:28:58 -0400
+
+From: Paul Moore <paul@paul-moore.com>
+
+Originally reported by Adam and Dusty, it appears we have a small
+race window in kauditd_thread(), as documented in the Fedora BZ:
+
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1459326#c35
+
+ "This issue is partly due to the read-copy nature of RCU, and
+ partly due to how we sync the auditd_connection state across
+ kauditd_thread and the audit control channel. The kauditd_thread
+ thread is always running so it can service the record queues and
+ emit the multicast messages, if it happens to be just past the
+ "main_queue" label, but before the "if (sk == NULL || ...)"
+ if-statement which calls auditd_reset() when the new auditd
+ connection is registered it could end up resetting the auditd
+ connection, regardless of if it is valid or not. This is a rather
+ small window and the variable nature of multi-core scheduling
+ explains why this is proving rather difficult to reproduce."
+
+The fix is to have functions only call auditd_reset() when they
+believe that the kernel/auditd connection is still valid, e.g.
+non-NULL, and to have these callers pass their local copy of the
+auditd_connection pointer to auditd_reset() where it can be compared
+with the current connection state before resetting. If the caller
+has a stale state tracking pointer then the reset is ignored.
+
+We also make a small change to kauditd_thread() so that if the
+kernel/auditd connection is dead we skip the retry queue and send the
+records straight to the hold queue. This is necessary as we used to
+rely on auditd_reset() to occasionally purge the retry queue but we
+are going to be calling the reset function much less now and we want
+to make sure the retry queue doesn't grow unbounded.
+
+Reported-by: Adam Williamson <awilliam@redhat.com>
+Reported-by: Dusty Mabe <dustymabe@redhat.com>
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
+---
+ kernel/audit.c | 36 +++++++++++++++++++++++-------------
+ 1 file changed, 23 insertions(+), 13 deletions(-)
+
+
+--
+Linux-audit mailing list
+Linux-audit@redhat.com
+https://www.redhat.com/mailman/listinfo/linux-audit
+
+diff --git a/kernel/audit.c b/kernel/audit.c
+index b2e877100242..e1e2b3abfb93 100644
+--- a/kernel/audit.c
++++ b/kernel/audit.c
+@@ -575,12 +575,16 @@ static void kauditd_retry_skb(struct sk_buff *skb)
+
+ /**
+ * auditd_reset - Disconnect the auditd connection
++ * @ac: auditd connection state
+ *
+ * Description:
+ * Break the auditd/kauditd connection and move all the queued records into the
+- * hold queue in case auditd reconnects.
++ * hold queue in case auditd reconnects. It is important to note that the @ac
++ * pointer should never be dereferenced inside this function as it may be NULL
++ * or invalid, you can only compare the memory address! If @ac is NULL then
++ * the connection will always be reset.
+ */
+-static void auditd_reset(void)
++static void auditd_reset(const struct auditd_connection *ac)
+ {
+ unsigned long flags;
+ struct sk_buff *skb;
+@@ -590,6 +594,11 @@ static void auditd_reset(void)
+ spin_lock_irqsave(&auditd_conn_lock, flags);
+ ac_old = rcu_dereference_protected(auditd_conn,
+ lockdep_is_held(&auditd_conn_lock));
++ if (ac && ac != ac_old) {
++ /* someone already registered a new auditd connection */
++ spin_unlock_irqrestore(&auditd_conn_lock, flags);
++ return;
++ }
+ rcu_assign_pointer(auditd_conn, NULL);
+ spin_unlock_irqrestore(&auditd_conn_lock, flags);
+
+@@ -649,8 +658,8 @@ static int auditd_send_unicast_skb(struct sk_buff *skb)
+ return rc;
+
+ err:
+- if (rc == -ECONNREFUSED)
+- auditd_reset();
++ if (ac && rc == -ECONNREFUSED)
++ auditd_reset(ac);
+ return rc;
+ }
+
+@@ -795,9 +804,9 @@ static int kauditd_thread(void *dummy)
+ rc = kauditd_send_queue(sk, portid,
+ &audit_hold_queue, UNICAST_RETRIES,
+ NULL, kauditd_rehold_skb);
+- if (rc < 0) {
++ if (ac && rc < 0) {
+ sk = NULL;
+- auditd_reset();
++ auditd_reset(ac);
+ goto main_queue;
+ }
+
+@@ -805,9 +814,9 @@ static int kauditd_thread(void *dummy)
+ rc = kauditd_send_queue(sk, portid,
+ &audit_retry_queue, UNICAST_RETRIES,
+ NULL, kauditd_hold_skb);
+- if (rc < 0) {
++ if (ac && rc < 0) {
+ sk = NULL;
+- auditd_reset();
++ auditd_reset(ac);
+ goto main_queue;
+ }
+
+@@ -815,12 +824,13 @@ static int kauditd_thread(void *dummy)
+ /* process the main queue - do the multicast send and attempt
+ * unicast, dump failed record sends to the retry queue; if
+ * sk == NULL due to previous failures we will just do the
+- * multicast send and move the record to the retry queue */
++ * multicast send and move the record to the hold queue */
+ rc = kauditd_send_queue(sk, portid, &audit_queue, 1,
+ kauditd_send_multicast_skb,
+- kauditd_retry_skb);
+- if (sk == NULL || rc < 0)
+- auditd_reset();
++ (sk ?
++ kauditd_retry_skb : kauditd_hold_skb));
++ if (ac && rc < 0)
++ auditd_reset(ac);
+ sk = NULL;
+
+ /* drop our netns reference, no auditd sends past this line */
+@@ -1230,7 +1240,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+ auditd_pid, 1);
+
+ /* unregister the auditd connection */
+- auditd_reset();
++ auditd_reset(NULL);
+ }
+ }
+ if (s.mask & AUDIT_STATUS_RATE_LIMIT) {
diff --git a/arm64-cavium-fixes.patch b/arm64-cavium-fixes.patch
new file mode 100644
index 000000000..7d4ce66d4
--- /dev/null
+++ b/arm64-cavium-fixes.patch
@@ -0,0 +1,1789 @@
+From 35412fc5240640825faa81068e5269069f90d86f Mon Sep 17 00:00:00 2001
+From: Jayachandran C <jnair@caviumnetworks.com>
+Date: Thu, 13 Apr 2017 20:30:44 +0000
+Subject: [PATCH 01/41] PCI: Add device flag PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT
+
+Add a new quirk flag PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT to limit the DMA alias
+search to go no further than the bridge where the IOMMU unit is attached.
+
+The flag will be used to indicate a bridge device which forwards the
+address translation requests to the IOMMU, i.e., where the interrupt and
+DMA requests leave the PCIe hierarchy and go into the system blocks.
+
+Usually this happens at the PCI RC, so this flag is not needed. But on
+systems where there are bridges that introduce aliases above the IOMMU,
+this flag prevents pci_for_each_dma_alias() from generating aliases that
+the IOMMU will never see.
+
+The function pci_for_each_dma_alias() is updated to stop when it see a
+bridge with this flag set.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=195447
+Signed-off-by: Jayachandran C <jnair@caviumnetworks.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Acked-by: David Daney <david.daney@cavium.com>
+(cherry picked from commit ffff885832101543c002cef7abcab0fd27a9aee1)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/pci/search.c | 4 ++++
+ include/linux/pci.h | 2 ++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/drivers/pci/search.c b/drivers/pci/search.c
+index 33e0f033a48e..4c6044ad7368 100644
+--- a/drivers/pci/search.c
++++ b/drivers/pci/search.c
+@@ -60,6 +60,10 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
+
+ tmp = bus->self;
+
++ /* stop at bridge where translation unit is associated */
++ if (tmp->dev_flags & PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT)
++ return ret;
++
+ /*
+ * PCIe-to-PCI/X bridges alias transactions from downstream
+ * devices using the subordinate bus number (PCI Express to
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index eb3da1a04e6c..3f596acc05be 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -178,6 +178,8 @@ enum pci_dev_flags {
+ PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7),
+ /* Get VPD from function 0 VPD */
+ PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8),
++ /* a non-root bridge where translation occurs, stop alias search here */
++ PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
+ /*
+ * Resume before calling the driver's system suspend hooks, disabling
+ * the direct_complete optimization.
+--
+2.11.0
+
+From 2fe0ffd43a57035207a124421e9164401aabb9d4 Mon Sep 17 00:00:00 2001
+From: Jayachandran C <jnair@caviumnetworks.com>
+Date: Thu, 13 Apr 2017 20:30:45 +0000
+Subject: [PATCH 02/41] PCI: Avoid generating invalid ThunderX2 DMA aliases
+
+On Cavium ThunderX2 arm64 SoCs (formerly known as Broadcom Vulcan), the PCI
+topology is slightly unusual. For a multi-node system, it looks like:
+
+ 00:00.0 PCI bridge to [bus 01-1e]
+ 01:0a.0 PCI-to-PCIe bridge to [bus 02-04]
+ 02:00.0 PCIe Root Port bridge to [bus 03-04] (XLATE_ROOT)
+ 03:00.0 PCIe Endpoint
+
+pci_for_each_dma_alias() assumes IOMMU translation is done at the root of
+the PCI hierarchy. It generates 03:00.0, 01:0a.0, and 00:00.0 as DMA
+aliases for 03:00.0 because buses 01 and 00 are non-PCIe buses that don't
+carry the Requester ID.
+
+Because the ThunderX2 IOMMU is at 02:00.0, the Requester IDs 01:0a.0 and
+00:00.0 are never valid for the endpoint. This quirk stops alias
+generation at the XLATE_ROOT bridge so we won't generate 01:0a.0 or
+00:00.0.
+
+The current IOMMU code only maps the last alias (this is a separate bug in
+itself). Prior to this quirk, we only created IOMMU mappings for the
+invalid Requester ID 00:00:0, which never matched any DMA transactions.
+
+With this quirk, we create IOMMU mappings for a valid Requester ID, which
+fixes devices with no aliases but leaves devices with aliases still broken.
+
+The last alias for the endpoint is also used by the ARM GICv3 MSI-X code.
+Without this quirk, the GIC Interrupt Translation Tables are setup with the
+invalid Requester ID, and the MSI-X generated by the device fails to be
+translated and routed.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=195447
+Signed-off-by: Jayachandran C <jnair@caviumnetworks.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Acked-by: David Daney <david.daney@cavium.com>
+(cherry picked from commit 45a2329367386342d41ea9414c88b023f5a79055)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/pci/quirks.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 673683660b5c..96d332978719 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -3958,6 +3958,20 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2260, quirk_mic_x200_dma_alias);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2264, quirk_mic_x200_dma_alias);
+
+ /*
++ * The IOMMU and interrupt controller on Broadcom Vulcan/Cavium ThunderX2 are
++ * associated not at the root bus, but at a bridge below. This quirk avoids
++ * generating invalid DMA aliases.
++ */
++static void quirk_bridge_cavm_thrx2_pcie_root(struct pci_dev *pdev)
++{
++ pdev->dev_flags |= PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT;
++}
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000,
++ quirk_bridge_cavm_thrx2_pcie_root);
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084,
++ quirk_bridge_cavm_thrx2_pcie_root);
++
++/*
+ * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero)
+ * class code. Fix it.
+ */
+--
+2.11.0
+
+From 153c6a5945340315126a7ec31cf52129ad782398 Mon Sep 17 00:00:00 2001
+From: Ashok Kumar Sekar <asekar@redhat.com>
+Date: Fri, 23 Sep 2016 04:16:19 -0700
+Subject: [PATCH 03/41] PCI: Vulcan: AHCI PCI bar fix for Broadcom Vulcan early
+ silicon
+
+PCI BAR 5 is not setup correctly for the on-board AHCI
+controller on Broadcom's Vulcan processor. Added a quirk to fix BAR 5
+by using BAR 4's resources which are populated correctly but NOT used
+by the AHCI controller actually.
+
+Signed-off-by: Ashok Kumar Sekar <asekar@redhat.com>
+Signed-off-by: Jayachandran C <jchandra@broadcom.com>
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/pci/quirks.c | 24 ++++++++++++++++++++++++
+ 1 file changed, 24 insertions(+)
+
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 96d332978719..0966a22eae13 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -3972,6 +3972,30 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084,
+ quirk_bridge_cavm_thrx2_pcie_root);
+
+ /*
++ * PCI BAR 5 is not setup correctly for the on-board AHCI controller
++ * on Broadcom's Vulcan processor. Added a quirk to fix BAR 5 by
++ * using BAR 4's resources which are populated correctly and NOT
++ * actually used by the AHCI controller.
++ */
++static void quirk_fix_vulcan_ahci_bars(struct pci_dev *dev)
++{
++ struct resource *r = &dev->resource[4];
++
++ if (!(r->flags & IORESOURCE_MEM) || (r->start == 0))
++ return;
++
++ /* Set BAR5 resource to BAR4 */
++ dev->resource[5] = *r;
++
++ /* Update BAR5 in pci config space */
++ pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, r->start);
++
++ /* Clear BAR4's resource */
++ memset(r, 0, sizeof(*r));
++}
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9027, quirk_fix_vulcan_ahci_bars);
++
++/*
+ * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero)
+ * class code. Fix it.
+ */
+--
+2.11.0
+
+From bfe8e2f6bb2922b76650e3d43782132b0c388508 Mon Sep 17 00:00:00 2001
+From: Jayachandran C <jnair@caviumnetworks.com>
+Date: Fri, 10 Mar 2017 10:04:52 +0000
+Subject: [PATCH 04/41] ahci: thunderx2: Fix for errata that affects stop
+ engine
+
+Apply workaround for this errata:
+ Synopsis: Resetting PxCMD.ST may hang the SATA device
+
+ Description: An internal ping-pong buffer state is not reset
+ correctly for an PxCMD.ST=0 command for a SATA channel. This
+ may cause the SATA interface to hang when a PxCMD.ST=0 command
+ is received.
+
+ Workaround: A SATA_BIU_CORE_ENABLE.sw_init_bsi must be asserted
+ by the driver whenever the PxCMD.ST needs to be de-asserted. This
+ will reset both the ports. So, it may not always work in a 2
+ channel SATA system.
+
+ Resolution: Fix in B0.
+
+Add the code to ahci_stop_engine() to do this. It is not easy to
+stop the other "port" since it is associated with a different AHCI
+interface. Please note that with this fix, SATA reset does not
+hang any more, but it can cause failures on the other interface
+if that is in active use.
+
+Unfortunately, we have nothing other the the CPU ID to check if the
+SATA block has this issue.
+
+Signed-off-by: Jayachandran C <jnair@caviumnetworks.com>
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/ata/libahci.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c
+index 3159f9e66d8f..5f64275ef387 100644
+--- a/drivers/ata/libahci.c
++++ b/drivers/ata/libahci.c
+@@ -664,6 +664,20 @@ int ahci_stop_engine(struct ata_port *ap)
+ tmp &= ~PORT_CMD_START;
+ writel(tmp, port_mmio + PORT_CMD);
+
++#ifdef CONFIG_ARM64
++ /* Rev Ax of Cavium CN99XX needs a hack for port stop */
++ if (MIDR_IS_CPU_MODEL_RANGE(read_cpuid_id(),
++ MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN),
++ MIDR_CPU_VAR_REV(0, 0),
++ MIDR_CPU_VAR_REV(0, MIDR_REVISION_MASK))) {
++ tmp = readl(hpriv->mmio + 0x8000);
++ writel(tmp | (1 << 26), hpriv->mmio + 0x8000);
++ udelay(1);
++ writel(tmp & ~(1 << 26), hpriv->mmio + 0x8000);
++ dev_warn(ap->host->dev, "CN99XX stop engine fix applied!\n");
++ }
++#endif
++
+ /* wait for engine to stop. This could be as long as 500 msec */
+ tmp = ata_wait_register(ap, port_mmio + PORT_CMD,
+ PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500);
+--
+2.11.0
+
+From 4bd9ffa0dc3a064f1191b117cd3b43e2fa310c81 Mon Sep 17 00:00:00 2001
+From: Sunil Goutham <sgoutham@cavium.com>
+Date: Tue, 28 Mar 2017 16:11:12 +0530
+Subject: [PATCH 14/41] iommu/arm-smmu: Fix 16-bit ASID configuration
+
+16-bit ASID should be enabled before initializing TTBR0/1,
+otherwise only LSB 8-bit ASID will be considered. Hence
+moving configuration of TTBCR register ahead of TTBR0/1
+while initializing context bank.
+
+Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
+[will: rewrote comment]
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+
+(cherry picked from commit 125458ab3aefe9cf2f72dcfe7338dc9ad967da0b)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu.c | 42 +++++++++++++++++++++++-------------------
+ 1 file changed, 23 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index b493c99e17f7..9905f08058b6 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -758,6 +758,29 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
+ }
+ writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
+
++ /*
++ * TTBCR
++ * We must write this before the TTBRs, since it determines the
++ * access behaviour of some fields (in particular, ASID[15:8]).
++ */
++ if (stage1) {
++ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
++ reg = pgtbl_cfg->arm_v7s_cfg.tcr;
++ reg2 = 0;
++ } else {
++ reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
++ reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
++ reg2 |= TTBCR2_SEP_UPSTREAM;
++ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
++ reg2 |= TTBCR2_AS;
++ }
++ if (smmu->version > ARM_SMMU_V1)
++ writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
++ } else {
++ reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
++ }
++ writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
++
+ /* TTBRs */
+ if (stage1) {
+ u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);
+@@ -781,25 +804,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
+ writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
+ }
+
+- /* TTBCR */
+- if (stage1) {
+- if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+- reg = pgtbl_cfg->arm_v7s_cfg.tcr;
+- reg2 = 0;
+- } else {
+- reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
+- reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
+- reg2 |= TTBCR2_SEP_UPSTREAM;
+- if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
+- reg2 |= TTBCR2_AS;
+- }
+- if (smmu->version > ARM_SMMU_V1)
+- writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
+- } else {
+- reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
+- }
+- writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
+-
+ /* MAIRs (stage-1 only) */
+ if (stage1) {
+ if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
+--
+2.11.0
+
+From a2eca90cb82c389bbe1da93a08355210e7c5c393 Mon Sep 17 00:00:00 2001
+From: Robert Richter <rrichter@cavium.com>
+Date: Mon, 13 Mar 2017 11:39:01 +0100
+Subject: [PATCH 15/41] iommu/arm-smmu: Print message when Cavium erratum 27704
+ was detected
+
+Firmware is responsible for properly enabling smmu workarounds. Print
+a message for better diagnostics when Cavium erratum 27704 was
+detected.
+
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+(cherry picked from commit 53c35dce45713d2a554109c21a8cd617d09eba50)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index 9905f08058b6..e021b360e315 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -1891,6 +1891,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
+ atomic_add_return(smmu->num_context_banks,
+ &cavium_smmu_context_count);
+ smmu->cavium_id_base -= smmu->num_context_banks;
++ dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
+ }
+
+ /* ID2 */
+--
+2.11.0
+
+From e319b8d378a4701d36030e140d17fb48aea1ff32 Mon Sep 17 00:00:00 2001
+From: Robert Richter <rrichter@cavium.com>
+Date: Thu, 16 Mar 2017 18:01:59 +0100
+Subject: [PATCH 16/41] iommu/arm-smmu, ACPI: Enable Cavium SMMU-v2
+
+In next IORT spec release there will be a definition of a Cavium
+specific model. Until then, enable the Cavium SMMU using cpu id
+registers. All versions of Cavium's SMMUv2 implementation must be
+enabled.
+
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu.c | 22 +++++++++++++++++++++-
+ 1 file changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index e021b360e315..24a1df09eaac 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -53,6 +53,8 @@
+
+ #include <linux/amba/bus.h>
+
++#include <asm/cputype.h>
++
+ #include "io-pgtable.h"
+
+ /* Maximum number of context banks per SMMU */
+@@ -1986,6 +1988,24 @@ static const struct of_device_id arm_smmu_of_match[] = {
+ MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
+
+ #ifdef CONFIG_ACPI
++
++static int acpi_smmu_enable_cavium(struct arm_smmu_device *smmu, int ret)
++{
++ u32 cpu_model;
++
++ if (!IS_ENABLED(CONFIG_ARM64))
++ return ret;
++
++ cpu_model = read_cpuid_id() & MIDR_CPU_MODEL_MASK;
++ if (cpu_model != MIDR_THUNDERX)
++ return ret;
++
++ smmu->version = ARM_SMMU_V2;
++ smmu->model = CAVIUM_SMMUV2;
++
++ return 0;
++}
++
+ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
+ {
+ int ret = 0;
+@@ -2008,7 +2028,7 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
+ ret = -ENODEV;
+ }
+
+- return ret;
++ return acpi_smmu_enable_cavium(smmu, ret);
+ }
+
+ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
+--
+2.11.0
+
+From 7d2abb4fa3e9dcebd3081cb91d84bd7339e29431 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 6 Jan 2017 16:28:17 +0000
+Subject: [PATCH 21/41] iommu/arm-smmu: Restrict domain attributes to UNMANAGED
+ domains
+
+The ARM SMMU drivers provide a DOMAIN_ATTR_NESTING domain attribute,
+which allows callers of the IOMMU API to request that the page table
+for a domain is installed at stage-2, if supported by the hardware.
+
+Since setting this attribute only makes sense for UNMANAGED domains,
+this patch returns -ENODEV if the domain_{get,set}_attr operations are
+called on other domain types.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+(cherry picked from commit 0834cc28fa56c65887c614b6c045be2ba06fdcb0)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu-v3.c | 6 ++++++
+ drivers/iommu/arm-smmu.c | 6 ++++++
+ 2 files changed, 12 insertions(+)
+
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 591bb96047c9..b47a88757c18 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -1837,6 +1837,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+ {
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
++ if (domain->type != IOMMU_DOMAIN_UNMANAGED)
++ return -EINVAL;
++
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+@@ -1852,6 +1855,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+ int ret = 0;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
++ if (domain->type != IOMMU_DOMAIN_UNMANAGED)
++ return -EINVAL;
++
+ mutex_lock(&smmu_domain->init_mutex);
+
+ switch (attr) {
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index 24a1df09eaac..cdedb0933d48 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -1555,6 +1555,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
+ {
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
++ if (domain->type != IOMMU_DOMAIN_UNMANAGED)
++ return -EINVAL;
++
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+@@ -1570,6 +1573,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
+ int ret = 0;
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
++ if (domain->type != IOMMU_DOMAIN_UNMANAGED)
++ return -EINVAL;
++
+ mutex_lock(&smmu_domain->init_mutex);
+
+ switch (attr) {
+--
+2.11.0
+
+From 2142445d92c24592e57646087340dbd425fdfb6b Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 6 Jan 2017 16:56:03 +0000
+Subject: [PATCH 22/41] iommu/arm-smmu: Install bypass S2CRs for
+ IOMMU_DOMAIN_IDENTITY domains
+
+In preparation for allowing the default domain type to be overridden,
+this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the
+ARM SMMU driver.
+
+An identity domain is created by placing the corresponding S2CR
+registers into "bypass" mode, which allows transactions to flow through
+the SMMU without any translation.
+
+Reviewed-by: Robin Murphy <robin.murphy@arm.com>
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+(cherry picked from commit 61bc671179f19060be883068b6d3d82ae0b24bc0)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu.c | 20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index cdedb0933d48..88e9131b6900 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -418,6 +418,7 @@ enum arm_smmu_domain_stage {
+ ARM_SMMU_DOMAIN_S1 = 0,
+ ARM_SMMU_DOMAIN_S2,
+ ARM_SMMU_DOMAIN_NESTED,
++ ARM_SMMU_DOMAIN_BYPASS,
+ };
+
+ struct arm_smmu_domain {
+@@ -844,6 +845,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
+ if (smmu_domain->smmu)
+ goto out_unlock;
+
++ if (domain->type == IOMMU_DOMAIN_IDENTITY) {
++ smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
++ smmu_domain->smmu = smmu;
++ goto out_unlock;
++ }
++
+ /*
+ * Mapping the requested stage onto what we support is surprisingly
+ * complicated, mainly because the spec allows S1+S2 SMMUs without
+@@ -1004,7 +1011,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
+ void __iomem *cb_base;
+ int irq;
+
+- if (!smmu)
++ if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
+ return;
+
+ /*
+@@ -1027,7 +1034,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
+ {
+ struct arm_smmu_domain *smmu_domain;
+
+- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
++ if (type != IOMMU_DOMAIN_UNMANAGED &&
++ type != IOMMU_DOMAIN_DMA &&
++ type != IOMMU_DOMAIN_IDENTITY)
+ return NULL;
+ /*
+ * Allocate the domain and initialise some of its data structures.
+@@ -1256,10 +1265,15 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
+ {
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_s2cr *s2cr = smmu->s2crs;
+- enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
+ u8 cbndx = smmu_domain->cfg.cbndx;
++ enum arm_smmu_s2cr_type type;
+ int i, idx;
+
++ if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
++ type = S2CR_TYPE_BYPASS;
++ else
++ type = S2CR_TYPE_TRANS;
++
+ for_each_cfg_sme(fwspec, i, idx) {
+ if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
+ continue;
+--
+2.11.0
+
+From 5f43f8eb48ae0c6c6c74b4a299e5ba1d6d1fe0b3 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Wed, 1 Mar 2017 21:11:29 +0000
+Subject: [PATCH 23/41] iommu/arm-smmu-v3: Make arm_smmu_install_ste_for_dev
+ return void
+
+arm_smmu_install_ste_for_dev cannot fail and always returns 0, however
+the fact that it returns int means that callers end up implementing
+redundant error handling code which complicates STE tracking and is
+never executed.
+
+This patch changes the return type of arm_smmu_install_ste_for_dev
+to void, to make it explicit that it cannot fail.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+(cherry picked from commit 67560edcd8e5c57eccec4df562abbfc21c17ad75)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu-v3.c | 12 +++---------
+ 1 file changed, 3 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index b47a88757c18..97be8de3e834 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -1579,7 +1579,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
+ return step;
+ }
+
+-static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
++static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
+ {
+ int i;
+ struct arm_smmu_master_data *master = fwspec->iommu_priv;
+@@ -1591,8 +1591,6 @@ static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
+
+ arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
+ }
+-
+- return 0;
+ }
+
+ static void arm_smmu_detach_dev(struct device *dev)
+@@ -1600,8 +1598,7 @@ static void arm_smmu_detach_dev(struct device *dev)
+ struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
+
+ master->ste.bypass = true;
+- if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0)
+- dev_warn(dev, "failed to install bypass STE\n");
++ arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+ }
+
+ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
+@@ -1653,10 +1650,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
+ ste->s2_cfg = &smmu_domain->s2_cfg;
+ }
+
+- ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+- if (ret < 0)
+- ste->valid = false;
+-
++ arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+ out_unlock:
+ mutex_unlock(&smmu_domain->init_mutex);
+ return ret;
+--
+2.11.0
+
+From 2e7b81f22936290d872bc624599f7c25f7513829 Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Fri, 6 Jan 2017 16:27:30 +0000
+Subject: [PATCH 24/41] iommu/arm-smmu-v3: Install bypass STEs for
+ IOMMU_DOMAIN_IDENTITY domains
+
+In preparation for allowing the default domain type to be overridden,
+this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the
+ARM SMMUv3 driver.
+
+An identity domain is created by placing the corresponding stream table
+entries into "bypass" mode, which allows transactions to flow through
+the SMMU without any translation.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+(cherry picked from commit beb3c6a066bff1ba412f983cb9d1a42f4cd8f76a)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu-v3.c | 58 +++++++++++++++++++++++++++++----------------
+ 1 file changed, 37 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 97be8de3e834..803352d78d43 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -554,9 +554,14 @@ struct arm_smmu_s2_cfg {
+ };
+
+ struct arm_smmu_strtab_ent {
+- bool valid;
+-
+- bool bypass; /* Overrides s1/s2 config */
++ /*
++ * An STE is "assigned" if the master emitting the corresponding SID
++ * is attached to a domain. The behaviour of an unassigned STE is
++ * determined by the disable_bypass parameter, whereas an assigned
++ * STE behaves according to s1_cfg/s2_cfg, which themselves are
++ * configured according to the domain type.
++ */
++ bool assigned;
+ struct arm_smmu_s1_cfg *s1_cfg;
+ struct arm_smmu_s2_cfg *s2_cfg;
+ };
+@@ -632,6 +637,7 @@ enum arm_smmu_domain_stage {
+ ARM_SMMU_DOMAIN_S1 = 0,
+ ARM_SMMU_DOMAIN_S2,
+ ARM_SMMU_DOMAIN_NESTED,
++ ARM_SMMU_DOMAIN_BYPASS,
+ };
+
+ struct arm_smmu_domain {
+@@ -1005,9 +1011,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
+ * This is hideously complicated, but we only really care about
+ * three cases at the moment:
+ *
+- * 1. Invalid (all zero) -> bypass (init)
+- * 2. Bypass -> translation (attach)
+- * 3. Translation -> bypass (detach)
++ * 1. Invalid (all zero) -> bypass/fault (init)
++ * 2. Bypass/fault -> translation/bypass (attach)
++ * 3. Translation/bypass -> bypass/fault (detach)
+ *
+ * Given that we can't update the STE atomically and the SMMU
+ * doesn't read the thing in a defined order, that leaves us
+@@ -1046,11 +1052,15 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
+ }
+
+ /* Nuke the existing STE_0 value, as we're going to rewrite it */
+- val = ste->valid ? STRTAB_STE_0_V : 0;
++ val = STRTAB_STE_0_V;
++
++ /* Bypass/fault */
++ if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) {
++ if (!ste->assigned && disable_bypass)
++ val |= STRTAB_STE_0_CFG_ABORT;
++ else
++ val |= STRTAB_STE_0_CFG_BYPASS;
+
+- if (ste->bypass) {
+- val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT
+- : STRTAB_STE_0_CFG_BYPASS;
+ dst[0] = cpu_to_le64(val);
+ dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING
+ << STRTAB_STE_1_SHCFG_SHIFT);
+@@ -1111,10 +1121,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid,
+ static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent)
+ {
+ unsigned int i;
+- struct arm_smmu_strtab_ent ste = {
+- .valid = true,
+- .bypass = true,
+- };
++ struct arm_smmu_strtab_ent ste = { .assigned = false };
+
+ for (i = 0; i < nent; ++i) {
+ arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste);
+@@ -1378,7 +1385,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
+ {
+ struct arm_smmu_domain *smmu_domain;
+
+- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
++ if (type != IOMMU_DOMAIN_UNMANAGED &&
++ type != IOMMU_DOMAIN_DMA &&
++ type != IOMMU_DOMAIN_IDENTITY)
+ return NULL;
+
+ /*
+@@ -1509,6 +1518,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct arm_smmu_device *smmu = smmu_domain->smmu;
+
++ if (domain->type == IOMMU_DOMAIN_IDENTITY) {
++ smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
++ return 0;
++ }
++
+ /* Restrict the stage to what we can actually support */
+ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
+@@ -1597,7 +1611,7 @@ static void arm_smmu_detach_dev(struct device *dev)
+ {
+ struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv;
+
+- master->ste.bypass = true;
++ master->ste.assigned = false;
+ arm_smmu_install_ste_for_dev(dev->iommu_fwspec);
+ }
+
+@@ -1617,7 +1631,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
+ ste = &master->ste;
+
+ /* Already attached to a different domain? */
+- if (!ste->bypass)
++ if (ste->assigned)
+ arm_smmu_detach_dev(dev);
+
+ mutex_lock(&smmu_domain->init_mutex);
+@@ -1638,10 +1652,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
+ goto out_unlock;
+ }
+
+- ste->bypass = false;
+- ste->valid = true;
++ ste->assigned = true;
+
+- if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
++ if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) {
++ ste->s1_cfg = NULL;
++ ste->s2_cfg = NULL;
++ } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ ste->s1_cfg = &smmu_domain->s1_cfg;
+ ste->s2_cfg = NULL;
+ arm_smmu_write_ctx_desc(smmu, ste->s1_cfg);
+@@ -1801,7 +1817,7 @@ static void arm_smmu_remove_device(struct device *dev)
+
+ master = fwspec->iommu_priv;
+ smmu = master->smmu;
+- if (master && master->ste.valid)
++ if (master && master->ste.assigned)
+ arm_smmu_detach_dev(dev);
+ iommu_group_remove_device(dev);
+ iommu_device_unlink(&smmu->iommu, dev);
+--
+2.11.0
+
+From ed65b7b197ed312ee0aa1d347240061dbc8fd4cf Mon Sep 17 00:00:00 2001
+From: Will Deacon <will.deacon@arm.com>
+Date: Thu, 5 Jan 2017 18:38:26 +0000
+Subject: [PATCH 25/41] iommu: Allow default domain type to be set on the
+ kernel command line
+
+The IOMMU core currently initialises the default domain for each group
+to IOMMU_DOMAIN_DMA, under the assumption that devices will use
+IOMMU-backed DMA ops by default. However, in some cases it is desirable
+for the DMA ops to bypass the IOMMU for performance reasons, reserving
+use of translation for subsystems such as VFIO that require it for
+enforcing device isolation.
+
+Rather than modify each IOMMU driver to provide different semantics for
+DMA domains, instead we introduce a command line parameter that can be
+used to change the type of the default domain. Passthrough can then be
+specified using "iommu.passthrough=1" on the kernel command line.
+
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+(cherry picked from commit fccb4e3b8ab0957628abec82675691c72f67003e)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 6 ++++++
+ drivers/iommu/iommu.c | 28 ++++++++++++++++++++++---
+ 2 files changed, 31 insertions(+), 3 deletions(-)
+
+diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
+index facc20a3f962..cb91f26cc8bc 100644
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -1644,6 +1644,12 @@
+ nobypass [PPC/POWERNV]
+ Disable IOMMU bypass, using IOMMU for PCI devices.
+
++ iommu.passthrough=
++ [ARM64] Configure DMA to bypass the IOMMU by default.
++ Format: { "0" | "1" }
++ 0 - Use IOMMU translation for DMA.
++ 1 - Bypass the IOMMU for DMA.
++ unset - Use IOMMU translation for DMA.
+
+ io7= [HW] IO7 for Marvel based alpha systems
+ See comment before marvel_specify_io7 in
+diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
+index 3b67144dead2..770ba7e7ef4d 100644
+--- a/drivers/iommu/iommu.c
++++ b/drivers/iommu/iommu.c
+@@ -36,6 +36,7 @@
+
+ static struct kset *iommu_group_kset;
+ static DEFINE_IDA(iommu_group_ida);
++static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
+
+ struct iommu_callback_data {
+ const struct iommu_ops *ops;
+@@ -112,6 +113,18 @@ static int __iommu_attach_group(struct iommu_domain *domain,
+ static void __iommu_detach_group(struct iommu_domain *domain,
+ struct iommu_group *group);
+
++static int __init iommu_set_def_domain_type(char *str)
++{
++ bool pt;
++
++ if (!str || strtobool(str, &pt))
++ return -EINVAL;
++
++ iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA;
++ return 0;
++}
++early_param("iommu.passthrough", iommu_set_def_domain_type);
++
+ static ssize_t iommu_group_attr_show(struct kobject *kobj,
+ struct attribute *__attr, char *buf)
+ {
+@@ -1015,10 +1028,19 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev)
+ * IOMMU driver.
+ */
+ if (!group->default_domain) {
+- group->default_domain = __iommu_domain_alloc(dev->bus,
+- IOMMU_DOMAIN_DMA);
++ struct iommu_domain *dom;
++
++ dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type);
++ if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) {
++ dev_warn(dev,
++ "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA",
++ iommu_def_domain_type);
++ dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA);
++ }
++
++ group->default_domain = dom;
+ if (!group->domain)
+- group->domain = group->default_domain;
++ group->domain = dom;
+ }
+
+ ret = iommu_group_add_device(group, dev);
+--
+2.11.0
+
+From 11eb465df795bea2c26cb3877ceb606336406a32 Mon Sep 17 00:00:00 2001
+From: Robert Richter <rrichter@cavium.com>
+Date: Wed, 12 Apr 2017 15:06:03 +0200
+Subject: [PATCH 26/41] iommu: Print a message with the default domain type
+ created
+
+There are several ways the bypass mode can be enabled. With commit
+
+ fccb4e3b8ab0 iommu: Allow default domain type to be set on the kernel command line
+
+there is the option to switch into bypass mode. And, depending on
+devicetree options, bypass mode can be also enabled. This makes it
+hard to determine if direct mapping is enabled. Print message with the
+default domain type case.
+
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/iommu.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
+index 770ba7e7ef4d..b698732c6f91 100644
+--- a/drivers/iommu/iommu.c
++++ b/drivers/iommu/iommu.c
+@@ -599,7 +599,9 @@ int iommu_group_add_device(struct iommu_group *group, struct device *dev)
+
+ trace_add_device_to_group(group->id, dev);
+
+- pr_info("Adding device %s to group %d\n", dev_name(dev), group->id);
++ pr_info("Adding device %s to group %d, default domain type %d\n",
++ dev_name(dev), group->id,
++ group->default_domain ? group->default_domain->type : -1);
+
+ return 0;
+
+--
+2.11.0
+
+From 8045df4924b41d303cd0599f1ed1ff9b23bed036 Mon Sep 17 00:00:00 2001
+From: Sunil Goutham <sgoutham@cavium.com>
+Date: Tue, 25 Apr 2017 15:27:52 +0530
+Subject: [PATCH 27/41] iommu/arm-smmu: Return IOVA in iova_to_phys when SMMU
+ is bypassed
+
+For software initiated address translation, when domain type is
+IOMMU_DOMAIN_IDENTITY i.e SMMU is bypassed, mimic HW behavior
+i.e return the same IOVA as translated address.
+
+This patch is an extension to Will Deacon's patchset
+"Implement SMMU passthrough using the default domain".
+
+Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
+Acked-by: Will Deacon <will.deacon@arm.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+(cherry picked from commit bdf95923086fb359ccb44c815724c3ace1611c90)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/arm-smmu-v3.c | 3 +++
+ drivers/iommu/arm-smmu.c | 3 +++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 803352d78d43..6ef9c3ed4344 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -1714,6 +1714,9 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
+
++ if (domain->type == IOMMU_DOMAIN_IDENTITY)
++ return iova;
++
+ if (!ops)
+ return 0;
+
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index 88e9131b6900..e731d8ead6cc 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -1411,6 +1411,9 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+ struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
+
++ if (domain->type == IOMMU_DOMAIN_IDENTITY)
++ return iova;
++
+ if (!ops)
+ return 0;
+
+--
+2.11.0
+
+From fecd86ffe12e5c49f22325faf732eaf5cfe8c62b Mon Sep 17 00:00:00 2001
+From: Robert Richter <rrichter@cavium.com>
+Date: Thu, 4 May 2017 17:48:48 +0200
+Subject: [PATCH 28/41] iommu, aarch64: Set bypass mode per default
+
+We see a performance degradation if smmu is enabled in non-bypass mode.
+This is a problem in the kernel's implememntation. Until that is solved,
+enable smmu in bypass mode per default.
+
+We have tested that SMMU passthrough mode doesn't effect VFIO on both
+CN88xx and CN99xx and haven't found any issues.
+
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/iommu/iommu.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
+index b698732c6f91..41125b19832b 100644
+--- a/drivers/iommu/iommu.c
++++ b/drivers/iommu/iommu.c
+@@ -36,7 +36,12 @@
+
+ static struct kset *iommu_group_kset;
+ static DEFINE_IDA(iommu_group_ida);
++
++#ifdef CONFIG_ARM64
++static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
++#else
+ static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
++#endif
+
+ struct iommu_callback_data {
+ const struct iommu_ops *ops;
+--
+2.11.0
+
+From patchwork Mon May 22 15:06:37 2017
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [v2,1/2] ACPICA: IORT: Update SMMU models for IORT rev. C
+From: Robin Murphy <robin.murphy@arm.com>
+X-Patchwork-Id: 9740941
+Message-Id: <11ef7d28c535c01d42b7b3c8e632934f0e0f1048.1495459319.git.robin.murphy@arm.com>
+To: will.deacon@arm.com,
+ joro@8bytes.org
+Cc: lorenzo.pieralisi@arm.com, gabriele.paoloni@huawei.com,
+ gakula@caviumnetworks.com, rjw@rjwysocki.net, robert.moore@intel.com,
+ shameerali.kolothum.thodi@huawei.com, rrichter@cavium.com,
+ linux-acpi@vger.kernel.org, iommu@lists.linux-foundation.org,
+ hanjun.guo@linaro.org, linu.cherian@cavium.com, john.garry@huawei.com,
+ linux-arm-kernel@lists.infradead.org, lv.zheng@intel.com
+Date: Mon, 22 May 2017 16:06:37 +0100
+
+IORT revision C has been published with a number of new SMMU
+implementation identifiers. Since IORT doesn't have any way of falling
+back to a more generic model code, we really need Linux to know about
+these before vendors start updating their firmware tables to use them.
+
+CC: Rafael J. Wysocki <rjw@rjwysocki.net>
+CC: Robert Moore <robert.moore@intel.com>
+CC: Lv Zheng <lv.zheng@intel.com>
+Acked-by: Robert Richter <rrichter@cavium.com>
+Tested-by: Robert Richter <rrichter@cavium.com>
+Signed-off-by: Robin Murphy <robin.murphy@arm.com>
+Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
+---
+
+v2: Update more comments, add Robert's tags.
+
+I'm including this here as a kernel patch just for context - once I've
+figured out how we actually submit patches to ACPICA directly, I'll do
+that per the preferred process.
+
+Robin.
+
+ include/acpi/actbl2.h | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h
+index faa9f2c0d5de..f469ea41f2fd 100644
+--- a/include/acpi/actbl2.h
++++ b/include/acpi/actbl2.h
+@@ -663,7 +663,7 @@ struct acpi_ibft_target {
+ * IORT - IO Remapping Table
+ *
+ * Conforms to "IO Remapping Table System Software on ARM Platforms",
+- * Document number: ARM DEN 0049B, October 2015
++ * Document number: ARM DEN 0049C, May 2017
+ *
+ ******************************************************************************/
+
+@@ -778,6 +778,8 @@ struct acpi_iort_smmu {
+ #define ACPI_IORT_SMMU_V2 0x00000001 /* Generic SMMUv2 */
+ #define ACPI_IORT_SMMU_CORELINK_MMU400 0x00000002 /* ARM Corelink MMU-400 */
+ #define ACPI_IORT_SMMU_CORELINK_MMU500 0x00000003 /* ARM Corelink MMU-500 */
++#define ACPI_IORT_SMMU_CORELINK_MMU401 0x00000004 /* ARM Corelink MMU-401 */
++#define ACPI_IORT_SMMU_CAVIUM_SMMUV2 0x00000005 /* Cavium ThunderX SMMUv2 */
+
+ /* Masks for Flags field above */
+
+@@ -798,13 +800,19 @@ struct acpi_iort_smmu_v3 {
+ u32 flags;
+ u32 reserved;
+ u64 vatos_address;
+- u32 model; /* O: generic SMMUv3 */
++ u32 model;
+ u32 event_gsiv;
+ u32 pri_gsiv;
+ u32 gerr_gsiv;
+ u32 sync_gsiv;
+ };
+
++/* Values for Model field above */
++
++#define ACPI_IORT_SMMU_V3 0x00000000 /* Generic SMMUv3 */
++#define ACPI_IORT_SMMU_HISILICON_HI161X 0x00000001 /* HiSilicon Hi161x SMMUv3 */
++#define ACPI_IORT_SMMU_CAVIUM_CN99XX 0x00000002 /* Cavium CN99xx SMMUv3 */
++
+ /* Masks for Flags field above */
+
+ #define ACPI_IORT_SMMU_V3_COHACC_OVERRIDE (1)
+From patchwork Mon May 22 15:06:38 2017
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: [v2,2/2] iommu/arm-smmu: Plumb in new ACPI identifiers
+From: Robin Murphy <robin.murphy@arm.com>
+X-Patchwork-Id: 9740939
+Message-Id: <ac4e7adc7ca636fc71e6a10f0d8ab273b3dfd5dc.1495459319.git.robin.murphy@arm.com>
+To: will.deacon@arm.com,
+ joro@8bytes.org
+Cc: lorenzo.pieralisi@arm.com, gabriele.paoloni@huawei.com,
+ gakula@caviumnetworks.com, shameerali.kolothum.thodi@huawei.com,
+ rrichter@cavium.com, linux-acpi@vger.kernel.org,
+ iommu@lists.linux-foundation.org, hanjun.guo@linaro.org,
+ linu.cherian@cavium.com, stable@vger.kernel.org, john.garry@huawei.com,
+ linux-arm-kernel@lists.infradead.org
+Date: Mon, 22 May 2017 16:06:38 +0100
+
+Revision C of IORT now allows us to identify ARM MMU-401 and the Cavium
+ThunderX implementation. Wire them up so that we can probe these models
+once firmware starts using the new codes, and so that the appropriate
+features and quirks get enabled when we do.
+
+For the sake of backports and mitigating sychronisation problems with
+the ACPICA headers, we'll carry a backup copy of the new definitions
+locally for the short term to make life simpler.
+
+CC: stable@vger.kernel.org # 4.10
+Acked-by: Robert Richter <rrichter@cavium.com>
+Tested-by: Robert Richter <rrichter@cavium.com>
+Signed-off-by: Robin Murphy <robin.murphy@arm.com>
+---
+
+v2: Add local backup definitions
+
+This is ready to go regardless of patch 1. The stable backport is in likely
+anticipation of machines with updated firmware paired with stable distro
+kernels, which would be unable to recognise and probe the SMMU otherwise.
+
+Robin.
+
+ drivers/iommu/arm-smmu.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
+index 7ec30b08b3bd..79b9bb7d4783 100644
+--- a/drivers/iommu/arm-smmu.c
++++ b/drivers/iommu/arm-smmu.c
+@@ -312,6 +312,14 @@ enum arm_smmu_implementation {
+ CAVIUM_SMMUV2,
+ };
+
++/* Until ACPICA headers cover IORT rev. C */
++#ifndef ACPI_IORT_SMMU_CORELINK_MMU401
++#define ACPI_IORT_SMMU_CORELINK_MMU401 0x4
++#endif
++#ifndef ACPI_IORT_SMMU_CAVIUM_SMMUV2
++#define ACPI_IORT_SMMU_CAVIUM_SMMUV2 0x5
++#endif
++
+ struct arm_smmu_s2cr {
+ struct iommu_group *group;
+ int count;
+@@ -2073,6 +2081,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
+ smmu->version = ARM_SMMU_V1;
+ smmu->model = GENERIC_SMMU;
+ break;
++ case ACPI_IORT_SMMU_CORELINK_MMU401:
++ smmu->version = ARM_SMMU_V1_64K;
++ smmu->model = GENERIC_SMMU;
++ break;
+ case ACPI_IORT_SMMU_V2:
+ smmu->version = ARM_SMMU_V2;
+ smmu->model = GENERIC_SMMU;
+@@ -2081,6 +2093,10 @@ static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
+ smmu->version = ARM_SMMU_V2;
+ smmu->model = ARM_MMU500;
+ break;
++ case ACPI_IORT_SMMU_CAVIUM_SMMUV2:
++ smmu->version = ARM_SMMU_V2;
++ smmu->model = CAVIUM_SMMUV2;
++ break;
+ default:
+ ret = -ENODEV;
+ }
+From 980ec2906ad4e92a89e8f635a79eba90318b22d5 Mon Sep 17 00:00:00 2001
+From: Linu Cherian <linu.cherian@cavium.com>
+Date: Fri, 12 May 2017 18:11:04 +0530
+Subject: [PATCH 31/41] ACPI/IORT: Fixup SMMUv3 resource size for Cavium
+ ThunderX2 SMMUv3 model
+
+Cavium ThunderX2 implementation doesn't support second page in SMMU
+register space. Hence, resource size is set as 64k for this model.
+
+Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
+Signed-off-by: Geetha Sowjanya <geethasowjanya.akula@cavium.com>
+Message-Id: <1494592866-14076-2-git-send-email-gakula@caviumnetworks.com>
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/acpi/arm64/iort.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
+index 4a5bb967250b..9fd2f1d31a27 100644
+--- a/drivers/acpi/arm64/iort.c
++++ b/drivers/acpi/arm64/iort.c
+@@ -669,12 +669,20 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
+ {
+ struct acpi_iort_smmu_v3 *smmu;
+ int num_res = 0;
++ unsigned long size = SZ_128K;
+
+ /* Retrieve SMMUv3 specific data */
+ smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+
++ /*
++ * Override the size, for Cavium ThunderX2 implementation
++ * which doesn't support the page 1 SMMU register space.
++ */
++ if (smmu->model == ACPI_IORT_SMMU_CAVIUM_CN99XX)
++ size = SZ_64K;
++
+ res[num_res].start = smmu->base_address;
+- res[num_res].end = smmu->base_address + SZ_128K - 1;
++ res[num_res].end = smmu->base_address + size - 1;
+ res[num_res].flags = IORESOURCE_MEM;
+
+ num_res++;
+--
+2.11.0
+
+From d082f66524ad8653793fc753dbff2b369b3cafe8 Mon Sep 17 00:00:00 2001
+From: Linu Cherian <linu.cherian@cavium.com>
+Date: Fri, 12 May 2017 18:11:05 +0530
+Subject: [PATCH 32/41] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2
+ erratum #74
+
+Cavium ThunderX2 SMMU implementation doesn't support page 1 register space
+and PAGE0_REGS_ONLY option is enabled as an errata workaround.
+This option when turned on, replaces all page 1 offsets used for
+EVTQ_PROD/CONS, PRIQ_PROD/CONS register access with page 0 offsets.
+
+SMMU resource size checks are now based on SMMU option PAGE0_REGS_ONLY,
+since resource size can be either 64k/128k.
+For this, arm_smmu_device_dt_probe/acpi_probe has been moved before
+platform_get_resource call, so that SMMU options are set beforehand.
+
+Signed-off-by: Linu Cherian <linu.cherian@cavium.com>
+Signed-off-by: Geetha Sowjanya <geethasowjanya.akula@cavium.com>
+Message-Id: <1494592866-14076-3-git-send-email-gakula@caviumnetworks.com>
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ Documentation/arm64/silicon-errata.txt | 1 +
+ .../devicetree/bindings/iommu/arm,smmu-v3.txt | 6 ++
+ drivers/iommu/arm-smmu-v3.c | 64 +++++++++++++++++-----
+ 3 files changed, 56 insertions(+), 15 deletions(-)
+
+diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
+index 2f66683500b8..629e2ce1f0d2 100644
+--- a/Documentation/arm64/silicon-errata.txt
++++ b/Documentation/arm64/silicon-errata.txt
+@@ -61,6 +61,7 @@ stable kernels.
+ | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
+ | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
+ | Cavium | ThunderX SMMUv2 | #27704 | N/A |
++| Cavium | ThunderX2 SMMUv3| #74 | N/A |
+ | | | | |
+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
+ | | | | |
+diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+index be57550e14e4..e6da62b3a3ff 100644
+--- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
++++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt
+@@ -49,6 +49,12 @@ the PCIe specification.
+ - hisilicon,broken-prefetch-cmd
+ : Avoid sending CMD_PREFETCH_* commands to the SMMU.
+
++- cavium-cn99xx,broken-page1-regspace
++ : Replaces all page 1 offsets used for EVTQ_PROD/CONS,
++ PRIQ_PROD/CONS register access with page 0 offsets.
++ Set for Caviun ThunderX2 silicon that doesn't support
++ SMMU page1 register space.
++
+ ** Example
+
+ smmu@2b400000 {
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 6ef9c3ed4344..913805429f80 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -412,6 +412,9 @@
+ #define MSI_IOVA_BASE 0x8000000
+ #define MSI_IOVA_LENGTH 0x100000
+
++#define ARM_SMMU_PAGE0_REGS_ONLY(smmu) \
++ ((smmu)->options & ARM_SMMU_OPT_PAGE0_REGS_ONLY)
++
+ static bool disable_bypass;
+ module_param_named(disable_bypass, disable_bypass, bool, S_IRUGO);
+ MODULE_PARM_DESC(disable_bypass,
+@@ -597,6 +600,7 @@ struct arm_smmu_device {
+ u32 features;
+
+ #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)
++#define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1)
+ u32 options;
+
+ struct arm_smmu_cmdq cmdq;
+@@ -663,9 +667,19 @@ struct arm_smmu_option_prop {
+
+ static struct arm_smmu_option_prop arm_smmu_options[] = {
+ { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" },
++ { ARM_SMMU_OPT_PAGE0_REGS_ONLY, "cavium-cn99xx,broken-page1-regspace"},
+ { 0, NULL},
+ };
+
++static inline void __iomem *arm_smmu_page1_fixup(unsigned long offset,
++ struct arm_smmu_device *smmu)
++{
++ if (offset > SZ_64K && ARM_SMMU_PAGE0_REGS_ONLY(smmu))
++ offset -= SZ_64K;
++
++ return smmu->base + offset;
++}
++
+ static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
+ {
+ return container_of(dom, struct arm_smmu_domain, domain);
+@@ -1959,8 +1973,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+ return -ENOMEM;
+ }
+
+- q->prod_reg = smmu->base + prod_off;
+- q->cons_reg = smmu->base + cons_off;
++ q->prod_reg = arm_smmu_page1_fixup(prod_off, smmu);
++ q->cons_reg = arm_smmu_page1_fixup(cons_off, smmu);
+ q->ent_dwords = dwords;
+
+ q->q_base = Q_BASE_RWA;
+@@ -2361,8 +2375,10 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
+
+ /* Event queue */
+ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
+- writel_relaxed(smmu->evtq.q.prod, smmu->base + ARM_SMMU_EVTQ_PROD);
+- writel_relaxed(smmu->evtq.q.cons, smmu->base + ARM_SMMU_EVTQ_CONS);
++ writel_relaxed(smmu->evtq.q.prod,
++ arm_smmu_page1_fixup(ARM_SMMU_EVTQ_PROD, smmu));
++ writel_relaxed(smmu->evtq.q.cons,
++ arm_smmu_page1_fixup(ARM_SMMU_EVTQ_CONS, smmu));
+
+ enables |= CR0_EVTQEN;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+@@ -2377,9 +2393,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
+ writeq_relaxed(smmu->priq.q.q_base,
+ smmu->base + ARM_SMMU_PRIQ_BASE);
+ writel_relaxed(smmu->priq.q.prod,
+- smmu->base + ARM_SMMU_PRIQ_PROD);
++ arm_smmu_page1_fixup(ARM_SMMU_PRIQ_PROD, smmu));
+ writel_relaxed(smmu->priq.q.cons,
+- smmu->base + ARM_SMMU_PRIQ_CONS);
++ arm_smmu_page1_fixup(ARM_SMMU_PRIQ_CONS, smmu));
+
+ enables |= CR0_PRIQEN;
+ ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0,
+@@ -2603,6 +2619,14 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
+ }
+
+ #ifdef CONFIG_ACPI
++static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
++{
++ if (model == ACPI_IORT_SMMU_CAVIUM_CN99XX)
++ smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
++
++ dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
++}
++
+ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
+ struct arm_smmu_device *smmu)
+ {
+@@ -2615,6 +2639,8 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
+ /* Retrieve SMMUv3 specific data */
+ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;
+
++ acpi_smmu_get_options(iort_smmu->model, smmu);
++
+ if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
+ smmu->features |= ARM_SMMU_FEAT_COHERENCY;
+
+@@ -2650,6 +2676,14 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
+ return ret;
+ }
+
++static unsigned long arm_smmu_resource_size(struct arm_smmu_device *smmu)
++{
++ if (ARM_SMMU_PAGE0_REGS_ONLY(smmu))
++ return SZ_64K;
++ else
++ return SZ_128K;
++}
++
+ static int arm_smmu_device_probe(struct platform_device *pdev)
+ {
+ int irq, ret;
+@@ -2666,9 +2700,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
+ }
+ smmu->dev = dev;
+
++ if (dev->of_node) {
++ ret = arm_smmu_device_dt_probe(pdev, smmu);
++ } else {
++ ret = arm_smmu_device_acpi_probe(pdev, smmu);
++ if (ret == -ENODEV)
++ return ret;
++ }
++
+ /* Base address */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+- if (resource_size(res) + 1 < SZ_128K) {
++ if (resource_size(res) + 1 < arm_smmu_resource_size(smmu)) {
+ dev_err(dev, "MMIO region too small (%pr)\n", res);
+ return -EINVAL;
+ }
+@@ -2695,14 +2737,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
+ if (irq > 0)
+ smmu->gerr_irq = irq;
+
+- if (dev->of_node) {
+- ret = arm_smmu_device_dt_probe(pdev, smmu);
+- } else {
+- ret = arm_smmu_device_acpi_probe(pdev, smmu);
+- if (ret == -ENODEV)
+- return ret;
+- }
+-
+ /* Set bypass mode according to firmware probing result */
+ bypass = !!ret;
+
+--
+2.11.0
+
+From 8b0e69d0a8d5c11ee433c2a110a7d056ad190e1a Mon Sep 17 00:00:00 2001
+From: Geetha Sowjanya <geethasowjanya.akula@cavium.com>
+Date: Fri, 12 May 2017 18:11:06 +0530
+Subject: [PATCH 33/41] iommu/arm-smmu-v3: Add workaround for Cavium ThunderX2
+ erratum #126
+
+Cavium ThunderX2 SMMU doesn't support MSI and also doesn't have unique irq
+lines for gerror, eventq and cmdq-sync.
+
+This patch addresses the issue by checking if any interrupt sources are
+using same irq number, then they are registered as shared irqs.
+
+Signed-off-by: Geetha Sowjanya <geethasowjanya.akula@cavium.com>
+Message-Id: <1494592866-14076-4-git-send-email-gakula@caviumnetworks.com>
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ Documentation/arm64/silicon-errata.txt | 1 +
+ drivers/iommu/arm-smmu-v3.c | 29 +++++++++++++++++++++++++----
+ 2 files changed, 26 insertions(+), 4 deletions(-)
+
+diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
+index 629e2ce1f0d2..cc15f251ce42 100644
+--- a/Documentation/arm64/silicon-errata.txt
++++ b/Documentation/arm64/silicon-errata.txt
+@@ -62,6 +62,7 @@ stable kernels.
+ | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 |
+ | Cavium | ThunderX SMMUv2 | #27704 | N/A |
+ | Cavium | ThunderX2 SMMUv3| #74 | N/A |
++| Cavium | ThunderX2 SMMUv3| #126 | N/A |
+ | | | | |
+ | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 |
+ | | | | |
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 913805429f80..2fc067f3e199 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -2230,6 +2230,25 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
+ devm_add_action(dev, arm_smmu_free_msis, dev);
+ }
+
++static int get_irq_flags(struct arm_smmu_device *smmu, int irq)
++{
++ int match_count = 0;
++
++ if (irq == smmu->evtq.q.irq)
++ match_count++;
++ if (irq == smmu->cmdq.q.irq)
++ match_count++;
++ if (irq == smmu->gerr_irq)
++ match_count++;
++ if (irq == smmu->priq.q.irq)
++ match_count++;
++
++ if (match_count > 1)
++ return IRQF_SHARED | IRQF_ONESHOT;
++
++ return IRQF_ONESHOT;
++}
++
+ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+ {
+ int ret, irq;
+@@ -2250,7 +2269,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+ if (irq) {
+ ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
+ arm_smmu_evtq_thread,
+- IRQF_ONESHOT,
++ get_irq_flags(smmu, irq),
+ "arm-smmu-v3-evtq", smmu);
+ if (ret < 0)
+ dev_warn(smmu->dev, "failed to enable evtq irq\n");
+@@ -2259,7 +2278,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+ irq = smmu->cmdq.q.irq;
+ if (irq) {
+ ret = devm_request_irq(smmu->dev, irq,
+- arm_smmu_cmdq_sync_handler, 0,
++ arm_smmu_cmdq_sync_handler,
++ get_irq_flags(smmu, irq),
+ "arm-smmu-v3-cmdq-sync", smmu);
+ if (ret < 0)
+ dev_warn(smmu->dev, "failed to enable cmdq-sync irq\n");
+@@ -2268,7 +2288,8 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+ irq = smmu->gerr_irq;
+ if (irq) {
+ ret = devm_request_irq(smmu->dev, irq, arm_smmu_gerror_handler,
+- 0, "arm-smmu-v3-gerror", smmu);
++ get_irq_flags(smmu, irq),
++ "arm-smmu-v3-gerror", smmu);
+ if (ret < 0)
+ dev_warn(smmu->dev, "failed to enable gerror irq\n");
+ }
+@@ -2278,7 +2299,7 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
+ if (irq) {
+ ret = devm_request_threaded_irq(smmu->dev, irq, NULL,
+ arm_smmu_priq_thread,
+- IRQF_ONESHOT,
++ get_irq_flags(smmu, irq),
+ "arm-smmu-v3-priq",
+ smmu);
+ if (ret < 0)
+--
+2.11.0
+
+From 929f539998cfb83834e890fd7781ddcfc327c109 Mon Sep 17 00:00:00 2001
+From: Robert Richter <rrichter@cavium.com>
+Date: Wed, 12 Apr 2017 10:31:15 +0200
+Subject: [PATCH 34/41] iommu/arm-smmu, ACPI: Enable Cavium SMMU-v3
+
+In next IORT spec release there will be a definition of a Cavium
+specific model. Until then, enable the Cavium SMMU using cpu id
+registers. Early silicon versions (A1) of Cavium's CN99xx SMMUv3
+implementation must be enabled. For later silicon versions (B0) the
+iort change will be in place.
+
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/acpi/arm64/iort.c | 13 ++++++++++++-
+ drivers/iommu/arm-smmu-v3.c | 19 +++++++++++++++++++
+ 2 files changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
+index 9fd2f1d31a27..3f5f892037eb 100644
+--- a/drivers/acpi/arm64/iort.c
++++ b/drivers/acpi/arm64/iort.c
+@@ -26,6 +26,8 @@
+ #include <linux/platform_device.h>
+ #include <linux/slab.h>
+
++#include <asm/cputype.h>
++
+ #define IORT_TYPE_MASK(type) (1 << (type))
+ #define IORT_MSI_TYPE (1 << ACPI_IORT_NODE_ITS_GROUP)
+ #define IORT_IOMMU_TYPE ((1 << ACPI_IORT_NODE_SMMU) | \
+@@ -664,6 +666,14 @@ static int __init arm_smmu_v3_count_resources(struct acpi_iort_node *node)
+ return num_res;
+ }
+
++static bool is_cavium_cn99xx_smmu_v3(void)
++{
++ u32 cpu_model = read_cpuid_id() & MIDR_CPU_MODEL_MASK;
++
++ return cpu_model == MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM,
++ BRCM_CPU_PART_VULCAN);
++}
++
+ static void __init arm_smmu_v3_init_resources(struct resource *res,
+ struct acpi_iort_node *node)
+ {
+@@ -678,7 +688,8 @@ static void __init arm_smmu_v3_init_resources(struct resource *res,
+ * Override the size, for Cavium ThunderX2 implementation
+ * which doesn't support the page 1 SMMU register space.
+ */
+- if (smmu->model == ACPI_IORT_SMMU_CAVIUM_CN99XX)
++ if (smmu->model == ACPI_IORT_SMMU_CAVIUM_CN99XX ||
++ is_cavium_cn99xx_smmu_v3())
+ size = SZ_64K;
+
+ res[num_res].start = smmu->base_address;
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 2fc067f3e199..de9774073a00 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -39,6 +39,8 @@
+
+ #include <linux/amba/bus.h>
+
++#include <asm/cputype.h>
++
+ #include "io-pgtable.h"
+
+ /* MMIO registers */
+@@ -2640,11 +2642,28 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
+ }
+
+ #ifdef CONFIG_ACPI
++
++static void acpi_smmu_enable_cavium(struct arm_smmu_device *smmu)
++{
++ u32 cpu_model;
++
++ if (!IS_ENABLED(CONFIG_ARM64))
++ return;
++
++ cpu_model = read_cpuid_id() & MIDR_CPU_MODEL_MASK;
++ if (cpu_model != MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN))
++ return;
++
++ smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
++}
++
+ static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
+ {
+ if (model == ACPI_IORT_SMMU_CAVIUM_CN99XX)
+ smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
+
++ acpi_smmu_enable_cavium(smmu);
++
+ dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+ }
+
+--
+2.11.0
+
+From 8e59a6a91bf2988fb9cbc21d481f5e2b88af8140 Mon Sep 17 00:00:00 2001
+From: Sunil Goutham <sgoutham@cavium.com>
+Date: Fri, 5 May 2017 16:47:46 +0530
+Subject: iommu/arm-smmu-v3: Increase CMDQ drain timeout value
+
+Waiting for a CMD_SYNC to be processed involves waiting for the command
+queue to drain, which can take an awful lot longer than waiting for a
+single entry to become available. Consequently, the common timeout value
+of 100us has been observed to be too short on some platforms when a
+CMD_SYNC is issued into a queued full of TLBI commands.
+
+This patch resolves the issue by using a different (1s) timeout when
+waiting for the CMDQ to drain and using a simple back-off mechanism
+when polling the cons pointer in the absence of WFE support.
+
+Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
+[will: rewrote commit message and cosmetic changes]
+Signed-off-by: Will Deacon <will.deacon@arm.com>
+---
+ drivers/iommu/arm-smmu-v3.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
+index 380969a..6a06be7 100644
+--- a/drivers/iommu/arm-smmu-v3.c
++++ b/drivers/iommu/arm-smmu-v3.c
+@@ -408,6 +408,7 @@
+
+ /* High-level queue structures */
+ #define ARM_SMMU_POLL_TIMEOUT_US 100
++#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
+
+ #define MSI_IOVA_BASE 0x8000000
+ #define MSI_IOVA_LENGTH 0x100000
+@@ -737,7 +738,13 @@ static void queue_inc_prod(struct arm_smmu_queue *q)
+ */
+ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
+ {
+- ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_POLL_TIMEOUT_US);
++ ktime_t timeout;
++ unsigned int delay = 1;
++
++ /* Wait longer if it's queue drain */
++ timeout = ktime_add_us(ktime_get(), drain ?
++ ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US :
++ ARM_SMMU_POLL_TIMEOUT_US);
+
+ while (queue_sync_cons(q), (drain ? !queue_empty(q) : queue_full(q))) {
+ if (ktime_compare(ktime_get(), timeout) > 0)
+@@ -747,7 +754,8 @@ static int queue_poll_cons(struct arm_smmu_queue *q, bool drain, bool wfe)
+ wfe();
+ } else {
+ cpu_relax();
+- udelay(1);
++ udelay(delay);
++ delay *= 2;
+ }
+ }
+
+--
+cgit v1.1
+
+From 2e92581ecd713010e9c65ba9a90f1f7557afbe95 Mon Sep 17 00:00:00 2001
+From: Manish Jaggi <mjaggi@caviumnetworks.com>
+Date: Thu, 30 Mar 2017 18:47:14 -0500
+Subject: [PATCH 36/41] PCI: Apply Cavium ACS quirk only to
+ CN81xx/CN83xx/CN88xx devices
+
+Only apply the Cavium ACS quirk to devices with ID in the range
+0xa000-0xa0ff. These are the on-chip PCI devices for CN81xx/CN83xx/CN88xx.
+
+Fixes: b404bcfbf035 ("PCI: Add ACS quirk for all Cavium devices")
+Reported-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Manish Jaggi <mjaggi@cavium.com>
+Acked-by: David Daney <david.daney@cavium.com>
+Acked-by: Alex Williamson <alex.williamson@redhat.com>
+(cherry picked from commit b77d537d00d08fcf0bf641cd3491dd7df0ad1475)
+Signed-off-by: Robert Richter <rrichter@cavium.com>
+---
+ drivers/pci/quirks.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index 0966a22eae13..f606916bc685 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -4133,6 +4133,9 @@ static int pci_quirk_cavium_acs(struct pci_dev *dev, u16 acs_flags)
+ acs_flags &= ~(PCI_ACS_SV | PCI_ACS_TB | PCI_ACS_RR |
+ PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT);
+
++ if (!((dev->device >= 0xa000) && (dev->device <= 0xa0ff)))
++ return -ENOTTY;
++
+ return acs_flags ? 0 : 1;
+ }
+
+--
+2.11.0
+
diff --git a/baseconfig/CONFIG_B43LEGACY_DEBUG b/baseconfig/CONFIG_B43LEGACY_DEBUG
index 02f67a471..494982463 100644
--- a/baseconfig/CONFIG_B43LEGACY_DEBUG
+++ b/baseconfig/CONFIG_B43LEGACY_DEBUG
@@ -1 +1 @@
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43LEGACY_DEBUG is not set
diff --git a/baseconfig/CONFIG_B43_DEBUG b/baseconfig/CONFIG_B43_DEBUG
index 9346a4511..a2bf9bb1f 100644
--- a/baseconfig/CONFIG_B43_DEBUG
+++ b/baseconfig/CONFIG_B43_DEBUG
@@ -1 +1 @@
-CONFIG_B43_DEBUG=y
+# CONFIG_B43_DEBUG is not set
diff --git a/drm-i915-Do-not-drop-pagetables-when-empty.patch b/drm-i915-Do-not-drop-pagetables-when-empty.patch
deleted file mode 100644
index 8dcbc81bb..000000000
--- a/drm-i915-Do-not-drop-pagetables-when-empty.patch
+++ /dev/null
@@ -1,95 +0,0 @@
-From patchwork Fri May 26 08:29:06 2017
-Content-Type: text/plain; charset="utf-8"
-MIME-Version: 1.0
-Content-Transfer-Encoding: 8bit
-Subject: drm/i915: Do not drop pagetables when empty
-From: Daniel Vetter <daniel.vetter@ffwll.ch>
-X-Patchwork-Id: 158340
-Message-Id: <20170526082906.8982-1-daniel.vetter@ffwll.ch>
-To: Intel Graphics Development <intel-gfx@lists.freedesktop.org>
-Cc: "# v4 . 10+" <stable@vger.kernel.org>,
- Daniel Vetter <daniel.vetter@intel.com>,
- Mika Kuoppala <mika.kuoppala@intel.com>
-Date: Fri, 26 May 2017 10:29:06 +0200
-
-From: Chris Wilson <chris@chris-wilson.co.uk>
-
-This is the minimal backport for stable of the upstream commit:
-
-commit dd19674bacba227ae5d3ce680cbc5668198894dc
-Author: Chris Wilson <chris@chris-wilson.co.uk>
-Date: Wed Feb 15 08:43:46 2017 +0000
-
- drm/i915: Remove bitmap tracking for used-ptes
-
-Due to a race with the shrinker, when we try to allocate a pagetable, we
-may end up shrinking it instead. This comes as a nasty surprise as we
-try to dereference it to fill in the pagetable entries for the object.
-
-In linus/master this is fixed by pinning the pagetables prior to
-allocation, but that backport is roughly
- drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
- drivers/gpu/drm/i915/i915_gem_evict.c | 12 +-
- drivers/gpu/drm/i915/i915_gem_gtt.c | 2017 ++++++++++++++-------------------
- drivers/gpu/drm/i915/i915_gem_gtt.h | 123 +-
- drivers/gpu/drm/i915/i915_trace.h | 104 --
- drivers/gpu/drm/i915/i915_vgpu.c | 9 +-
- drivers/gpu/drm/i915/i915_vma.c | 9 -
- drivers/gpu/drm/i915/intel_lrc.c | 4 +-
- 8 files changed, 946 insertions(+), 1334 deletions(-)
-i.e. unsuitable for stable. Instead we neuter the code that tried to
-free the pagetables.
-
-Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99295
-Fixes: 2ce5179fe826 ("drm/i915/gtt: Free unused lower-level page tables")
-Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-Cc: Michel Thierry <michel.thierry@intel.com>
-Cc: Mika Kuoppala <mika.kuoppala@intel.com>
-Cc: Chris Wilson <chris@chris-wilson.co.uk>
-Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
-Cc: Michał Winiarski <michal.winiarski@intel.com>
-Cc: Daniel Vetter <daniel.vetter@intel.com>
-Cc: Jani Nikula <jani.nikula@linux.intel.com>
-Cc: intel-gfx@lists.freedesktop.org
-Cc: <stable@vger.kernel.org> # v4.10+
-Tested-by: Maël Lavault <mael.lavault@protonmail.com>
-Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
----
- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 ----------
- 1 file changed, 10 deletions(-)
-
-diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
-index 96e45a4d5441..4f581adf2fcf 100644
---- a/drivers/gpu/drm/i915/i915_gem_gtt.c
-+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
-@@ -755,10 +755,6 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
- GEM_BUG_ON(pte_end > GEN8_PTES);
-
- bitmap_clear(pt->used_ptes, pte, num_entries);
-- if (USES_FULL_PPGTT(vm->i915)) {
-- if (bitmap_empty(pt->used_ptes, GEN8_PTES))
-- return true;
-- }
-
- pt_vaddr = kmap_px(pt);
-
-@@ -798,9 +794,6 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
- }
- }
-
-- if (bitmap_empty(pd->used_pdes, I915_PDES))
-- return true;
--
- return false;
- }
-
-@@ -829,9 +822,6 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
-
- mark_tlbs_dirty(ppgtt);
-
-- if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
-- return true;
--
- return false;
- }
-
diff --git a/kernel-aarch64.config b/kernel-aarch64.config
index 505d4810a..c1f7b1514 100644
--- a/kernel-aarch64.config
+++ b/kernel-aarch64.config
@@ -401,8 +401,8 @@ CONFIG_AXP288_CHARGER=m
CONFIG_AXP288_FUEL_GAUGE=m
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-armv7hl-lpae.config b/kernel-armv7hl-lpae.config
index 2f48a6c6a..8096d970e 100644
--- a/kernel-armv7hl-lpae.config
+++ b/kernel-armv7hl-lpae.config
@@ -424,8 +424,8 @@ CONFIG_AXP288_CHARGER=m
CONFIG_AXP288_FUEL_GAUGE=m
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-armv7hl.config b/kernel-armv7hl.config
index d6dc918c2..ab01e101d 100644
--- a/kernel-armv7hl.config
+++ b/kernel-armv7hl.config
@@ -439,8 +439,8 @@ CONFIG_AXP288_CHARGER=m
CONFIG_AXP288_FUEL_GAUGE=m
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-i686-PAE.config b/kernel-i686-PAE.config
index 7ef62432b..efb5f9633 100644
--- a/kernel-i686-PAE.config
+++ b/kernel-i686-PAE.config
@@ -325,8 +325,8 @@ CONFIG_AX25_DAMA_SLAVE=y
CONFIG_AX25=m
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-i686.config b/kernel-i686.config
index 9ad06b119..e8ed393ae 100644
--- a/kernel-i686.config
+++ b/kernel-i686.config
@@ -325,8 +325,8 @@ CONFIG_AX25_DAMA_SLAVE=y
CONFIG_AX25=m
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-ppc64.config b/kernel-ppc64.config
index f925b6679..4c35bd0ff 100644
--- a/kernel-ppc64.config
+++ b/kernel-ppc64.config
@@ -275,8 +275,8 @@ CONFIG_AX25=m
# CONFIG_AXON_RAM is not set
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-ppc64le.config b/kernel-ppc64le.config
index be6feb99d..46452a04f 100644
--- a/kernel-ppc64le.config
+++ b/kernel-ppc64le.config
@@ -269,8 +269,8 @@ CONFIG_AX25=m
# CONFIG_AXON_RAM is not set
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-ppc64p7.config b/kernel-ppc64p7.config
index 5fb09fbfe..4cc12067e 100644
--- a/kernel-ppc64p7.config
+++ b/kernel-ppc64p7.config
@@ -269,8 +269,8 @@ CONFIG_AX25=m
# CONFIG_AXON_RAM is not set
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-s390x.config b/kernel-s390x.config
index b37428ba1..20126a7f9 100644
--- a/kernel-s390x.config
+++ b/kernel-s390x.config
@@ -269,8 +269,8 @@ CONFIG_AX25_DAMA_SLAVE=y
CONFIG_AX25=m
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel-x86_64.config b/kernel-x86_64.config
index c5726d821..a6c191e99 100644
--- a/kernel-x86_64.config
+++ b/kernel-x86_64.config
@@ -324,8 +324,8 @@ CONFIG_AX25_DAMA_SLAVE=y
CONFIG_AX25=m
CONFIG_B43_BCMA_PIO=y
CONFIG_B43_BCMA=y
-CONFIG_B43_DEBUG=y
-CONFIG_B43LEGACY_DEBUG=y
+# CONFIG_B43_DEBUG is not set
+# CONFIG_B43LEGACY_DEBUG is not set
CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
# CONFIG_B43LEGACY_DMA_MODE is not set
CONFIG_B43LEGACY_DMA=y
diff --git a/kernel.spec b/kernel.spec
index 962d263e0..976bf56de 100644
--- a/kernel.spec
+++ b/kernel.spec
@@ -44,7 +44,7 @@ Summary: The Linux kernel
# For non-released -rc kernels, this will be appended after the rcX and
# gitX tags, so a 3 here would become part of release "0.rcX.gitX.3"
#
-%global baserelease 300
+%global baserelease 301
%global fedora_build %{baserelease}
# base_sublevel is the kernel version we're starting with and patching
@@ -58,7 +58,7 @@ Summary: The Linux kernel
%define stable_rc 0
# Do we have a -stable update to apply?
-%define stable_update 5
+%define stable_update 6
# Set rpm version accordingly
%if 0%{?stable_update}
%define stablerev %{stable_update}
@@ -568,9 +568,6 @@ Patch430: bcm2837-initial-support.patch
Patch431: arm-rk3288-tinker.patch
-# http://www.spinics.net/lists/dri-devel/msg132235.html
-Patch433: drm-vc4-Fix-OOPSes-from-trying-to-cache-a-partially-constructed-BO..patch
-
# bcm283x mmc for wifi http://www.spinics.net/lists/arm-kernel/msg567077.html
Patch434: bcm283x-mmc-bcm2835.patch
@@ -585,10 +582,18 @@ Patch437: bcm283x-hdmi-audio.patch
# https://www.spinics.net/lists/arm-kernel/msg554183.html
Patch438: arm-imx6-hummingboard2.patch
+# Cavium fixes
+Patch439: arm64-cavium-fixes.patch
+
Patch440: arm64-Add-option-of-13-for-FORCE_MAX_ZONEORDER.patch
Patch441: bcm2835-clk-audio-jitter-issues.patch
+# http://www.spinics.net/lists/dri-devel/msg132235.html
+Patch443: drm-vc4-Fix-OOPSes-from-trying-to-cache-a-partially-constructed-BO..patch
+# https://lists.freedesktop.org/archives/dri-devel/2017-June/143688.html
+Patch444: vc4-tformat-fixes.patch
+
Patch460: lib-cpumask-Make-CPUMASK_OFFSTACK-usable-without-deb.patch
Patch466: input-kill-stupid-messages.patch
@@ -659,9 +664,6 @@ Patch668: CVE-2017-7477.patch
Patch669: 0001-SUNRPC-Refactor-svc_set_num_threads.patch
Patch670: 0002-NFSv4-Fix-callback-server-shutdown.patch
-#Fix broadwell issues
-Patch675: drm-i915-Do-not-drop-pagetables-when-empty.patch
-
# rhbz 1455780
Patch676: 2-2-nvme-Quirk-APST-on-Intel-600P-P3100-devices.patch
@@ -673,8 +675,12 @@ Patch679: actual_udpencap_fix.patch
Patch680: 0001-platform-x86-thinkpad_acpi-guard-generic-hotkey-case.patch
Patch681: 0002-platform-x86-thinkpad_acpi-add-mapping-for-new-hotke.patch
-# rhbz 1461337
-Patch682: 0001-efi-Fix-boot-panic-because-of-invalid-BGRT-image-add.patch
+# rhbz 1459326
+Patch683: RFC-audit-fix-a-race-condition-with-the-auditd-tracking-code.patch
+
+# CVE-2017-1000364 rhbz 1462819 1461333
+Patch684: mm-larger-stack-guard-gap-between-vmas.patch
+Patch685: mm-fix-new-crash-in-unmapped_area_topdown.patch
# END OF PATCH DEFINITIONS
@@ -2244,6 +2250,22 @@ fi
#
#
%changelog
+* Tue Jun 20 2017 Laura Abbott <labbott@fedoraproject.org> - 4.11.6-301
+- bump and build
+
+* Mon Jun 19 2017 Laura Abbott <labbott@fedoraproject.org> - 4.11.6-300
+- Linux v4.11.6
+- Fix CVE-2017-1000364 (rhbz 1462819 1461333)
+
+* Mon Jun 19 2017 Peter Robinson <pbrobinson@fedoraproject.org>
+- Add vc4 T-Format support to improve performance
+
+* Fri Jun 16 2017 Laura Abbott <labbott@fedoraproject.org>
+- Fix an auditd race condition (rhbz 1459326)
+
+* Thu Jun 15 2017 Peter Robinson <pbrobinson@fedoraproject.org>
+- Upstream fixes for Cavium platforms
+
* Wed Jun 14 2017 Laura Abbott <labbott@fedoraproject.org> - 4.11.5-300
- Linux v4.11.5
diff --git a/mm-fix-new-crash-in-unmapped_area_topdown.patch b/mm-fix-new-crash-in-unmapped_area_topdown.patch
new file mode 100644
index 000000000..20da9556f
--- /dev/null
+++ b/mm-fix-new-crash-in-unmapped_area_topdown.patch
@@ -0,0 +1,53 @@
+From patchwork Tue Jun 20 09:10:44 2017
+Content-Type: text/plain; charset="utf-8"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Subject: mm: fix new crash in unmapped_area_topdown()
+From: Hugh Dickins <hughd@google.com>
+X-Patchwork-Id: 9798991
+Message-Id: <alpine.LSU.2.11.1706200206210.10925@eggly.anvils>
+To: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Dave Jones <davej@codemonkey.org.uk>, Oleg Nesterov <oleg@redhat.com>,
+ Michal Hocko <mhocko@suse.com>, linux-kernel@vger.kernel.org,
+ linux-mm@kvack.org
+Date: Tue, 20 Jun 2017 02:10:44 -0700 (PDT)
+
+Trinity gets kernel BUG at mm/mmap.c:1963! in about 3 minutes of
+mmap testing. That's the VM_BUG_ON(gap_end < gap_start) at the
+end of unmapped_area_topdown(). Linus points out how MAP_FIXED
+(which does not have to respect our stack guard gap intentions)
+could result in gap_end below gap_start there. Fix that, and
+the similar case in its alternative, unmapped_area().
+
+Cc: stable@vger.kernel.org
+Fixes: 1be7107fbe18 ("mm: larger stack guard gap, between vmas")
+Reported-by: Dave Jones <davej@codemonkey.org.uk>
+Debugged-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+---
+
+ mm/mmap.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- 4.12-rc6/mm/mmap.c 2017-06-19 09:06:10.035407505 -0700
++++ linux/mm/mmap.c 2017-06-19 21:09:28.616707311 -0700
+@@ -1817,7 +1817,8 @@ unsigned long unmapped_area(struct vm_un
+ /* Check if current node has a suitable gap */
+ if (gap_start > high_limit)
+ return -ENOMEM;
+- if (gap_end >= low_limit && gap_end - gap_start >= length)
++ if (gap_end >= low_limit &&
++ gap_end > gap_start && gap_end - gap_start >= length)
+ goto found;
+
+ /* Visit right subtree if it looks promising */
+@@ -1920,7 +1921,8 @@ unsigned long unmapped_area_topdown(stru
+ gap_end = vm_start_gap(vma);
+ if (gap_end < low_limit)
+ return -ENOMEM;
+- if (gap_start <= high_limit && gap_end - gap_start >= length)
++ if (gap_start <= high_limit &&
++ gap_end > gap_start && gap_end - gap_start >= length)
+ goto found;
+
+ /* Visit left subtree if it looks promising */
diff --git a/mm-larger-stack-guard-gap-between-vmas.patch b/mm-larger-stack-guard-gap-between-vmas.patch
new file mode 100644
index 000000000..45d7987cc
--- /dev/null
+++ b/mm-larger-stack-guard-gap-between-vmas.patch
@@ -0,0 +1,889 @@
+From 1be7107fbe18eed3e319a6c3e83c78254b693acb Mon Sep 17 00:00:00 2001
+From: Hugh Dickins <hughd@google.com>
+Date: Mon, 19 Jun 2017 04:03:24 -0700
+Subject: mm: larger stack guard gap, between vmas
+
+From: Hugh Dickins <hughd@google.com>
+
+commit 1be7107fbe18eed3e319a6c3e83c78254b693acb upstream.
+
+Stack guard page is a useful feature to reduce a risk of stack smashing
+into a different mapping. We have been using a single page gap which
+is sufficient to prevent having stack adjacent to a different mapping.
+But this seems to be insufficient in the light of the stack usage in
+userspace. E.g. glibc uses as large as 64kB alloca() in many commonly
+used functions. Others use constructs liks gid_t buffer[NGROUPS_MAX]
+which is 256kB or stack strings with MAX_ARG_STRLEN.
+
+This will become especially dangerous for suid binaries and the default
+no limit for the stack size limit because those applications can be
+tricked to consume a large portion of the stack and a single glibc call
+could jump over the guard page. These attacks are not theoretical,
+unfortunatelly.
+
+Make those attacks less probable by increasing the stack guard gap
+to 1MB (on systems with 4k pages; but make it depend on the page size
+because systems with larger base pages might cap stack allocations in
+the PAGE_SIZE units) which should cover larger alloca() and VLA stack
+allocations. It is obviously not a full fix because the problem is
+somehow inherent, but it should reduce attack space a lot.
+
+One could argue that the gap size should be configurable from userspace,
+but that can be done later when somebody finds that the new 1MB is wrong
+for some special case applications. For now, add a kernel command line
+option (stack_guard_gap) to specify the stack gap size (in page units).
+
+Implementation wise, first delete all the old code for stack guard page:
+because although we could get away with accounting one extra page in a
+stack vma, accounting a larger gap can break userspace - case in point,
+a program run with "ulimit -S -v 20000" failed when the 1MB gap was
+counted for RLIMIT_AS; similar problems could come with RLIMIT_MLOCK
+and strict non-overcommit mode.
+
+Instead of keeping gap inside the stack vma, maintain the stack guard
+gap as a gap between vmas: using vm_start_gap() in place of vm_start
+(or vm_end_gap() in place of vm_end if VM_GROWSUP) in just those few
+places which need to respect the gap - mainly arch_get_unmapped_area(),
+and and the vma tree's subtree_gap support for that.
+
+Original-patch-by: Oleg Nesterov <oleg@redhat.com>
+Original-patch-by: Michal Hocko <mhocko@suse.com>
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Acked-by: Michal Hocko <mhocko@suse.com>
+Tested-by: Helge Deller <deller@gmx.de> # parisc
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+[wt: backport to 4.11: adjust context]
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/admin-guide/kernel-parameters.txt | 7 +
+ arch/arc/mm/mmap.c | 2
+ arch/arm/mm/mmap.c | 4
+ arch/frv/mm/elf-fdpic.c | 2
+ arch/mips/mm/mmap.c | 2
+ arch/parisc/kernel/sys_parisc.c | 15 +-
+ arch/powerpc/mm/hugetlbpage-radix.c | 2
+ arch/powerpc/mm/mmap.c | 4
+ arch/powerpc/mm/slice.c | 2
+ arch/s390/mm/mmap.c | 4
+ arch/sh/mm/mmap.c | 4
+ arch/sparc/kernel/sys_sparc_64.c | 4
+ arch/sparc/mm/hugetlbpage.c | 2
+ arch/tile/mm/hugetlbpage.c | 2
+ arch/x86/kernel/sys_x86_64.c | 4
+ arch/x86/mm/hugetlbpage.c | 2
+ arch/xtensa/kernel/syscall.c | 2
+ fs/hugetlbfs/inode.c | 2
+ fs/proc/task_mmu.c | 4
+ include/linux/mm.h | 53 ++++----
+ mm/gup.c | 5
+ mm/memory.c | 38 ------
+ mm/mmap.c | 149 ++++++++++++++----------
+ 23 files changed, 152 insertions(+), 163 deletions(-)
+
+--- a/Documentation/admin-guide/kernel-parameters.txt
++++ b/Documentation/admin-guide/kernel-parameters.txt
+@@ -3779,6 +3779,13 @@
+ spia_pedr=
+ spia_peddr=
+
++ stack_guard_gap= [MM]
++ override the default stack gap protection. The value
++ is in page units and it defines how many pages prior
++ to (for stacks growing down) resp. after (for stacks
++ growing up) the main stack are reserved for no other
++ mapping. Default value is 256 pages.
++
+ stacktrace [FTRACE]
+ Enabled the stack tracer on boot up.
+
+--- a/arch/arc/mm/mmap.c
++++ b/arch/arc/mm/mmap.c
+@@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp
+
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/arm/mm/mmap.c
++++ b/arch/arm/mm/mmap.c
+@@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp
+
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+@@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct fi
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/frv/mm/elf-fdpic.c
++++ b/arch/frv/mm/elf-fdpic.c
+@@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(str
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(current->mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ goto success;
+ }
+
+--- a/arch/mips/mm/mmap.c
++++ b/arch/mips/mm/mmap.c
+@@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_a
+
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/parisc/kernel/sys_parisc.c
++++ b/arch/parisc/kernel/sys_parisc.c
+@@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(str
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+ struct mm_struct *mm = current->mm;
+- struct vm_area_struct *vma;
++ struct vm_area_struct *vma, *prev;
+ unsigned long task_size = TASK_SIZE;
+ int do_color_align, last_mmap;
+ struct vm_unmapped_area_info info;
+@@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(str
+ else
+ addr = PAGE_ALIGN(addr);
+
+- vma = find_vma(mm, addr);
++ vma = find_vma_prev(mm, addr, &prev);
+ if (task_size - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)) &&
++ (!prev || addr >= vm_end_gap(prev)))
+ goto found_addr;
+ }
+
+@@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct fi
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+ {
+- struct vm_area_struct *vma;
++ struct vm_area_struct *vma, *prev;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ int do_color_align, last_mmap;
+@@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct fi
+ addr = COLOR_ALIGN(addr, last_mmap, pgoff);
+ else
+ addr = PAGE_ALIGN(addr);
+- vma = find_vma(mm, addr);
++
++ vma = find_vma_prev(mm, addr, &prev);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)) &&
++ (!prev || addr >= vm_end_gap(prev)))
+ goto found_addr;
+ }
+
+--- a/arch/powerpc/mm/hugetlbpage-radix.c
++++ b/arch/powerpc/mm/hugetlbpage-radix.c
+@@ -65,7 +65,7 @@ radix__hugetlb_get_unmapped_area(struct
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+ /*
+--- a/arch/powerpc/mm/mmap.c
++++ b/arch/powerpc/mm/mmap.c
+@@ -107,7 +107,7 @@ radix__arch_get_unmapped_area(struct fil
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+@@ -143,7 +143,7 @@ radix__arch_get_unmapped_area_topdown(st
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/powerpc/mm/slice.c
++++ b/arch/powerpc/mm/slice.c
+@@ -105,7 +105,7 @@ static int slice_area_is_free(struct mm_
+ if ((mm->task_size - len) < addr)
+ return 0;
+ vma = find_vma(mm, addr);
+- return (!vma || (addr + len) <= vma->vm_start);
++ return (!vma || (addr + len) <= vm_start_gap(vma));
+ }
+
+ static int slice_low_has_vma(struct mm_struct *mm, unsigned long slice)
+--- a/arch/s390/mm/mmap.c
++++ b/arch/s390/mm/mmap.c
+@@ -100,7 +100,7 @@ arch_get_unmapped_area(struct file *filp
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+@@ -138,7 +138,7 @@ arch_get_unmapped_area_topdown(struct fi
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/sh/mm/mmap.c
++++ b/arch/sh/mm/mmap.c
+@@ -64,7 +64,7 @@ unsigned long arch_get_unmapped_area(str
+
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+@@ -114,7 +114,7 @@ arch_get_unmapped_area_topdown(struct fi
+
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/sparc/kernel/sys_sparc_64.c
++++ b/arch/sparc/kernel/sys_sparc_64.c
+@@ -120,7 +120,7 @@ unsigned long arch_get_unmapped_area(str
+
+ vma = find_vma(mm, addr);
+ if (task_size - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+@@ -183,7 +183,7 @@ arch_get_unmapped_area_topdown(struct fi
+
+ vma = find_vma(mm, addr);
+ if (task_size - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/sparc/mm/hugetlbpage.c
++++ b/arch/sparc/mm/hugetlbpage.c
+@@ -120,7 +120,7 @@ hugetlb_get_unmapped_area(struct file *f
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (task_size - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+ if (mm->get_unmapped_area == arch_get_unmapped_area)
+--- a/arch/tile/mm/hugetlbpage.c
++++ b/arch/tile/mm/hugetlbpage.c
+@@ -233,7 +233,7 @@ unsigned long hugetlb_get_unmapped_area(
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+ if (current->mm->get_unmapped_area == arch_get_unmapped_area)
+--- a/arch/x86/kernel/sys_x86_64.c
++++ b/arch/x86/kernel/sys_x86_64.c
+@@ -141,7 +141,7 @@ arch_get_unmapped_area(struct file *filp
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (end - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+@@ -184,7 +184,7 @@ arch_get_unmapped_area_topdown(struct fi
+ addr = PAGE_ALIGN(addr);
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/arch/x86/mm/hugetlbpage.c
++++ b/arch/x86/mm/hugetlbpage.c
+@@ -145,7 +145,7 @@ hugetlb_get_unmapped_area(struct file *f
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+ if (mm->get_unmapped_area == arch_get_unmapped_area)
+--- a/arch/xtensa/kernel/syscall.c
++++ b/arch/xtensa/kernel/syscall.c
+@@ -88,7 +88,7 @@ unsigned long arch_get_unmapped_area(str
+ /* At this point: (!vmm || addr < vmm->vm_end). */
+ if (TASK_SIZE - len < addr)
+ return -ENOMEM;
+- if (!vmm || addr + len <= vmm->vm_start)
++ if (!vmm || addr + len <= vm_start_gap(vmm))
+ return addr;
+ addr = vmm->vm_end;
+ if (flags & MAP_SHARED)
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -200,7 +200,7 @@ hugetlb_get_unmapped_area(struct file *f
+ addr = ALIGN(addr, huge_page_size(h));
+ vma = find_vma(mm, addr);
+ if (TASK_SIZE - len >= addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)))
+ return addr;
+ }
+
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -300,11 +300,7 @@ show_map_vma(struct seq_file *m, struct
+
+ /* We don't show the stack guard page in /proc/maps */
+ start = vma->vm_start;
+- if (stack_guard_page_start(vma, start))
+- start += PAGE_SIZE;
+ end = vma->vm_end;
+- if (stack_guard_page_end(vma, end))
+- end -= PAGE_SIZE;
+
+ seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
+ seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1381,12 +1381,6 @@ int clear_page_dirty_for_io(struct page
+
+ int get_cmdline(struct task_struct *task, char *buffer, int buflen);
+
+-/* Is the vma a continuation of the stack vma above it? */
+-static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr)
+-{
+- return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
+-}
+-
+ static inline bool vma_is_anonymous(struct vm_area_struct *vma)
+ {
+ return !vma->vm_ops;
+@@ -1402,28 +1396,6 @@ bool vma_is_shmem(struct vm_area_struct
+ static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+ #endif
+
+-static inline int stack_guard_page_start(struct vm_area_struct *vma,
+- unsigned long addr)
+-{
+- return (vma->vm_flags & VM_GROWSDOWN) &&
+- (vma->vm_start == addr) &&
+- !vma_growsdown(vma->vm_prev, addr);
+-}
+-
+-/* Is the vma a continuation of the stack vma below it? */
+-static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr)
+-{
+- return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP);
+-}
+-
+-static inline int stack_guard_page_end(struct vm_area_struct *vma,
+- unsigned long addr)
+-{
+- return (vma->vm_flags & VM_GROWSUP) &&
+- (vma->vm_end == addr) &&
+- !vma_growsup(vma->vm_next, addr);
+-}
+-
+ int vma_is_stack_for_current(struct vm_area_struct *vma);
+
+ extern unsigned long move_page_tables(struct vm_area_struct *vma,
+@@ -2210,6 +2182,7 @@ void page_cache_async_readahead(struct a
+ pgoff_t offset,
+ unsigned long size);
+
++extern unsigned long stack_guard_gap;
+ /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
+ extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+
+@@ -2238,6 +2211,30 @@ static inline struct vm_area_struct * fi
+ return vma;
+ }
+
++static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
++{
++ unsigned long vm_start = vma->vm_start;
++
++ if (vma->vm_flags & VM_GROWSDOWN) {
++ vm_start -= stack_guard_gap;
++ if (vm_start > vma->vm_start)
++ vm_start = 0;
++ }
++ return vm_start;
++}
++
++static inline unsigned long vm_end_gap(struct vm_area_struct *vma)
++{
++ unsigned long vm_end = vma->vm_end;
++
++ if (vma->vm_flags & VM_GROWSUP) {
++ vm_end += stack_guard_gap;
++ if (vm_end < vma->vm_end)
++ vm_end = -PAGE_SIZE;
++ }
++ return vm_end;
++}
++
+ static inline unsigned long vma_pages(struct vm_area_struct *vma)
+ {
+ return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -387,11 +387,6 @@ static int faultin_page(struct task_stru
+ /* mlock all present pages, but do not fault in new pages */
+ if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
+ return -ENOENT;
+- /* For mm_populate(), just skip the stack guard page. */
+- if ((*flags & FOLL_POPULATE) &&
+- (stack_guard_page_start(vma, address) ||
+- stack_guard_page_end(vma, address + PAGE_SIZE)))
+- return -ENOENT;
+ if (*flags & FOLL_WRITE)
+ fault_flags |= FAULT_FLAG_WRITE;
+ if (*flags & FOLL_REMOTE)
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -2855,40 +2855,6 @@ out_release:
+ }
+
+ /*
+- * This is like a special single-page "expand_{down|up}wards()",
+- * except we must first make sure that 'address{-|+}PAGE_SIZE'
+- * doesn't hit another vma.
+- */
+-static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned long address)
+-{
+- address &= PAGE_MASK;
+- if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
+- struct vm_area_struct *prev = vma->vm_prev;
+-
+- /*
+- * Is there a mapping abutting this one below?
+- *
+- * That's only ok if it's the same stack mapping
+- * that has gotten split..
+- */
+- if (prev && prev->vm_end == address)
+- return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
+-
+- return expand_downwards(vma, address - PAGE_SIZE);
+- }
+- if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
+- struct vm_area_struct *next = vma->vm_next;
+-
+- /* As VM_GROWSDOWN but s/below/above/ */
+- if (next && next->vm_start == address + PAGE_SIZE)
+- return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
+-
+- return expand_upwards(vma, address + PAGE_SIZE);
+- }
+- return 0;
+-}
+-
+-/*
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
+@@ -2904,10 +2870,6 @@ static int do_anonymous_page(struct vm_f
+ if (vma->vm_flags & VM_SHARED)
+ return VM_FAULT_SIGBUS;
+
+- /* Check if we need to add a guard page to the stack */
+- if (check_stack_guard_page(vma, vmf->address) < 0)
+- return VM_FAULT_SIGSEGV;
+-
+ /*
+ * Use pte_alloc() instead of pte_alloc_map(). We can't run
+ * pte_offset_map() on pmds where a huge pmd might be created
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -183,6 +183,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+ unsigned long retval;
+ unsigned long newbrk, oldbrk;
+ struct mm_struct *mm = current->mm;
++ struct vm_area_struct *next;
+ unsigned long min_brk;
+ bool populate;
+ LIST_HEAD(uf);
+@@ -229,7 +230,8 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
+ }
+
+ /* Check against existing mmap mappings. */
+- if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
++ next = find_vma(mm, oldbrk);
++ if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
+ goto out;
+
+ /* Ok, looks good - let it rip. */
+@@ -253,10 +255,22 @@ out:
+
+ static long vma_compute_subtree_gap(struct vm_area_struct *vma)
+ {
+- unsigned long max, subtree_gap;
+- max = vma->vm_start;
+- if (vma->vm_prev)
+- max -= vma->vm_prev->vm_end;
++ unsigned long max, prev_end, subtree_gap;
++
++ /*
++ * Note: in the rare case of a VM_GROWSDOWN above a VM_GROWSUP, we
++ * allow two stack_guard_gaps between them here, and when choosing
++ * an unmapped area; whereas when expanding we only require one.
++ * That's a little inconsistent, but keeps the code here simpler.
++ */
++ max = vm_start_gap(vma);
++ if (vma->vm_prev) {
++ prev_end = vm_end_gap(vma->vm_prev);
++ if (max > prev_end)
++ max -= prev_end;
++ else
++ max = 0;
++ }
+ if (vma->vm_rb.rb_left) {
+ subtree_gap = rb_entry(vma->vm_rb.rb_left,
+ struct vm_area_struct, vm_rb)->rb_subtree_gap;
+@@ -352,7 +366,7 @@ static void validate_mm(struct mm_struct
+ anon_vma_unlock_read(anon_vma);
+ }
+
+- highest_address = vma->vm_end;
++ highest_address = vm_end_gap(vma);
+ vma = vma->vm_next;
+ i++;
+ }
+@@ -541,7 +555,7 @@ void __vma_link_rb(struct mm_struct *mm,
+ if (vma->vm_next)
+ vma_gap_update(vma->vm_next);
+ else
+- mm->highest_vm_end = vma->vm_end;
++ mm->highest_vm_end = vm_end_gap(vma);
+
+ /*
+ * vma->vm_prev wasn't known when we followed the rbtree to find the
+@@ -856,7 +870,7 @@ again:
+ vma_gap_update(vma);
+ if (end_changed) {
+ if (!next)
+- mm->highest_vm_end = end;
++ mm->highest_vm_end = vm_end_gap(vma);
+ else if (!adjust_next)
+ vma_gap_update(next);
+ }
+@@ -941,7 +955,7 @@ again:
+ * mm->highest_vm_end doesn't need any update
+ * in remove_next == 1 case.
+ */
+- VM_WARN_ON(mm->highest_vm_end != end);
++ VM_WARN_ON(mm->highest_vm_end != vm_end_gap(vma));
+ }
+ }
+ if (insert && file)
+@@ -1787,7 +1801,7 @@ unsigned long unmapped_area(struct vm_un
+
+ while (true) {
+ /* Visit left subtree if it looks promising */
+- gap_end = vma->vm_start;
++ gap_end = vm_start_gap(vma);
+ if (gap_end >= low_limit && vma->vm_rb.rb_left) {
+ struct vm_area_struct *left =
+ rb_entry(vma->vm_rb.rb_left,
+@@ -1798,7 +1812,7 @@ unsigned long unmapped_area(struct vm_un
+ }
+ }
+
+- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
++ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
+ check_current:
+ /* Check if current node has a suitable gap */
+ if (gap_start > high_limit)
+@@ -1825,8 +1839,8 @@ check_current:
+ vma = rb_entry(rb_parent(prev),
+ struct vm_area_struct, vm_rb);
+ if (prev == vma->vm_rb.rb_left) {
+- gap_start = vma->vm_prev->vm_end;
+- gap_end = vma->vm_start;
++ gap_start = vm_end_gap(vma->vm_prev);
++ gap_end = vm_start_gap(vma);
+ goto check_current;
+ }
+ }
+@@ -1890,7 +1904,7 @@ unsigned long unmapped_area_topdown(stru
+
+ while (true) {
+ /* Visit right subtree if it looks promising */
+- gap_start = vma->vm_prev ? vma->vm_prev->vm_end : 0;
++ gap_start = vma->vm_prev ? vm_end_gap(vma->vm_prev) : 0;
+ if (gap_start <= high_limit && vma->vm_rb.rb_right) {
+ struct vm_area_struct *right =
+ rb_entry(vma->vm_rb.rb_right,
+@@ -1903,7 +1917,7 @@ unsigned long unmapped_area_topdown(stru
+
+ check_current:
+ /* Check if current node has a suitable gap */
+- gap_end = vma->vm_start;
++ gap_end = vm_start_gap(vma);
+ if (gap_end < low_limit)
+ return -ENOMEM;
+ if (gap_start <= high_limit && gap_end - gap_start >= length)
+@@ -1929,7 +1943,7 @@ check_current:
+ struct vm_area_struct, vm_rb);
+ if (prev == vma->vm_rb.rb_right) {
+ gap_start = vma->vm_prev ?
+- vma->vm_prev->vm_end : 0;
++ vm_end_gap(vma->vm_prev) : 0;
+ goto check_current;
+ }
+ }
+@@ -1967,7 +1981,7 @@ arch_get_unmapped_area(struct file *filp
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+ {
+ struct mm_struct *mm = current->mm;
+- struct vm_area_struct *vma;
++ struct vm_area_struct *vma, *prev;
+ struct vm_unmapped_area_info info;
+
+ if (len > TASK_SIZE - mmap_min_addr)
+@@ -1978,9 +1992,10 @@ arch_get_unmapped_area(struct file *filp
+
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+- vma = find_vma(mm, addr);
++ vma = find_vma_prev(mm, addr, &prev);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)) &&
++ (!prev || addr >= vm_end_gap(prev)))
+ return addr;
+ }
+
+@@ -2003,7 +2018,7 @@ arch_get_unmapped_area_topdown(struct fi
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+ {
+- struct vm_area_struct *vma;
++ struct vm_area_struct *vma, *prev;
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ struct vm_unmapped_area_info info;
+@@ -2018,9 +2033,10 @@ arch_get_unmapped_area_topdown(struct fi
+ /* requesting a specific address */
+ if (addr) {
+ addr = PAGE_ALIGN(addr);
+- vma = find_vma(mm, addr);
++ vma = find_vma_prev(mm, addr, &prev);
+ if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+- (!vma || addr + len <= vma->vm_start))
++ (!vma || addr + len <= vm_start_gap(vma)) &&
++ (!prev || addr >= vm_end_gap(prev)))
+ return addr;
+ }
+
+@@ -2155,21 +2171,19 @@ find_vma_prev(struct mm_struct *mm, unsi
+ * update accounting. This is shared with both the
+ * grow-up and grow-down cases.
+ */
+-static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow)
++static int acct_stack_growth(struct vm_area_struct *vma,
++ unsigned long size, unsigned long grow)
+ {
+ struct mm_struct *mm = vma->vm_mm;
+ struct rlimit *rlim = current->signal->rlim;
+- unsigned long new_start, actual_size;
++ unsigned long new_start;
+
+ /* address space limit tests */
+ if (!may_expand_vm(mm, vma->vm_flags, grow))
+ return -ENOMEM;
+
+ /* Stack limit test */
+- actual_size = size;
+- if (size && (vma->vm_flags & (VM_GROWSUP | VM_GROWSDOWN)))
+- actual_size -= PAGE_SIZE;
+- if (actual_size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
++ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur))
+ return -ENOMEM;
+
+ /* mlock limit tests */
+@@ -2207,17 +2221,30 @@ static int acct_stack_growth(struct vm_a
+ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
+ {
+ struct mm_struct *mm = vma->vm_mm;
++ struct vm_area_struct *next;
++ unsigned long gap_addr;
+ int error = 0;
+
+ if (!(vma->vm_flags & VM_GROWSUP))
+ return -EFAULT;
+
+ /* Guard against wrapping around to address 0. */
+- if (address < PAGE_ALIGN(address+4))
+- address = PAGE_ALIGN(address+4);
+- else
++ address &= PAGE_MASK;
++ address += PAGE_SIZE;
++ if (!address)
+ return -ENOMEM;
+
++ /* Enforce stack_guard_gap */
++ gap_addr = address + stack_guard_gap;
++ if (gap_addr < address)
++ return -ENOMEM;
++ next = vma->vm_next;
++ if (next && next->vm_start < gap_addr) {
++ if (!(next->vm_flags & VM_GROWSUP))
++ return -ENOMEM;
++ /* Check that both stack segments have the same anon_vma? */
++ }
++
+ /* We must make sure the anon_vma is allocated. */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
+@@ -2261,7 +2288,7 @@ int expand_upwards(struct vm_area_struct
+ if (vma->vm_next)
+ vma_gap_update(vma->vm_next);
+ else
+- mm->highest_vm_end = address;
++ mm->highest_vm_end = vm_end_gap(vma);
+ spin_unlock(&mm->page_table_lock);
+
+ perf_event_mmap(vma);
+@@ -2282,6 +2309,8 @@ int expand_downwards(struct vm_area_stru
+ unsigned long address)
+ {
+ struct mm_struct *mm = vma->vm_mm;
++ struct vm_area_struct *prev;
++ unsigned long gap_addr;
+ int error;
+
+ address &= PAGE_MASK;
+@@ -2289,6 +2318,17 @@ int expand_downwards(struct vm_area_stru
+ if (error)
+ return error;
+
++ /* Enforce stack_guard_gap */
++ gap_addr = address - stack_guard_gap;
++ if (gap_addr > address)
++ return -ENOMEM;
++ prev = vma->vm_prev;
++ if (prev && prev->vm_end > gap_addr) {
++ if (!(prev->vm_flags & VM_GROWSDOWN))
++ return -ENOMEM;
++ /* Check that both stack segments have the same anon_vma? */
++ }
++
+ /* We must make sure the anon_vma is allocated. */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
+@@ -2343,28 +2383,25 @@ int expand_downwards(struct vm_area_stru
+ return error;
+ }
+
+-/*
+- * Note how expand_stack() refuses to expand the stack all the way to
+- * abut the next virtual mapping, *unless* that mapping itself is also
+- * a stack mapping. We want to leave room for a guard page, after all
+- * (the guard page itself is not added here, that is done by the
+- * actual page faulting logic)
+- *
+- * This matches the behavior of the guard page logic (see mm/memory.c:
+- * check_stack_guard_page()), which only allows the guard page to be
+- * removed under these circumstances.
+- */
++/* enforced gap between the expanding stack and other mappings. */
++unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
++
++static int __init cmdline_parse_stack_guard_gap(char *p)
++{
++ unsigned long val;
++ char *endptr;
++
++ val = simple_strtoul(p, &endptr, 10);
++ if (!*endptr)
++ stack_guard_gap = val << PAGE_SHIFT;
++
++ return 0;
++}
++__setup("stack_guard_gap=", cmdline_parse_stack_guard_gap);
++
+ #ifdef CONFIG_STACK_GROWSUP
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+- struct vm_area_struct *next;
+-
+- address &= PAGE_MASK;
+- next = vma->vm_next;
+- if (next && next->vm_start == address + PAGE_SIZE) {
+- if (!(next->vm_flags & VM_GROWSUP))
+- return -ENOMEM;
+- }
+ return expand_upwards(vma, address);
+ }
+
+@@ -2386,14 +2423,6 @@ find_extend_vma(struct mm_struct *mm, un
+ #else
+ int expand_stack(struct vm_area_struct *vma, unsigned long address)
+ {
+- struct vm_area_struct *prev;
+-
+- address &= PAGE_MASK;
+- prev = vma->vm_prev;
+- if (prev && prev->vm_end == address) {
+- if (!(prev->vm_flags & VM_GROWSDOWN))
+- return -ENOMEM;
+- }
+ return expand_downwards(vma, address);
+ }
+
+@@ -2491,7 +2520,7 @@ detach_vmas_to_be_unmapped(struct mm_str
+ vma->vm_prev = prev;
+ vma_gap_update(vma);
+ } else
+- mm->highest_vm_end = prev ? prev->vm_end : 0;
++ mm->highest_vm_end = prev ? vm_end_gap(prev) : 0;
+ tail_vma->vm_next = NULL;
+
+ /* Kill the cache */
diff --git a/sources b/sources
index 161469091..910639ba0 100644
--- a/sources
+++ b/sources
@@ -1,3 +1,3 @@
SHA512 (perf-man-4.11.tar.gz) = 0b070d2f10a743329de2f532e2d7e19ef385a3e6ef3c700b591ae2697604dbe542b36e31121b3e37517ee8071ab800386fa8663c24a5b36520a18e096c6eefc8
SHA512 (linux-4.11.tar.xz) = 6610eed97ffb7207c71771198c36179b8244ace7222bebb109507720e26c5f17d918079a56d5febdd8605844d67fb2df0ebe910fa2f2f53690daf6e2a8ad09c3
-SHA512 (patch-4.11.5.xz) = c337470c79961c88b806a449ee3bbb3b5428c1f1d6751133de00b67901a6ad8db2ed8899e0b5ca89ff902f29f58a6721053d25e286a2120e7cf2e578907c8645
+SHA512 (patch-4.11.6.xz) = e0e2de7d721575cd2770fa4fa61a1ecdfd54bb4239725363a90ab3b670aab44531a7c0f198ff769080643e86ce7e4806d26bb436a43437747e123715061b278b
diff --git a/vc4-tformat-fixes.patch b/vc4-tformat-fixes.patch
new file mode 100644
index 000000000..4941fb92f
--- /dev/null
+++ b/vc4-tformat-fixes.patch
@@ -0,0 +1,428 @@
+From ac4e55169649132123c4f2f39e0b02b5c849bae8 Mon Sep 17 00:00:00 2001
+From: Peter Robinson <pbrobinson@gmail.com>
+Date: Mon, 19 Jun 2017 13:20:48 +0100
+Subject: [PATCH] drm/vc4: Add T-format scanout support
+
+The T tiling format is what V3D uses for textures, with no raster
+support at all until later revisions of the hardware (and always at a
+large 3D performance penalty). If we can't scan out V3D's format,
+then we often need to do a relayout at some stage of the pipeline,
+either right before texturing from the scanout buffer (common in X11
+without a compositor) or between a tiled screen buffer right before
+scanout (an option I've considered in trying to resolve this
+inconsistency, but which means needing to use the dirty fb ioctl and
+having some update policy).
+
+T-format scanout lets us avoid either of those shadow copies, for a
+massive, obvious performance improvement to X11 window dragging
+without a compositor. Unfortunately, enabling a compositor to work
+around the discrepancy has turned out to be too costly in memory
+consumption for the Raspbian distribution.
+
+Because the HVS operates a scanline at a time, compositing from T does
+increase the memory bandwidth cost of scanout. On my 1920x1080@32bpp
+display on a RPi3, we go from about 15% of system memory bandwidth
+with linear to about 20% with tiled. However, for X11 this still ends
+up being a huge performance win in active usage.
+
+This patch doesn't yet handle src_x/src_y offsetting within the tiled
+buffer. However, we fail to do so for untiled buffers already.
+
+drm/vc4: Add get/set tiling ioctls.
+
+This allows mesa to set the tiling format for a BO and have that
+tiling format be respected by mesa on the other side of an
+import/export (and by vc4 scanout in the kernel), without defining a
+protocol to pass the tiling through userspace.
+
+Signed-off-by: Eric Anholt <eric@anholt.net>
+Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
+Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
+---
+ drivers/gpu/drm/vc4/vc4_bo.c | 83 +++++++++++++++++++++++++++++++++++++++++
+ drivers/gpu/drm/vc4/vc4_drv.c | 2 +
+ drivers/gpu/drm/vc4/vc4_drv.h | 6 +++
+ drivers/gpu/drm/vc4/vc4_kms.c | 41 +++++++++++++++++++-
+ drivers/gpu/drm/vc4/vc4_plane.c | 31 +++++++++++++--
+ drivers/gpu/drm/vc4/vc4_regs.h | 19 ++++++++++
+ include/uapi/drm/drm_fourcc.h | 22 +++++++++++
+ include/uapi/drm/vc4_drm.h | 16 ++++++++
+ 8 files changed, 215 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
+index 3f6704cf6608..0918346c248e 100644
+--- a/drivers/gpu/drm/vc4/vc4_bo.c
++++ b/drivers/gpu/drm/vc4/vc4_bo.c
+@@ -325,6 +325,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo)
+ bo->validated_shader = NULL;
+ }
+
++ bo->t_format = false;
+ bo->free_time = jiffies;
+ list_add(&bo->size_head, cache_list);
+ list_add(&bo->unref_head, &vc4->bo_cache.time_list);
+@@ -525,6 +526,88 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+ return ret;
+ }
+
++/**
++ * vc4_set_tiling_ioctl() - Sets the tiling modifier for a BO.
++ * @dev: DRM device
++ * @data: ioctl argument
++ * @file_priv: DRM file for this fd
++ *
++ * The tiling state of the BO decides the default modifier of an fb if
++ * no specific modifier was set by userspace, and the return value of
++ * vc4_get_tiling_ioctl() (so that userspace can treat a BO it
++ * received from dmabuf as the same tiling format as the producer
++ * used).
++ */
++int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *file_priv)
++{
++ struct drm_vc4_set_tiling *args = data;
++ struct drm_gem_object *gem_obj;
++ struct vc4_bo *bo;
++ bool t_format;
++
++ if (args->flags != 0)
++ return -EINVAL;
++
++ switch (args->modifier) {
++ case DRM_FORMAT_MOD_NONE:
++ t_format = false;
++ break;
++ case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
++ t_format = true;
++ break;
++ default:
++ return -EINVAL;
++ }
++
++ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
++ if (!gem_obj) {
++ DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
++ return -ENOENT;
++ }
++ bo = to_vc4_bo(gem_obj);
++ bo->t_format = t_format;
++
++ drm_gem_object_unreference_unlocked(gem_obj);
++
++ return 0;
++}
++
++/**
++ * vc4_get_tiling_ioctl() - Gets the tiling modifier for a BO.
++ * @dev: DRM device
++ * @data: ioctl argument
++ * @file_priv: DRM file for this fd
++ *
++ * Returns the tiling modifier for a BO as set by vc4_set_tiling_ioctl().
++ */
++int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *file_priv)
++{
++ struct drm_vc4_get_tiling *args = data;
++ struct drm_gem_object *gem_obj;
++ struct vc4_bo *bo;
++
++ if (args->flags != 0 || args->modifier != 0)
++ return -EINVAL;
++
++ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
++ if (!gem_obj) {
++ DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
++ return -ENOENT;
++ }
++ bo = to_vc4_bo(gem_obj);
++
++ if (bo->t_format)
++ args->modifier = DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
++ else
++ args->modifier = DRM_FORMAT_MOD_NONE;
++
++ drm_gem_object_unreference_unlocked(gem_obj);
++
++ return 0;
++}
++
+ void vc4_bo_cache_init(struct drm_device *dev)
+ {
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
+index a459745e96f7..2edf2d4c5156 100644
+--- a/drivers/gpu/drm/vc4/vc4_drv.c
++++ b/drivers/gpu/drm/vc4/vc4_drv.c
+@@ -122,6 +122,8 @@ static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
+ DRM_ROOT_ONLY),
+ DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
++ DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
++ DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
+ };
+
+ static struct drm_driver vc4_drm_driver = {
+diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
+index 0e59f3ee1b83..64f0cb1f889e 100644
+--- a/drivers/gpu/drm/vc4/vc4_drv.h
++++ b/drivers/gpu/drm/vc4/vc4_drv.h
+@@ -135,6 +135,8 @@ struct vc4_bo {
+ */
+ uint64_t write_seqno;
+
++ bool t_format;
++
+ /* List entry for the BO's position in either
+ * vc4_exec_info->unref_list or vc4_dev->bo_cache.time_list
+ */
+@@ -433,6 +435,10 @@ int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+ int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
++int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *file_priv);
++int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
++ struct drm_file *file_priv);
+ int vc4_get_hang_state_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+ int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
+diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
+index ad7925a9e0ea..25be60016527 100644
+--- a/drivers/gpu/drm/vc4/vc4_kms.c
++++ b/drivers/gpu/drm/vc4/vc4_kms.c
+@@ -202,11 +202,50 @@ static int vc4_atomic_commit(struct drm_device *dev,
+ return 0;
+ }
+
++static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev,
++ struct drm_file *file_priv,
++ const struct drm_mode_fb_cmd2 *mode_cmd)
++{
++ struct drm_mode_fb_cmd2 mode_cmd_local;
++
++ /* If the user didn't specify a modifier, use the
++ * vc4_set_tiling_ioctl() state for the BO.
++ */
++ if (!(mode_cmd->flags & DRM_MODE_FB_MODIFIERS)) {
++ struct drm_gem_object *gem_obj;
++ struct vc4_bo *bo;
++
++ gem_obj = drm_gem_object_lookup(file_priv,
++ mode_cmd->handles[0]);
++ if (!gem_obj) {
++ DRM_ERROR("Failed to look up GEM BO %d\n",
++ mode_cmd->handles[0]);
++ return ERR_PTR(-ENOENT);
++ }
++ bo = to_vc4_bo(gem_obj);
++
++ mode_cmd_local = *mode_cmd;
++
++ if (bo->t_format) {
++ mode_cmd_local.modifier[0] =
++ DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED;
++ } else {
++ mode_cmd_local.modifier[0] = DRM_FORMAT_MOD_NONE;
++ }
++
++ drm_gem_object_unreference_unlocked(gem_obj);
++
++ mode_cmd = &mode_cmd_local;
++ }
++
++ return drm_fb_cma_create(dev, file_priv, mode_cmd);
++}
++
+ static const struct drm_mode_config_funcs vc4_mode_funcs = {
+ .output_poll_changed = vc4_output_poll_changed,
+ .atomic_check = drm_atomic_helper_check,
+ .atomic_commit = vc4_atomic_commit,
+- .fb_create = drm_fb_cma_create,
++ .fb_create = vc4_fb_create,
+ };
+
+ int vc4_kms_load(struct drm_device *dev)
+diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
+index f7a229df572d..99f4d4b48015 100644
+--- a/drivers/gpu/drm/vc4/vc4_plane.c
++++ b/drivers/gpu/drm/vc4/vc4_plane.c
+@@ -498,8 +498,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
+ u32 ctl0_offset = vc4_state->dlist_count;
+ const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
+ int num_planes = drm_format_num_planes(format->drm);
+- u32 scl0, scl1;
+- u32 lbm_size;
++ u32 scl0, scl1, pitch0;
++ u32 lbm_size, tiling;
+ unsigned long irqflags;
+ int ret, i;
+
+@@ -540,11 +540,31 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
+ scl1 = vc4_get_scl_field(state, 0);
+ }
+
++ switch (fb->modifier) {
++ case DRM_FORMAT_MOD_LINEAR:
++ tiling = SCALER_CTL0_TILING_LINEAR;
++ pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
++ break;
++ case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
++ tiling = SCALER_CTL0_TILING_256B_OR_T;
++
++ pitch0 = (VC4_SET_FIELD(0, SCALER_PITCH0_TILE_Y_OFFSET),
++ VC4_SET_FIELD(0, SCALER_PITCH0_TILE_WIDTH_L),
++ VC4_SET_FIELD((vc4_state->src_w[0] + 31) >> 5,
++ SCALER_PITCH0_TILE_WIDTH_R));
++ break;
++ default:
++ DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
++ (long long)fb->modifier);
++ return -EINVAL;
++ }
++
+ /* Control word */
+ vc4_dlist_write(vc4_state,
+ SCALER_CTL0_VALID |
+ (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
+ (format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
++ VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
+ (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
+ VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
+ VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
+@@ -598,8 +618,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
+ for (i = 0; i < num_planes; i++)
+ vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+
+- /* Pitch word 0/1/2 */
+- for (i = 0; i < num_planes; i++) {
++ /* Pitch word 0 */
++ vc4_dlist_write(vc4_state, pitch0);
++
++ /* Pitch word 1/2 */
++ for (i = 1; i < num_planes; i++) {
+ vc4_dlist_write(vc4_state,
+ VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH));
+ }
+diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
+index 385405a2df05..362d8b7f8a5f 100644
+--- a/drivers/gpu/drm/vc4/vc4_regs.h
++++ b/drivers/gpu/drm/vc4/vc4_regs.h
+@@ -604,6 +604,13 @@ enum hvs_pixel_format {
+ #define SCALER_CTL0_SIZE_MASK VC4_MASK(29, 24)
+ #define SCALER_CTL0_SIZE_SHIFT 24
+
++#define SCALER_CTL0_TILING_MASK VC4_MASK(21, 20)
++#define SCALER_CTL0_TILING_SHIFT 20
++#define SCALER_CTL0_TILING_LINEAR 0
++#define SCALER_CTL0_TILING_64B 1
++#define SCALER_CTL0_TILING_128B 2
++#define SCALER_CTL0_TILING_256B_OR_T 3
++
+ #define SCALER_CTL0_HFLIP BIT(16)
+ #define SCALER_CTL0_VFLIP BIT(15)
+
+@@ -733,7 +740,19 @@ enum hvs_pixel_format {
+ #define SCALER_PPF_KERNEL_OFFSET_SHIFT 0
+ #define SCALER_PPF_KERNEL_UNCACHED BIT(31)
+
++/* PITCH0/1/2 fields for raster. */
+ #define SCALER_SRC_PITCH_MASK VC4_MASK(15, 0)
+ #define SCALER_SRC_PITCH_SHIFT 0
+
++/* PITCH0 fields for T-tiled. */
++#define SCALER_PITCH0_TILE_WIDTH_L_MASK VC4_MASK(22, 16)
++#define SCALER_PITCH0_TILE_WIDTH_L_SHIFT 16
++#define SCALER_PITCH0_TILE_LINE_DIR BIT(15)
++#define SCALER_PITCH0_TILE_INITIAL_LINE_DIR BIT(14)
++/* Y offset within a tile. */
++#define SCALER_PITCH0_TILE_Y_OFFSET_MASK VC4_MASK(13, 7)
++#define SCALER_PITCH0_TILE_Y_OFFSET_SHIFT 7
++#define SCALER_PITCH0_TILE_WIDTH_R_MASK VC4_MASK(6, 0)
++#define SCALER_PITCH0_TILE_WIDTH_R_SHIFT 0
++
+ #endif /* VC4_REGS_H */
+diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h
+index ef20abb8119b..9aaf633788a7 100644
+--- a/include/uapi/drm/drm_fourcc.h
++++ b/include/uapi/drm/drm_fourcc.h
+@@ -168,6 +168,7 @@ extern "C" {
+ #define DRM_FORMAT_MOD_VENDOR_SAMSUNG 0x04
+ #define DRM_FORMAT_MOD_VENDOR_QCOM 0x05
+ #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
++#define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
+ /* add more to the end as needed */
+
+ #define fourcc_mod_code(vendor, val) \
+@@ -292,6 +293,27 @@ extern "C" {
+ */
+ #define DRM_FORMAT_MOD_VIVANTE_SPLIT_SUPER_TILED fourcc_mod_code(VIVANTE, 4)
+
++/*
++ * Broadcom VC4 "T" format
++ *
++ * This is the primary layout that the V3D GPU can texture from (it
++ * can't do linear). The T format has:
++ *
++ * - 64b utiles of pixels in a raster-order grid according to cpp. It's 4x4
++ * pixels at 32 bit depth.
++ *
++ * - 1k subtiles made of a 4x4 raster-order grid of 64b utiles (so usually
++ * 16x16 pixels).
++ *
++ * - 4k tiles made of a 2x2 grid of 1k subtiles (so usually 32x32 pixels). On
++ * even 4k tile rows, they're arranged as (BL, TL, TR, BR), and on odd rows
++ * they're (TR, BR, BL, TL), where bottom left is start of memory.
++ *
++ * - an image made of 4k tiles in rows either left-to-right (even rows of 4k
++ * tiles) or right-to-left (odd rows of 4k tiles).
++ */
++#define DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED fourcc_mod_code(BROADCOM, 1)
++
+ #if defined(__cplusplus)
+ }
+ #endif
+diff --git a/include/uapi/drm/vc4_drm.h b/include/uapi/drm/vc4_drm.h
+index f07a09016726..6ac4c5c014cb 100644
+--- a/include/uapi/drm/vc4_drm.h
++++ b/include/uapi/drm/vc4_drm.h
+@@ -38,6 +38,8 @@ extern "C" {
+ #define DRM_VC4_CREATE_SHADER_BO 0x05
+ #define DRM_VC4_GET_HANG_STATE 0x06
+ #define DRM_VC4_GET_PARAM 0x07
++#define DRM_VC4_SET_TILING 0x08
++#define DRM_VC4_GET_TILING 0x09
+
+ #define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
+ #define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
+@@ -47,6 +49,8 @@ extern "C" {
+ #define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
+ #define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
+ #define DRM_IOCTL_VC4_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_PARAM, struct drm_vc4_get_param)
++#define DRM_IOCTL_VC4_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
++#define DRM_IOCTL_VC4_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
+
+ struct drm_vc4_submit_rcl_surface {
+ __u32 hindex; /* Handle index, or ~0 if not present. */
+@@ -295,6 +299,18 @@ struct drm_vc4_get_param {
+ __u64 value;
+ };
+
++struct drm_vc4_get_tiling {
++ __u32 handle;
++ __u32 flags;
++ __u64 modifier;
++};
++
++struct drm_vc4_set_tiling {
++ __u32 handle;
++ __u32 flags;
++ __u64 modifier;
++};
++
+ #if defined(__cplusplus)
+ }
+ #endif
+--
+2.13.0
+