summaryrefslogtreecommitdiffstats
path: root/0001-mm-CONFIG_NR_ZONES_EXTENDED.patch
diff options
context:
space:
mode:
Diffstat (limited to '0001-mm-CONFIG_NR_ZONES_EXTENDED.patch')
-rw-r--r--0001-mm-CONFIG_NR_ZONES_EXTENDED.patch173
1 files changed, 173 insertions, 0 deletions
diff --git a/0001-mm-CONFIG_NR_ZONES_EXTENDED.patch b/0001-mm-CONFIG_NR_ZONES_EXTENDED.patch
new file mode 100644
index 000000000..44ef3662b
--- /dev/null
+++ b/0001-mm-CONFIG_NR_ZONES_EXTENDED.patch
@@ -0,0 +1,173 @@
+From 8b368e8e961944105945fbe36f3f264252bfd19a Mon Sep 17 00:00:00 2001
+From: Dan Williams <dan.j.williams@intel.com>
+Date: Thu, 25 Feb 2016 01:02:30 +0000
+Subject: [PATCH] mm: CONFIG_NR_ZONES_EXTENDED
+
+ZONE_DEVICE (merged in 4.3) and ZONE_CMA (proposed) are examples of new mm
+zones that are bumping up against the current maximum limit of 4 zones,
+i.e. 2 bits in page->flags. When adding a zone this equation still needs
+to be satisified:
+
+ SECTIONS_WIDTH + ZONES_WIDTH + NODES_SHIFT + LAST_CPUPID_SHIFT
+ <= BITS_PER_LONG - NR_PAGEFLAGS
+
+ZONE_DEVICE currently tries to satisfy this equation by requiring that
+ZONE_DMA be disabled, but this is untenable given generic kernels want to
+support ZONE_DEVICE and ZONE_DMA simultaneously. ZONE_CMA would like to
+increase the amount of memory covered per section, but that limits the
+minimum granularity at which consecutive memory ranges can be added via
+devm_memremap_pages().
+
+The trade-off of what is acceptable to sacrifice depends heavily on the
+platform. For example, ZONE_CMA is targeted for 32-bit platforms where
+page->flags is constrained, but those platforms likely do not care about
+the minimum granularity of memory hotplug. A big iron machine with 1024
+numa nodes can likely sacrifice ZONE_DMA where a general purpose
+distribution kernel can not.
+
+CONFIG_NR_ZONES_EXTENDED is a configuration symbol that gets selected when
+the number of configured zones exceeds 4. It documents the configuration
+symbols and definitions that get modified when ZONES_WIDTH is greater than
+2.
+
+For now, it steals a bit from NODES_SHIFT. Later on it can be used to
+document the definitions that get modified when a 32-bit configuration
+wants more zone bits.
+
+Note that GFP_ZONE_TABLE poses an interesting constraint since
+include/linux/gfp.h gets included by the 32-bit portion of a 64-bit build.
+We need to be careful to only build the table for zones that have a
+corresponding gfp_t flag. GFP_ZONES_SHIFT is introduced for this purpose.
+This patch does not attempt to solve the problem of adding a new zone
+that also has a corresponding GFP_ flag.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=110931
+Fixes: 033fbae988fc ("mm: ZONE_DEVICE for "device memory"")
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Reported-by: Mark <markk@clara.co.uk>
+Cc: Mel Gorman <mgorman@suse.de>
+Cc: Rik van Riel <riel@redhat.com>
+Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+---
+ arch/x86/Kconfig | 6 ++++--
+ include/linux/gfp.h | 33 ++++++++++++++++++++-------------
+ include/linux/page-flags-layout.h | 2 ++
+ mm/Kconfig | 7 +++++--
+ 4 files changed, 31 insertions(+), 17 deletions(-)
+
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index 3fef519..b94704a 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -1409,8 +1409,10 @@ config NUMA_EMU
+
+ config NODES_SHIFT
+ int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
+- range 1 10
+- default "10" if MAXSMP
++ range 1 10 if !NR_ZONES_EXTENDED
++ range 1 9 if NR_ZONES_EXTENDED
++ default "10" if MAXSMP && !NR_ZONES_EXTENDED
++ default "9" if MAXSMP && NR_ZONES_EXTENDED
+ default "6" if X86_64
+ default "3"
+ depends on NEED_MULTIPLE_NODES
+diff --git a/include/linux/gfp.h b/include/linux/gfp.h
+index af1f2b2..d201d8a 100644
+--- a/include/linux/gfp.h
++++ b/include/linux/gfp.h
+@@ -329,22 +329,29 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+ * 0xe => BAD (MOVABLE+DMA32+HIGHMEM)
+ * 0xf => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
+ *
+- * ZONES_SHIFT must be <= 2 on 32 bit platforms.
++ * GFP_ZONES_SHIFT must be <= 2 on 32 bit platforms.
+ */
+
+-#if 16 * ZONES_SHIFT > BITS_PER_LONG
+-#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
++#if defined(CONFIG_ZONE_DEVICE) && (MAX_NR_ZONES-1) <= 4
++/* ZONE_DEVICE is not a valid GFP zone specifier */
++#define GFP_ZONES_SHIFT 2
++#else
++#define GFP_ZONES_SHIFT ZONES_SHIFT
++#endif
++
++#if 16 * GFP_ZONES_SHIFT > BITS_PER_LONG
++#error GFP_ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
+ #endif
+
+ #define GFP_ZONE_TABLE ( \
+- (ZONE_NORMAL << 0 * ZONES_SHIFT) \
+- | (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT) \
+- | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT) \
+- | (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT) \
+- | (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT) \
+- | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT) \
+- | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT) \
+- | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT) \
++ (ZONE_NORMAL << 0 * GFP_ZONES_SHIFT) \
++ | (OPT_ZONE_DMA << ___GFP_DMA * GFP_ZONES_SHIFT) \
++ | (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * GFP_ZONES_SHIFT) \
++ | (OPT_ZONE_DMA32 << ___GFP_DMA32 * GFP_ZONES_SHIFT) \
++ | (ZONE_NORMAL << ___GFP_MOVABLE * GFP_ZONES_SHIFT) \
++ | (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * GFP_ZONES_SHIFT) \
++ | (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * GFP_ZONES_SHIFT) \
++ | (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * GFP_ZONES_SHIFT) \
+ )
+
+ /*
+@@ -369,8 +376,8 @@ static inline enum zone_type gfp_zone(gfp_t flags)
+ enum zone_type z;
+ int bit = (__force int) (flags & GFP_ZONEMASK);
+
+- z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
+- ((1 << ZONES_SHIFT) - 1);
++ z = (GFP_ZONE_TABLE >> (bit * GFP_ZONES_SHIFT)) &
++ ((1 << GFP_ZONES_SHIFT) - 1);
+ VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
+ return z;
+ }
+diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h
+index da52366..77b078c 100644
+--- a/include/linux/page-flags-layout.h
++++ b/include/linux/page-flags-layout.h
+@@ -17,6 +17,8 @@
+ #define ZONES_SHIFT 1
+ #elif MAX_NR_ZONES <= 4
+ #define ZONES_SHIFT 2
++#elif MAX_NR_ZONES <= 8
++#define ZONES_SHIFT 3
+ #else
+ #error ZONES_SHIFT -- too many zones configured adjust calculation
+ #endif
+diff --git a/mm/Kconfig b/mm/Kconfig
+index 031a329..7826216 100644
+--- a/mm/Kconfig
++++ b/mm/Kconfig
+@@ -652,8 +652,6 @@ config IDLE_PAGE_TRACKING
+
+ config ZONE_DEVICE
+ bool "Device memory (pmem, etc...) hotplug support"
+- default !ZONE_DMA
+- depends on !ZONE_DMA
+ depends on MEMORY_HOTPLUG
+ depends on MEMORY_HOTREMOVE
+ depends on X86_64 #arch_add_memory() comprehends device memory
+@@ -667,5 +665,10 @@ config ZONE_DEVICE
+
+ If FS_DAX is enabled, then say Y.
+
++config NR_ZONES_EXTENDED
++ bool
++ default n if !64BIT
++ default y if ZONE_DEVICE && ZONE_DMA && ZONE_DMA32
++
+ config FRAME_VECTOR
+ bool
+--
+2.5.0
+