summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-03-03 12:38:52 +0100
committerIngo Molnar <mingo@elte.hu>2008-04-26 17:44:55 +0200
commit60a3cdd0639473c79c253bc08c8ef8f882cca107 (patch)
treee72a5ca87873b286b02966a390277f828139d350
parentb1721d0da266b4af8cb4419473b4ca36206ab200 (diff)
downloadkernel-crypto-60a3cdd0639473c79c253bc08c8ef8f882cca107.tar.gz
kernel-crypto-60a3cdd0639473c79c253bc08c8ef8f882cca107.tar.xz
kernel-crypto-60a3cdd0639473c79c253bc08c8ef8f882cca107.zip
x86: add optimized inlining
add CONFIG_OPTIMIZE_INLINING=y. allow gcc to optimize the kernel image's size by uninlining functions that have been marked 'inline'. Previously gcc was forced by Linux to always-inline these functions via a gcc attribute: #define inline inline __attribute__((always_inline)) Especially when the user has already selected CONFIG_OPTIMIZE_FOR_SIZE=y this can make a huge difference in kernel image size (using a standard Fedora .config): text data bss dec hex filename 5613924 562708 3854336 10030968 990f78 vmlinux.before 5486689 562708 3854336 9903733 971e75 vmlinux.after that's a 2.3% text size reduction (!). Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/Kconfig.debug13
-rw-r--r--arch/x86/configs/i386_defconfig1
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--include/linux/compiler-gcc.h12
4 files changed, 24 insertions, 3 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 239fd9fba0a..5b1979a45a1 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -257,3 +257,16 @@ config CPA_DEBUG
Do change_page_attr() self-tests every 30 seconds.
endmenu
+
+config OPTIMIZE_INLINING
+ bool "Allow gcc to uninline functions marked 'inline'"
+ default y
+ help
+ This option determines if the kernel forces gcc to inline the functions
+ developers have marked 'inline'. Doing so takes away freedom from gcc to
+ do what it thinks is best, which is desirable for the gcc 3.x series of
+ compilers. The gcc 4.x series have a rewritten inlining algorithm and
+ disabling this option will generate a smaller kernel there. Hopefully
+ this algorithm is so good that allowing gcc4 to make the decision can
+ become the default in the future, until then this option is there to
+ test gcc for this.
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 3df340b54e5..ad7ddaaff58 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1421,6 +1421,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
+CONFIG_OPTIMIZE_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index eef98cb00c6..2d6f5b2809d 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1346,6 +1346,7 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
+CONFIG_OPTIMIZE_INLINING=y
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
# CONFIG_FAULT_INJECTION is not set
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index fe23792f05c..340bc5d9277 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -28,9 +28,15 @@
#define __must_be_array(a) \
BUILD_BUG_ON_ZERO(__builtin_types_compatible_p(typeof(a), typeof(&a[0])))
-#define inline inline __attribute__((always_inline))
-#define __inline__ __inline__ __attribute__((always_inline))
-#define __inline __inline __attribute__((always_inline))
+/*
+ * Force always-inline if the user requests it so via the .config:
+ */
+#if !defined(CONFIG_OPTIMIZE_INLINING) && (__GNUC__ >= 4)
+# define inline inline __attribute__((always_inline))
+# define __inline__ __inline__ __attribute__((always_inline))
+# define __inline __inline __attribute__((always_inline))
+#endif
+
#define __deprecated __attribute__((deprecated))
#define __packed __attribute__((packed))
#define __weak __attribute__((weak))