5000_enable-additional-cpu-optimizations-for-gcc.patch 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328
  1. This patch has been tested on and known to work with kernel versions from 3.2
  2. up to the latest git version (pulled on 12/14/2013).
  3. This patch will expand the number of microarchitectures to include new
  4. processors including: AMD K10-family, AMD Family 10h (Barcelona), AMD Family
  5. 14h (Bobcat), AMD Family 15h (Bulldozer), AMD Family 15h (Piledriver), AMD
  6. Family 16h (Jaguar), Intel 1st Gen Core i3/i5/i7 (Nehalem), Intel 2nd Gen Core
  7. i3/i5/i7 (Sandybridge), Intel 3rd Gen Core i3/i5/i7 (Ivybridge), and Intel 4th
  8. Gen Core i3/i5/i7 (Haswell). It also offers the compiler the 'native' flag.
  9. Small but real speed increases are measurable using a make endpoint comparing
  10. a generic kernel to one built with one of the respective microarchs.
  11. See the following experimental evidence supporting this statement:
  12. https://github.com/graysky2/kernel_gcc_patch
  13. REQUIREMENTS
  14. linux version >=3.15
  15. gcc version <4.9
  16. ---
  17. diff -uprN a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
  18. --- a/arch/x86/include/asm/module.h 2013-11-03 18:41:51.000000000 -0500
  19. +++ b/arch/x86/include/asm/module.h 2013-12-15 06:21:24.351122516 -0500
  20. @@ -15,6 +15,16 @@
  21. #define MODULE_PROC_FAMILY "586MMX "
  22. #elif defined CONFIG_MCORE2
  23. #define MODULE_PROC_FAMILY "CORE2 "
  24. +#elif defined CONFIG_MNATIVE
  25. +#define MODULE_PROC_FAMILY "NATIVE "
  26. +#elif defined CONFIG_MCOREI7
  27. +#define MODULE_PROC_FAMILY "COREI7 "
  28. +#elif defined CONFIG_MCOREI7AVX
  29. +#define MODULE_PROC_FAMILY "COREI7AVX "
  30. +#elif defined CONFIG_MCOREAVXI
  31. +#define MODULE_PROC_FAMILY "COREAVXI "
  32. +#elif defined CONFIG_MCOREAVX2
  33. +#define MODULE_PROC_FAMILY "COREAVX2 "
  34. #elif defined CONFIG_MATOM
  35. #define MODULE_PROC_FAMILY "ATOM "
  36. #elif defined CONFIG_M686
  37. @@ -33,6 +43,18 @@
  38. #define MODULE_PROC_FAMILY "K7 "
  39. #elif defined CONFIG_MK8
  40. #define MODULE_PROC_FAMILY "K8 "
  41. +#elif defined CONFIG_MK10
  42. +#define MODULE_PROC_FAMILY "K10 "
  43. +#elif defined CONFIG_MBARCELONA
  44. +#define MODULE_PROC_FAMILY "BARCELONA "
  45. +#elif defined CONFIG_MBOBCAT
  46. +#define MODULE_PROC_FAMILY "BOBCAT "
  47. +#elif defined CONFIG_MBULLDOZER
  48. +#define MODULE_PROC_FAMILY "BULLDOZER "
  49. +#elif defined CONFIG_MPILEDRIVER
  50. +#define MODULE_PROC_FAMILY "PILEDRIVER "
  51. +#elif defined CONFIG_MJAGUAR
  52. +#define MODULE_PROC_FAMILY "JAGUAR "
  53. #elif defined CONFIG_MELAN
  54. #define MODULE_PROC_FAMILY "ELAN "
  55. #elif defined CONFIG_MCRUSOE
  56. diff -uprN a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
  57. --- a/arch/x86/Kconfig.cpu 2013-11-03 18:41:51.000000000 -0500
  58. +++ b/arch/x86/Kconfig.cpu 2013-12-15 06:21:24.351122516 -0500
  59. @@ -139,7 +139,7 @@ config MPENTIUM4
  60. config MK6
  61. - bool "K6/K6-II/K6-III"
  62. + bool "AMD K6/K6-II/K6-III"
  63. depends on X86_32
  64. ---help---
  65. Select this for an AMD K6-family processor. Enables use of
  66. @@ -147,7 +147,7 @@ config MK6
  67. flags to GCC.
  68. config MK7
  69. - bool "Athlon/Duron/K7"
  70. + bool "AMD Athlon/Duron/K7"
  71. depends on X86_32
  72. ---help---
  73. Select this for an AMD Athlon K7-family processor. Enables use of
  74. @@ -155,12 +155,55 @@ config MK7
  75. flags to GCC.
  76. config MK8
  77. - bool "Opteron/Athlon64/Hammer/K8"
  78. + bool "AMD Opteron/Athlon64/Hammer/K8"
  79. ---help---
  80. Select this for an AMD Opteron or Athlon64 Hammer-family processor.
  81. Enables use of some extended instructions, and passes appropriate
  82. optimization flags to GCC.
  83. +config MK10
  84. + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10"
  85. + ---help---
  86. + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50,
  87. + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor.
  88. + Enables use of some extended instructions, and passes appropriate
  89. + optimization flags to GCC.
  90. +
  91. +config MBARCELONA
  92. + bool "AMD Barcelona"
  93. + ---help---
  94. + Select this for AMD Barcelona and newer processors.
  95. +
  96. + Enables -march=barcelona
  97. +
  98. +config MBOBCAT
  99. + bool "AMD Bobcat"
  100. + ---help---
  101. + Select this for AMD Bobcat processors.
  102. +
  103. + Enables -march=btver1
  104. +
  105. +config MBULLDOZER
  106. + bool "AMD Bulldozer"
  107. + ---help---
  108. + Select this for AMD Bulldozer processors.
  109. +
  110. + Enables -march=bdver1
  111. +
  112. +config MPILEDRIVER
  113. + bool "AMD Piledriver"
  114. + ---help---
  115. + Select this for AMD Piledriver processors.
  116. +
  117. + Enables -march=bdver2
  118. +
  119. +config MJAGUAR
  120. + bool "AMD Jaguar"
  121. + ---help---
  122. + Select this for AMD Jaguar processors.
  123. +
  124. + Enables -march=btver2
  125. +
  126. config MCRUSOE
  127. bool "Crusoe"
  128. depends on X86_32
  129. @@ -251,8 +294,17 @@ config MPSC
  130. using the cpu family field
  131. in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one.
  132. +config MATOM
  133. + bool "Intel Atom"
  134. + ---help---
  135. +
  136. + Select this for the Intel Atom platform. Intel Atom CPUs have an
  137. + in-order pipelining architecture and thus can benefit from
  138. + accordingly optimized code. Use a recent GCC with specific Atom
  139. + support in order to fully benefit from selecting this option.
  140. +
  141. config MCORE2
  142. - bool "Core 2/newer Xeon"
  143. + bool "Intel Core 2"
  144. ---help---
  145. Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
  146. @@ -260,14 +312,40 @@ config MCORE2
  147. family in /proc/cpuinfo. Newer ones have 6 and older ones 15
  148. (not a typo)
  149. -config MATOM
  150. - bool "Intel Atom"
  151. + Enables -march=core2
  152. +
  153. +config MCOREI7
  154. + bool "Intel Core i7"
  155. ---help---
  156. - Select this for the Intel Atom platform. Intel Atom CPUs have an
  157. - in-order pipelining architecture and thus can benefit from
  158. - accordingly optimized code. Use a recent GCC with specific Atom
  159. - support in order to fully benefit from selecting this option.
  160. + Select this for the Intel Nehalem platform. Intel Nehalem proecessors
  161. + include Core i3, i5, i7, Xeon: 34xx, 35xx, 55xx, 56xx, 75xx processors.
  162. +
  163. + Enables -march=corei7
  164. +
  165. +config MCOREI7AVX
  166. + bool "Intel Core 2nd Gen AVX"
  167. + ---help---
  168. +
  169. + Select this for 2nd Gen Core processors including Sandy Bridge.
  170. +
  171. + Enables -march=corei7-avx
  172. +
  173. +config MCOREAVXI
  174. + bool "Intel Core 3rd Gen AVX"
  175. + ---help---
  176. +
  177. + Select this for 3rd Gen Core processors including Ivy Bridge.
  178. +
  179. + Enables -march=core-avx-i
  180. +
  181. +config MCOREAVX2
  182. + bool "Intel Core AVX2"
  183. + ---help---
  184. +
  185. + Select this for AVX2 enabled processors including Haswell.
  186. +
  187. + Enables -march=core-avx2
  188. config GENERIC_CPU
  189. bool "Generic-x86-64"
  190. @@ -276,6 +354,19 @@ config GENERIC_CPU
  191. Generic x86-64 CPU.
  192. Run equally well on all x86-64 CPUs.
  193. +config MNATIVE
  194. + bool "Native optimizations autodetected by GCC"
  195. + ---help---
  196. +
  197. + GCC 4.2 and above support -march=native, which automatically detects
  198. + the optimum settings to use based on your processor. -march=native
  199. + also detects and applies additional settings beyond -march specific
  200. + to your CPU, (eg. -msse4). Unless you have a specific reason not to
  201. + (e.g. distcc cross-compiling), you should probably be using
  202. + -march=native rather than anything listed below.
  203. +
  204. + Enables -march=native
  205. +
  206. endchoice
  207. config X86_GENERIC
  208. @@ -300,7 +391,7 @@ config X86_INTERNODE_CACHE_SHIFT
  209. config X86_L1_CACHE_SHIFT
  210. int
  211. default "7" if MPENTIUM4 || MPSC
  212. - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
  213. + default "6" if MK7 || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MPENTIUMM || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MVIAC7 || X86_GENERIC || MNATIVE || GENERIC_CPU
  214. default "4" if MELAN || M486 || MGEODEGX1
  215. default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
  216. @@ -331,11 +422,11 @@ config X86_ALIGNMENT_16
  217. config X86_INTEL_USERCOPY
  218. def_bool y
  219. - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2
  220. + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || MNATIVE || X86_GENERIC || MK8 || MK7 || MK10 || MBARCELONA || MEFFICEON || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2
  221. config X86_USE_PPRO_CHECKSUM
  222. def_bool y
  223. - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM
  224. + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MK10 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MATOM || MNATIVE
  225. config X86_USE_3DNOW
  226. def_bool y
  227. @@ -363,17 +454,17 @@ config X86_P6_NOP
  228. config X86_TSC
  229. def_bool y
  230. - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64
  231. + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MCOREI7 || MCOREI7-AVX || MATOM) || X86_64 || MNATIVE
  232. config X86_CMPXCHG64
  233. def_bool y
  234. - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM
  235. + depends on X86_PAE || X86_64 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MATOM || MNATIVE
  236. # this should be set for all -march=.. options where the compiler
  237. # generates cmov.
  238. config X86_CMOV
  239. def_bool y
  240. - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX)
  241. + depends on (MK8 || MK10 || MBARCELONA || MBOBCAT || MBULLDOZER || MPILEDRIVER || MJAGUAR || MK7 || MCORE2 || MCOREI7 || MCOREI7AVX || MCOREAVXI || MCOREAVX2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MNATIVE || MATOM || MGEODE_LX)
  242. config X86_MINIMUM_CPU_FAMILY
  243. int
  244. diff -uprN a/arch/x86/Makefile b/arch/x86/Makefile
  245. --- a/arch/x86/Makefile 2013-11-03 18:41:51.000000000 -0500
  246. +++ b/arch/x86/Makefile 2013-12-15 06:21:24.354455723 -0500
  247. @@ -61,11 +61,26 @@ else
  248. KBUILD_CFLAGS += $(call cc-option,-mno-sse -mpreferred-stack-boundary=3)
  249. # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu)
  250. + cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
  251. cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8)
  252. + cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10)
  253. + cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona)
  254. + cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1)
  255. + cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1)
  256. + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2)
  257. + cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2)
  258. cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona)
  259. cflags-$(CONFIG_MCORE2) += \
  260. - $(call cc-option,-march=core2,$(call cc-option,-mtune=generic))
  261. + $(call cc-option,-march=core2,$(call cc-option,-mtune=core2))
  262. + cflags-$(CONFIG_MCOREI7) += \
  263. + $(call cc-option,-march=corei7,$(call cc-option,-mtune=corei7))
  264. + cflags-$(CONFIG_MCOREI7AVX) += \
  265. + $(call cc-option,-march=corei7-avx,$(call cc-option,-mtune=corei7-avx))
  266. + cflags-$(CONFIG_MCOREAVXI) += \
  267. + $(call cc-option,-march=core-avx-i,$(call cc-option,-mtune=core-avx-i))
  268. + cflags-$(CONFIG_MCOREAVX2) += \
  269. + $(call cc-option,-march=core-avx2,$(call cc-option,-mtune=core-avx2))
  270. cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \
  271. $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))
  272. cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic)
  273. diff -uprN a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
  274. --- a/arch/x86/Makefile_32.cpu 2013-11-03 18:41:51.000000000 -0500
  275. +++ b/arch/x86/Makefile_32.cpu 2013-12-15 06:21:24.354455723 -0500
  276. @@ -23,7 +23,14 @@ cflags-$(CONFIG_MK6) += -march=k6
  277. # Please note, that patches that add -march=athlon-xp and friends are pointless.
  278. # They make zero difference whatsosever to performance at this time.
  279. cflags-$(CONFIG_MK7) += -march=athlon
  280. +cflags-$(CONFIG_MNATIVE) += $(call cc-option,-march=native)
  281. cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,-march=athlon)
  282. +cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10,-march=athlon)
  283. +cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona,-march=athlon)
  284. +cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1,-march=athlon)
  285. +cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1,-march=athlon)
  286. +cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2,-march=athlon)
  287. +cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2,-march=athlon)
  288. cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
  289. cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
  290. cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
  291. @@ -32,6 +39,10 @@ cflags-$(CONFIG_MCYRIXIII) += $(call cc-
  292. cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
  293. cflags-$(CONFIG_MVIAC7) += -march=i686
  294. cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2)
  295. +cflags-$(CONFIG_MCOREI7) += -march=i686 $(call tune,corei7)
  296. +cflags-$(CONFIG_MCOREI7AVX) += -march=i686 $(call tune,corei7-avx)
  297. +cflags-$(CONFIG_MCOREAVXI) += -march=i686 $(call tune,core-avx-i)
  298. +cflags-$(CONFIG_MCOREAVX2) += -march=i686 $(call tune,core-avx2)
  299. cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom,$(call cc-option,-march=core2,-march=i686)) \
  300. $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic))