SSE.h 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. /* This Source Code Form is subject to the terms of the Mozilla Public
  2. * License, v. 2.0. If a copy of the MPL was not distributed with this
  3. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  4. /* compile-time and runtime tests for whether to use SSE instructions */
  5. #ifndef mozilla_SSE_h_
  6. #define mozilla_SSE_h_
  7. // for definition of MFBT_DATA
  8. #include "mozilla/Types.h"
  9. /**
  10. * The public interface of this header consists of a set of macros and
  11. * functions for Intel CPU features.
  12. *
  13. * DETECTING ISA EXTENSIONS
  14. * ========================
  15. *
  16. * This header provides the following functions for determining whether the
  17. * current CPU supports a particular instruction set extension:
  18. *
  19. * mozilla::supports_mmx
  20. * mozilla::supports_sse
  21. * mozilla::supports_sse2
  22. * mozilla::supports_sse3
  23. * mozilla::supports_ssse3
  24. * mozilla::supports_sse4a
  25. * mozilla::supports_sse4_1
  26. * mozilla::supports_sse4_2
  27. * mozilla::supports_avx
  28. * mozilla::supports_avx2
  29. *
  30. * If you're writing code using inline assembly, you should guard it with a
  31. * call to one of these functions. For instance:
  32. *
  33. * if (mozilla::supports_sse2()) {
  34. * asm(" ... ");
  35. * }
  36. * else {
  37. * ...
  38. * }
  39. *
  40. * Note that these functions depend on cpuid intrinsics only available in gcc
  41. * 4.3 or later and MSVC 8.0 (Visual C++ 2005) or later, so they return false
  42. * in older compilers. (This could be fixed by replacing the code with inline
  43. * assembly.)
  44. *
  45. *
  46. * USING INTRINSICS
  47. * ================
  48. *
  49. * This header also provides support for coding using CPU intrinsics.
  50. *
  51. * For each mozilla::supports_abc function, we define a MOZILLA_MAY_SUPPORT_ABC
  52. * macro which indicates that the target/compiler combination we're using is
  53. * compatible with the ABC extension. For instance, x86_64 with MSVC 2003 is
  54. * compatible with SSE2 but not SSE3, since although there exist x86_64 CPUs
  55. * with SSE3 support, MSVC 2003 only supports through SSE2.
  56. *
  57. * Until gcc fixes #pragma target [1] [2] or our x86 builds require SSE2,
  58. * you'll need to separate code using intrinsics into a file separate from your
  59. * regular code. Here's the recommended pattern:
  60. *
  61. * #ifdef MOZILLA_MAY_SUPPORT_ABC
  62. * namespace mozilla {
  63. * namespace ABC {
  64. * void foo();
  65. * }
  66. * }
  67. * #endif
  68. *
  69. * void foo() {
  70. * #ifdef MOZILLA_MAY_SUPPORT_ABC
  71. * if (mozilla::supports_abc()) {
  72. * mozilla::ABC::foo(); // in a separate file
  73. * return;
  74. * }
  75. * #endif
  76. *
  77. * foo_unvectorized();
  78. * }
  79. *
  80. * You'll need to define mozilla::ABC::foo() in a separate file and add the
  81. * -mabc flag when using gcc.
  82. *
  83. * [1] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39787 and
  84. * [2] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=41201 being fixed.
  85. *
  86. */
  87. #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
  88. #ifdef __MMX__
  89. // It's ok to use MMX instructions based on the -march option (or
  90. // the default for x86_64 or for Intel Mac).
  91. #define MOZILLA_PRESUME_MMX 1
  92. #endif
  93. #ifdef __SSE__
  94. // It's ok to use SSE instructions based on the -march option (or
  95. // the default for x86_64 or for Intel Mac).
  96. #define MOZILLA_PRESUME_SSE 1
  97. #endif
  98. #ifdef __SSE2__
  99. // It's ok to use SSE2 instructions based on the -march option (or
  100. // the default for x86_64 or for Intel Mac).
  101. #define MOZILLA_PRESUME_SSE2 1
  102. #endif
  103. #ifdef __SSE3__
  104. // It's ok to use SSE3 instructions based on the -march option (or the
  105. // default for Intel Mac).
  106. #define MOZILLA_PRESUME_SSE3 1
  107. #endif
  108. #ifdef __SSSE3__
  109. // It's ok to use SSSE3 instructions based on the -march option.
  110. #define MOZILLA_PRESUME_SSSE3 1
  111. #endif
  112. #ifdef __SSE4A__
  113. // It's ok to use SSE4A instructions based on the -march option.
  114. #define MOZILLA_PRESUME_SSE4A 1
  115. #endif
  116. #ifdef __SSE4_1__
  117. // It's ok to use SSE4.1 instructions based on the -march option.
  118. #define MOZILLA_PRESUME_SSE4_1 1
  119. #endif
  120. #ifdef __SSE4_2__
  121. // It's ok to use SSE4.2 instructions based on the -march option.
  122. #define MOZILLA_PRESUME_SSE4_2 1
  123. #endif
  124. #ifdef __AVX__
  125. // It's ok to use AVX instructions based on the -march option.
  126. #define MOZILLA_PRESUME_AVX 1
  127. #endif
  128. #ifdef __AVX2__
  129. // It's ok to use AVX instructions based on the -march option.
  130. #define MOZILLA_PRESUME_AVX2 1
  131. #endif
  132. #ifdef HAVE_CPUID_H
  133. #define MOZILLA_SSE_HAVE_CPUID_DETECTION
  134. #endif
  135. #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64))
  136. #define MOZILLA_SSE_HAVE_CPUID_DETECTION
  137. #if defined(_M_IX86_FP)
  138. #if _M_IX86_FP >= 1
  139. // It's ok to use SSE instructions based on the /arch option
  140. #define MOZILLA_PRESUME_SSE
  141. #endif
  142. #if _M_IX86_FP >= 2
  143. // It's ok to use SSE2 instructions based on the /arch option
  144. #define MOZILLA_PRESUME_SSE2
  145. #endif
  146. #elif defined(_M_AMD64)
  147. // MSVC for AMD64 doesn't support MMX, so don't presume it here.
  148. // SSE is always available on AMD64.
  149. #define MOZILLA_PRESUME_SSE
  150. // SSE2 is always available on AMD64.
  151. #define MOZILLA_PRESUME_SSE2
  152. #endif
  153. #elif defined(__SUNPRO_CC) && (defined(__i386) || defined(__x86_64__))
  154. // Sun Studio on x86 or amd64
  155. #define MOZILLA_SSE_HAVE_CPUID_DETECTION
  156. #if defined(__x86_64__)
  157. // MMX is always available on AMD64.
  158. #define MOZILLA_PRESUME_MMX
  159. // SSE is always available on AMD64.
  160. #define MOZILLA_PRESUME_SSE
  161. // SSE2 is always available on AMD64.
  162. #define MOZILLA_PRESUME_SSE2
  163. #endif
  164. #endif
  165. namespace mozilla {
  166. namespace sse_private {
  167. #if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  168. #if !defined(MOZILLA_PRESUME_MMX)
  169. extern bool MFBT_DATA mmx_enabled;
  170. #endif
  171. #if !defined(MOZILLA_PRESUME_SSE)
  172. extern bool MFBT_DATA sse_enabled;
  173. #endif
  174. #if !defined(MOZILLA_PRESUME_SSE2)
  175. extern bool MFBT_DATA sse2_enabled;
  176. #endif
  177. #if !defined(MOZILLA_PRESUME_SSE3)
  178. extern bool MFBT_DATA sse3_enabled;
  179. #endif
  180. #if !defined(MOZILLA_PRESUME_SSSE3)
  181. extern bool MFBT_DATA ssse3_enabled;
  182. #endif
  183. #if !defined(MOZILLA_PRESUME_SSE4A)
  184. extern bool MFBT_DATA sse4a_enabled;
  185. #endif
  186. #if !defined(MOZILLA_PRESUME_SSE4_1)
  187. extern bool MFBT_DATA sse4_1_enabled;
  188. #endif
  189. #if !defined(MOZILLA_PRESUME_SSE4_2)
  190. extern bool MFBT_DATA sse4_2_enabled;
  191. #endif
  192. #if !defined(MOZILLA_PRESUME_AVX)
  193. extern bool MFBT_DATA avx_enabled;
  194. #endif
  195. #if !defined(MOZILLA_PRESUME_AVX2)
  196. extern bool MFBT_DATA avx2_enabled;
  197. #endif
  198. #endif
  199. } // namespace sse_private
  200. #if defined(MOZILLA_PRESUME_MMX)
  201. #define MOZILLA_MAY_SUPPORT_MMX 1
  202. inline bool supports_mmx() { return true; }
  203. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  204. #if !(defined(_MSC_VER) && defined(_M_AMD64))
  205. // Define MOZILLA_MAY_SUPPORT_MMX only if we're not on MSVC for
  206. // AMD64, since that compiler doesn't support MMX.
  207. #define MOZILLA_MAY_SUPPORT_MMX 1
  208. #endif
  209. inline bool supports_mmx() { return sse_private::mmx_enabled; }
  210. #else
  211. inline bool supports_mmx() { return false; }
  212. #endif
  213. #if defined(MOZILLA_PRESUME_SSE)
  214. #define MOZILLA_MAY_SUPPORT_SSE 1
  215. inline bool supports_sse() { return true; }
  216. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  217. #define MOZILLA_MAY_SUPPORT_SSE 1
  218. inline bool supports_sse() { return sse_private::sse_enabled; }
  219. #else
  220. inline bool supports_sse() { return false; }
  221. #endif
  222. #if defined(MOZILLA_PRESUME_SSE2)
  223. #define MOZILLA_MAY_SUPPORT_SSE2 1
  224. inline bool supports_sse2() { return true; }
  225. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  226. #define MOZILLA_MAY_SUPPORT_SSE2 1
  227. inline bool supports_sse2() { return sse_private::sse2_enabled; }
  228. #else
  229. inline bool supports_sse2() { return false; }
  230. #endif
  231. #if defined(MOZILLA_PRESUME_SSE3)
  232. #define MOZILLA_MAY_SUPPORT_SSE3 1
  233. inline bool supports_sse3() { return true; }
  234. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  235. #define MOZILLA_MAY_SUPPORT_SSE3 1
  236. inline bool supports_sse3() { return sse_private::sse3_enabled; }
  237. #else
  238. inline bool supports_sse3() { return false; }
  239. #endif
  240. #if defined(MOZILLA_PRESUME_SSSE3)
  241. #define MOZILLA_MAY_SUPPORT_SSSE3 1
  242. inline bool supports_ssse3() { return true; }
  243. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  244. #define MOZILLA_MAY_SUPPORT_SSSE3 1
  245. inline bool supports_ssse3() { return sse_private::ssse3_enabled; }
  246. #else
  247. inline bool supports_ssse3() { return false; }
  248. #endif
  249. #if defined(MOZILLA_PRESUME_SSE4A)
  250. #define MOZILLA_MAY_SUPPORT_SSE4A 1
  251. inline bool supports_sse4a() { return true; }
  252. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  253. #define MOZILLA_MAY_SUPPORT_SSE4A 1
  254. inline bool supports_sse4a() { return sse_private::sse4a_enabled; }
  255. #else
  256. inline bool supports_sse4a() { return false; }
  257. #endif
  258. #if defined(MOZILLA_PRESUME_SSE4_1)
  259. #define MOZILLA_MAY_SUPPORT_SSE4_1 1
  260. inline bool supports_sse4_1() { return true; }
  261. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  262. #define MOZILLA_MAY_SUPPORT_SSE4_1 1
  263. inline bool supports_sse4_1() { return sse_private::sse4_1_enabled; }
  264. #else
  265. inline bool supports_sse4_1() { return false; }
  266. #endif
  267. #if defined(MOZILLA_PRESUME_SSE4_2)
  268. #define MOZILLA_MAY_SUPPORT_SSE4_2 1
  269. inline bool supports_sse4_2() { return true; }
  270. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  271. #define MOZILLA_MAY_SUPPORT_SSE4_2 1
  272. inline bool supports_sse4_2() { return sse_private::sse4_2_enabled; }
  273. #else
  274. inline bool supports_sse4_2() { return false; }
  275. #endif
  276. #if defined(MOZILLA_PRESUME_AVX)
  277. #define MOZILLA_MAY_SUPPORT_AVX 1
  278. inline bool supports_avx() { return true; }
  279. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  280. #define MOZILLA_MAY_SUPPORT_AVX 1
  281. inline bool supports_avx() { return sse_private::avx_enabled; }
  282. #else
  283. inline bool supports_avx() { return false; }
  284. #endif
  285. #if defined(MOZILLA_PRESUME_AVX2)
  286. #define MOZILLA_MAY_SUPPORT_AVX2 1
  287. inline bool supports_avx2() { return true; }
  288. #elif defined(MOZILLA_SSE_HAVE_CPUID_DETECTION)
  289. #define MOZILLA_MAY_SUPPORT_AVX2 1
  290. inline bool supports_avx2() { return sse_private::avx2_enabled; }
  291. #else
  292. inline bool supports_avx2() { return false; }
  293. #endif
  294. } // namespace mozilla
  295. #endif /* !defined(mozilla_SSE_h_) */