vfp.h 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. /*
  2. * linux/arch/arm/vfp/vfp.h
  3. *
  4. * Copyright (C) 2004 ARM Limited.
  5. * Written by Deep Blue Solutions Limited.
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. static inline u32 vfp_shiftright32jamming(u32 val, unsigned int shift)
  12. {
  13. if (shift) {
  14. if (shift < 32)
  15. val = val >> shift | ((val << (32 - shift)) != 0);
  16. else
  17. val = val != 0;
  18. }
  19. return val;
  20. }
  21. static inline u64 vfp_shiftright64jamming(u64 val, unsigned int shift)
  22. {
  23. if (shift) {
  24. if (shift < 64)
  25. val = val >> shift | ((val << (64 - shift)) != 0);
  26. else
  27. val = val != 0;
  28. }
  29. return val;
  30. }
  31. static inline u32 vfp_hi64to32jamming(u64 val)
  32. {
  33. u32 v;
  34. asm(
  35. "cmp %Q1, #1 @ vfp_hi64to32jamming\n\t"
  36. "movcc %0, %R1\n\t"
  37. "orrcs %0, %R1, #1"
  38. : "=r" (v) : "r" (val) : "cc");
  39. return v;
  40. }
  41. static inline void add128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
  42. {
  43. asm( "adds %Q0, %Q2, %Q4\n\t"
  44. "adcs %R0, %R2, %R4\n\t"
  45. "adcs %Q1, %Q3, %Q5\n\t"
  46. "adc %R1, %R3, %R5"
  47. : "=r" (nl), "=r" (nh)
  48. : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
  49. : "cc");
  50. *resh = nh;
  51. *resl = nl;
  52. }
  53. static inline void sub128(u64 *resh, u64 *resl, u64 nh, u64 nl, u64 mh, u64 ml)
  54. {
  55. asm( "subs %Q0, %Q2, %Q4\n\t"
  56. "sbcs %R0, %R2, %R4\n\t"
  57. "sbcs %Q1, %Q3, %Q5\n\t"
  58. "sbc %R1, %R3, %R5\n\t"
  59. : "=r" (nl), "=r" (nh)
  60. : "0" (nl), "1" (nh), "r" (ml), "r" (mh)
  61. : "cc");
  62. *resh = nh;
  63. *resl = nl;
  64. }
  65. static inline void mul64to128(u64 *resh, u64 *resl, u64 n, u64 m)
  66. {
  67. u32 nh, nl, mh, ml;
  68. u64 rh, rma, rmb, rl;
  69. nl = n;
  70. ml = m;
  71. rl = (u64)nl * ml;
  72. nh = n >> 32;
  73. rma = (u64)nh * ml;
  74. mh = m >> 32;
  75. rmb = (u64)nl * mh;
  76. rma += rmb;
  77. rh = (u64)nh * mh;
  78. rh += ((u64)(rma < rmb) << 32) + (rma >> 32);
  79. rma <<= 32;
  80. rl += rma;
  81. rh += (rl < rma);
  82. *resl = rl;
  83. *resh = rh;
  84. }
  85. static inline void shift64left(u64 *resh, u64 *resl, u64 n)
  86. {
  87. *resh = n >> 63;
  88. *resl = n << 1;
  89. }
  90. static inline u64 vfp_hi64multiply64(u64 n, u64 m)
  91. {
  92. u64 rh, rl;
  93. mul64to128(&rh, &rl, n, m);
  94. return rh | (rl != 0);
  95. }
  96. static inline u64 vfp_estimate_div128to64(u64 nh, u64 nl, u64 m)
  97. {
  98. u64 mh, ml, remh, reml, termh, terml, z;
  99. if (nh >= m)
  100. return ~0ULL;
  101. mh = m >> 32;
  102. if (mh << 32 <= nh) {
  103. z = 0xffffffff00000000ULL;
  104. } else {
  105. z = nh;
  106. do_div(z, mh);
  107. z <<= 32;
  108. }
  109. mul64to128(&termh, &terml, m, z);
  110. sub128(&remh, &reml, nh, nl, termh, terml);
  111. ml = m << 32;
  112. while ((s64)remh < 0) {
  113. z -= 0x100000000ULL;
  114. add128(&remh, &reml, remh, reml, mh, ml);
  115. }
  116. remh = (remh << 32) | (reml >> 32);
  117. if (mh << 32 <= remh) {
  118. z |= 0xffffffff;
  119. } else {
  120. do_div(remh, mh);
  121. z |= remh;
  122. }
  123. return z;
  124. }
  125. /*
  126. * Operations on unpacked elements
  127. */
  128. #define vfp_sign_negate(sign) (sign ^ 0x8000)
  129. /*
  130. * Single-precision
  131. */
  132. struct vfp_single {
  133. s16 exponent;
  134. u16 sign;
  135. u32 significand;
  136. };
  137. extern s32 vfp_get_float(unsigned int reg);
  138. extern void vfp_put_float(s32 val, unsigned int reg);
  139. /*
  140. * VFP_SINGLE_MANTISSA_BITS - number of bits in the mantissa
  141. * VFP_SINGLE_EXPONENT_BITS - number of bits in the exponent
  142. * VFP_SINGLE_LOW_BITS - number of low bits in the unpacked significand
  143. * which are not propagated to the float upon packing.
  144. */
  145. #define VFP_SINGLE_MANTISSA_BITS (23)
  146. #define VFP_SINGLE_EXPONENT_BITS (8)
  147. #define VFP_SINGLE_LOW_BITS (32 - VFP_SINGLE_MANTISSA_BITS - 2)
  148. #define VFP_SINGLE_LOW_BITS_MASK ((1 << VFP_SINGLE_LOW_BITS) - 1)
  149. /*
  150. * The bit in an unpacked float which indicates that it is a quiet NaN
  151. */
  152. #define VFP_SINGLE_SIGNIFICAND_QNAN (1 << (VFP_SINGLE_MANTISSA_BITS - 1 + VFP_SINGLE_LOW_BITS))
  153. /*
  154. * Operations on packed single-precision numbers
  155. */
  156. #define vfp_single_packed_sign(v) ((v) & 0x80000000)
  157. #define vfp_single_packed_negate(v) ((v) ^ 0x80000000)
  158. #define vfp_single_packed_abs(v) ((v) & ~0x80000000)
  159. #define vfp_single_packed_exponent(v) (((v) >> VFP_SINGLE_MANTISSA_BITS) & ((1 << VFP_SINGLE_EXPONENT_BITS) - 1))
  160. #define vfp_single_packed_mantissa(v) ((v) & ((1 << VFP_SINGLE_MANTISSA_BITS) - 1))
  161. /*
  162. * Unpack a single-precision float. Note that this returns the magnitude
  163. * of the single-precision float mantissa with the 1. if necessary,
  164. * aligned to bit 30.
  165. */
  166. static inline void vfp_single_unpack(struct vfp_single *s, s32 val)
  167. {
  168. u32 significand;
  169. s->sign = vfp_single_packed_sign(val) >> 16,
  170. s->exponent = vfp_single_packed_exponent(val);
  171. significand = (u32) val;
  172. significand = (significand << (32 - VFP_SINGLE_MANTISSA_BITS)) >> 2;
  173. if (s->exponent && s->exponent != 255)
  174. significand |= 0x40000000;
  175. s->significand = significand;
  176. }
  177. /*
  178. * Re-pack a single-precision float. This assumes that the float is
  179. * already normalised such that the MSB is bit 30, _not_ bit 31.
  180. */
  181. static inline s32 vfp_single_pack(struct vfp_single *s)
  182. {
  183. u32 val;
  184. val = (s->sign << 16) +
  185. (s->exponent << VFP_SINGLE_MANTISSA_BITS) +
  186. (s->significand >> VFP_SINGLE_LOW_BITS);
  187. return (s32)val;
  188. }
  189. #define VFP_NUMBER (1<<0)
  190. #define VFP_ZERO (1<<1)
  191. #define VFP_DENORMAL (1<<2)
  192. #define VFP_INFINITY (1<<3)
  193. #define VFP_NAN (1<<4)
  194. #define VFP_NAN_SIGNAL (1<<5)
  195. #define VFP_QNAN (VFP_NAN)
  196. #define VFP_SNAN (VFP_NAN|VFP_NAN_SIGNAL)
  197. static inline int vfp_single_type(struct vfp_single *s)
  198. {
  199. int type = VFP_NUMBER;
  200. if (s->exponent == 255) {
  201. if (s->significand == 0)
  202. type = VFP_INFINITY;
  203. else if (s->significand & VFP_SINGLE_SIGNIFICAND_QNAN)
  204. type = VFP_QNAN;
  205. else
  206. type = VFP_SNAN;
  207. } else if (s->exponent == 0) {
  208. if (s->significand == 0)
  209. type |= VFP_ZERO;
  210. else
  211. type |= VFP_DENORMAL;
  212. }
  213. return type;
  214. }
  215. #ifndef DEBUG
  216. #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
  217. u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions);
  218. #else
  219. u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func);
  220. #endif
  221. /*
  222. * Double-precision
  223. */
  224. struct vfp_double {
  225. s16 exponent;
  226. u16 sign;
  227. u64 significand;
  228. };
  229. /*
  230. * VFP_REG_ZERO is a special register number for vfp_get_double
  231. * which returns (double)0.0. This is useful for the compare with
  232. * zero instructions.
  233. */
  234. #ifdef CONFIG_VFPv3
  235. #define VFP_REG_ZERO 32
  236. #else
  237. #define VFP_REG_ZERO 16
  238. #endif
  239. extern u64 vfp_get_double(unsigned int reg);
  240. extern void vfp_put_double(u64 val, unsigned int reg);
  241. #define VFP_DOUBLE_MANTISSA_BITS (52)
  242. #define VFP_DOUBLE_EXPONENT_BITS (11)
  243. #define VFP_DOUBLE_LOW_BITS (64 - VFP_DOUBLE_MANTISSA_BITS - 2)
  244. #define VFP_DOUBLE_LOW_BITS_MASK ((1 << VFP_DOUBLE_LOW_BITS) - 1)
  245. /*
  246. * The bit in an unpacked double which indicates that it is a quiet NaN
  247. */
  248. #define VFP_DOUBLE_SIGNIFICAND_QNAN (1ULL << (VFP_DOUBLE_MANTISSA_BITS - 1 + VFP_DOUBLE_LOW_BITS))
  249. /*
  250. * Operations on packed single-precision numbers
  251. */
  252. #define vfp_double_packed_sign(v) ((v) & (1ULL << 63))
  253. #define vfp_double_packed_negate(v) ((v) ^ (1ULL << 63))
  254. #define vfp_double_packed_abs(v) ((v) & ~(1ULL << 63))
  255. #define vfp_double_packed_exponent(v) (((v) >> VFP_DOUBLE_MANTISSA_BITS) & ((1 << VFP_DOUBLE_EXPONENT_BITS) - 1))
  256. #define vfp_double_packed_mantissa(v) ((v) & ((1ULL << VFP_DOUBLE_MANTISSA_BITS) - 1))
  257. /*
  258. * Unpack a double-precision float. Note that this returns the magnitude
  259. * of the double-precision float mantissa with the 1. if necessary,
  260. * aligned to bit 62.
  261. */
  262. static inline void vfp_double_unpack(struct vfp_double *s, s64 val)
  263. {
  264. u64 significand;
  265. s->sign = vfp_double_packed_sign(val) >> 48;
  266. s->exponent = vfp_double_packed_exponent(val);
  267. significand = (u64) val;
  268. significand = (significand << (64 - VFP_DOUBLE_MANTISSA_BITS)) >> 2;
  269. if (s->exponent && s->exponent != 2047)
  270. significand |= (1ULL << 62);
  271. s->significand = significand;
  272. }
  273. /*
  274. * Re-pack a double-precision float. This assumes that the float is
  275. * already normalised such that the MSB is bit 30, _not_ bit 31.
  276. */
  277. static inline s64 vfp_double_pack(struct vfp_double *s)
  278. {
  279. u64 val;
  280. val = ((u64)s->sign << 48) +
  281. ((u64)s->exponent << VFP_DOUBLE_MANTISSA_BITS) +
  282. (s->significand >> VFP_DOUBLE_LOW_BITS);
  283. return (s64)val;
  284. }
  285. static inline int vfp_double_type(struct vfp_double *s)
  286. {
  287. int type = VFP_NUMBER;
  288. if (s->exponent == 2047) {
  289. if (s->significand == 0)
  290. type = VFP_INFINITY;
  291. else if (s->significand & VFP_DOUBLE_SIGNIFICAND_QNAN)
  292. type = VFP_QNAN;
  293. else
  294. type = VFP_SNAN;
  295. } else if (s->exponent == 0) {
  296. if (s->significand == 0)
  297. type |= VFP_ZERO;
  298. else
  299. type |= VFP_DENORMAL;
  300. }
  301. return type;
  302. }
  303. u32 vfp_double_normaliseround(int dd, struct vfp_double *vd, u32 fpscr, u32 exceptions, const char *func);
  304. u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand);
  305. /*
  306. * A special flag to tell the normalisation code not to normalise.
  307. */
  308. #define VFP_NAN_FLAG 0x100
  309. /*
  310. * A bit pattern used to indicate the initial (unset) value of the
  311. * exception mask, in case nothing handles an instruction. This
  312. * doesn't include the NAN flag, which get masked out before
  313. * we check for an error.
  314. */
  315. #define VFP_EXCEPTION_ERROR ((u32)-1 & ~VFP_NAN_FLAG)
  316. /*
  317. * A flag to tell vfp instruction type.
  318. * OP_SCALAR - this operation always operates in scalar mode
  319. * OP_SD - the instruction exceptionally writes to a single precision result.
  320. * OP_DD - the instruction exceptionally writes to a double precision result.
  321. * OP_SM - the instruction exceptionally reads from a single precision operand.
  322. */
  323. #define OP_SCALAR (1 << 0)
  324. #define OP_SD (1 << 1)
  325. #define OP_DD (1 << 1)
  326. #define OP_SM (1 << 2)
  327. struct op {
  328. u32 (* const fn)(int dd, int dn, int dm, u32 fpscr);
  329. u32 flags;
  330. };
  331. extern void vfp_save_state(void *location, u32 fpexc);