visemul.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894
  1. /* visemul.c: Emulation of VIS instructions.
  2. *
  3. * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
  4. */
  5. #include <linux/kernel.h>
  6. #include <linux/errno.h>
  7. #include <linux/thread_info.h>
  8. #include <linux/perf_event.h>
  9. #include <asm/ptrace.h>
  10. #include <asm/pstate.h>
  11. #include <asm/system.h>
  12. #include <asm/fpumacro.h>
  13. #include <asm/uaccess.h>
  14. /* OPF field of various VIS instructions. */
  15. /* 000111011 - four 16-bit packs */
  16. #define FPACK16_OPF 0x03b
  17. /* 000111010 - two 32-bit packs */
  18. #define FPACK32_OPF 0x03a
  19. /* 000111101 - four 16-bit packs */
  20. #define FPACKFIX_OPF 0x03d
  21. /* 001001101 - four 16-bit expands */
  22. #define FEXPAND_OPF 0x04d
  23. /* 001001011 - two 32-bit merges */
  24. #define FPMERGE_OPF 0x04b
  25. /* 000110001 - 8-by-16-bit partitoned product */
  26. #define FMUL8x16_OPF 0x031
  27. /* 000110011 - 8-by-16-bit upper alpha partitioned product */
  28. #define FMUL8x16AU_OPF 0x033
  29. /* 000110101 - 8-by-16-bit lower alpha partitioned product */
  30. #define FMUL8x16AL_OPF 0x035
  31. /* 000110110 - upper 8-by-16-bit partitioned product */
  32. #define FMUL8SUx16_OPF 0x036
  33. /* 000110111 - lower 8-by-16-bit partitioned product */
  34. #define FMUL8ULx16_OPF 0x037
  35. /* 000111000 - upper 8-by-16-bit partitioned product */
  36. #define FMULD8SUx16_OPF 0x038
  37. /* 000111001 - lower unsigned 8-by-16-bit partitioned product */
  38. #define FMULD8ULx16_OPF 0x039
  39. /* 000101000 - four 16-bit compare; set rd if src1 > src2 */
  40. #define FCMPGT16_OPF 0x028
  41. /* 000101100 - two 32-bit compare; set rd if src1 > src2 */
  42. #define FCMPGT32_OPF 0x02c
  43. /* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
  44. #define FCMPLE16_OPF 0x020
  45. /* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
  46. #define FCMPLE32_OPF 0x024
  47. /* 000100010 - four 16-bit compare; set rd if src1 != src2 */
  48. #define FCMPNE16_OPF 0x022
  49. /* 000100110 - two 32-bit compare; set rd if src1 != src2 */
  50. #define FCMPNE32_OPF 0x026
  51. /* 000101010 - four 16-bit compare; set rd if src1 == src2 */
  52. #define FCMPEQ16_OPF 0x02a
  53. /* 000101110 - two 32-bit compare; set rd if src1 == src2 */
  54. #define FCMPEQ32_OPF 0x02e
  55. /* 000000000 - Eight 8-bit edge boundary processing */
  56. #define EDGE8_OPF 0x000
  57. /* 000000001 - Eight 8-bit edge boundary processing, no CC */
  58. #define EDGE8N_OPF 0x001
  59. /* 000000010 - Eight 8-bit edge boundary processing, little-endian */
  60. #define EDGE8L_OPF 0x002
  61. /* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
  62. #define EDGE8LN_OPF 0x003
  63. /* 000000100 - Four 16-bit edge boundary processing */
  64. #define EDGE16_OPF 0x004
  65. /* 000000101 - Four 16-bit edge boundary processing, no CC */
  66. #define EDGE16N_OPF 0x005
  67. /* 000000110 - Four 16-bit edge boundary processing, little-endian */
  68. #define EDGE16L_OPF 0x006
  69. /* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
  70. #define EDGE16LN_OPF 0x007
  71. /* 000001000 - Two 32-bit edge boundary processing */
  72. #define EDGE32_OPF 0x008
  73. /* 000001001 - Two 32-bit edge boundary processing, no CC */
  74. #define EDGE32N_OPF 0x009
  75. /* 000001010 - Two 32-bit edge boundary processing, little-endian */
  76. #define EDGE32L_OPF 0x00a
  77. /* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
  78. #define EDGE32LN_OPF 0x00b
  79. /* 000111110 - distance between 8 8-bit components */
  80. #define PDIST_OPF 0x03e
  81. /* 000010000 - convert 8-bit 3-D address to blocked byte address */
  82. #define ARRAY8_OPF 0x010
  83. /* 000010010 - convert 16-bit 3-D address to blocked byte address */
  84. #define ARRAY16_OPF 0x012
  85. /* 000010100 - convert 32-bit 3-D address to blocked byte address */
  86. #define ARRAY32_OPF 0x014
  87. /* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
  88. #define BMASK_OPF 0x019
  89. /* 001001100 - Permute bytes as specified by GSR.MASK */
  90. #define BSHUFFLE_OPF 0x04c
  91. #define VIS_OPF_SHIFT 5
  92. #define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
  93. #define RS1(INSN) (((INSN) >> 14) & 0x1f)
  94. #define RS2(INSN) (((INSN) >> 0) & 0x1f)
  95. #define RD(INSN) (((INSN) >> 25) & 0x1f)
  96. static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
  97. unsigned int rd, int from_kernel)
  98. {
  99. if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
  100. if (from_kernel != 0)
  101. __asm__ __volatile__("flushw");
  102. else
  103. flushw_user();
  104. }
  105. }
  106. static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
  107. {
  108. unsigned long value;
  109. if (reg < 16)
  110. return (!reg ? 0 : regs->u_regs[reg]);
  111. if (regs->tstate & TSTATE_PRIV) {
  112. struct reg_window *win;
  113. win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
  114. value = win->locals[reg - 16];
  115. } else if (test_thread_flag(TIF_32BIT)) {
  116. struct reg_window32 __user *win32;
  117. win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
  118. get_user(value, &win32->locals[reg - 16]);
  119. } else {
  120. struct reg_window __user *win;
  121. win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
  122. get_user(value, &win->locals[reg - 16]);
  123. }
  124. return value;
  125. }
  126. static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
  127. struct pt_regs *regs)
  128. {
  129. BUG_ON(reg < 16);
  130. BUG_ON(regs->tstate & TSTATE_PRIV);
  131. if (test_thread_flag(TIF_32BIT)) {
  132. struct reg_window32 __user *win32;
  133. win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
  134. return (unsigned long __user *)&win32->locals[reg - 16];
  135. } else {
  136. struct reg_window __user *win;
  137. win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
  138. return &win->locals[reg - 16];
  139. }
  140. }
  141. static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
  142. struct pt_regs *regs)
  143. {
  144. BUG_ON(reg >= 16);
  145. BUG_ON(regs->tstate & TSTATE_PRIV);
  146. return &regs->u_regs[reg];
  147. }
  148. static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
  149. {
  150. if (rd < 16) {
  151. unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
  152. *rd_kern = val;
  153. } else {
  154. unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
  155. if (test_thread_flag(TIF_32BIT))
  156. __put_user((u32)val, (u32 __user *)rd_user);
  157. else
  158. __put_user(val, rd_user);
  159. }
  160. }
  161. static inline unsigned long fpd_regval(struct fpustate *f,
  162. unsigned int insn_regnum)
  163. {
  164. insn_regnum = (((insn_regnum & 1) << 5) |
  165. (insn_regnum & 0x1e));
  166. return *(unsigned long *) &f->regs[insn_regnum];
  167. }
  168. static inline unsigned long *fpd_regaddr(struct fpustate *f,
  169. unsigned int insn_regnum)
  170. {
  171. insn_regnum = (((insn_regnum & 1) << 5) |
  172. (insn_regnum & 0x1e));
  173. return (unsigned long *) &f->regs[insn_regnum];
  174. }
  175. static inline unsigned int fps_regval(struct fpustate *f,
  176. unsigned int insn_regnum)
  177. {
  178. return f->regs[insn_regnum];
  179. }
  180. static inline unsigned int *fps_regaddr(struct fpustate *f,
  181. unsigned int insn_regnum)
  182. {
  183. return &f->regs[insn_regnum];
  184. }
  185. struct edge_tab {
  186. u16 left, right;
  187. };
  188. static struct edge_tab edge8_tab[8] = {
  189. { 0xff, 0x80 },
  190. { 0x7f, 0xc0 },
  191. { 0x3f, 0xe0 },
  192. { 0x1f, 0xf0 },
  193. { 0x0f, 0xf8 },
  194. { 0x07, 0xfc },
  195. { 0x03, 0xfe },
  196. { 0x01, 0xff },
  197. };
  198. static struct edge_tab edge8_tab_l[8] = {
  199. { 0xff, 0x01 },
  200. { 0xfe, 0x03 },
  201. { 0xfc, 0x07 },
  202. { 0xf8, 0x0f },
  203. { 0xf0, 0x1f },
  204. { 0xe0, 0x3f },
  205. { 0xc0, 0x7f },
  206. { 0x80, 0xff },
  207. };
  208. static struct edge_tab edge16_tab[4] = {
  209. { 0xf, 0x8 },
  210. { 0x7, 0xc },
  211. { 0x3, 0xe },
  212. { 0x1, 0xf },
  213. };
  214. static struct edge_tab edge16_tab_l[4] = {
  215. { 0xf, 0x1 },
  216. { 0xe, 0x3 },
  217. { 0xc, 0x7 },
  218. { 0x8, 0xf },
  219. };
  220. static struct edge_tab edge32_tab[2] = {
  221. { 0x3, 0x2 },
  222. { 0x1, 0x3 },
  223. };
  224. static struct edge_tab edge32_tab_l[2] = {
  225. { 0x3, 0x1 },
  226. { 0x2, 0x3 },
  227. };
  228. static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
  229. {
  230. unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
  231. u16 left, right;
  232. maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
  233. orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
  234. orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
  235. if (test_thread_flag(TIF_32BIT)) {
  236. rs1 = rs1 & 0xffffffff;
  237. rs2 = rs2 & 0xffffffff;
  238. }
  239. switch (opf) {
  240. default:
  241. case EDGE8_OPF:
  242. case EDGE8N_OPF:
  243. left = edge8_tab[rs1 & 0x7].left;
  244. right = edge8_tab[rs2 & 0x7].right;
  245. break;
  246. case EDGE8L_OPF:
  247. case EDGE8LN_OPF:
  248. left = edge8_tab_l[rs1 & 0x7].left;
  249. right = edge8_tab_l[rs2 & 0x7].right;
  250. break;
  251. case EDGE16_OPF:
  252. case EDGE16N_OPF:
  253. left = edge16_tab[(rs1 >> 1) & 0x3].left;
  254. right = edge16_tab[(rs2 >> 1) & 0x3].right;
  255. break;
  256. case EDGE16L_OPF:
  257. case EDGE16LN_OPF:
  258. left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
  259. right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
  260. break;
  261. case EDGE32_OPF:
  262. case EDGE32N_OPF:
  263. left = edge32_tab[(rs1 >> 2) & 0x1].left;
  264. right = edge32_tab[(rs2 >> 2) & 0x1].right;
  265. break;
  266. case EDGE32L_OPF:
  267. case EDGE32LN_OPF:
  268. left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
  269. right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
  270. break;
  271. }
  272. if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
  273. rd_val = right & left;
  274. else
  275. rd_val = left;
  276. store_reg(regs, rd_val, RD(insn));
  277. switch (opf) {
  278. case EDGE8_OPF:
  279. case EDGE8L_OPF:
  280. case EDGE16_OPF:
  281. case EDGE16L_OPF:
  282. case EDGE32_OPF:
  283. case EDGE32L_OPF: {
  284. unsigned long ccr, tstate;
  285. __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
  286. "rd %%ccr, %0"
  287. : "=r" (ccr)
  288. : "r" (orig_rs1), "r" (orig_rs2)
  289. : "cc");
  290. tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
  291. regs->tstate = tstate | (ccr << 32UL);
  292. }
  293. }
  294. }
  295. static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
  296. {
  297. unsigned long rs1, rs2, rd_val;
  298. unsigned int bits, bits_mask;
  299. maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
  300. rs1 = fetch_reg(RS1(insn), regs);
  301. rs2 = fetch_reg(RS2(insn), regs);
  302. bits = (rs2 > 5 ? 5 : rs2);
  303. bits_mask = (1UL << bits) - 1UL;
  304. rd_val = ((((rs1 >> 11) & 0x3) << 0) |
  305. (((rs1 >> 33) & 0x3) << 2) |
  306. (((rs1 >> 55) & 0x1) << 4) |
  307. (((rs1 >> 13) & 0xf) << 5) |
  308. (((rs1 >> 35) & 0xf) << 9) |
  309. (((rs1 >> 56) & 0xf) << 13) |
  310. (((rs1 >> 17) & bits_mask) << 17) |
  311. (((rs1 >> 39) & bits_mask) << (17 + bits)) |
  312. (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
  313. switch (opf) {
  314. case ARRAY16_OPF:
  315. rd_val <<= 1;
  316. break;
  317. case ARRAY32_OPF:
  318. rd_val <<= 2;
  319. }
  320. store_reg(regs, rd_val, RD(insn));
  321. }
  322. static void bmask(struct pt_regs *regs, unsigned int insn)
  323. {
  324. unsigned long rs1, rs2, rd_val, gsr;
  325. maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
  326. rs1 = fetch_reg(RS1(insn), regs);
  327. rs2 = fetch_reg(RS2(insn), regs);
  328. rd_val = rs1 + rs2;
  329. store_reg(regs, rd_val, RD(insn));
  330. gsr = current_thread_info()->gsr[0] & 0xffffffff;
  331. gsr |= rd_val << 32UL;
  332. current_thread_info()->gsr[0] = gsr;
  333. }
  334. static void bshuffle(struct pt_regs *regs, unsigned int insn)
  335. {
  336. struct fpustate *f = FPUSTATE;
  337. unsigned long rs1, rs2, rd_val;
  338. unsigned long bmask, i;
  339. bmask = current_thread_info()->gsr[0] >> 32UL;
  340. rs1 = fpd_regval(f, RS1(insn));
  341. rs2 = fpd_regval(f, RS2(insn));
  342. rd_val = 0UL;
  343. for (i = 0; i < 8; i++) {
  344. unsigned long which = (bmask >> (i * 4)) & 0xf;
  345. unsigned long byte;
  346. if (which < 8)
  347. byte = (rs1 >> (which * 8)) & 0xff;
  348. else
  349. byte = (rs2 >> ((which-8)*8)) & 0xff;
  350. rd_val |= (byte << (i * 8));
  351. }
  352. *fpd_regaddr(f, RD(insn)) = rd_val;
  353. }
  354. static void pdist(struct pt_regs *regs, unsigned int insn)
  355. {
  356. struct fpustate *f = FPUSTATE;
  357. unsigned long rs1, rs2, *rd, rd_val;
  358. unsigned long i;
  359. rs1 = fpd_regval(f, RS1(insn));
  360. rs2 = fpd_regval(f, RS2(insn));
  361. rd = fpd_regaddr(f, RD(insn));
  362. rd_val = *rd;
  363. for (i = 0; i < 8; i++) {
  364. s16 s1, s2;
  365. s1 = (rs1 >> (56 - (i * 8))) & 0xff;
  366. s2 = (rs2 >> (56 - (i * 8))) & 0xff;
  367. /* Absolute value of difference. */
  368. s1 -= s2;
  369. if (s1 < 0)
  370. s1 = ~s1 + 1;
  371. rd_val += s1;
  372. }
  373. *rd = rd_val;
  374. }
  375. static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
  376. {
  377. struct fpustate *f = FPUSTATE;
  378. unsigned long rs1, rs2, gsr, scale, rd_val;
  379. gsr = current_thread_info()->gsr[0];
  380. scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
  381. switch (opf) {
  382. case FPACK16_OPF: {
  383. unsigned long byte;
  384. rs2 = fpd_regval(f, RS2(insn));
  385. rd_val = 0;
  386. for (byte = 0; byte < 4; byte++) {
  387. unsigned int val;
  388. s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
  389. int scaled = src << scale;
  390. int from_fixed = scaled >> 7;
  391. val = ((from_fixed < 0) ?
  392. 0 :
  393. (from_fixed > 255) ?
  394. 255 : from_fixed);
  395. rd_val |= (val << (8 * byte));
  396. }
  397. *fps_regaddr(f, RD(insn)) = rd_val;
  398. break;
  399. }
  400. case FPACK32_OPF: {
  401. unsigned long word;
  402. rs1 = fpd_regval(f, RS1(insn));
  403. rs2 = fpd_regval(f, RS2(insn));
  404. rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
  405. for (word = 0; word < 2; word++) {
  406. unsigned long val;
  407. s32 src = (rs2 >> (word * 32UL));
  408. s64 scaled = src << scale;
  409. s64 from_fixed = scaled >> 23;
  410. val = ((from_fixed < 0) ?
  411. 0 :
  412. (from_fixed > 255) ?
  413. 255 : from_fixed);
  414. rd_val |= (val << (32 * word));
  415. }
  416. *fpd_regaddr(f, RD(insn)) = rd_val;
  417. break;
  418. }
  419. case FPACKFIX_OPF: {
  420. unsigned long word;
  421. rs2 = fpd_regval(f, RS2(insn));
  422. rd_val = 0;
  423. for (word = 0; word < 2; word++) {
  424. long val;
  425. s32 src = (rs2 >> (word * 32UL));
  426. s64 scaled = src << scale;
  427. s64 from_fixed = scaled >> 16;
  428. val = ((from_fixed < -32768) ?
  429. -32768 :
  430. (from_fixed > 32767) ?
  431. 32767 : from_fixed);
  432. rd_val |= ((val & 0xffff) << (word * 16));
  433. }
  434. *fps_regaddr(f, RD(insn)) = rd_val;
  435. break;
  436. }
  437. case FEXPAND_OPF: {
  438. unsigned long byte;
  439. rs2 = fps_regval(f, RS2(insn));
  440. rd_val = 0;
  441. for (byte = 0; byte < 4; byte++) {
  442. unsigned long val;
  443. u8 src = (rs2 >> (byte * 8)) & 0xff;
  444. val = src << 4;
  445. rd_val |= (val << (byte * 16));
  446. }
  447. *fpd_regaddr(f, RD(insn)) = rd_val;
  448. break;
  449. }
  450. case FPMERGE_OPF: {
  451. rs1 = fps_regval(f, RS1(insn));
  452. rs2 = fps_regval(f, RS2(insn));
  453. rd_val = (((rs2 & 0x000000ff) << 0) |
  454. ((rs1 & 0x000000ff) << 8) |
  455. ((rs2 & 0x0000ff00) << 8) |
  456. ((rs1 & 0x0000ff00) << 16) |
  457. ((rs2 & 0x00ff0000) << 16) |
  458. ((rs1 & 0x00ff0000) << 24) |
  459. ((rs2 & 0xff000000) << 24) |
  460. ((rs1 & 0xff000000) << 32));
  461. *fpd_regaddr(f, RD(insn)) = rd_val;
  462. break;
  463. }
  464. }
  465. }
  466. static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
  467. {
  468. struct fpustate *f = FPUSTATE;
  469. unsigned long rs1, rs2, rd_val;
  470. switch (opf) {
  471. case FMUL8x16_OPF: {
  472. unsigned long byte;
  473. rs1 = fps_regval(f, RS1(insn));
  474. rs2 = fpd_regval(f, RS2(insn));
  475. rd_val = 0;
  476. for (byte = 0; byte < 4; byte++) {
  477. u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
  478. s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
  479. u32 prod = src1 * src2;
  480. u16 scaled = ((prod & 0x00ffff00) >> 8);
  481. /* Round up. */
  482. if (prod & 0x80)
  483. scaled++;
  484. rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
  485. }
  486. *fpd_regaddr(f, RD(insn)) = rd_val;
  487. break;
  488. }
  489. case FMUL8x16AU_OPF:
  490. case FMUL8x16AL_OPF: {
  491. unsigned long byte;
  492. s16 src2;
  493. rs1 = fps_regval(f, RS1(insn));
  494. rs2 = fps_regval(f, RS2(insn));
  495. rd_val = 0;
  496. src2 = rs2 >> (opf == FMUL8x16AU_OPF ? 16 : 0);
  497. for (byte = 0; byte < 4; byte++) {
  498. u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
  499. u32 prod = src1 * src2;
  500. u16 scaled = ((prod & 0x00ffff00) >> 8);
  501. /* Round up. */
  502. if (prod & 0x80)
  503. scaled++;
  504. rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
  505. }
  506. *fpd_regaddr(f, RD(insn)) = rd_val;
  507. break;
  508. }
  509. case FMUL8SUx16_OPF:
  510. case FMUL8ULx16_OPF: {
  511. unsigned long byte, ushift;
  512. rs1 = fpd_regval(f, RS1(insn));
  513. rs2 = fpd_regval(f, RS2(insn));
  514. rd_val = 0;
  515. ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
  516. for (byte = 0; byte < 4; byte++) {
  517. u16 src1;
  518. s16 src2;
  519. u32 prod;
  520. u16 scaled;
  521. src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
  522. src2 = ((rs2 >> (16 * byte)) & 0xffff);
  523. prod = src1 * src2;
  524. scaled = ((prod & 0x00ffff00) >> 8);
  525. /* Round up. */
  526. if (prod & 0x80)
  527. scaled++;
  528. rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
  529. }
  530. *fpd_regaddr(f, RD(insn)) = rd_val;
  531. break;
  532. }
  533. case FMULD8SUx16_OPF:
  534. case FMULD8ULx16_OPF: {
  535. unsigned long byte, ushift;
  536. rs1 = fps_regval(f, RS1(insn));
  537. rs2 = fps_regval(f, RS2(insn));
  538. rd_val = 0;
  539. ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
  540. for (byte = 0; byte < 2; byte++) {
  541. u16 src1;
  542. s16 src2;
  543. u32 prod;
  544. u16 scaled;
  545. src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
  546. src2 = ((rs2 >> (16 * byte)) & 0xffff);
  547. prod = src1 * src2;
  548. scaled = ((prod & 0x00ffff00) >> 8);
  549. /* Round up. */
  550. if (prod & 0x80)
  551. scaled++;
  552. rd_val |= ((scaled & 0xffffUL) <<
  553. ((byte * 32UL) + 7UL));
  554. }
  555. *fpd_regaddr(f, RD(insn)) = rd_val;
  556. break;
  557. }
  558. }
  559. }
  560. static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
  561. {
  562. struct fpustate *f = FPUSTATE;
  563. unsigned long rs1, rs2, rd_val, i;
  564. rs1 = fpd_regval(f, RS1(insn));
  565. rs2 = fpd_regval(f, RS2(insn));
  566. rd_val = 0;
  567. switch (opf) {
  568. case FCMPGT16_OPF:
  569. for (i = 0; i < 4; i++) {
  570. s16 a = (rs1 >> (i * 16)) & 0xffff;
  571. s16 b = (rs2 >> (i * 16)) & 0xffff;
  572. if (a > b)
  573. rd_val |= 8 >> i;
  574. }
  575. break;
  576. case FCMPGT32_OPF:
  577. for (i = 0; i < 2; i++) {
  578. s32 a = (rs1 >> (i * 32)) & 0xffffffff;
  579. s32 b = (rs2 >> (i * 32)) & 0xffffffff;
  580. if (a > b)
  581. rd_val |= 2 >> i;
  582. }
  583. break;
  584. case FCMPLE16_OPF:
  585. for (i = 0; i < 4; i++) {
  586. s16 a = (rs1 >> (i * 16)) & 0xffff;
  587. s16 b = (rs2 >> (i * 16)) & 0xffff;
  588. if (a <= b)
  589. rd_val |= 8 >> i;
  590. }
  591. break;
  592. case FCMPLE32_OPF:
  593. for (i = 0; i < 2; i++) {
  594. s32 a = (rs1 >> (i * 32)) & 0xffffffff;
  595. s32 b = (rs2 >> (i * 32)) & 0xffffffff;
  596. if (a <= b)
  597. rd_val |= 2 >> i;
  598. }
  599. break;
  600. case FCMPNE16_OPF:
  601. for (i = 0; i < 4; i++) {
  602. s16 a = (rs1 >> (i * 16)) & 0xffff;
  603. s16 b = (rs2 >> (i * 16)) & 0xffff;
  604. if (a != b)
  605. rd_val |= 8 >> i;
  606. }
  607. break;
  608. case FCMPNE32_OPF:
  609. for (i = 0; i < 2; i++) {
  610. s32 a = (rs1 >> (i * 32)) & 0xffffffff;
  611. s32 b = (rs2 >> (i * 32)) & 0xffffffff;
  612. if (a != b)
  613. rd_val |= 2 >> i;
  614. }
  615. break;
  616. case FCMPEQ16_OPF:
  617. for (i = 0; i < 4; i++) {
  618. s16 a = (rs1 >> (i * 16)) & 0xffff;
  619. s16 b = (rs2 >> (i * 16)) & 0xffff;
  620. if (a == b)
  621. rd_val |= 8 >> i;
  622. }
  623. break;
  624. case FCMPEQ32_OPF:
  625. for (i = 0; i < 2; i++) {
  626. s32 a = (rs1 >> (i * 32)) & 0xffffffff;
  627. s32 b = (rs2 >> (i * 32)) & 0xffffffff;
  628. if (a == b)
  629. rd_val |= 2 >> i;
  630. }
  631. break;
  632. }
  633. maybe_flush_windows(0, 0, RD(insn), 0);
  634. store_reg(regs, rd_val, RD(insn));
  635. }
  636. /* Emulate the VIS instructions which are not implemented in
  637. * hardware on Niagara.
  638. */
  639. int vis_emul(struct pt_regs *regs, unsigned int insn)
  640. {
  641. unsigned long pc = regs->tpc;
  642. unsigned int opf;
  643. BUG_ON(regs->tstate & TSTATE_PRIV);
  644. perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, 0);
  645. if (test_thread_flag(TIF_32BIT))
  646. pc = (u32)pc;
  647. if (get_user(insn, (u32 __user *) pc))
  648. return -EFAULT;
  649. save_and_clear_fpu();
  650. opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
  651. switch (opf) {
  652. default:
  653. return -EINVAL;
  654. /* Pixel Formatting Instructions. */
  655. case FPACK16_OPF:
  656. case FPACK32_OPF:
  657. case FPACKFIX_OPF:
  658. case FEXPAND_OPF:
  659. case FPMERGE_OPF:
  660. pformat(regs, insn, opf);
  661. break;
  662. /* Partitioned Multiply Instructions */
  663. case FMUL8x16_OPF:
  664. case FMUL8x16AU_OPF:
  665. case FMUL8x16AL_OPF:
  666. case FMUL8SUx16_OPF:
  667. case FMUL8ULx16_OPF:
  668. case FMULD8SUx16_OPF:
  669. case FMULD8ULx16_OPF:
  670. pmul(regs, insn, opf);
  671. break;
  672. /* Pixel Compare Instructions */
  673. case FCMPGT16_OPF:
  674. case FCMPGT32_OPF:
  675. case FCMPLE16_OPF:
  676. case FCMPLE32_OPF:
  677. case FCMPNE16_OPF:
  678. case FCMPNE32_OPF:
  679. case FCMPEQ16_OPF:
  680. case FCMPEQ32_OPF:
  681. pcmp(regs, insn, opf);
  682. break;
  683. /* Edge Handling Instructions */
  684. case EDGE8_OPF:
  685. case EDGE8N_OPF:
  686. case EDGE8L_OPF:
  687. case EDGE8LN_OPF:
  688. case EDGE16_OPF:
  689. case EDGE16N_OPF:
  690. case EDGE16L_OPF:
  691. case EDGE16LN_OPF:
  692. case EDGE32_OPF:
  693. case EDGE32N_OPF:
  694. case EDGE32L_OPF:
  695. case EDGE32LN_OPF:
  696. edge(regs, insn, opf);
  697. break;
  698. /* Pixel Component Distance */
  699. case PDIST_OPF:
  700. pdist(regs, insn);
  701. break;
  702. /* Three-Dimensional Array Addressing Instructions */
  703. case ARRAY8_OPF:
  704. case ARRAY16_OPF:
  705. case ARRAY32_OPF:
  706. array(regs, insn, opf);
  707. break;
  708. /* Byte Mask and Shuffle Instructions */
  709. case BMASK_OPF:
  710. bmask(regs, insn);
  711. break;
  712. case BSHUFFLE_OPF:
  713. bshuffle(regs, insn);
  714. break;
  715. }
  716. regs->tpc = regs->tnpc;
  717. regs->tnpc += 4;
  718. return 0;
  719. }