dis_decode.cc 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /////////////////////////////////////////////////////////////////////////
  2. // $Id: dis_decode.cc 11873 2013-10-10 21:00:26Z sshwarts $
  3. /////////////////////////////////////////////////////////////////////////
  4. //
  5. // Copyright (c) 2005-2012 Stanislav Shwartsman
  6. // Written by Stanislav Shwartsman [sshwarts at sourceforge net]
  7. //
  8. // This library is free software; you can redistribute it and/or
  9. // modify it under the terms of the GNU Lesser General Public
  10. // License as published by the Free Software Foundation; either
  11. // version 2 of the License, or (at your option) any later version.
  12. //
  13. // This library is distributed in the hope that it will be useful,
  14. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. // Lesser General Public License for more details.
  17. //
  18. // You should have received a copy of the GNU Lesser General Public
  19. // License along with this library; if not, write to the Free Software
  20. // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  21. #include <stdio.h>
  22. #include <stdarg.h>
  23. #include <string.h>
  24. #include "disasm.h"
  25. #include "dis_tables.h"
  26. #define OPCODE(entry) ((BxDisasmOpcodeInfo_t*) entry->OpcodeInfo)
  27. #define OPCODE_TABLE(entry) ((BxDisasmOpcodeTable_t*) entry->OpcodeInfo)
  28. static const unsigned char instruction_has_modrm[512] = {
  29. /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
  30. /* ------------------------------- */
  31. /* 00 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,
  32. /* 10 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,
  33. /* 20 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,
  34. /* 30 */ 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0,
  35. /* 40 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  36. /* 50 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  37. /* 60 */ 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0,
  38. /* 70 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  39. /* 80 */ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  40. /* 90 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  41. /* A0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  42. /* B0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  43. /* C0 */ 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,
  44. /* D0 */ 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1,
  45. /* E0 */ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  46. /* F0 */ 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1,
  47. /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
  48. /* ------------------------------- */
  49. 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1, /* 0F 00 */
  50. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F 10 */
  51. 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1, /* 0F 20 */
  52. 0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0, /* 0F 30 */
  53. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F 40 */
  54. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F 50 */
  55. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F 60 */
  56. 1,1,1,1,1,1,1,0,1,1,0,0,1,1,1,1, /* 0F 70 */
  57. 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0F 80 */
  58. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F 90 */
  59. 0,0,0,1,1,1,0,0,0,0,0,1,1,1,1,1, /* 0F A0 */
  60. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F B0 */
  61. 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0, /* 0F C0 */
  62. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F D0 */
  63. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0F E0 */
  64. 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0 /* 0F F0 */
  65. /* ------------------------------- */
  66. /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
  67. };
  68. unsigned disassembler::disasm(bx_bool is_32, bx_bool is_64, bx_address cs_base, bx_address ip, const Bit8u *instr, char *disbuf)
  69. {
  70. x86_insn insn = decode(is_32, is_64, cs_base, ip, instr, disbuf);
  71. return insn.ilen;
  72. }
  73. x86_insn disassembler::decode(bx_bool is_32, bx_bool is_64, bx_address cs_base, bx_address ip, const Bit8u *instr, char *disbuf)
  74. {
  75. if (is_64) is_32 = 1;
  76. x86_insn insn(is_32, is_64);
  77. const Bit8u *instruction_begin = instruction = instr;
  78. resolve_modrm = NULL;
  79. db_eip = ip;
  80. db_cs_base = cs_base; // cs linear base (cs_base for PM & cs<<4 for RM & VM)
  81. disbufptr = disbuf; // start sprintf()'ing into beginning of buffer
  82. #define SSE_PREFIX_NONE 0
  83. #define SSE_PREFIX_66 1
  84. #define SSE_PREFIX_F3 2
  85. #define SSE_PREFIX_F2 3 /* only one SSE prefix could be used */
  86. unsigned sse_prefix = SSE_PREFIX_NONE, sse_opcode = 0;
  87. unsigned rex_prefix = 0, prefixes = 0;
  88. for(;;)
  89. {
  90. insn.b1 = fetch_byte();
  91. prefixes++;
  92. switch(insn.b1) {
  93. case 0x40: // rex
  94. case 0x41:
  95. case 0x42:
  96. case 0x43:
  97. case 0x44:
  98. case 0x45:
  99. case 0x46:
  100. case 0x47:
  101. case 0x48:
  102. case 0x49:
  103. case 0x4A:
  104. case 0x4B:
  105. case 0x4C:
  106. case 0x4D:
  107. case 0x4E:
  108. case 0x4F:
  109. if (! is_64) break;
  110. rex_prefix = insn.b1;
  111. continue;
  112. case 0x26: // ES:
  113. case 0x2e: // CS:
  114. case 0x36: // SS:
  115. case 0x3e: // DS:
  116. if (! is_64) insn.seg_override = (insn.b1 >> 3) & 3;
  117. rex_prefix = 0;
  118. continue;
  119. case 0x64: // FS:
  120. case 0x65: // GS:
  121. insn.seg_override = insn.b1 & 0xf;
  122. rex_prefix = 0;
  123. continue;
  124. case 0x66: // operand size override
  125. if (!insn.os_64) insn.os_32 = !is_32;
  126. if (!sse_prefix) sse_prefix = SSE_PREFIX_66;
  127. rex_prefix = 0;
  128. continue;
  129. case 0x67: // address size override
  130. if (!is_64) insn.as_32 = !is_32;
  131. insn.as_64 = 0;
  132. rex_prefix = 0;
  133. continue;
  134. case 0xf0: // lock
  135. rex_prefix = 0;
  136. continue;
  137. case 0xf2: // repne
  138. case 0xf3: // rep
  139. sse_prefix = (insn.b1 & 0xf) ^ 1;
  140. rex_prefix = 0;
  141. continue;
  142. // no more prefixes
  143. default:
  144. break;
  145. }
  146. break;
  147. }
  148. if (insn.b1 == 0x0f)
  149. {
  150. insn.b1 = 0x100 | fetch_byte();
  151. }
  152. if (rex_prefix) {
  153. insn.extend8b = 1;
  154. if (rex_prefix & 0x8) {
  155. insn.os_64 = 1;
  156. insn.os_32 = 1;
  157. }
  158. if (rex_prefix & 0x4) insn.rex_r = 8;
  159. if (rex_prefix & 0x2) insn.rex_x = 8;
  160. if (rex_prefix & 0x1) insn.rex_b = 8;
  161. }
  162. const BxDisasmOpcodeTable_t *opcode_table, *entry;
  163. if (is_64) {
  164. if (insn.os_64)
  165. opcode_table = BxDisasmOpcodes64q;
  166. else if (insn.os_32)
  167. opcode_table = BxDisasmOpcodes64d;
  168. else
  169. opcode_table = BxDisasmOpcodes64w;
  170. } else {
  171. if (insn.os_32)
  172. opcode_table = BxDisasmOpcodes32;
  173. else
  174. opcode_table = BxDisasmOpcodes16;
  175. }
  176. entry = opcode_table + insn.b1;
  177. if ((insn.b1 & ~1) == 0xc4 && (is_64 || (peek_byte() & 0xc0) == 0xc0))
  178. {
  179. if (sse_prefix)
  180. dis_sprintf("(bad vex+rex prefix) ");
  181. if (rex_prefix)
  182. dis_sprintf("(bad vex+sse prefix) ");
  183. // decode 0xC4 or 0xC5 VEX prefix
  184. sse_prefix = decode_vex(&insn);
  185. if (insn.b1 < 256 || insn.b1 >= 1024)
  186. entry = &BxDisasmGroupSSE_ERR[0];
  187. else
  188. entry = BxDisasmOpcodesAVX + (insn.b1 - 256);
  189. }
  190. /*
  191. if (insn.b1== 0x62 && (is_64 || (peek_byte() & 0xc0) == 0xc0))
  192. {
  193. if (sse_prefix)
  194. dis_sprintf("(bad evex+rex prefix) ");
  195. if (rex_prefix)
  196. dis_sprintf("(bad evex+sse prefix) ");
  197. // decode 0x62 EVEX prefix
  198. sse_prefix = decode_evex(&insn);
  199. if (insn.b1 < 256 || insn.b1 >= 1024)
  200. entry = &BxDisasmGroupSSE_ERR[0];
  201. // else
  202. // entry = BxDisasmOpcodesEVEX + (insn.b1 - 256);
  203. }
  204. */
  205. else if (insn.b1 == 0x8f && (is_64 || (peek_byte() & 0xc0) == 0xc0) && (peek_byte() & 0x8) == 0x8)
  206. {
  207. if (sse_prefix)
  208. dis_sprintf("(bad xop+rex prefix) ");
  209. if (rex_prefix)
  210. dis_sprintf("(bad xop+sse prefix) ");
  211. // decode 0x8F XOP prefix
  212. sse_prefix = decode_xop(&insn);
  213. if (insn.b1 >= 768 || sse_prefix != 0)
  214. entry = &BxDisasmGroupSSE_ERR[0];
  215. else
  216. entry = BxDisasmOpcodesXOP + insn.b1;
  217. }
  218. if (insn.b1 >= 512 || instruction_has_modrm[insn.b1] || insn.is_xop > 0)
  219. {
  220. // take 3rd byte for 3-byte opcode
  221. if (entry->Attr == _GRP3BOP) {
  222. entry = &(OPCODE_TABLE(entry)[fetch_byte()]);
  223. }
  224. decode_modrm(&insn);
  225. }
  226. int attr = entry->Attr;
  227. while(attr)
  228. {
  229. switch(attr) {
  230. case _GROUPN:
  231. entry = &(OPCODE_TABLE(entry)[insn.nnn & 7]);
  232. break;
  233. case _GRPSSE66:
  234. /* SSE opcode group with only prefix 0x66 allowed */
  235. sse_opcode = 1;
  236. if (sse_prefix != SSE_PREFIX_66)
  237. entry = &(BxDisasmGroupSSE_ERR[sse_prefix]);
  238. attr = 0;
  239. continue;
  240. case _GRPSSEF2:
  241. /* SSE opcode group with only prefix 0xF2 allowed */
  242. sse_opcode = 1;
  243. if (sse_prefix != SSE_PREFIX_F2)
  244. entry = &(BxDisasmGroupSSE_ERR[sse_prefix]);
  245. attr = 0;
  246. continue;
  247. case _GRPSSEF3:
  248. /* SSE opcode group with only prefix 0xF3 allowed */
  249. sse_opcode = 1;
  250. if (sse_prefix != SSE_PREFIX_F3)
  251. entry = &(BxDisasmGroupSSE_ERR[sse_prefix]);
  252. attr = 0;
  253. continue;
  254. case _GRPSSENONE:
  255. /* SSE opcode group with no prefix only allowed */
  256. sse_opcode = 1;
  257. if (sse_prefix != SSE_PREFIX_NONE)
  258. entry = &(BxDisasmGroupSSE_ERR[sse_prefix]);
  259. attr = 0;
  260. continue;
  261. case _GRPSSE:
  262. sse_opcode = 1;
  263. /* For SSE opcodes, look into another 4 entries table
  264. with the opcode prefixes (NONE, 0x66, 0xF2, 0xF3) */
  265. entry = &(OPCODE_TABLE(entry)[sse_prefix]);
  266. break;
  267. case _GRPSSE2:
  268. sse_opcode = 1;
  269. /* For SSE opcodes, look into another 2 entries table
  270. with the opcode prefixes (NONE, 0x66)
  271. SSE prefixes 0xF2 and 0xF3 are not allowed */
  272. if (sse_prefix > SSE_PREFIX_66)
  273. entry = &(BxDisasmGroupSSE_ERR[sse_prefix]);
  274. else
  275. entry = &(OPCODE_TABLE(entry)[sse_prefix]);
  276. break;
  277. case _SPLIT11B:
  278. entry = &(OPCODE_TABLE(entry)[insn.mod != 3]); /* REG/MEM */
  279. break;
  280. case _GRPRM:
  281. entry = &(OPCODE_TABLE(entry)[insn.rm & 7]);
  282. break;
  283. case _GRPFP:
  284. if(insn.mod != 3)
  285. {
  286. entry = &(OPCODE_TABLE(entry)[insn.nnn & 7]);
  287. } else {
  288. int index = (insn.b1-0xD8)*64 + (insn.modrm & 0x3f);
  289. entry = &(BxDisasmOpcodeInfoFP[index]);
  290. }
  291. break;
  292. case _GRP3DNOW:
  293. entry = &(BxDisasm3DNowGroup[fetch_byte()]);
  294. break;
  295. case _GRP64B:
  296. entry = &(OPCODE_TABLE(entry)[insn.os_64 ? 2 : insn.os_32]);
  297. if (sse_prefix == SSE_PREFIX_66)
  298. sse_prefix = 0;
  299. break;
  300. case _GRPVEXW:
  301. entry = &(OPCODE_TABLE(entry)[insn.vex_w]);
  302. break;
  303. default:
  304. printf("Internal disassembler error - unknown attribute !\n");
  305. return x86_insn(is_32, is_64);
  306. }
  307. /* get additional attributes from group table */
  308. attr = entry->Attr;
  309. }
  310. #define BRANCH_NOT_TAKEN 0x2E
  311. #define BRANCH_TAKEN 0x3E
  312. unsigned branch_hint = 0;
  313. // print prefixes
  314. for(unsigned i=0;i<prefixes;i++)
  315. {
  316. Bit8u prefix_byte = *(instr+i);
  317. if (prefix_byte == 0xF0) dis_sprintf("lock ");
  318. if (! insn.is_xop && ! insn.is_vex) {
  319. if (insn.b1 == 0x90 && !insn.rex_b && prefix_byte == 0xF3)
  320. continue;
  321. if (prefix_byte == 0xF3 || prefix_byte == 0xF2) {
  322. if (! sse_opcode) {
  323. const BxDisasmOpcodeTable_t *prefix = &(opcode_table[prefix_byte]);
  324. dis_sprintf("%s ", OPCODE(prefix)->IntelOpcode);
  325. }
  326. }
  327. // branch hint for jcc instructions
  328. if ((insn.b1 >= 0x070 && insn.b1 <= 0x07F) ||
  329. (insn.b1 >= 0x180 && insn.b1 <= 0x18F))
  330. {
  331. if (prefix_byte == BRANCH_NOT_TAKEN || prefix_byte == BRANCH_TAKEN)
  332. branch_hint = prefix_byte;
  333. }
  334. }
  335. }
  336. const BxDisasmOpcodeInfo_t *opcode = OPCODE(entry);
  337. if (! insn.is_xop && ! insn.is_vex) {
  338. // patch jecx opcode
  339. if (insn.b1 == 0xE3 && insn.as_32 && !insn.as_64)
  340. opcode = &Ia_jecxz_Jb;
  341. // fix nop opcode
  342. if (insn.b1 == 0x90) {
  343. if (sse_prefix == SSE_PREFIX_F3)
  344. opcode = &Ia_pause;
  345. else if (!insn.rex_b)
  346. opcode = &Ia_nop;
  347. }
  348. }
  349. // print instruction disassembly
  350. if (intel_mode)
  351. print_disassembly_intel(&insn, opcode);
  352. else
  353. print_disassembly_att (&insn, opcode);
  354. if (branch_hint == BRANCH_NOT_TAKEN)
  355. {
  356. dis_sprintf(", not taken");
  357. }
  358. else if (branch_hint == BRANCH_TAKEN)
  359. {
  360. dis_sprintf(", taken");
  361. }
  362. if (insn.is_vex < 0)
  363. dis_sprintf(" (bad vex)");
  364. else if (insn.is_evex < 0)
  365. dis_sprintf(" (bad evex)");
  366. else if (insn.is_xop < 0)
  367. dis_sprintf(" (bad xop)");
  368. insn.ilen = (unsigned)(instruction - instruction_begin);
  369. return insn;
  370. }
  371. unsigned disassembler::decode_vex(x86_insn *insn)
  372. {
  373. insn->is_vex = 1;
  374. unsigned b2 = fetch_byte(), vex_opcode_extension = 1;
  375. insn->rex_r = (b2 & 0x80) ? 0 : 0x8;
  376. if (insn->b1 == 0xc4) {
  377. // decode 3-byte VEX prefix
  378. insn->rex_x = (b2 & 0x40) ? 0 : 0x8;
  379. if (insn->is_64)
  380. insn->rex_b = (b2 & 0x20) ? 0 : 0x8;
  381. vex_opcode_extension = b2 & 0x1f;
  382. if (! vex_opcode_extension || vex_opcode_extension > 3)
  383. insn->is_vex = -1;
  384. b2 = fetch_byte(); // fetch VEX3 byte
  385. if (b2 & 0x80) {
  386. insn->os_64 = 1;
  387. insn->os_32 = 1;
  388. insn->vex_w = 1;
  389. }
  390. }
  391. insn->vex_vvv = 15 - ((b2 >> 3) & 0xf);
  392. if (! insn->is_64) insn->vex_vvv &= 7;
  393. insn->vex_l = (b2 >> 2) & 0x1;
  394. insn->b1 = fetch_byte() + 256 * vex_opcode_extension;
  395. return b2 & 0x3;
  396. }
  397. unsigned disassembler::decode_evex(x86_insn *insn)
  398. {
  399. insn->is_evex = 1;
  400. Bit32u evex = fetch_dword();
  401. // check for reserved EVEX bits
  402. if ((evex & 0x0c) != 0 || (evex & 0x400) == 0) {
  403. insn->is_evex = -1;
  404. }
  405. unsigned evex_opcext = evex & 0x3;
  406. if (evex_opcext == 0) {
  407. insn->is_evex = -1;
  408. }
  409. if (insn->is_64) {
  410. insn->rex_r = ((evex >> 4) & 0x8) ^ 0x8;
  411. insn->rex_r |= (evex & 0x10) ^ 0x10;
  412. insn->rex_x = ((evex >> 3) & 0x8) ^ 0x8;
  413. insn->rex_b = ((evex >> 2) & 0x8) ^ 0x8;
  414. insn->rex_b |= (insn->rex_x << 1);
  415. }
  416. unsigned sse_prefix = (evex >> 8) & 0x3;
  417. insn->vex_vvv = 15 - ((evex >> 11) & 0xf);
  418. unsigned evex_v = ((evex >> 15) & 0x10) ^ 0x10;
  419. insn->vex_vvv |= evex_v;
  420. if (! insn->is_64) insn->vex_vvv &= 7;
  421. insn->vex_w = (evex >> 15) & 0x1;
  422. if (insn->vex_w) {
  423. insn->os_64 = 1;
  424. insn->os_32 = 1;
  425. }
  426. insn->evex_b = (evex >> 20) & 0x1;
  427. insn->evex_ll_rc = (evex >> 21) & 0x3;
  428. insn->evex_z = (evex >> 23) & 0x1;
  429. insn->b1 = (evex >> 24);
  430. insn->b1 += 256 * (evex_opcext-1);
  431. return sse_prefix;
  432. }
  433. unsigned disassembler::decode_xop(x86_insn *insn)
  434. {
  435. insn->is_xop = 1;
  436. unsigned b2 = fetch_byte();
  437. insn->rex_r = (b2 & 0x80) ? 0 : 0x8;
  438. insn->rex_x = (b2 & 0x40) ? 0 : 0x8;
  439. if (insn->is_64)
  440. insn->rex_b = (b2 & 0x20) ? 0 : 0x8;
  441. unsigned xop_opcode_extension = (b2 & 0x1f) - 8;
  442. if (xop_opcode_extension >= 3)
  443. insn->is_xop = -1;
  444. b2 = fetch_byte(); // fetch VEX3 byte
  445. if (b2 & 0x80) {
  446. insn->os_64 = 1;
  447. insn->os_32 = 1;
  448. insn->vex_w = 1;
  449. }
  450. insn->vex_vvv = 15 - ((b2 >> 3) & 0xf);
  451. if (! insn->is_64) insn->vex_vvv &= 7;
  452. insn->vex_l = (b2 >> 2) & 0x1;
  453. insn->b1 = fetch_byte() + 256 * xop_opcode_extension;
  454. return b2 & 0x3;
  455. }
  456. void disassembler::dis_sprintf(const char *fmt, ...)
  457. {
  458. va_list ap;
  459. va_start(ap, fmt);
  460. vsprintf(disbufptr, fmt, ap);
  461. va_end(ap);
  462. disbufptr += strlen(disbufptr);
  463. }
  464. void disassembler::dis_putc(char symbol)
  465. {
  466. *disbufptr++ = symbol;
  467. *disbufptr = 0;
  468. }