Arm64Emitter.h 47 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480
  1. // Copyright 2015 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #pragma once
  4. #include <array>
  5. #include <bit>
  6. #include <cstring>
  7. #include <functional>
  8. #include <optional>
  9. #include <type_traits>
  10. #include <utility>
  11. #include "Common/ArmCommon.h"
  12. #include "Common/Assert.h"
  13. #include "Common/BitSet.h"
  14. #include "Common/BitUtils.h"
  15. #include "Common/CodeBlock.h"
  16. #include "Common/Common.h"
  17. #include "Common/CommonTypes.h"
  18. #include "Common/MathUtil.h"
  19. #include "Common/SmallVector.h"
  20. namespace Arm64Gen
  21. {
  22. // X30 serves a dual purpose as a link register
  23. // Encoded as <u3:type><u5:reg>
  24. // Types:
  25. // 000 - 32bit GPR
  26. // 001 - 64bit GPR
  27. // 010 - VFP single precision
  28. // 100 - VFP double precision
  29. // 110 - VFP quad precision
  30. enum class ARM64Reg
  31. {
  32. // 32bit registers
  33. W0 = 0,
  34. W1,
  35. W2,
  36. W3,
  37. W4,
  38. W5,
  39. W6,
  40. W7,
  41. W8,
  42. W9,
  43. W10,
  44. W11,
  45. W12,
  46. W13,
  47. W14,
  48. W15,
  49. W16,
  50. W17,
  51. W18,
  52. W19,
  53. W20,
  54. W21,
  55. W22,
  56. W23,
  57. W24,
  58. W25,
  59. W26,
  60. W27,
  61. W28,
  62. W29,
  63. W30,
  64. WSP, // 32bit stack pointer
  65. // 64bit registers
  66. X0 = 0x20,
  67. X1,
  68. X2,
  69. X3,
  70. X4,
  71. X5,
  72. X6,
  73. X7,
  74. X8,
  75. X9,
  76. X10,
  77. X11,
  78. X12,
  79. X13,
  80. X14,
  81. X15,
  82. X16,
  83. X17,
  84. X18,
  85. X19,
  86. X20,
  87. X21,
  88. X22,
  89. X23,
  90. X24,
  91. X25,
  92. X26,
  93. X27,
  94. X28,
  95. X29,
  96. X30,
  97. SP, // 64bit stack pointer
  98. // VFP single precision registers
  99. S0 = 0x40,
  100. S1,
  101. S2,
  102. S3,
  103. S4,
  104. S5,
  105. S6,
  106. S7,
  107. S8,
  108. S9,
  109. S10,
  110. S11,
  111. S12,
  112. S13,
  113. S14,
  114. S15,
  115. S16,
  116. S17,
  117. S18,
  118. S19,
  119. S20,
  120. S21,
  121. S22,
  122. S23,
  123. S24,
  124. S25,
  125. S26,
  126. S27,
  127. S28,
  128. S29,
  129. S30,
  130. S31,
  131. // VFP Double Precision registers
  132. D0 = 0x80,
  133. D1,
  134. D2,
  135. D3,
  136. D4,
  137. D5,
  138. D6,
  139. D7,
  140. D8,
  141. D9,
  142. D10,
  143. D11,
  144. D12,
  145. D13,
  146. D14,
  147. D15,
  148. D16,
  149. D17,
  150. D18,
  151. D19,
  152. D20,
  153. D21,
  154. D22,
  155. D23,
  156. D24,
  157. D25,
  158. D26,
  159. D27,
  160. D28,
  161. D29,
  162. D30,
  163. D31,
  164. // ASIMD Quad-Word registers
  165. Q0 = 0xC0,
  166. Q1,
  167. Q2,
  168. Q3,
  169. Q4,
  170. Q5,
  171. Q6,
  172. Q7,
  173. Q8,
  174. Q9,
  175. Q10,
  176. Q11,
  177. Q12,
  178. Q13,
  179. Q14,
  180. Q15,
  181. Q16,
  182. Q17,
  183. Q18,
  184. Q19,
  185. Q20,
  186. Q21,
  187. Q22,
  188. Q23,
  189. Q24,
  190. Q25,
  191. Q26,
  192. Q27,
  193. Q28,
  194. Q29,
  195. Q30,
  196. Q31,
  197. // For PRFM(prefetch memory) encoding
  198. // This is encoded in the Rt register
  199. // Data preload
  200. PLDL1KEEP = 0,
  201. PLDL1STRM,
  202. PLDL2KEEP,
  203. PLDL2STRM,
  204. PLDL3KEEP,
  205. PLDL3STRM,
  206. // Instruction preload
  207. PLIL1KEEP = 8,
  208. PLIL1STRM,
  209. PLIL2KEEP,
  210. PLIL2STRM,
  211. PLIL3KEEP,
  212. PLIL3STRM,
  213. // Prepare for store
  214. PLTL1KEEP = 16,
  215. PLTL1STRM,
  216. PLTL2KEEP,
  217. PLTL2STRM,
  218. PLTL3KEEP,
  219. PLTL3STRM,
  220. WZR = WSP,
  221. ZR = SP,
  222. INVALID_REG = -1,
  223. };
  224. constexpr int operator&(const ARM64Reg& reg, const int mask)
  225. {
  226. return static_cast<int>(reg) & mask;
  227. }
  228. constexpr int operator|(const ARM64Reg& reg, const int mask)
  229. {
  230. return static_cast<int>(reg) | mask;
  231. }
  232. constexpr ARM64Reg operator+(const ARM64Reg& reg, const int addend)
  233. {
  234. return static_cast<ARM64Reg>(static_cast<int>(reg) + addend);
  235. }
  236. constexpr bool Is64Bit(ARM64Reg reg)
  237. {
  238. return (reg & 0x20) != 0;
  239. }
  240. constexpr bool IsSingle(ARM64Reg reg)
  241. {
  242. return (reg & 0xC0) == 0x40;
  243. }
  244. constexpr bool IsDouble(ARM64Reg reg)
  245. {
  246. return (reg & 0xC0) == 0x80;
  247. }
  248. constexpr bool IsScalar(ARM64Reg reg)
  249. {
  250. return IsSingle(reg) || IsDouble(reg);
  251. }
  252. constexpr bool IsQuad(ARM64Reg reg)
  253. {
  254. return (reg & 0xC0) == 0xC0;
  255. }
  256. constexpr bool IsVector(ARM64Reg reg)
  257. {
  258. return (reg & 0xC0) != 0;
  259. }
  260. constexpr bool IsGPR(ARM64Reg reg)
  261. {
  262. return static_cast<int>(reg) < 0x40;
  263. }
  264. constexpr int DecodeReg(ARM64Reg reg)
  265. {
  266. return reg & 0x1F;
  267. }
  268. constexpr ARM64Reg EncodeRegTo32(ARM64Reg reg)
  269. {
  270. return static_cast<ARM64Reg>(DecodeReg(reg));
  271. }
  272. constexpr ARM64Reg EncodeRegTo64(ARM64Reg reg)
  273. {
  274. return static_cast<ARM64Reg>(reg | 0x20);
  275. }
  276. constexpr ARM64Reg EncodeRegToSingle(ARM64Reg reg)
  277. {
  278. return static_cast<ARM64Reg>(ARM64Reg::S0 | DecodeReg(reg));
  279. }
  280. constexpr ARM64Reg EncodeRegToDouble(ARM64Reg reg)
  281. {
  282. return static_cast<ARM64Reg>((reg & ~0xC0) | 0x80);
  283. }
  284. constexpr ARM64Reg EncodeRegToQuad(ARM64Reg reg)
  285. {
  286. return static_cast<ARM64Reg>(reg | 0xC0);
  287. }
  288. enum class ShiftType
  289. {
  290. // Logical Shift Left
  291. LSL = 0,
  292. // Logical Shift Right
  293. LSR = 1,
  294. // Arithmetic Shift Right
  295. ASR = 2,
  296. // Rotate Right
  297. ROR = 3,
  298. };
  299. enum class ExtendSpecifier
  300. {
  301. UXTB = 0x0,
  302. UXTH = 0x1,
  303. UXTW = 0x2, /* Also LSL on 32bit width */
  304. UXTX = 0x3, /* Also LSL on 64bit width */
  305. SXTB = 0x4,
  306. SXTH = 0x5,
  307. SXTW = 0x6,
  308. SXTX = 0x7,
  309. };
  310. enum class IndexType
  311. {
  312. Unsigned,
  313. Post,
  314. Pre,
  315. Signed, // used in LDP/STP
  316. };
  317. enum class ShiftAmount
  318. {
  319. Shift0,
  320. Shift16,
  321. Shift32,
  322. Shift48,
  323. };
  324. enum class RoundingMode
  325. {
  326. A, // round to nearest, ties to away
  327. M, // round towards -inf
  328. N, // round to nearest, ties to even
  329. P, // round towards +inf
  330. Z, // round towards zero
  331. };
  332. enum class GPRSize
  333. {
  334. B32,
  335. B64,
  336. };
  337. struct FixupBranch
  338. {
  339. enum class Type : u32
  340. {
  341. CBZ,
  342. CBNZ,
  343. BConditional,
  344. TBZ,
  345. TBNZ,
  346. B,
  347. BL,
  348. };
  349. u8* ptr;
  350. Type type;
  351. // Used with B.cond
  352. CCFlags cond;
  353. // Used with TBZ/TBNZ
  354. u8 bit;
  355. // Used with Test/Compare and Branch
  356. ARM64Reg reg;
  357. };
  358. enum class PStateField
  359. {
  360. SPSel = 0,
  361. DAIFSet,
  362. DAIFClr,
  363. NZCV, // The only system registers accessible from EL0 (user space)
  364. PMCR_EL0,
  365. PMCCNTR_EL0,
  366. FPCR = 0x340,
  367. FPSR = 0x341,
  368. };
  369. enum class SystemHint
  370. {
  371. NOP,
  372. YIELD,
  373. WFE,
  374. WFI,
  375. SEV,
  376. SEVL,
  377. };
  378. enum class BarrierType
  379. {
  380. OSHLD = 1,
  381. OSHST = 2,
  382. OSH = 3,
  383. NSHLD = 5,
  384. NSHST = 6,
  385. NSH = 7,
  386. ISHLD = 9,
  387. ISHST = 10,
  388. ISH = 11,
  389. LD = 13,
  390. ST = 14,
  391. SY = 15,
  392. };
  393. class ArithOption
  394. {
  395. private:
  396. enum class WidthSpecifier
  397. {
  398. Default,
  399. Width32Bit,
  400. Width64Bit,
  401. };
  402. enum class TypeSpecifier
  403. {
  404. ExtendedReg,
  405. Immediate,
  406. ShiftedReg,
  407. };
  408. ARM64Reg m_destReg;
  409. WidthSpecifier m_width;
  410. ExtendSpecifier m_extend;
  411. TypeSpecifier m_type;
  412. ShiftType m_shifttype;
  413. u32 m_shift;
  414. public:
  415. ArithOption(ARM64Reg Rd, bool index = false)
  416. {
  417. // Indexed registers are a certain feature of AARch64
  418. // On Loadstore instructions that use a register offset
  419. // We can have the register as an index
  420. // If we are indexing then the offset register will
  421. // be shifted to the left so we are indexing at intervals
  422. // of the size of what we are loading
  423. // 8-bit: Index does nothing
  424. // 16-bit: Index LSL 1
  425. // 32-bit: Index LSL 2
  426. // 64-bit: Index LSL 3
  427. if (index)
  428. m_shift = 4;
  429. else
  430. m_shift = 0;
  431. m_destReg = Rd;
  432. m_type = TypeSpecifier::ExtendedReg;
  433. if (Is64Bit(Rd))
  434. {
  435. m_width = WidthSpecifier::Width64Bit;
  436. m_extend = ExtendSpecifier::UXTX;
  437. }
  438. else
  439. {
  440. m_width = WidthSpecifier::Width32Bit;
  441. m_extend = ExtendSpecifier::UXTW;
  442. }
  443. m_shifttype = ShiftType::LSL;
  444. }
  445. ArithOption(ARM64Reg Rd, ExtendSpecifier extend_type, u32 shift = 0)
  446. {
  447. m_destReg = Rd;
  448. m_width = Is64Bit(Rd) ? WidthSpecifier::Width64Bit : WidthSpecifier::Width32Bit;
  449. m_extend = extend_type;
  450. m_type = TypeSpecifier::ExtendedReg;
  451. m_shifttype = ShiftType::LSL;
  452. m_shift = shift;
  453. }
  454. ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
  455. {
  456. m_destReg = Rd;
  457. m_shift = shift;
  458. m_shifttype = shift_type;
  459. m_type = TypeSpecifier::ShiftedReg;
  460. if (Is64Bit(Rd))
  461. {
  462. m_width = WidthSpecifier::Width64Bit;
  463. if (shift == 64)
  464. m_shift = 0;
  465. m_extend = ExtendSpecifier::UXTX;
  466. }
  467. else
  468. {
  469. m_width = WidthSpecifier::Width32Bit;
  470. if (shift == 32)
  471. m_shift = 0;
  472. m_extend = ExtendSpecifier::UXTW;
  473. }
  474. }
  475. ARM64Reg GetReg() const { return m_destReg; }
  476. u32 GetData() const
  477. {
  478. switch (m_type)
  479. {
  480. case TypeSpecifier::ExtendedReg:
  481. return (static_cast<u32>(m_extend) << 13) | (m_shift << 10);
  482. case TypeSpecifier::ShiftedReg:
  483. return (static_cast<u32>(m_shifttype) << 22) | (m_shift << 10);
  484. default:
  485. DEBUG_ASSERT_MSG(DYNA_REC, false, "Invalid type in GetData");
  486. break;
  487. }
  488. return 0;
  489. }
  490. bool IsExtended() const { return m_type == TypeSpecifier::ExtendedReg; }
  491. };
  492. struct LogicalImm
  493. {
  494. constexpr LogicalImm() {}
  495. constexpr LogicalImm(u8 r_, u8 s_, bool n_) : r(r_), s(s_), n(n_), valid(true) {}
  496. constexpr LogicalImm(u64 value, GPRSize size)
  497. {
  498. // Logical immediates are encoded using parameters n, imm_s and imm_r using
  499. // the following table:
  500. //
  501. // N imms immr size S R
  502. // 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
  503. // 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
  504. // 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
  505. // 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
  506. // 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
  507. // 0 11110s xxxxxr 2 UInt(s) UInt(r)
  508. // (s bits must not be all set)
  509. //
  510. // A pattern is constructed of size bits, where the least significant S+1 bits
  511. // are set. The pattern is rotated right by R, and repeated across a 32 or
  512. // 64-bit value, depending on destination register width.
  513. if (size == GPRSize::B32)
  514. {
  515. // To handle 32-bit logical immediates, the very easiest thing is to repeat
  516. // the input value twice to make a 64-bit word. The correct encoding of that
  517. // as a logical immediate will also be the correct encoding of the 32-bit
  518. // value.
  519. value = (value << 32) | (value & 0xFFFFFFFF);
  520. }
  521. if (value == 0 || (~value) == 0)
  522. {
  523. valid = false;
  524. return;
  525. }
  526. // Normalize value, rotating it such that the LSB is 1:
  527. // If LSB is already one, we mask away the trailing sequence of ones and
  528. // pick the next sequence of ones. This ensures we get a complete element
  529. // that has not been cut-in-half due to rotation across the word boundary.
  530. const int rotation = std::countr_zero(value & (value + 1));
  531. const u64 normalized = std::rotr(value, rotation);
  532. const int element_size = std::countr_zero(normalized & (normalized + 1));
  533. const int ones = std::countr_one(normalized);
  534. // Check the value is repeating; also ensures element size is a power of two.
  535. if (std::rotr(value, element_size) != value)
  536. {
  537. valid = false;
  538. return;
  539. }
  540. // Now we're done. We just have to encode the S output in such a way that
  541. // it gives both the number of set bits and the length of the repeated
  542. // segment.
  543. r = static_cast<u8>((element_size - rotation) & (element_size - 1));
  544. s = static_cast<u8>((((~element_size + 1) << 1) | (ones - 1)) & 0x3f);
  545. n = Common::ExtractBit<6>(element_size);
  546. valid = true;
  547. }
  548. constexpr operator bool() const { return valid; }
  549. u8 r = 0;
  550. u8 s = 0;
  551. bool n = false;
  552. bool valid = false;
  553. };
  554. class ARM64XEmitter
  555. {
  556. friend class ARM64FloatEmitter;
  557. private:
  558. struct RegisterMove
  559. {
  560. ARM64Reg dst;
  561. ARM64Reg src;
  562. };
  563. // Pointer to memory where code will be emitted to.
  564. u8* m_code = nullptr;
  565. // Pointer past the end of the memory region we're allowed to emit to.
  566. // Writes that would reach this memory are refused and will set the m_write_failed flag instead.
  567. u8* m_code_end = nullptr;
  568. u8* m_lastCacheFlushEnd = nullptr;
  569. // Set to true when a write request happens that would write past m_code_end.
  570. // Must be cleared with SetCodePtr() afterwards.
  571. bool m_write_failed = false;
  572. void AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative, bool flags);
  573. void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr);
  574. void EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr);
  575. void EncodeUnconditionalBranchInst(u32 op, const void* ptr);
  576. void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn);
  577. void EncodeExceptionInst(u32 instenc, u32 imm);
  578. void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt);
  579. void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
  580. ArithOption Option);
  581. void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  582. void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
  583. void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
  584. void EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
  585. void EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn);
  586. void EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  587. void EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  588. void EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  589. void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm);
  590. void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt);
  591. void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
  592. void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  593. void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size);
  594. void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);
  595. void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
  596. void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  597. void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
  598. void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
  599. void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
  600. s32 imm);
  601. void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
  602. void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  603. [[nodiscard]] FixupBranch WriteFixupBranch();
  604. // This function solves the "parallel moves" problem common in compilers.
  605. // The arguments are mutated!
  606. void ParallelMoves(RegisterMove* begin, RegisterMove* end, std::array<u8, 32>* source_gpr_usages);
  607. template <typename T>
  608. void MOVI2RImpl(ARM64Reg Rd, T imm);
  609. protected:
  610. void Write32(u32 value);
  611. public:
  612. ARM64XEmitter() = default;
  613. ARM64XEmitter(u8* code, u8* code_end)
  614. : m_code(code), m_code_end(code_end), m_lastCacheFlushEnd(code)
  615. {
  616. }
  617. virtual ~ARM64XEmitter() {}
  618. void SetCodePtr(u8* ptr, u8* end, bool write_failed = false);
  619. void SetCodePtrUnsafe(u8* ptr, u8* end, bool write_failed = false);
  620. const u8* GetCodePtr() const { return m_code; }
  621. u8* GetWritableCodePtr() { return m_code; }
  622. const u8* GetCodeEnd() const { return m_code_end; }
  623. u8* GetWritableCodeEnd() { return m_code_end; }
  624. void ReserveCodeSpace(u32 bytes);
  625. u8* AlignCode16();
  626. u8* AlignCodePage();
  627. void FlushIcache();
  628. void FlushIcacheSection(u8* start, u8* end);
  629. // Should be checked after a block of code has been generated to see if the code has been
  630. // successfully written to memory. Do not call the generated code when this returns true!
  631. bool HasWriteFailed() const { return m_write_failed; }
  632. // FixupBranch branching
  633. void SetJumpTarget(FixupBranch const& branch);
  634. [[nodiscard]] FixupBranch CBZ(ARM64Reg Rt);
  635. [[nodiscard]] FixupBranch CBNZ(ARM64Reg Rt);
  636. [[nodiscard]] FixupBranch B(CCFlags cond);
  637. [[nodiscard]] FixupBranch TBZ(ARM64Reg Rt, u8 bit);
  638. [[nodiscard]] FixupBranch TBNZ(ARM64Reg Rt, u8 bit);
  639. [[nodiscard]] FixupBranch B();
  640. [[nodiscard]] FixupBranch BL();
  641. // Compare and Branch
  642. void CBZ(ARM64Reg Rt, const void* ptr);
  643. void CBNZ(ARM64Reg Rt, const void* ptr);
  644. // Conditional Branch
  645. void B(CCFlags cond, const void* ptr);
  646. // Test and Branch
  647. void TBZ(ARM64Reg Rt, u8 bits, const void* ptr);
  648. void TBNZ(ARM64Reg Rt, u8 bits, const void* ptr);
  649. // Unconditional Branch
  650. void B(const void* ptr);
  651. void BL(const void* ptr);
  652. // Unconditional Branch (register)
  653. void BR(ARM64Reg Rn);
  654. void BLR(ARM64Reg Rn);
  655. void RET(ARM64Reg Rn = ARM64Reg::X30);
  656. void ERET();
  657. void DRPS();
  658. // Exception generation
  659. void SVC(u32 imm);
  660. void HVC(u32 imm);
  661. void SMC(u32 imm);
  662. void BRK(u32 imm);
  663. void HLT(u32 imm);
  664. void DCPS1(u32 imm);
  665. void DCPS2(u32 imm);
  666. void DCPS3(u32 imm);
  667. // System
  668. void _MSR(PStateField field, u8 imm);
  669. void _MSR(PStateField field, ARM64Reg Rt);
  670. void MRS(ARM64Reg Rt, PStateField field);
  671. void CNTVCT(ARM64Reg Rt);
  672. void HINT(SystemHint op);
  673. void NOP() { HINT(SystemHint::NOP); }
  674. void SEV() { HINT(SystemHint::SEV); }
  675. void SEVL() { HINT(SystemHint::SEVL); }
  676. void WFE() { HINT(SystemHint::WFE); }
  677. void WFI() { HINT(SystemHint::WFI); }
  678. void YIELD() { HINT(SystemHint::YIELD); }
  679. void CLREX();
  680. void DSB(BarrierType type);
  681. void DMB(BarrierType type);
  682. void ISB(BarrierType type);
  683. // Add/Subtract (Extended/Shifted register)
  684. void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  685. void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
  686. void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  687. void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
  688. void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  689. void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
  690. void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  691. void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
  692. void CMN(ARM64Reg Rn, ARM64Reg Rm);
  693. void CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
  694. void CMP(ARM64Reg Rn, ARM64Reg Rm);
  695. void CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option);
  696. // Add/Subtract (with carry)
  697. void ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  698. void ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  699. void SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  700. void SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  701. // Conditional Compare (immediate)
  702. void CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
  703. void CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
  704. // Conditional Compare (register)
  705. void CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
  706. void CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
  707. // Conditional Select
  708. void CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
  709. void CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
  710. void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
  711. void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
  712. // Aliases
  713. void CSET(ARM64Reg Rd, CCFlags cond)
  714. {
  715. ARM64Reg zr = Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR;
  716. CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
  717. }
  718. void CSETM(ARM64Reg Rd, CCFlags cond)
  719. {
  720. ARM64Reg zr = Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR;
  721. CSINV(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
  722. }
  723. void NEG(ARM64Reg Rd, ARM64Reg Rs) { SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
  724. void NEG(ARM64Reg Rd, ARM64Reg Rs, ArithOption Option)
  725. {
  726. SUB(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs, Option);
  727. }
  728. void NEGS(ARM64Reg Rd, ARM64Reg Rs) { SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs); }
  729. void NEGS(ARM64Reg Rd, ARM64Reg Rs, ArithOption Option)
  730. {
  731. SUBS(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rs, Option);
  732. }
  733. // Data-Processing 1 source
  734. void RBIT(ARM64Reg Rd, ARM64Reg Rn);
  735. void REV16(ARM64Reg Rd, ARM64Reg Rn);
  736. void REV32(ARM64Reg Rd, ARM64Reg Rn);
  737. void REV64(ARM64Reg Rd, ARM64Reg Rn);
  738. void CLZ(ARM64Reg Rd, ARM64Reg Rn);
  739. void CLS(ARM64Reg Rd, ARM64Reg Rn);
  740. // Data-Processing 2 source
  741. void UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  742. void SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  743. void LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  744. void LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  745. void ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  746. void RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  747. void CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  748. void CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  749. void CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  750. void CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  751. void CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  752. void CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  753. void CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  754. void CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  755. // Data-Processing 3 source
  756. void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  757. void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  758. void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  759. void SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  760. void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  761. void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  762. void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  763. void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  764. void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  765. void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  766. void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  767. void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  768. // Logical (shifted register)
  769. void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  770. void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  771. void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  772. void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  773. void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  774. void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  775. void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  776. void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift);
  777. void TST(ARM64Reg Rn, ARM64Reg Rm) { ANDS(Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, Rm); }
  778. void TST(ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  779. {
  780. ANDS(Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, Rm, Shift);
  781. }
  782. // Wrap the above for saner syntax
  783. void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  784. {
  785. AND(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  786. }
  787. void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  788. {
  789. BIC(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  790. }
  791. void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  792. {
  793. ORR(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  794. }
  795. void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  796. {
  797. ORN(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  798. }
  799. void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  800. {
  801. EOR(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  802. }
  803. void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  804. {
  805. EON(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  806. }
  807. void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  808. {
  809. ANDS(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  810. }
  811. void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  812. {
  813. BICS(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  814. }
  815. // Convenience wrappers around ORR. These match the official convenience syntax.
  816. void MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift);
  817. void MOV(ARM64Reg Rd, ARM64Reg Rm);
  818. void MVN(ARM64Reg Rd, ARM64Reg Rm);
  819. // Convenience wrappers around UBFM/EXTR.
  820. void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
  821. void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
  822. void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
  823. void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
  824. // Logical (immediate)
  825. void AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
  826. void ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
  827. void EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
  828. void ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm);
  829. void TST(ARM64Reg Rn, LogicalImm imm);
  830. // Add/subtract (immediate)
  831. void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
  832. void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
  833. void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
  834. void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
  835. void CMP(ARM64Reg Rn, u32 imm, bool shift = false);
  836. void CMN(ARM64Reg Rn, u32 imm, bool shift = false);
  837. // Data Processing (Immediate)
  838. void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = ShiftAmount::Shift0);
  839. void MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos = ShiftAmount::Shift0);
  840. void MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos = ShiftAmount::Shift0);
  841. // Bitfield move
  842. void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
  843. void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
  844. void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
  845. void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
  846. void BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
  847. void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
  848. // Extract register (ROR with two inputs, if same then faster on A67)
  849. void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
  850. // Aliases
  851. void SXTB(ARM64Reg Rd, ARM64Reg Rn);
  852. void SXTH(ARM64Reg Rd, ARM64Reg Rn);
  853. void SXTW(ARM64Reg Rd, ARM64Reg Rn);
  854. void UXTB(ARM64Reg Rd, ARM64Reg Rn);
  855. void UXTH(ARM64Reg Rd, ARM64Reg Rn);
  856. void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width) { UBFM(Rd, Rn, lsb, lsb + width - 1); }
  857. // Load Register (Literal)
  858. void LDR(ARM64Reg Rt, u32 imm);
  859. void LDRSW(ARM64Reg Rt, u32 imm);
  860. void PRFM(ARM64Reg Rt, u32 imm);
  861. // Load/Store Exclusive
  862. void STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
  863. void STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
  864. void LDXRB(ARM64Reg Rt, ARM64Reg Rn);
  865. void LDAXRB(ARM64Reg Rt, ARM64Reg Rn);
  866. void STLRB(ARM64Reg Rt, ARM64Reg Rn);
  867. void LDARB(ARM64Reg Rt, ARM64Reg Rn);
  868. void STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
  869. void STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
  870. void LDXRH(ARM64Reg Rt, ARM64Reg Rn);
  871. void LDAXRH(ARM64Reg Rt, ARM64Reg Rn);
  872. void STLRH(ARM64Reg Rt, ARM64Reg Rn);
  873. void LDARH(ARM64Reg Rt, ARM64Reg Rn);
  874. void STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
  875. void STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
  876. void STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
  877. void STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
  878. void LDXR(ARM64Reg Rt, ARM64Reg Rn);
  879. void LDAXR(ARM64Reg Rt, ARM64Reg Rn);
  880. void LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
  881. void LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
  882. void STLR(ARM64Reg Rt, ARM64Reg Rn);
  883. void LDAR(ARM64Reg Rt, ARM64Reg Rn);
  884. // Load/Store no-allocate pair (offset)
  885. void STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
  886. void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
  887. // Load/Store register (immediate indexed)
  888. void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  889. void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  890. void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  891. void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  892. void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  893. void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  894. void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  895. void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  896. void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  897. // Load/Store register (register offset)
  898. void STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  899. void LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  900. void LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  901. void STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  902. void LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  903. void LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  904. void STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  905. void LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  906. void LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  907. void PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  908. // Load/Store register (unscaled offset)
  909. void STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  910. void LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  911. void LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  912. void STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  913. void LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  914. void LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  915. void STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  916. void LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  917. void LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  918. // Load/Store pair
  919. void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
  920. void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
  921. void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
  922. // Address of label/page PC-relative
  923. void ADR(ARM64Reg Rd, s32 imm);
  924. void ADRP(ARM64Reg Rd, s64 imm);
  925. // Wrapper around ADR/ADRP/MOVZ/MOVN/MOVK
  926. void MOVI2R(ARM64Reg Rd, u64 imm);
  927. bool MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2);
  928. template <class P>
  929. void MOVP2R(ARM64Reg Rd, P* ptr)
  930. {
  931. ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
  932. MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr));
  933. }
  934. template <class P>
  935. // Given an address, stores the page address into a register and returns the page-relative offset
  936. s32 MOVPage2R(ARM64Reg Rd, P* ptr)
  937. {
  938. ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "Can't store pointers in 32-bit registers");
  939. MOVI2R(Rd, reinterpret_cast<uintptr_t>(ptr) & ~0xFFFULL);
  940. return static_cast<s32>(reinterpret_cast<uintptr_t>(ptr) & 0xFFFULL);
  941. }
  942. // Wrappers around bitwise operations with an immediate. If you're sure an imm can be encoded
  943. // without a scratch register, preferably construct a LogicalImm directly instead,
  944. // since that is constexpr and thus can be done at compile time for constant values.
  945. void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
  946. void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
  947. void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  948. {
  949. ANDSI2R(Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm, scratch);
  950. }
  951. void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
  952. void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch);
  953. // Wrappers around arithmetic operations with an immediate.
  954. void ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
  955. ARM64Reg scratch);
  956. void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
  957. void ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
  958. void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
  959. void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
  960. void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = ARM64Reg::INVALID_REG);
  961. bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
  962. bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
  963. bool TryCMPI2R(ARM64Reg Rn, u64 imm);
  964. bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
  965. bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
  966. bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
  967. // ABI related
  968. static constexpr BitSet32 CALLER_SAVED_GPRS = BitSet32(0x4007FFFF);
  969. static constexpr BitSet32 CALLER_SAVED_FPRS = BitSet32(0xFFFF00FF);
  970. void ABI_PushRegisters(BitSet32 registers);
  971. void ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask = BitSet32(0));
  972. // Plain function call
  973. void QuickCallFunction(ARM64Reg scratchreg, const void* func);
  974. template <typename T>
  975. void QuickCallFunction(ARM64Reg scratchreg, T func)
  976. {
  977. QuickCallFunction(scratchreg, (const void*)func);
  978. }
  979. template <typename FuncRet, typename... FuncArgs, typename... Args>
  980. void ABI_CallFunction(FuncRet (*func)(FuncArgs...), Args... args)
  981. {
  982. static_assert(sizeof...(FuncArgs) == sizeof...(Args), "Wrong number of arguments");
  983. static_assert(sizeof...(FuncArgs) <= 8, "Passing arguments on the stack is not supported");
  984. if constexpr (!std::is_void_v<FuncRet>)
  985. static_assert(sizeof(FuncRet) <= 16, "Large return types are not supported");
  986. std::array<u8, 32> source_gpr_uses{};
  987. auto check_argument = [&](auto& arg) {
  988. using Arg = std::decay_t<decltype(arg)>;
  989. if constexpr (std::is_same_v<Arg, ARM64Reg>)
  990. {
  991. ASSERT(IsGPR(arg));
  992. source_gpr_uses[DecodeReg(arg)]++;
  993. }
  994. else
  995. {
  996. // To be more correct, we should be checking FuncArgs here rather than Args, but that's a
  997. // lot more effort to implement. Let's just do these best-effort checks for now.
  998. static_assert(!std::is_floating_point_v<Arg>, "Floating-point arguments are not supported");
  999. static_assert(sizeof(Arg) <= 8, "Arguments bigger than a register are not supported");
  1000. }
  1001. };
  1002. (check_argument(args), ...);
  1003. {
  1004. Common::SmallVector<RegisterMove, sizeof...(Args)> pending_moves;
  1005. size_t i = 0;
  1006. auto handle_register_argument = [&](auto& arg) {
  1007. using Arg = std::decay_t<decltype(arg)>;
  1008. if constexpr (std::is_same_v<Arg, ARM64Reg>)
  1009. {
  1010. const ARM64Reg dst_reg =
  1011. (Is64Bit(arg) ? EncodeRegTo64 : EncodeRegTo32)(static_cast<ARM64Reg>(i));
  1012. if (dst_reg == arg)
  1013. {
  1014. // The value is already in the right register.
  1015. source_gpr_uses[DecodeReg(arg)]--;
  1016. }
  1017. else if (source_gpr_uses[i] == 0)
  1018. {
  1019. // The destination register isn't used as the source of another move.
  1020. // We can go ahead and do the move right away.
  1021. MOV(dst_reg, arg);
  1022. source_gpr_uses[DecodeReg(arg)]--;
  1023. }
  1024. else
  1025. {
  1026. // The destination register is used as the source of a move we haven't gotten to yet.
  1027. // Let's record that we need to deal with this move later.
  1028. pending_moves.emplace_back(dst_reg, arg);
  1029. }
  1030. }
  1031. ++i;
  1032. };
  1033. (handle_register_argument(args), ...);
  1034. if (!pending_moves.empty())
  1035. {
  1036. ParallelMoves(pending_moves.data(), pending_moves.data() + pending_moves.size(),
  1037. &source_gpr_uses);
  1038. }
  1039. }
  1040. {
  1041. size_t i = 0;
  1042. auto handle_immediate_argument = [&](auto& arg) {
  1043. using Arg = std::decay_t<decltype(arg)>;
  1044. if constexpr (!std::is_same_v<Arg, ARM64Reg>)
  1045. {
  1046. const ARM64Reg dst_reg =
  1047. (sizeof(arg) == 8 ? EncodeRegTo64 : EncodeRegTo32)(static_cast<ARM64Reg>(i));
  1048. if constexpr (std::is_pointer_v<Arg>)
  1049. MOVP2R(dst_reg, arg);
  1050. else
  1051. MOVI2R(dst_reg, arg);
  1052. }
  1053. ++i;
  1054. };
  1055. (handle_immediate_argument(args), ...);
  1056. }
  1057. QuickCallFunction(ARM64Reg::X8, func);
  1058. }
  1059. // Utility to generate a call to a std::function object.
  1060. //
  1061. // Unfortunately, calling operator() directly is undefined behavior in C++
  1062. // (this method might be a thunk in the case of multi-inheritance) so we
  1063. // have to go through a trampoline function.
  1064. template <typename T, typename... Args>
  1065. static T CallLambdaTrampoline(const std::function<T(Args...)>* f, Args... args)
  1066. {
  1067. return (*f)(args...);
  1068. }
  1069. template <typename FuncRet, typename... FuncArgs, typename... Args>
  1070. void ABI_CallLambdaFunction(const std::function<FuncRet(FuncArgs...)>* f, Args... args)
  1071. {
  1072. auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<FuncRet, FuncArgs...>;
  1073. ABI_CallFunction(trampoline, f, args...);
  1074. }
  1075. };
  1076. class ARM64FloatEmitter
  1077. {
  1078. public:
  1079. ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) {}
  1080. void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  1081. void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  1082. // Loadstore unscaled
  1083. void LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  1084. void STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  1085. // Loadstore single structure
  1086. void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
  1087. void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
  1088. void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
  1089. void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
  1090. void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
  1091. void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
  1092. void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
  1093. void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
  1094. // Loadstore multiple structure
  1095. void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
  1096. void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = ARM64Reg::SP);
  1097. void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
  1098. void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = ARM64Reg::SP);
  1099. // Loadstore paired
  1100. void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
  1101. void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
  1102. // Loadstore register offset
  1103. void STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  1104. void LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  1105. // Scalar - 1 Source
  1106. void FABS(ARM64Reg Rd, ARM64Reg Rn);
  1107. void FNEG(ARM64Reg Rd, ARM64Reg Rn);
  1108. void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
  1109. void FRINTI(ARM64Reg Rd, ARM64Reg Rn);
  1110. void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
  1111. void FRECPE(ARM64Reg Rd, ARM64Reg Rn);
  1112. void FRSQRTE(ARM64Reg Rd, ARM64Reg Rn);
  1113. // Scalar - pairwise
  1114. void FADDP(ARM64Reg Rd, ARM64Reg Rn);
  1115. void FMAXP(ARM64Reg Rd, ARM64Reg Rn);
  1116. void FMINP(ARM64Reg Rd, ARM64Reg Rn);
  1117. void FMAXNMP(ARM64Reg Rd, ARM64Reg Rn);
  1118. void FMINNMP(ARM64Reg Rd, ARM64Reg Rn);
  1119. // Scalar - 2 Source
  1120. void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1121. void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1122. void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1123. void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1124. void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1125. void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1126. void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1127. void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1128. void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1129. void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1130. // Scalar - 3 Source. Note - the accumulator is last on ARM!
  1131. void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  1132. void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  1133. void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  1134. void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
  1135. // Scalar floating point immediate
  1136. void FMOV(ARM64Reg Rd, uint8_t imm8);
  1137. // Vector
  1138. void ADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1139. void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1140. void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1141. void BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1142. void BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1143. void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1144. void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
  1145. void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1146. void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1147. void FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1148. void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1149. void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1150. void FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1151. void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1152. void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1153. void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1154. void FCVTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1155. void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1156. void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1157. void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1158. void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1159. void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1160. void FRECPE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1161. void FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1162. void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1163. void NOT(ARM64Reg Rd, ARM64Reg Rn);
  1164. void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1165. void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1166. void MOV(ARM64Reg Rd, ARM64Reg Rn) { ORR(Rd, Rn, Rn); }
  1167. void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1168. void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1169. void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1170. void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1171. void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1172. void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
  1173. void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
  1174. void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1175. void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1176. void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1177. void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1178. void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1179. void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
  1180. // Move
  1181. void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1182. void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
  1183. void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2);
  1184. void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
  1185. void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
  1186. // One source
  1187. void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
  1188. // Scalar convert float to int, in a lot of variants.
  1189. // Note that the scalar version of this operation has two encodings, one that goes to an integer
  1190. // register
  1191. // and one that outputs to a scalar fp register.
  1192. void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
  1193. void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
  1194. // Scalar convert int to float. No rounding mode specifier necessary.
  1195. void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
  1196. void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
  1197. // Scalar fixed point to float. scale is the number of fractional bits.
  1198. void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
  1199. void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
  1200. // Float comparison
  1201. void FCMP(ARM64Reg Rn, ARM64Reg Rm);
  1202. void FCMP(ARM64Reg Rn);
  1203. void FCMPE(ARM64Reg Rn, ARM64Reg Rm);
  1204. void FCMPE(ARM64Reg Rn);
  1205. void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1206. void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1207. void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1208. void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1209. void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1210. void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1211. void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1212. void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
  1213. void FACGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1214. void FACGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1215. // Conditional select
  1216. void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
  1217. // Permute
  1218. void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1219. void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1220. void ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1221. void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1222. void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1223. void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1224. // Extract
  1225. void EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 index);
  1226. // Scalar shift by immediate
  1227. void SHL(ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1228. void URSHR(ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1229. // Vector shift by immediate
  1230. void SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1231. void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1232. void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1233. void URSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1234. void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1235. void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1236. void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1237. void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
  1238. void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
  1239. void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
  1240. void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
  1241. void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
  1242. // vector x indexed element
  1243. void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
  1244. void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
  1245. // Modified Immediate
  1246. void MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift = 0);
  1247. void ORR(u8 size, ARM64Reg Rd, u8 imm, u8 shift = 0);
  1248. void BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift = 0);
  1249. void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = ARM64Reg::INVALID_REG,
  1250. bool negate = false);
  1251. void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = ARM64Reg::INVALID_REG);
  1252. // ABI related
  1253. void ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp = ARM64Reg::INVALID_REG);
  1254. void ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp = ARM64Reg::INVALID_REG);
  1255. private:
  1256. ARM64XEmitter* m_emit;
  1257. inline void Write32(u32 value) { m_emit->Write32(value); }
  1258. // Emitting functions
  1259. void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  1260. void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
  1261. ARM64Reg Rm);
  1262. void EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1263. void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1264. void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
  1265. void EmitScalar2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1266. void EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1267. void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1268. void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt,
  1269. ARM64Reg Rn);
  1270. void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt,
  1271. ARM64Reg Rn, ARM64Reg Rm);
  1272. void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1273. void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1274. void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale,
  1275. ARM64Reg Rd, ARM64Reg Rn);
  1276. void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
  1277. void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1278. void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1279. void EmitExtract(u32 imm4, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
  1280. void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
  1281. void EmitShiftImm(bool Q, bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1282. void EmitScalarShiftImm(bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1283. void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
  1284. void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn,
  1285. ARM64Reg Rm);
  1286. void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
  1287. void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn,
  1288. ARM64Reg Rm);
  1289. void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
  1290. void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
  1291. void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra,
  1292. int opcode);
  1293. void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2,
  1294. ARM64Reg Rn, s32 imm);
  1295. void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm);
  1296. void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh);
  1297. void ORR_BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift, u8 op);
  1298. void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
  1299. void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
  1300. void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
  1301. void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
  1302. void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
  1303. };
  1304. class ARM64CodeBlock : public Common::CodeBlock<ARM64XEmitter>
  1305. {
  1306. private:
  1307. void PoisonMemory() override
  1308. {
  1309. // If our memory isn't a multiple of u32 then this won't write the last remaining bytes with
  1310. // anything
  1311. // Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.
  1312. // AArch64: 0xD4200000 = BRK 0
  1313. constexpr u32 brk_0 = 0xD4200000;
  1314. for (size_t i = 0; i < region_size; i += sizeof(u32))
  1315. {
  1316. std::memcpy(region + i, &brk_0, sizeof(u32));
  1317. }
  1318. }
  1319. };
  1320. } // namespace Arm64Gen