Arm64Emitter.cpp 120 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412
  1. // Copyright 2015 Dolphin Emulator Project
  2. // SPDX-License-Identifier: GPL-2.0-or-later
  3. #include "Common/Arm64Emitter.h"
  4. #include <algorithm>
  5. #include <array>
  6. #include <bit>
  7. #include <cstring>
  8. #include <optional>
  9. #include <tuple>
  10. #include <utility>
  11. #include <vector>
  12. #include <fmt/format.h>
  13. #include "Common/Align.h"
  14. #include "Common/Assert.h"
  15. #include "Common/CommonTypes.h"
  16. #include "Common/MathUtil.h"
  17. #include "Common/SmallVector.h"
  18. #ifdef _WIN32
  19. #include <Windows.h>
  20. #endif
  21. #ifdef __APPLE__
  22. #include <libkern/OSCacheControl.h>
  23. #endif
  24. namespace Arm64Gen
  25. {
  26. namespace
  27. {
  28. // For ADD/SUB
  29. std::optional<std::pair<u32, bool>> IsImmArithmetic(uint64_t input)
  30. {
  31. if (input < 4096)
  32. return std::pair{static_cast<u32>(input), false};
  33. if ((input & 0xFFF000) == input)
  34. return std::pair{static_cast<u32>(input >> 12), true};
  35. return std::nullopt;
  36. }
  37. float FPImm8ToFloat(u8 bits)
  38. {
  39. const u32 sign = bits >> 7;
  40. const u32 bit6 = (bits >> 6) & 1;
  41. const u32 exp = ((!bit6) << 7) | (0x7C * bit6) | ((bits >> 4) & 3);
  42. const u32 mantissa = (bits & 0xF) << 19;
  43. const u32 f = (sign << 31) | (exp << 23) | mantissa;
  44. return std::bit_cast<float>(f);
  45. }
  46. std::optional<u8> FPImm8FromFloat(float value)
  47. {
  48. const u32 f = std::bit_cast<u32>(value);
  49. const u32 mantissa4 = (f & 0x7FFFFF) >> 19;
  50. const u32 exponent = (f >> 23) & 0xFF;
  51. const u32 sign = f >> 31;
  52. if ((exponent >> 7) == ((exponent >> 6) & 1))
  53. return std::nullopt;
  54. const u8 imm8 = (sign << 7) | ((!(exponent >> 7)) << 6) | ((exponent & 3) << 4) | mantissa4;
  55. const float new_float = FPImm8ToFloat(imm8);
  56. if (new_float != value)
  57. return std::nullopt;
  58. return imm8;
  59. }
  60. } // Anonymous namespace
  61. void ARM64XEmitter::SetCodePtrUnsafe(u8* ptr, u8* end, bool write_failed)
  62. {
  63. m_code = ptr;
  64. m_code_end = end;
  65. m_write_failed = write_failed;
  66. }
  67. void ARM64XEmitter::SetCodePtr(u8* ptr, u8* end, bool write_failed)
  68. {
  69. SetCodePtrUnsafe(ptr, end, write_failed);
  70. m_lastCacheFlushEnd = ptr;
  71. }
  72. void ARM64XEmitter::ReserveCodeSpace(u32 bytes)
  73. {
  74. for (u32 i = 0; i < bytes / 4; i++)
  75. BRK(0);
  76. }
  77. u8* ARM64XEmitter::AlignCode16()
  78. {
  79. int c = int((u64)m_code & 15);
  80. if (c)
  81. ReserveCodeSpace(16 - c);
  82. return m_code;
  83. }
  84. u8* ARM64XEmitter::AlignCodePage()
  85. {
  86. int c = int((u64)m_code & 4095);
  87. if (c)
  88. ReserveCodeSpace(4096 - c);
  89. return m_code;
  90. }
  91. void ARM64XEmitter::Write32(u32 value)
  92. {
  93. if (m_code + sizeof(u32) > m_code_end)
  94. {
  95. m_code = m_code_end;
  96. m_write_failed = true;
  97. return;
  98. }
  99. std::memcpy(m_code, &value, sizeof(u32));
  100. m_code += sizeof(u32);
  101. }
  102. void ARM64XEmitter::FlushIcache()
  103. {
  104. FlushIcacheSection(m_lastCacheFlushEnd, m_code);
  105. m_lastCacheFlushEnd = m_code;
  106. }
  107. void ARM64XEmitter::FlushIcacheSection(u8* start, u8* end)
  108. {
  109. if (start == end)
  110. return;
  111. #if defined(IOS) || defined(__APPLE__)
  112. // Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
  113. sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
  114. #elif defined(WIN32)
  115. FlushInstructionCache(GetCurrentProcess(), start, end - start);
  116. #else
  117. // Don't rely on GCC's __clear_cache implementation, as it caches
  118. // icache/dcache cache line sizes, that can vary between cores on
  119. // big.LITTLE architectures.
  120. u64 addr, ctr_el0;
  121. static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
  122. size_t isize, dsize;
  123. __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
  124. isize = 4 << ((ctr_el0 >> 0) & 0xf);
  125. dsize = 4 << ((ctr_el0 >> 16) & 0xf);
  126. // use the global minimum cache line size
  127. icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
  128. dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
  129. addr = (u64)start & ~(u64)(dsize - 1);
  130. for (; addr < (u64)end; addr += dsize)
  131. // use "civac" instead of "cvau", as this is the suggested workaround for
  132. // Cortex-A53 errata 819472, 826319, 827319 and 824069.
  133. __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
  134. __asm__ volatile("dsb ish" : : : "memory");
  135. addr = (u64)start & ~(u64)(isize - 1);
  136. for (; addr < (u64)end; addr += isize)
  137. __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
  138. __asm__ volatile("dsb ish" : : : "memory");
  139. __asm__ volatile("isb" : : : "memory");
  140. #endif
  141. }
  142. // Exception generation
  143. static const u32 ExcEnc[][3] = {
  144. {0, 0, 1}, // SVC
  145. {0, 0, 2}, // HVC
  146. {0, 0, 3}, // SMC
  147. {1, 0, 0}, // BRK
  148. {2, 0, 0}, // HLT
  149. {5, 0, 1}, // DCPS1
  150. {5, 0, 2}, // DCPS2
  151. {5, 0, 3}, // DCPS3
  152. };
  153. // Arithmetic generation
  154. static const u32 ArithEnc[] = {
  155. 0x058, // ADD
  156. 0x258, // SUB
  157. };
  158. // Conditional Select
  159. static const u32 CondSelectEnc[][2] = {
  160. {0, 0}, // CSEL
  161. {0, 1}, // CSINC
  162. {1, 0}, // CSINV
  163. {1, 1}, // CSNEG
  164. };
  165. // Data-Processing (1 source)
  166. static const u32 Data1SrcEnc[][2] = {
  167. {0, 0}, // RBIT
  168. {0, 1}, // REV16
  169. {0, 2}, // REV32
  170. {0, 3}, // REV64
  171. {0, 4}, // CLZ
  172. {0, 5}, // CLS
  173. };
  174. // Data-Processing (2 source)
  175. static const u32 Data2SrcEnc[] = {
  176. 0x02, // UDIV
  177. 0x03, // SDIV
  178. 0x08, // LSLV
  179. 0x09, // LSRV
  180. 0x0A, // ASRV
  181. 0x0B, // RORV
  182. 0x10, // CRC32B
  183. 0x11, // CRC32H
  184. 0x12, // CRC32W
  185. 0x14, // CRC32CB
  186. 0x15, // CRC32CH
  187. 0x16, // CRC32CW
  188. 0x13, // CRC32X (64bit Only)
  189. 0x17, // XRC32CX (64bit Only)
  190. };
  191. // Data-Processing (3 source)
  192. static const u32 Data3SrcEnc[][2] = {
  193. {0, 0}, // MADD
  194. {0, 1}, // MSUB
  195. {1, 0}, // SMADDL (64Bit Only)
  196. {1, 1}, // SMSUBL (64Bit Only)
  197. {2, 0}, // SMULH (64Bit Only)
  198. {5, 0}, // UMADDL (64Bit Only)
  199. {5, 1}, // UMSUBL (64Bit Only)
  200. {6, 0}, // UMULH (64Bit Only)
  201. };
  202. // Logical (shifted register)
  203. static const u32 LogicalEnc[][2] = {
  204. {0, 0}, // AND
  205. {0, 1}, // BIC
  206. {1, 0}, // OOR
  207. {1, 1}, // ORN
  208. {2, 0}, // EOR
  209. {2, 1}, // EON
  210. {3, 0}, // ANDS
  211. {3, 1}, // BICS
  212. };
  213. // Load/Store Exclusive
  214. static const u32 LoadStoreExcEnc[][5] = {
  215. {0, 0, 0, 0, 0}, // STXRB
  216. {0, 0, 0, 0, 1}, // STLXRB
  217. {0, 0, 1, 0, 0}, // LDXRB
  218. {0, 0, 1, 0, 1}, // LDAXRB
  219. {0, 1, 0, 0, 1}, // STLRB
  220. {0, 1, 1, 0, 1}, // LDARB
  221. {1, 0, 0, 0, 0}, // STXRH
  222. {1, 0, 0, 0, 1}, // STLXRH
  223. {1, 0, 1, 0, 0}, // LDXRH
  224. {1, 0, 1, 0, 1}, // LDAXRH
  225. {1, 1, 0, 0, 1}, // STLRH
  226. {1, 1, 1, 0, 1}, // LDARH
  227. {2, 0, 0, 0, 0}, // STXR
  228. {3, 0, 0, 0, 0}, // (64bit) STXR
  229. {2, 0, 0, 0, 1}, // STLXR
  230. {3, 0, 0, 0, 1}, // (64bit) STLXR
  231. {2, 0, 0, 1, 0}, // STXP
  232. {3, 0, 0, 1, 0}, // (64bit) STXP
  233. {2, 0, 0, 1, 1}, // STLXP
  234. {3, 0, 0, 1, 1}, // (64bit) STLXP
  235. {2, 0, 1, 0, 0}, // LDXR
  236. {3, 0, 1, 0, 0}, // (64bit) LDXR
  237. {2, 0, 1, 0, 1}, // LDAXR
  238. {3, 0, 1, 0, 1}, // (64bit) LDAXR
  239. {2, 0, 1, 1, 0}, // LDXP
  240. {3, 0, 1, 1, 0}, // (64bit) LDXP
  241. {2, 0, 1, 1, 1}, // LDAXP
  242. {3, 0, 1, 1, 1}, // (64bit) LDAXP
  243. {2, 1, 0, 0, 1}, // STLR
  244. {3, 1, 0, 0, 1}, // (64bit) STLR
  245. {2, 1, 1, 0, 1}, // LDAR
  246. {3, 1, 1, 0, 1}, // (64bit) LDAR
  247. };
  248. void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr)
  249. {
  250. bool b64Bit = Is64Bit(Rt);
  251. s64 distance = (s64)ptr - (s64)m_code;
  252. ASSERT_MSG(DYNA_REC, !(distance & 0x3), "Distance must be a multiple of 4: {}", distance);
  253. distance >>= 2;
  254. ASSERT_MSG(DYNA_REC, distance >= -0x40000 && distance <= 0x3FFFF,
  255. "Received too large distance: {}", distance);
  256. Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | (((u32)distance << 5) & 0xFFFFE0) |
  257. DecodeReg(Rt));
  258. }
  259. void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr)
  260. {
  261. u8 b40 = bits & 0x1F;
  262. u8 b5 = (bits >> 5) & 0x1;
  263. s64 distance = (s64)ptr - (s64)m_code;
  264. ASSERT_MSG(DYNA_REC, !(distance & 0x3), "distance must be a multiple of 4: {}", distance);
  265. distance >>= 2;
  266. ASSERT_MSG(DYNA_REC, distance >= -0x3FFF && distance < 0x3FFF, "Received too large distance: {}",
  267. distance);
  268. Write32((b5 << 31) | (0x36 << 24) | (op << 24) | (b40 << 19) |
  269. ((static_cast<u32>(distance) << 5) & 0x7FFE0) | DecodeReg(Rt));
  270. }
  271. void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr)
  272. {
  273. s64 distance = (s64)ptr - s64(m_code);
  274. ASSERT_MSG(DYNA_REC, !(distance & 0x3), "distance must be a multiple of 4: {}", distance);
  275. distance >>= 2;
  276. ASSERT_MSG(DYNA_REC, distance >= -0x2000000LL && distance <= 0x1FFFFFFLL,
  277. "Received too large distance: {}", distance);
  278. Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));
  279. }
  280. void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn)
  281. {
  282. Write32((0x6B << 25) | (opc << 21) | (op2 << 16) | (op3 << 10) | (DecodeReg(Rn) << 5) | op4);
  283. }
  284. void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm)
  285. {
  286. ASSERT_MSG(DYNA_REC, !(imm & ~0xFFFF), "Exception instruction too large immediate: {}", imm);
  287. Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) |
  288. ExcEnc[instenc][2]);
  289. }
  290. void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt)
  291. {
  292. Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) |
  293. DecodeReg(Rt));
  294. }
  295. void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn,
  296. ARM64Reg Rm, ArithOption Option)
  297. {
  298. bool b64Bit = Is64Bit(Rd);
  299. Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) |
  300. (Option.IsExtended() ? (1 << 21) : 0) | (DecodeReg(Rm) << 16) | Option.GetData() |
  301. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  302. }
  303. void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn,
  304. ARM64Reg Rm)
  305. {
  306. bool b64Bit = Is64Bit(Rd);
  307. Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0xD0 << 21) | (DecodeReg(Rm) << 16) |
  308. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  309. }
  310. void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
  311. {
  312. bool b64Bit = Is64Bit(Rn);
  313. ASSERT_MSG(DYNA_REC, !(imm & ~0x1F), "too large immediate: {}", imm);
  314. ASSERT_MSG(DYNA_REC, !(nzcv & ~0xF), "Flags out of range: {}", nzcv);
  315. Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (imm << 16) | (cond << 12) |
  316. (1 << 11) | (DecodeReg(Rn) << 5) | nzcv);
  317. }
  318. void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv,
  319. CCFlags cond)
  320. {
  321. bool b64Bit = Is64Bit(Rm);
  322. ASSERT_MSG(DYNA_REC, !(nzcv & ~0xF), "Flags out of range: {}", nzcv);
  323. Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | (DecodeReg(Rm) << 16) |
  324. (cond << 12) | (DecodeReg(Rn) << 5) | nzcv);
  325. }
  326. void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
  327. CCFlags cond)
  328. {
  329. bool b64Bit = Is64Bit(Rd);
  330. Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | (0xD4 << 21) |
  331. (DecodeReg(Rm) << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) |
  332. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  333. }
  334. void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn)
  335. {
  336. bool b64Bit = Is64Bit(Rd);
  337. Write32((b64Bit << 31) | (0x2D6 << 21) | (Data1SrcEnc[instenc][0] << 16) |
  338. (Data1SrcEnc[instenc][1] << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  339. }
  340. void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  341. {
  342. bool b64Bit = Is64Bit(Rd);
  343. Write32((b64Bit << 31) | (0x0D6 << 21) | (DecodeReg(Rm) << 16) | (Data2SrcEnc[instenc] << 10) |
  344. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  345. }
  346. void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
  347. ARM64Reg Ra)
  348. {
  349. bool b64Bit = Is64Bit(Rd);
  350. Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | (DecodeReg(Rm) << 16) |
  351. (Data3SrcEnc[instenc][1] << 15) | (DecodeReg(Ra) << 10) | (DecodeReg(Rn) << 5) |
  352. DecodeReg(Rd));
  353. }
  354. void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
  355. ArithOption Shift)
  356. {
  357. bool b64Bit = Is64Bit(Rd);
  358. Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) |
  359. (LogicalEnc[instenc][1] << 21) | Shift.GetData() | (DecodeReg(Rm) << 16) |
  360. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  361. }
  362. void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm)
  363. {
  364. bool b64Bit = Is64Bit(Rt);
  365. bool bVec = IsVector(Rt);
  366. ASSERT_MSG(DYNA_REC, !(imm & 0xFFFFF), "offset too large {}", imm);
  367. if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set
  368. bitop |= 0x1;
  369. Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (imm << 5) | DecodeReg(Rt));
  370. }
  371. void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn,
  372. ARM64Reg Rt)
  373. {
  374. Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) |
  375. (LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) |
  376. (DecodeReg(Rs) << 16) | (LoadStoreExcEnc[instenc][4] << 15) | (DecodeReg(Rt2) << 10) |
  377. (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  378. }
  379. void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
  380. u32 imm)
  381. {
  382. bool b64Bit = Is64Bit(Rt);
  383. bool b128Bit = IsQuad(Rt);
  384. bool bVec = IsVector(Rt);
  385. if (b128Bit)
  386. {
  387. ASSERT_MSG(DYNA_REC, (imm & 0xf) == 0, "128-bit load/store must use aligned offset: {}", imm);
  388. imm >>= 4;
  389. }
  390. else if (b64Bit)
  391. {
  392. ASSERT_MSG(DYNA_REC, (imm & 0x7) == 0, "64-bit load/store must use aligned offset: {}", imm);
  393. imm >>= 3;
  394. }
  395. else
  396. {
  397. ASSERT_MSG(DYNA_REC, (imm & 0x3) == 0, "32-bit load/store must use aligned offset: {}", imm);
  398. imm >>= 2;
  399. }
  400. ASSERT_MSG(DYNA_REC, (imm & ~0xF) == 0, "offset too large {}", imm);
  401. u32 opc = 0;
  402. if (b128Bit)
  403. opc = 2;
  404. else if (b64Bit && bVec)
  405. opc = 1;
  406. else if (b64Bit && !bVec)
  407. opc = 2;
  408. Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (DecodeReg(Rt2) << 10) |
  409. (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  410. }
  411. void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  412. {
  413. bool b64Bit = Is64Bit(Rt);
  414. bool bVec = IsVector(Rt);
  415. u32 offset = imm & 0x1FF;
  416. ASSERT_MSG(DYNA_REC, !(imm < -256 || imm > 255), "offset too large {}", imm);
  417. Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) |
  418. (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  419. }
  420. void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size)
  421. {
  422. bool b64Bit = Is64Bit(Rt);
  423. bool bVec = IsVector(Rt);
  424. if (size == 64)
  425. {
  426. ASSERT_MSG(DYNA_REC, (imm & 0x7) == 0, "64-bit load/store must use aligned offset: {}", imm);
  427. imm >>= 3;
  428. }
  429. else if (size == 32)
  430. {
  431. ASSERT_MSG(DYNA_REC, (imm & 0x3) == 0, "32-bit load/store must use aligned offset: {}", imm);
  432. imm >>= 2;
  433. }
  434. else if (size == 16)
  435. {
  436. ASSERT_MSG(DYNA_REC, (imm & 0x1) == 0, "16-bit load/store must use aligned offset: {}", imm);
  437. imm >>= 1;
  438. }
  439. ASSERT_MSG(DYNA_REC, imm >= 0, "(IndexType::Unsigned): offset must be positive {}", imm);
  440. ASSERT_MSG(DYNA_REC, !(imm & ~0xFFF), "(IndexType::Unsigned): offset too large {}", imm);
  441. Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 10) | (DecodeReg(Rn) << 5) |
  442. DecodeReg(Rt));
  443. }
  444. void ARM64XEmitter::EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos)
  445. {
  446. bool b64Bit = Is64Bit(Rd);
  447. ASSERT_MSG(DYNA_REC, !(imm & ~0xFFFF), "immediate out of range: {}", imm);
  448. Write32((b64Bit << 31) | (op << 29) | (0x25 << 23) | (static_cast<u32>(pos) << 21) | (imm << 5) |
  449. DecodeReg(Rd));
  450. }
  451. void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
  452. {
  453. bool b64Bit = Is64Bit(Rd);
  454. Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | (immr << 16) |
  455. (imms << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  456. }
  457. void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn,
  458. ArithOption Rm)
  459. {
  460. const int decoded_Rm = DecodeReg(Rm.GetReg());
  461. Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | Rm.GetData() |
  462. (1 << 11) | (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  463. }
  464. void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn,
  465. ARM64Reg Rd)
  466. {
  467. bool b64Bit = Is64Bit(Rd);
  468. ASSERT_MSG(DYNA_REC, !(imm & ~0xFFF), "immediate too large: {}", imm);
  469. Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | (imm << 10) |
  470. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  471. }
  472. void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
  473. {
  474. ASSERT_MSG(DYNA_REC, imm.valid, "Invalid logical immediate");
  475. // Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
  476. // Use Rn to determine bitness here.
  477. bool b64Bit = Is64Bit(Rn);
  478. ASSERT_MSG(DYNA_REC, b64Bit || !imm.n,
  479. "64-bit logical immediate does not fit in 32-bit register");
  480. Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (imm.n << 22) | (imm.r << 16) |
  481. (imm.s << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  482. }
  483. void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2,
  484. ARM64Reg Rn, s32 imm)
  485. {
  486. bool b64Bit = Is64Bit(Rt);
  487. u32 type_encode = 0;
  488. switch (type)
  489. {
  490. case IndexType::Signed:
  491. type_encode = 0b010;
  492. break;
  493. case IndexType::Post:
  494. type_encode = 0b001;
  495. break;
  496. case IndexType::Pre:
  497. type_encode = 0b011;
  498. break;
  499. case IndexType::Unsigned:
  500. ASSERT_MSG(DYNA_REC, false, "IndexType::Unsigned is not supported!");
  501. break;
  502. }
  503. if (b64Bit)
  504. {
  505. op |= 0b10;
  506. ASSERT_MSG(DYNA_REC, (imm & 0x7) == 0, "64-bit load/store must use aligned offset: {}", imm);
  507. imm >>= 3;
  508. }
  509. else
  510. {
  511. ASSERT_MSG(DYNA_REC, (imm & 0x3) == 0, "32-bit load/store must use aligned offset: {}", imm);
  512. imm >>= 2;
  513. }
  514. ASSERT_MSG(DYNA_REC, imm >= -64 && imm < 64, "imm too large for load/store pair! {}", imm);
  515. Write32((op << 30) | (0b101 << 27) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) |
  516. (DecodeReg(Rt2) << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  517. }
  518. void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm)
  519. {
  520. Write32((op << 31) | ((imm & 0x3) << 29) | (0x10 << 24) | ((imm & 0x1FFFFC) << 3) |
  521. DecodeReg(Rd));
  522. }
  523. void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  524. {
  525. ASSERT_MSG(DYNA_REC, !(imm < -256 || imm > 255), "offset too large: {}", imm);
  526. Write32((size << 30) | (0b111 << 27) | (op << 22) | ((imm & 0x1FF) << 12) | (DecodeReg(Rn) << 5) |
  527. DecodeReg(Rt));
  528. }
  529. static constexpr bool IsInRangeImm19(s64 distance)
  530. {
  531. return (distance >= -0x40000 && distance <= 0x3FFFF);
  532. }
  533. static constexpr bool IsInRangeImm14(s64 distance)
  534. {
  535. return (distance >= -0x2000 && distance <= 0x1FFF);
  536. }
  537. static constexpr bool IsInRangeImm26(s64 distance)
  538. {
  539. return (distance >= -0x2000000 && distance <= 0x1FFFFFF);
  540. }
  541. static constexpr u32 MaskImm19(s64 distance)
  542. {
  543. return distance & 0x7FFFF;
  544. }
  545. static constexpr u32 MaskImm14(s64 distance)
  546. {
  547. return distance & 0x3FFF;
  548. }
  549. static constexpr u32 MaskImm26(s64 distance)
  550. {
  551. return distance & 0x3FFFFFF;
  552. }
  553. // FixupBranch branching
  554. void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
  555. {
  556. if (!branch.ptr)
  557. return;
  558. bool Not = false;
  559. u32 inst = 0;
  560. s64 distance = (s64)(m_code - branch.ptr);
  561. distance >>= 2;
  562. switch (branch.type)
  563. {
  564. case FixupBranch::Type::CBNZ:
  565. Not = true;
  566. [[fallthrough]];
  567. case FixupBranch::Type::CBZ:
  568. {
  569. ASSERT_MSG(DYNA_REC, IsInRangeImm19(distance),
  570. "Branch type {}: Received too large distance: {}", static_cast<int>(branch.type),
  571. distance);
  572. const bool b64Bit = Is64Bit(branch.reg);
  573. inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (MaskImm19(distance) << 5) |
  574. DecodeReg(branch.reg);
  575. }
  576. break;
  577. case FixupBranch::Type::BConditional:
  578. ASSERT_MSG(DYNA_REC, IsInRangeImm19(distance),
  579. "Branch type {}: Received too large distance: {}", static_cast<int>(branch.type),
  580. distance);
  581. inst = (0x2A << 25) | (MaskImm19(distance) << 5) | branch.cond;
  582. break;
  583. case FixupBranch::Type::TBNZ:
  584. Not = true;
  585. [[fallthrough]];
  586. case FixupBranch::Type::TBZ:
  587. {
  588. ASSERT_MSG(DYNA_REC, IsInRangeImm14(distance),
  589. "Branch type {}: Received too large distance: {}", static_cast<int>(branch.type),
  590. distance);
  591. inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) |
  592. (MaskImm14(distance) << 5) | DecodeReg(branch.reg);
  593. }
  594. break;
  595. case FixupBranch::Type::B:
  596. ASSERT_MSG(DYNA_REC, IsInRangeImm26(distance),
  597. "Branch type {}: Received too large distance: {}", static_cast<int>(branch.type),
  598. distance);
  599. inst = (0x5 << 26) | MaskImm26(distance);
  600. break;
  601. case FixupBranch::Type::BL:
  602. ASSERT_MSG(DYNA_REC, IsInRangeImm26(distance),
  603. "Branch type {}: Received too large distance: {}", static_cast<int>(branch.type),
  604. distance);
  605. inst = (0x25 << 26) | MaskImm26(distance);
  606. break;
  607. }
  608. std::memcpy(branch.ptr, &inst, sizeof(inst));
  609. }
  610. FixupBranch ARM64XEmitter::WriteFixupBranch()
  611. {
  612. FixupBranch branch{};
  613. branch.ptr = m_code;
  614. BRK(0);
  615. // If we couldn't write the full jump instruction, indicate that in the returned FixupBranch by
  616. // setting the branch's address to null. This will prevent a later SetJumpTarget() from writing to
  617. // invalid memory.
  618. if (HasWriteFailed())
  619. branch.ptr = nullptr;
  620. return branch;
  621. }
  622. FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt)
  623. {
  624. FixupBranch branch = WriteFixupBranch();
  625. branch.type = FixupBranch::Type::CBZ;
  626. branch.reg = Rt;
  627. return branch;
  628. }
  629. FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt)
  630. {
  631. FixupBranch branch = WriteFixupBranch();
  632. branch.type = FixupBranch::Type::CBNZ;
  633. branch.reg = Rt;
  634. return branch;
  635. }
  636. FixupBranch ARM64XEmitter::B(CCFlags cond)
  637. {
  638. FixupBranch branch = WriteFixupBranch();
  639. branch.type = FixupBranch::Type::BConditional;
  640. branch.cond = cond;
  641. return branch;
  642. }
  643. FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit)
  644. {
  645. FixupBranch branch = WriteFixupBranch();
  646. branch.type = FixupBranch::Type::TBZ;
  647. branch.reg = Rt;
  648. branch.bit = bit;
  649. return branch;
  650. }
  651. FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit)
  652. {
  653. FixupBranch branch = WriteFixupBranch();
  654. branch.type = FixupBranch::Type::TBNZ;
  655. branch.reg = Rt;
  656. branch.bit = bit;
  657. return branch;
  658. }
  659. FixupBranch ARM64XEmitter::B()
  660. {
  661. FixupBranch branch = WriteFixupBranch();
  662. branch.type = FixupBranch::Type::B;
  663. return branch;
  664. }
  665. FixupBranch ARM64XEmitter::BL()
  666. {
  667. FixupBranch branch = WriteFixupBranch();
  668. branch.type = FixupBranch::Type::BL;
  669. return branch;
  670. }
  671. // Compare and Branch
  672. void ARM64XEmitter::CBZ(ARM64Reg Rt, const void* ptr)
  673. {
  674. EncodeCompareBranchInst(0, Rt, ptr);
  675. }
  676. void ARM64XEmitter::CBNZ(ARM64Reg Rt, const void* ptr)
  677. {
  678. EncodeCompareBranchInst(1, Rt, ptr);
  679. }
  680. // Conditional Branch
  681. void ARM64XEmitter::B(CCFlags cond, const void* ptr)
  682. {
  683. s64 distance = (s64)ptr - (s64)m_code;
  684. distance >>= 2;
  685. ASSERT_MSG(DYNA_REC, IsInRangeImm19(distance),
  686. "Received too large distance: {}->{} (dist {} {:#x})", fmt::ptr(m_code), fmt::ptr(ptr),
  687. distance, distance);
  688. Write32((0x54 << 24) | (MaskImm19(distance) << 5) | cond);
  689. }
  690. // Test and Branch
  691. void ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bits, const void* ptr)
  692. {
  693. EncodeTestBranchInst(0, Rt, bits, ptr);
  694. }
  695. void ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bits, const void* ptr)
  696. {
  697. EncodeTestBranchInst(1, Rt, bits, ptr);
  698. }
  699. // Unconditional Branch
  700. void ARM64XEmitter::B(const void* ptr)
  701. {
  702. EncodeUnconditionalBranchInst(0, ptr);
  703. }
  704. void ARM64XEmitter::BL(const void* ptr)
  705. {
  706. EncodeUnconditionalBranchInst(1, ptr);
  707. }
  708. void ARM64XEmitter::QuickCallFunction(ARM64Reg scratchreg, const void* func)
  709. {
  710. s64 distance = (s64)func - (s64)m_code;
  711. distance >>= 2; // Can only branch to opcode-aligned (4) addresses
  712. if (!IsInRangeImm26(distance))
  713. {
  714. MOVI2R(scratchreg, (uintptr_t)func);
  715. BLR(scratchreg);
  716. }
  717. else
  718. {
  719. BL(func);
  720. }
  721. }
  722. // Unconditional Branch (register)
  723. void ARM64XEmitter::BR(ARM64Reg Rn)
  724. {
  725. EncodeUnconditionalBranchInst(0, 0x1F, 0, 0, Rn);
  726. }
  727. void ARM64XEmitter::BLR(ARM64Reg Rn)
  728. {
  729. EncodeUnconditionalBranchInst(1, 0x1F, 0, 0, Rn);
  730. }
  731. void ARM64XEmitter::RET(ARM64Reg Rn)
  732. {
  733. EncodeUnconditionalBranchInst(2, 0x1F, 0, 0, Rn);
  734. }
  735. void ARM64XEmitter::ERET()
  736. {
  737. EncodeUnconditionalBranchInst(4, 0x1F, 0, 0, ARM64Reg::SP);
  738. }
  739. void ARM64XEmitter::DRPS()
  740. {
  741. EncodeUnconditionalBranchInst(5, 0x1F, 0, 0, ARM64Reg::SP);
  742. }
  743. // Exception generation
  744. void ARM64XEmitter::SVC(u32 imm)
  745. {
  746. EncodeExceptionInst(0, imm);
  747. }
  748. void ARM64XEmitter::HVC(u32 imm)
  749. {
  750. EncodeExceptionInst(1, imm);
  751. }
  752. void ARM64XEmitter::SMC(u32 imm)
  753. {
  754. EncodeExceptionInst(2, imm);
  755. }
  756. void ARM64XEmitter::BRK(u32 imm)
  757. {
  758. EncodeExceptionInst(3, imm);
  759. }
  760. void ARM64XEmitter::HLT(u32 imm)
  761. {
  762. EncodeExceptionInst(4, imm);
  763. }
  764. void ARM64XEmitter::DCPS1(u32 imm)
  765. {
  766. EncodeExceptionInst(5, imm);
  767. }
  768. void ARM64XEmitter::DCPS2(u32 imm)
  769. {
  770. EncodeExceptionInst(6, imm);
  771. }
  772. void ARM64XEmitter::DCPS3(u32 imm)
  773. {
  774. EncodeExceptionInst(7, imm);
  775. }
  776. // System
  777. void ARM64XEmitter::_MSR(PStateField field, u8 imm)
  778. {
  779. u32 op1 = 0, op2 = 0;
  780. switch (field)
  781. {
  782. case PStateField::SPSel:
  783. op1 = 0;
  784. op2 = 5;
  785. break;
  786. case PStateField::DAIFSet:
  787. op1 = 3;
  788. op2 = 6;
  789. break;
  790. case PStateField::DAIFClr:
  791. op1 = 3;
  792. op2 = 7;
  793. break;
  794. default:
  795. ASSERT_MSG(DYNA_REC, false, "Invalid PStateField to do a imm move to");
  796. break;
  797. }
  798. EncodeSystemInst(0, op1, 4, imm, op2, ARM64Reg::WSP);
  799. }
  800. static void GetSystemReg(PStateField field, int& o0, int& op1, int& CRn, int& CRm, int& op2)
  801. {
  802. switch (field)
  803. {
  804. case PStateField::NZCV:
  805. o0 = 3;
  806. op1 = 3;
  807. CRn = 4;
  808. CRm = 2;
  809. op2 = 0;
  810. break;
  811. case PStateField::FPCR:
  812. o0 = 3;
  813. op1 = 3;
  814. CRn = 4;
  815. CRm = 4;
  816. op2 = 0;
  817. break;
  818. case PStateField::FPSR:
  819. o0 = 3;
  820. op1 = 3;
  821. CRn = 4;
  822. CRm = 4;
  823. op2 = 1;
  824. break;
  825. case PStateField::PMCR_EL0:
  826. o0 = 3;
  827. op1 = 3;
  828. CRn = 9;
  829. CRm = 6;
  830. op2 = 0;
  831. break;
  832. case PStateField::PMCCNTR_EL0:
  833. o0 = 3;
  834. op1 = 3;
  835. CRn = 9;
  836. CRm = 7;
  837. op2 = 0;
  838. break;
  839. default:
  840. ASSERT_MSG(DYNA_REC, false, "Invalid PStateField to do a register move from/to");
  841. break;
  842. }
  843. }
  844. void ARM64XEmitter::_MSR(PStateField field, ARM64Reg Rt)
  845. {
  846. int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
  847. ASSERT_MSG(DYNA_REC, Is64Bit(Rt), "MSR: Rt must be 64-bit");
  848. GetSystemReg(field, o0, op1, CRn, CRm, op2);
  849. EncodeSystemInst(o0, op1, CRn, CRm, op2, Rt);
  850. }
  851. void ARM64XEmitter::MRS(ARM64Reg Rt, PStateField field)
  852. {
  853. int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
  854. ASSERT_MSG(DYNA_REC, Is64Bit(Rt), "MRS: Rt must be 64-bit");
  855. GetSystemReg(field, o0, op1, CRn, CRm, op2);
  856. EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, Rt);
  857. }
  858. void ARM64XEmitter::CNTVCT(Arm64Gen::ARM64Reg Rt)
  859. {
  860. ASSERT_MSG(DYNA_REC, Is64Bit(Rt), "CNTVCT: Rt must be 64-bit");
  861. // MRS <Xt>, CNTVCT_EL0 ; Read CNTVCT_EL0 into Xt
  862. EncodeSystemInst(3 | 4, 3, 0xe, 0, 2, Rt);
  863. }
  864. void ARM64XEmitter::HINT(SystemHint op)
  865. {
  866. EncodeSystemInst(0, 3, 2, 0, static_cast<u32>(op), ARM64Reg::WSP);
  867. }
  868. void ARM64XEmitter::CLREX()
  869. {
  870. EncodeSystemInst(0, 3, 3, 0, 2, ARM64Reg::WSP);
  871. }
  872. void ARM64XEmitter::DSB(BarrierType type)
  873. {
  874. EncodeSystemInst(0, 3, 3, static_cast<u32>(type), 4, ARM64Reg::WSP);
  875. }
  876. void ARM64XEmitter::DMB(BarrierType type)
  877. {
  878. EncodeSystemInst(0, 3, 3, static_cast<u32>(type), 5, ARM64Reg::WSP);
  879. }
  880. void ARM64XEmitter::ISB(BarrierType type)
  881. {
  882. EncodeSystemInst(0, 3, 3, static_cast<u32>(type), 6, ARM64Reg::WSP);
  883. }
  884. // Add/Subtract (extended register)
  885. void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  886. {
  887. ADD(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  888. }
  889. void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
  890. {
  891. EncodeArithmeticInst(0, false, Rd, Rn, Rm, Option);
  892. }
  893. void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  894. {
  895. EncodeArithmeticInst(0, true, Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  896. }
  897. void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
  898. {
  899. EncodeArithmeticInst(0, true, Rd, Rn, Rm, Option);
  900. }
  901. void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  902. {
  903. SUB(Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  904. }
  905. void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
  906. {
  907. EncodeArithmeticInst(1, false, Rd, Rn, Rm, Option);
  908. }
  909. void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  910. {
  911. EncodeArithmeticInst(1, true, Rd, Rn, Rm, ArithOption(Rd, ShiftType::LSL, 0));
  912. }
  913. void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
  914. {
  915. EncodeArithmeticInst(1, true, Rd, Rn, Rm, Option);
  916. }
  917. void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm)
  918. {
  919. CMN(Rn, Rm, ArithOption(Rn, ShiftType::LSL, 0));
  920. }
  921. void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
  922. {
  923. EncodeArithmeticInst(0, true, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, Rm, Option);
  924. }
  925. void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm)
  926. {
  927. CMP(Rn, Rm, ArithOption(Rn, ShiftType::LSL, 0));
  928. }
  929. void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm, ArithOption Option)
  930. {
  931. EncodeArithmeticInst(1, true, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, Rm, Option);
  932. }
  933. // Add/Subtract (with carry)
  934. void ARM64XEmitter::ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  935. {
  936. EncodeArithmeticCarryInst(0, false, Rd, Rn, Rm);
  937. }
  938. void ARM64XEmitter::ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  939. {
  940. EncodeArithmeticCarryInst(0, true, Rd, Rn, Rm);
  941. }
  942. void ARM64XEmitter::SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  943. {
  944. EncodeArithmeticCarryInst(1, false, Rd, Rn, Rm);
  945. }
  946. void ARM64XEmitter::SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  947. {
  948. EncodeArithmeticCarryInst(1, true, Rd, Rn, Rm);
  949. }
  950. // Conditional Compare (immediate)
  951. void ARM64XEmitter::CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
  952. {
  953. EncodeCondCompareImmInst(0, Rn, imm, nzcv, cond);
  954. }
  955. void ARM64XEmitter::CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
  956. {
  957. EncodeCondCompareImmInst(1, Rn, imm, nzcv, cond);
  958. }
  959. // Conditiona Compare (register)
  960. void ARM64XEmitter::CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
  961. {
  962. EncodeCondCompareRegInst(0, Rn, Rm, nzcv, cond);
  963. }
  964. void ARM64XEmitter::CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
  965. {
  966. EncodeCondCompareRegInst(1, Rn, Rm, nzcv, cond);
  967. }
  968. // Conditional Select
  969. void ARM64XEmitter::CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
  970. {
  971. EncodeCondSelectInst(0, Rd, Rn, Rm, cond);
  972. }
  973. void ARM64XEmitter::CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
  974. {
  975. EncodeCondSelectInst(1, Rd, Rn, Rm, cond);
  976. }
  977. void ARM64XEmitter::CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
  978. {
  979. EncodeCondSelectInst(2, Rd, Rn, Rm, cond);
  980. }
  981. void ARM64XEmitter::CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
  982. {
  983. EncodeCondSelectInst(3, Rd, Rn, Rm, cond);
  984. }
  985. // Data-Processing 1 source
  986. void ARM64XEmitter::RBIT(ARM64Reg Rd, ARM64Reg Rn)
  987. {
  988. EncodeData1SrcInst(0, Rd, Rn);
  989. }
  990. void ARM64XEmitter::REV16(ARM64Reg Rd, ARM64Reg Rn)
  991. {
  992. EncodeData1SrcInst(1, Rd, Rn);
  993. }
  994. void ARM64XEmitter::REV32(ARM64Reg Rd, ARM64Reg Rn)
  995. {
  996. EncodeData1SrcInst(2, Rd, Rn);
  997. }
  998. void ARM64XEmitter::REV64(ARM64Reg Rd, ARM64Reg Rn)
  999. {
  1000. EncodeData1SrcInst(3, Rd, Rn);
  1001. }
  1002. void ARM64XEmitter::CLZ(ARM64Reg Rd, ARM64Reg Rn)
  1003. {
  1004. EncodeData1SrcInst(4, Rd, Rn);
  1005. }
  1006. void ARM64XEmitter::CLS(ARM64Reg Rd, ARM64Reg Rn)
  1007. {
  1008. EncodeData1SrcInst(5, Rd, Rn);
  1009. }
  1010. // Data-Processing 2 source
  1011. void ARM64XEmitter::UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1012. {
  1013. EncodeData2SrcInst(0, Rd, Rn, Rm);
  1014. }
  1015. void ARM64XEmitter::SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1016. {
  1017. EncodeData2SrcInst(1, Rd, Rn, Rm);
  1018. }
  1019. void ARM64XEmitter::LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1020. {
  1021. EncodeData2SrcInst(2, Rd, Rn, Rm);
  1022. }
  1023. void ARM64XEmitter::LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1024. {
  1025. EncodeData2SrcInst(3, Rd, Rn, Rm);
  1026. }
  1027. void ARM64XEmitter::ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1028. {
  1029. EncodeData2SrcInst(4, Rd, Rn, Rm);
  1030. }
  1031. void ARM64XEmitter::RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1032. {
  1033. EncodeData2SrcInst(5, Rd, Rn, Rm);
  1034. }
  1035. void ARM64XEmitter::CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1036. {
  1037. EncodeData2SrcInst(6, Rd, Rn, Rm);
  1038. }
  1039. void ARM64XEmitter::CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1040. {
  1041. EncodeData2SrcInst(7, Rd, Rn, Rm);
  1042. }
  1043. void ARM64XEmitter::CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1044. {
  1045. EncodeData2SrcInst(8, Rd, Rn, Rm);
  1046. }
  1047. void ARM64XEmitter::CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1048. {
  1049. EncodeData2SrcInst(9, Rd, Rn, Rm);
  1050. }
  1051. void ARM64XEmitter::CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1052. {
  1053. EncodeData2SrcInst(10, Rd, Rn, Rm);
  1054. }
  1055. void ARM64XEmitter::CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1056. {
  1057. EncodeData2SrcInst(11, Rd, Rn, Rm);
  1058. }
  1059. void ARM64XEmitter::CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1060. {
  1061. EncodeData2SrcInst(12, Rd, Rn, Rm);
  1062. }
  1063. void ARM64XEmitter::CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1064. {
  1065. EncodeData2SrcInst(13, Rd, Rn, Rm);
  1066. }
  1067. // Data-Processing 3 source
  1068. void ARM64XEmitter::MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  1069. {
  1070. EncodeData3SrcInst(0, Rd, Rn, Rm, Ra);
  1071. }
  1072. void ARM64XEmitter::MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  1073. {
  1074. EncodeData3SrcInst(1, Rd, Rn, Rm, Ra);
  1075. }
  1076. void ARM64XEmitter::SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  1077. {
  1078. EncodeData3SrcInst(2, Rd, Rn, Rm, Ra);
  1079. }
  1080. void ARM64XEmitter::SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1081. {
  1082. SMADDL(Rd, Rn, Rm, ARM64Reg::SP);
  1083. }
  1084. void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  1085. {
  1086. EncodeData3SrcInst(3, Rd, Rn, Rm, Ra);
  1087. }
  1088. void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1089. {
  1090. EncodeData3SrcInst(4, Rd, Rn, Rm, ARM64Reg::SP);
  1091. }
  1092. void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  1093. {
  1094. EncodeData3SrcInst(5, Rd, Rn, Rm, Ra);
  1095. }
  1096. void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1097. {
  1098. UMADDL(Rd, Rn, Rm, ARM64Reg::SP);
  1099. }
  1100. void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  1101. {
  1102. EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);
  1103. }
  1104. void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1105. {
  1106. EncodeData3SrcInst(7, Rd, Rn, Rm, ARM64Reg::SP);
  1107. }
  1108. void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1109. {
  1110. EncodeData3SrcInst(0, Rd, Rn, Rm, ARM64Reg::SP);
  1111. }
  1112. void ARM64XEmitter::MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  1113. {
  1114. EncodeData3SrcInst(1, Rd, Rn, Rm, ARM64Reg::SP);
  1115. }
  1116. // Logical (shifted register)
  1117. void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1118. {
  1119. EncodeLogicalInst(0, Rd, Rn, Rm, Shift);
  1120. }
  1121. void ARM64XEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1122. {
  1123. EncodeLogicalInst(1, Rd, Rn, Rm, Shift);
  1124. }
  1125. void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1126. {
  1127. EncodeLogicalInst(2, Rd, Rn, Rm, Shift);
  1128. }
  1129. void ARM64XEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1130. {
  1131. EncodeLogicalInst(3, Rd, Rn, Rm, Shift);
  1132. }
  1133. void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1134. {
  1135. EncodeLogicalInst(4, Rd, Rn, Rm, Shift);
  1136. }
  1137. void ARM64XEmitter::EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1138. {
  1139. EncodeLogicalInst(5, Rd, Rn, Rm, Shift);
  1140. }
  1141. void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1142. {
  1143. EncodeLogicalInst(6, Rd, Rn, Rm, Shift);
  1144. }
  1145. void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ArithOption Shift)
  1146. {
  1147. EncodeLogicalInst(7, Rd, Rn, Rm, Shift);
  1148. }
  1149. void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, ArithOption Shift)
  1150. {
  1151. ORR(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rm, Shift);
  1152. }
  1153. void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm)
  1154. {
  1155. if (IsGPR(Rd) && IsGPR(Rm))
  1156. ORR(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rm, ArithOption(Rm, ShiftType::LSL, 0));
  1157. else
  1158. ASSERT_MSG(DYNA_REC, false, "Non-GPRs not supported in MOV");
  1159. }
  1160. void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm)
  1161. {
  1162. ORN(Rd, Is64Bit(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR, Rm, ArithOption(Rm, ShiftType::LSL, 0));
  1163. }
  1164. void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift)
  1165. {
  1166. int bits = Is64Bit(Rd) ? 64 : 32;
  1167. UBFM(Rd, Rm, (bits - shift) & (bits - 1), bits - shift - 1);
  1168. }
  1169. void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift)
  1170. {
  1171. int bits = Is64Bit(Rd) ? 64 : 32;
  1172. UBFM(Rd, Rm, shift, bits - 1);
  1173. }
  1174. void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift)
  1175. {
  1176. int bits = Is64Bit(Rd) ? 64 : 32;
  1177. SBFM(Rd, Rm, shift, bits - 1);
  1178. }
  1179. void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)
  1180. {
  1181. EXTR(Rd, Rm, Rm, shift);
  1182. }
  1183. // Logical (immediate)
  1184. void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
  1185. {
  1186. EncodeLogicalImmInst(0, Rd, Rn, imm);
  1187. }
  1188. void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
  1189. {
  1190. EncodeLogicalImmInst(3, Rd, Rn, imm);
  1191. }
  1192. void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
  1193. {
  1194. EncodeLogicalImmInst(2, Rd, Rn, imm);
  1195. }
  1196. void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, LogicalImm imm)
  1197. {
  1198. EncodeLogicalImmInst(1, Rd, Rn, imm);
  1199. }
  1200. void ARM64XEmitter::TST(ARM64Reg Rn, LogicalImm imm)
  1201. {
  1202. EncodeLogicalImmInst(3, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm);
  1203. }
  1204. // Add/subtract (immediate)
  1205. void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
  1206. {
  1207. EncodeAddSubImmInst(0, false, shift, imm, Rn, Rd);
  1208. }
  1209. void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
  1210. {
  1211. EncodeAddSubImmInst(0, true, shift, imm, Rn, Rd);
  1212. }
  1213. void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
  1214. {
  1215. EncodeAddSubImmInst(1, false, shift, imm, Rn, Rd);
  1216. }
  1217. void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
  1218. {
  1219. EncodeAddSubImmInst(1, true, shift, imm, Rn, Rd);
  1220. }
  1221. void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift)
  1222. {
  1223. EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? ARM64Reg::SP : ARM64Reg::WSP);
  1224. }
  1225. void ARM64XEmitter::CMN(ARM64Reg Rn, u32 imm, bool shift)
  1226. {
  1227. EncodeAddSubImmInst(0, true, shift, imm, Rn, Is64Bit(Rn) ? ARM64Reg::SP : ARM64Reg::WSP);
  1228. }
  1229. // Data Processing (Immediate)
  1230. void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos)
  1231. {
  1232. EncodeMOVWideInst(2, Rd, imm, pos);
  1233. }
  1234. void ARM64XEmitter::MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos)
  1235. {
  1236. EncodeMOVWideInst(0, Rd, imm, pos);
  1237. }
  1238. void ARM64XEmitter::MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos)
  1239. {
  1240. EncodeMOVWideInst(3, Rd, imm, pos);
  1241. }
  1242. // Bitfield move
  1243. void ARM64XEmitter::BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
  1244. {
  1245. EncodeBitfieldMOVInst(1, Rd, Rn, immr, imms);
  1246. }
  1247. void ARM64XEmitter::SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
  1248. {
  1249. EncodeBitfieldMOVInst(0, Rd, Rn, immr, imms);
  1250. }
  1251. void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
  1252. {
  1253. EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms);
  1254. }
  1255. void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
  1256. {
  1257. u32 size = Is64Bit(Rn) ? 64 : 32;
  1258. ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb,
  1259. "lsb {} and width {} is greater than the register size {}!", lsb, width, size);
  1260. BFM(Rd, Rn, (size - lsb) % size, width - 1);
  1261. }
  1262. void ARM64XEmitter::BFXIL(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
  1263. {
  1264. u32 size = Is64Bit(Rn) ? 64 : 32;
  1265. ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb,
  1266. "lsb {} and width {} is greater than the register size {}!", lsb, width, size);
  1267. BFM(Rd, Rn, lsb, lsb + width - 1);
  1268. }
  1269. void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
  1270. {
  1271. u32 size = Is64Bit(Rn) ? 64 : 32;
  1272. ASSERT_MSG(DYNA_REC, lsb < size && width >= 1 && width <= size - lsb,
  1273. "lsb {} and width {} is greater than the register size {}!", lsb, width, size);
  1274. UBFM(Rd, Rn, (size - lsb) % size, width - 1);
  1275. }
  1276. void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift)
  1277. {
  1278. bool sf = Is64Bit(Rd);
  1279. bool N = sf;
  1280. Write32((sf << 31) | (0x27 << 23) | (N << 22) | (DecodeReg(Rm) << 16) | (shift << 10) |
  1281. (DecodeReg(Rm) << 5) | DecodeReg(Rd));
  1282. }
  1283. void ARM64XEmitter::SXTB(ARM64Reg Rd, ARM64Reg Rn)
  1284. {
  1285. SBFM(Rd, Rn, 0, 7);
  1286. }
  1287. void ARM64XEmitter::SXTH(ARM64Reg Rd, ARM64Reg Rn)
  1288. {
  1289. SBFM(Rd, Rn, 0, 15);
  1290. }
  1291. void ARM64XEmitter::SXTW(ARM64Reg Rd, ARM64Reg Rn)
  1292. {
  1293. ASSERT_MSG(DYNA_REC, Is64Bit(Rd), "64bit register required as destination");
  1294. SBFM(Rd, Rn, 0, 31);
  1295. }
  1296. void ARM64XEmitter::UXTB(ARM64Reg Rd, ARM64Reg Rn)
  1297. {
  1298. UBFM(Rd, Rn, 0, 7);
  1299. }
  1300. void ARM64XEmitter::UXTH(ARM64Reg Rd, ARM64Reg Rn)
  1301. {
  1302. UBFM(Rd, Rn, 0, 15);
  1303. }
  1304. // Load Register (Literal)
  1305. void ARM64XEmitter::LDR(ARM64Reg Rt, u32 imm)
  1306. {
  1307. EncodeLoadRegisterInst(0, Rt, imm);
  1308. }
  1309. void ARM64XEmitter::LDRSW(ARM64Reg Rt, u32 imm)
  1310. {
  1311. EncodeLoadRegisterInst(2, Rt, imm);
  1312. }
  1313. void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm)
  1314. {
  1315. EncodeLoadRegisterInst(3, Rt, imm);
  1316. }
  1317. // Load/Store pair
  1318. void ARM64XEmitter::LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
  1319. {
  1320. EncodeLoadStorePair(0, 1, type, Rt, Rt2, Rn, imm);
  1321. }
  1322. void ARM64XEmitter::LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
  1323. {
  1324. EncodeLoadStorePair(1, 1, type, Rt, Rt2, Rn, imm);
  1325. }
  1326. void ARM64XEmitter::STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
  1327. {
  1328. EncodeLoadStorePair(0, 0, type, Rt, Rt2, Rn, imm);
  1329. }
  1330. // Load/Store Exclusive
  1331. void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
  1332. {
  1333. EncodeLoadStoreExcInst(0, Rs, ARM64Reg::SP, Rt, Rn);
  1334. }
  1335. void ARM64XEmitter::STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
  1336. {
  1337. EncodeLoadStoreExcInst(1, Rs, ARM64Reg::SP, Rt, Rn);
  1338. }
  1339. void ARM64XEmitter::LDXRB(ARM64Reg Rt, ARM64Reg Rn)
  1340. {
  1341. EncodeLoadStoreExcInst(2, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1342. }
  1343. void ARM64XEmitter::LDAXRB(ARM64Reg Rt, ARM64Reg Rn)
  1344. {
  1345. EncodeLoadStoreExcInst(3, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1346. }
  1347. void ARM64XEmitter::STLRB(ARM64Reg Rt, ARM64Reg Rn)
  1348. {
  1349. EncodeLoadStoreExcInst(4, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1350. }
  1351. void ARM64XEmitter::LDARB(ARM64Reg Rt, ARM64Reg Rn)
  1352. {
  1353. EncodeLoadStoreExcInst(5, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1354. }
  1355. void ARM64XEmitter::STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
  1356. {
  1357. EncodeLoadStoreExcInst(6, Rs, ARM64Reg::SP, Rt, Rn);
  1358. }
  1359. void ARM64XEmitter::STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
  1360. {
  1361. EncodeLoadStoreExcInst(7, Rs, ARM64Reg::SP, Rt, Rn);
  1362. }
  1363. void ARM64XEmitter::LDXRH(ARM64Reg Rt, ARM64Reg Rn)
  1364. {
  1365. EncodeLoadStoreExcInst(8, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1366. }
  1367. void ARM64XEmitter::LDAXRH(ARM64Reg Rt, ARM64Reg Rn)
  1368. {
  1369. EncodeLoadStoreExcInst(9, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1370. }
  1371. void ARM64XEmitter::STLRH(ARM64Reg Rt, ARM64Reg Rn)
  1372. {
  1373. EncodeLoadStoreExcInst(10, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1374. }
  1375. void ARM64XEmitter::LDARH(ARM64Reg Rt, ARM64Reg Rn)
  1376. {
  1377. EncodeLoadStoreExcInst(11, ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1378. }
  1379. void ARM64XEmitter::STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
  1380. {
  1381. EncodeLoadStoreExcInst(12 + Is64Bit(Rt), Rs, ARM64Reg::SP, Rt, Rn);
  1382. }
  1383. void ARM64XEmitter::STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
  1384. {
  1385. EncodeLoadStoreExcInst(14 + Is64Bit(Rt), Rs, ARM64Reg::SP, Rt, Rn);
  1386. }
  1387. void ARM64XEmitter::STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
  1388. {
  1389. EncodeLoadStoreExcInst(16 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
  1390. }
  1391. void ARM64XEmitter::STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
  1392. {
  1393. EncodeLoadStoreExcInst(18 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
  1394. }
  1395. void ARM64XEmitter::LDXR(ARM64Reg Rt, ARM64Reg Rn)
  1396. {
  1397. EncodeLoadStoreExcInst(20 + Is64Bit(Rt), ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1398. }
  1399. void ARM64XEmitter::LDAXR(ARM64Reg Rt, ARM64Reg Rn)
  1400. {
  1401. EncodeLoadStoreExcInst(22 + Is64Bit(Rt), ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1402. }
  1403. void ARM64XEmitter::LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
  1404. {
  1405. EncodeLoadStoreExcInst(24 + Is64Bit(Rt), ARM64Reg::SP, Rt2, Rt, Rn);
  1406. }
  1407. void ARM64XEmitter::LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
  1408. {
  1409. EncodeLoadStoreExcInst(26 + Is64Bit(Rt), ARM64Reg::SP, Rt2, Rt, Rn);
  1410. }
  1411. void ARM64XEmitter::STLR(ARM64Reg Rt, ARM64Reg Rn)
  1412. {
  1413. EncodeLoadStoreExcInst(28 + Is64Bit(Rt), ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1414. }
  1415. void ARM64XEmitter::LDAR(ARM64Reg Rt, ARM64Reg Rn)
  1416. {
  1417. EncodeLoadStoreExcInst(30 + Is64Bit(Rt), ARM64Reg::SP, ARM64Reg::SP, Rt, Rn);
  1418. }
  1419. // Load/Store no-allocate pair (offset)
  1420. void ARM64XEmitter::STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
  1421. {
  1422. EncodeLoadStorePairedInst(0xA0, Rt, Rt2, Rn, imm);
  1423. }
  1424. void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
  1425. {
  1426. EncodeLoadStorePairedInst(0xA1, Rt, Rt2, Rn, imm);
  1427. }
  1428. // Load/Store register (immediate post-indexed)
  1429. // XXX: Most of these support vectors
  1430. void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1431. {
  1432. if (type == IndexType::Unsigned)
  1433. EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm, 8);
  1434. else
  1435. EncodeLoadStoreIndexedInst(0x0E0, type == IndexType::Post ? 1 : 3, Rt, Rn, imm);
  1436. }
  1437. void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1438. {
  1439. if (type == IndexType::Unsigned)
  1440. EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm, 8);
  1441. else
  1442. EncodeLoadStoreIndexedInst(0x0E1, type == IndexType::Post ? 1 : 3, Rt, Rn, imm);
  1443. }
  1444. void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1445. {
  1446. if (type == IndexType::Unsigned)
  1447. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm, 8);
  1448. else
  1449. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E2 : 0x0E3, type == IndexType::Post ? 1 : 3, Rt, Rn,
  1450. imm);
  1451. }
  1452. void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1453. {
  1454. if (type == IndexType::Unsigned)
  1455. EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm, 16);
  1456. else
  1457. EncodeLoadStoreIndexedInst(0x1E0, type == IndexType::Post ? 1 : 3, Rt, Rn, imm);
  1458. }
  1459. void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1460. {
  1461. if (type == IndexType::Unsigned)
  1462. EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm, 16);
  1463. else
  1464. EncodeLoadStoreIndexedInst(0x1E1, type == IndexType::Post ? 1 : 3, Rt, Rn, imm);
  1465. }
  1466. void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1467. {
  1468. if (type == IndexType::Unsigned)
  1469. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm, 16);
  1470. else
  1471. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E2 : 0x1E3, type == IndexType::Post ? 1 : 3, Rt, Rn,
  1472. imm);
  1473. }
  1474. void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1475. {
  1476. if (type == IndexType::Unsigned)
  1477. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
  1478. else
  1479. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E0 : 0x2E0, type == IndexType::Post ? 1 : 3, Rt, Rn,
  1480. imm);
  1481. }
  1482. void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1483. {
  1484. if (type == IndexType::Unsigned)
  1485. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
  1486. else
  1487. EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E1 : 0x2E1, type == IndexType::Post ? 1 : 3, Rt, Rn,
  1488. imm);
  1489. }
  1490. void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1491. {
  1492. if (type == IndexType::Unsigned)
  1493. EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm, 32);
  1494. else
  1495. EncodeLoadStoreIndexedInst(0x2E2, type == IndexType::Post ? 1 : 3, Rt, Rn, imm);
  1496. }
  1497. // Load/Store register (register offset)
  1498. void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1499. {
  1500. EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm);
  1501. }
  1502. void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1503. {
  1504. EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm);
  1505. }
  1506. void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1507. {
  1508. bool b64Bit = Is64Bit(Rt);
  1509. EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm);
  1510. }
  1511. void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1512. {
  1513. EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm);
  1514. }
  1515. void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1516. {
  1517. EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm);
  1518. }
  1519. void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1520. {
  1521. bool b64Bit = Is64Bit(Rt);
  1522. EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm);
  1523. }
  1524. void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1525. {
  1526. bool b64Bit = Is64Bit(Rt);
  1527. EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm);
  1528. }
  1529. void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1530. {
  1531. bool b64Bit = Is64Bit(Rt);
  1532. EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm);
  1533. }
  1534. void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1535. {
  1536. EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm);
  1537. }
  1538. void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  1539. {
  1540. EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm);
  1541. }
  1542. // Load/Store register (unscaled offset)
  1543. void ARM64XEmitter::STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1544. {
  1545. EncodeLoadStoreUnscaled(0, 0, Rt, Rn, imm);
  1546. }
  1547. void ARM64XEmitter::LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1548. {
  1549. EncodeLoadStoreUnscaled(0, 1, Rt, Rn, imm);
  1550. }
  1551. void ARM64XEmitter::LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1552. {
  1553. EncodeLoadStoreUnscaled(0, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
  1554. }
  1555. void ARM64XEmitter::STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1556. {
  1557. EncodeLoadStoreUnscaled(1, 0, Rt, Rn, imm);
  1558. }
  1559. void ARM64XEmitter::LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1560. {
  1561. EncodeLoadStoreUnscaled(1, 1, Rt, Rn, imm);
  1562. }
  1563. void ARM64XEmitter::LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1564. {
  1565. EncodeLoadStoreUnscaled(1, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
  1566. }
  1567. void ARM64XEmitter::STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1568. {
  1569. EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 0, Rt, Rn, imm);
  1570. }
  1571. void ARM64XEmitter::LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1572. {
  1573. EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 1, Rt, Rn, imm);
  1574. }
  1575. void ARM64XEmitter::LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  1576. {
  1577. ASSERT_MSG(DYNA_REC, !Is64Bit(Rt), "Must have a 64bit destination register!");
  1578. EncodeLoadStoreUnscaled(2, 2, Rt, Rn, imm);
  1579. }
  1580. // Address of label/page PC-relative
  1581. void ARM64XEmitter::ADR(ARM64Reg Rd, s32 imm)
  1582. {
  1583. EncodeAddressInst(0, Rd, imm);
  1584. }
  1585. void ARM64XEmitter::ADRP(ARM64Reg Rd, s64 imm)
  1586. {
  1587. EncodeAddressInst(1, Rd, static_cast<s32>(imm >> 12));
  1588. }
  1589. // This is using a hand-rolled algorithm. The goal is zero memory allocations, not necessarily
  1590. // the best JIT-time time complexity. (The number of moves is usually very small.)
  1591. void ARM64XEmitter::ParallelMoves(RegisterMove* begin, RegisterMove* end,
  1592. std::array<u8, 32>* source_gpr_usages)
  1593. {
  1594. // X0-X7 are used for passing arguments.
  1595. // X18-X31 are either callee saved or used for special purposes.
  1596. constexpr size_t temp_reg_begin = 8;
  1597. constexpr size_t temp_reg_end = 18;
  1598. while (begin != end)
  1599. {
  1600. bool removed_moves_during_this_loop_iteration = false;
  1601. RegisterMove* current_move = end;
  1602. while (current_move != begin)
  1603. {
  1604. RegisterMove* prev_move = current_move;
  1605. --current_move;
  1606. if ((*source_gpr_usages)[DecodeReg(current_move->dst)] == 0)
  1607. {
  1608. MOV(current_move->dst, current_move->src);
  1609. (*source_gpr_usages)[DecodeReg(current_move->src)]--;
  1610. std::move(prev_move, end, current_move);
  1611. --end;
  1612. removed_moves_during_this_loop_iteration = true;
  1613. }
  1614. }
  1615. if (!removed_moves_during_this_loop_iteration)
  1616. {
  1617. // We need to break a cycle using a temporary register.
  1618. size_t temp_reg = temp_reg_begin;
  1619. while ((*source_gpr_usages)[temp_reg] != 0)
  1620. {
  1621. ++temp_reg;
  1622. ASSERT_MSG(DYNA_REC, temp_reg != temp_reg_end, "Out of registers");
  1623. }
  1624. const ARM64Reg src = begin->src;
  1625. const ARM64Reg dst =
  1626. (Is64Bit(src) ? EncodeRegTo64 : EncodeRegTo32)(static_cast<ARM64Reg>(temp_reg));
  1627. MOV(dst, src);
  1628. (*source_gpr_usages)[DecodeReg(dst)] = (*source_gpr_usages)[DecodeReg(src)];
  1629. (*source_gpr_usages)[DecodeReg(src)] = 0;
  1630. std::for_each(begin, end, [src, dst](RegisterMove& move) {
  1631. if (move.src == src)
  1632. move.src = dst;
  1633. });
  1634. }
  1635. }
  1636. }
  1637. template <typename T>
  1638. void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, T imm)
  1639. {
  1640. enum class Approach
  1641. {
  1642. MOVZBase,
  1643. MOVNBase,
  1644. ADRBase,
  1645. ADRPBase,
  1646. ORRBase,
  1647. };
  1648. struct Part
  1649. {
  1650. Part() = default;
  1651. Part(u16 imm_, ShiftAmount shift_) : imm(imm_), shift(shift_) {}
  1652. u16 imm;
  1653. ShiftAmount shift;
  1654. };
  1655. constexpr size_t max_parts = sizeof(T) / 2;
  1656. Common::SmallVector<Part, max_parts> best_parts;
  1657. Approach best_approach;
  1658. u64 best_base;
  1659. const auto instructions_required = [](const Common::SmallVector<Part, max_parts>& parts,
  1660. Approach approach) {
  1661. return parts.size() + (approach > Approach::MOVNBase);
  1662. };
  1663. const auto try_base = [&](T base, Approach approach, bool first_time) {
  1664. Common::SmallVector<Part, max_parts> parts;
  1665. for (size_t i = 0; i < max_parts; ++i)
  1666. {
  1667. const size_t shift = i * 16;
  1668. const u16 imm_shifted = static_cast<u16>(imm >> shift);
  1669. const u16 base_shifted = static_cast<u16>(base >> shift);
  1670. if (imm_shifted != base_shifted)
  1671. parts.emplace_back(imm_shifted, static_cast<ShiftAmount>(i));
  1672. }
  1673. if (first_time ||
  1674. instructions_required(parts, approach) < instructions_required(best_parts, best_approach))
  1675. {
  1676. best_parts = std::move(parts);
  1677. best_approach = approach;
  1678. best_base = base;
  1679. }
  1680. };
  1681. // Try MOVZ/MOVN
  1682. try_base(T(0), Approach::MOVZBase, true);
  1683. try_base(~T(0), Approach::MOVNBase, false);
  1684. // Try PC-relative approaches
  1685. const auto sext_21_bit = [](u64 x) {
  1686. return static_cast<s64>((x & 0x1FFFFF) | (x & 0x100000 ? ~0x1FFFFF : 0));
  1687. };
  1688. const u64 pc = reinterpret_cast<u64>(GetCodePtr());
  1689. const s64 adrp_offset = sext_21_bit((imm >> 12) - (pc >> 12)) << 12;
  1690. const s64 adr_offset = sext_21_bit(imm - pc);
  1691. const u64 adrp_base = (pc & ~0xFFF) + adrp_offset;
  1692. const u64 adr_base = pc + adr_offset;
  1693. if constexpr (sizeof(T) == 8)
  1694. {
  1695. try_base(adrp_base, Approach::ADRPBase, false);
  1696. try_base(adr_base, Approach::ADRBase, false);
  1697. }
  1698. // Try ORR (or skip it if we already have a 1-instruction encoding - these tests are non-trivial)
  1699. if (instructions_required(best_parts, best_approach) > 1)
  1700. {
  1701. if constexpr (sizeof(T) == 8)
  1702. {
  1703. for (u64 orr_imm : {(imm << 32) | (imm & 0x0000'0000'FFFF'FFFF),
  1704. (imm & 0xFFFF'FFFF'0000'0000) | (imm >> 32),
  1705. (imm << 48) | (imm & 0x0000'FFFF'FFFF'0000) | (imm >> 48)})
  1706. {
  1707. if (LogicalImm(orr_imm, GPRSize::B64))
  1708. try_base(orr_imm, Approach::ORRBase, false);
  1709. }
  1710. }
  1711. else
  1712. {
  1713. if (LogicalImm(imm, GPRSize::B32))
  1714. try_base(imm, Approach::ORRBase, false);
  1715. }
  1716. }
  1717. size_t parts_uploaded = 0;
  1718. // To kill any dependencies, we start with an instruction that overwrites the entire register
  1719. switch (best_approach)
  1720. {
  1721. case Approach::MOVZBase:
  1722. if (best_parts.empty())
  1723. best_parts.emplace_back(u16(0), ShiftAmount::Shift0);
  1724. MOVZ(Rd, best_parts[0].imm, best_parts[0].shift);
  1725. ++parts_uploaded;
  1726. break;
  1727. case Approach::MOVNBase:
  1728. if (best_parts.empty())
  1729. best_parts.emplace_back(u16(0xFFFF), ShiftAmount::Shift0);
  1730. MOVN(Rd, static_cast<u16>(~best_parts[0].imm), best_parts[0].shift);
  1731. ++parts_uploaded;
  1732. break;
  1733. case Approach::ADRBase:
  1734. ADR(Rd, adr_offset);
  1735. break;
  1736. case Approach::ADRPBase:
  1737. ADRP(Rd, adrp_offset);
  1738. break;
  1739. case Approach::ORRBase:
  1740. constexpr ARM64Reg zero_reg = sizeof(T) == 8 ? ARM64Reg::ZR : ARM64Reg::WZR;
  1741. const bool success = TryORRI2R(Rd, zero_reg, best_base);
  1742. ASSERT(success);
  1743. break;
  1744. }
  1745. // And then we use MOVK for the remaining parts
  1746. for (; parts_uploaded < best_parts.size(); ++parts_uploaded)
  1747. {
  1748. const Part& part = best_parts[parts_uploaded];
  1749. if (best_approach == Approach::ADRPBase && part.shift == ShiftAmount::Shift0)
  1750. {
  1751. // The combination of ADRP followed by ADD immediate is specifically optimized in hardware
  1752. ASSERT(part.imm == (adrp_base & 0xF000) + (part.imm & 0xFFF));
  1753. ADD(Rd, Rd, part.imm & 0xFFF);
  1754. }
  1755. else
  1756. {
  1757. MOVK(Rd, part.imm, part.shift);
  1758. }
  1759. }
  1760. }
  1761. template void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, u64 imm);
  1762. template void ARM64XEmitter::MOVI2RImpl(ARM64Reg Rd, u32 imm);
  1763. void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm)
  1764. {
  1765. if (Is64Bit(Rd))
  1766. MOVI2RImpl<u64>(Rd, imm);
  1767. else
  1768. MOVI2RImpl<u32>(Rd, static_cast<u32>(imm));
  1769. }
  1770. bool ARM64XEmitter::MOVI2R2(ARM64Reg Rd, u64 imm1, u64 imm2)
  1771. {
  1772. // TODO: Also optimize for performance, not just for code size.
  1773. u8* start_pointer = GetWritableCodePtr();
  1774. MOVI2R(Rd, imm1);
  1775. int size1 = GetCodePtr() - start_pointer;
  1776. m_code = start_pointer;
  1777. MOVI2R(Rd, imm2);
  1778. int size2 = GetCodePtr() - start_pointer;
  1779. m_code = start_pointer;
  1780. bool element = size1 > size2;
  1781. MOVI2R(Rd, element ? imm2 : imm1);
  1782. return element;
  1783. }
  1784. void ARM64XEmitter::ABI_PushRegisters(BitSet32 registers)
  1785. {
  1786. int num_regs = registers.Count();
  1787. int stack_size = (num_regs + (num_regs & 1)) * 8;
  1788. auto it = registers.begin();
  1789. if (!num_regs)
  1790. return;
  1791. // 8 byte per register, but 16 byte alignment, so we may have to padd one register.
  1792. // Only update the SP on the last write to avoid the dependency between those stores.
  1793. // The first push must adjust the SP, else a context switch may invalidate everything below SP.
  1794. if (num_regs & 1)
  1795. {
  1796. STR(IndexType::Pre, ARM64Reg::X0 + *it++, ARM64Reg::SP, -stack_size);
  1797. }
  1798. else
  1799. {
  1800. ARM64Reg first_reg = ARM64Reg::X0 + *it++;
  1801. ARM64Reg second_reg = ARM64Reg::X0 + *it++;
  1802. STP(IndexType::Pre, first_reg, second_reg, ARM64Reg::SP, -stack_size);
  1803. }
  1804. // Fast store for all other registers, this is always an even number.
  1805. for (int i = 0; i < (num_regs - 1) / 2; i++)
  1806. {
  1807. ARM64Reg odd_reg = ARM64Reg::X0 + *it++;
  1808. ARM64Reg even_reg = ARM64Reg::X0 + *it++;
  1809. STP(IndexType::Signed, odd_reg, even_reg, ARM64Reg::SP, 16 * (i + 1));
  1810. }
  1811. ASSERT_MSG(DYNA_REC, it == registers.end(), "Registers don't match: {:b}", registers.m_val);
  1812. }
  1813. void ARM64XEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
  1814. {
  1815. int num_regs = registers.Count();
  1816. int stack_size = (num_regs + (num_regs & 1)) * 8;
  1817. auto it = registers.begin();
  1818. if (!num_regs)
  1819. return;
  1820. // We must adjust the SP in the end, so load the first (two) registers at least.
  1821. ARM64Reg first = ARM64Reg::X0 + *it++;
  1822. ARM64Reg second;
  1823. if (!(num_regs & 1))
  1824. second = ARM64Reg::X0 + *it++;
  1825. else
  1826. second = {};
  1827. // 8 byte per register, but 16 byte alignment, so we may have to padd one register.
  1828. // Only update the SP on the last load to avoid the dependency between those loads.
  1829. // Fast load for all but the first (two) registers, this is always an even number.
  1830. for (int i = 0; i < (num_regs - 1) / 2; i++)
  1831. {
  1832. ARM64Reg odd_reg = ARM64Reg::X0 + *it++;
  1833. ARM64Reg even_reg = ARM64Reg::X0 + *it++;
  1834. LDP(IndexType::Signed, odd_reg, even_reg, ARM64Reg::SP, 16 * (i + 1));
  1835. }
  1836. // Post loading the first (two) registers.
  1837. if (num_regs & 1)
  1838. LDR(IndexType::Post, first, ARM64Reg::SP, stack_size);
  1839. else
  1840. LDP(IndexType::Post, first, second, ARM64Reg::SP, stack_size);
  1841. ASSERT_MSG(DYNA_REC, it == registers.end(), "Registers don't match: {:b}", registers.m_val);
  1842. }
  1843. // Float Emitter
  1844. void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt,
  1845. ARM64Reg Rn, s32 imm)
  1846. {
  1847. u32 encoded_size = 0;
  1848. u32 encoded_imm = 0;
  1849. if (size == 8)
  1850. encoded_size = 0;
  1851. else if (size == 16)
  1852. encoded_size = 1;
  1853. else if (size == 32)
  1854. encoded_size = 2;
  1855. else if (size == 64)
  1856. encoded_size = 3;
  1857. else if (size == 128)
  1858. encoded_size = 0;
  1859. if (type == IndexType::Unsigned)
  1860. {
  1861. ASSERT_MSG(DYNA_REC, imm >= 0, "(IndexType::Unsigned) immediate offset must be positive! ({})",
  1862. imm);
  1863. if (size == 16)
  1864. {
  1865. ASSERT_MSG(DYNA_REC, (imm & 0x1) == 0, "16-bit load/store must use aligned offset: {}", imm);
  1866. imm >>= 1;
  1867. }
  1868. else if (size == 32)
  1869. {
  1870. ASSERT_MSG(DYNA_REC, (imm & 0x3) == 0, "32-bit load/store must use aligned offset: {}", imm);
  1871. imm >>= 2;
  1872. }
  1873. else if (size == 64)
  1874. {
  1875. ASSERT_MSG(DYNA_REC, (imm & 0x7) == 0, "64-bit load/store must use aligned offset: {}", imm);
  1876. imm >>= 3;
  1877. }
  1878. else if (size == 128)
  1879. {
  1880. ASSERT_MSG(DYNA_REC, (imm & 0xf) == 0, "128-bit load/store must use aligned offset: {}", imm);
  1881. imm >>= 4;
  1882. }
  1883. ASSERT_MSG(DYNA_REC, imm <= 0xFFF, "Immediate value is too big: {}", imm);
  1884. encoded_imm = (imm & 0xFFF);
  1885. }
  1886. else
  1887. {
  1888. ASSERT_MSG(DYNA_REC, !(imm < -256 || imm > 255),
  1889. "immediate offset must be within range of -256 to 256! {}", imm);
  1890. encoded_imm = (imm & 0x1FF) << 2;
  1891. if (type == IndexType::Post)
  1892. encoded_imm |= 1;
  1893. else
  1894. encoded_imm |= 3;
  1895. }
  1896. Write32((encoded_size << 30) | (0xF << 26) | (type == IndexType::Unsigned ? (1 << 24) : 0) |
  1897. (size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (DecodeReg(Rn) << 5) |
  1898. DecodeReg(Rt));
  1899. }
  1900. void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd,
  1901. ARM64Reg Rn, ARM64Reg Rm)
  1902. {
  1903. ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "Only double and single registers are supported!");
  1904. Write32((M << 31) | (S << 29) | (0b11110001 << 21) | (type << 22) | (DecodeReg(Rm) << 16) |
  1905. (opcode << 12) | (1 << 11) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1906. }
  1907. void ARM64FloatEmitter::EmitScalarThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
  1908. ARM64Reg Rm)
  1909. {
  1910. ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "Only double and single registers are supported!");
  1911. Write32((1 << 30) | (U << 29) | (0b11110001 << 21) | (size << 22) | (DecodeReg(Rm) << 16) |
  1912. (opcode << 11) | (1 << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1913. }
  1914. void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn,
  1915. ARM64Reg Rm)
  1916. {
  1917. ASSERT_MSG(DYNA_REC, !IsSingle(Rd), "Singles are not supported!");
  1918. bool quad = IsQuad(Rd);
  1919. Write32((quad << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (DecodeReg(Rm) << 16) |
  1920. (opcode << 11) | (1 << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1921. }
  1922. void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn)
  1923. {
  1924. Write32((Q << 30) | (op << 29) | (0b111 << 25) | (imm5 << 16) | (imm4 << 11) | (1 << 10) |
  1925. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1926. }
  1927. void ARM64FloatEmitter::EmitScalar2RegMisc(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
  1928. {
  1929. Write32((1 << 30) | (U << 29) | (0b11110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) |
  1930. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1931. }
  1932. void ARM64FloatEmitter::EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
  1933. {
  1934. Write32((1 << 30) | (U << 29) | (0b111100011 << 20) | (size << 22) | (opcode << 12) | (1 << 11) |
  1935. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1936. }
  1937. void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
  1938. {
  1939. ASSERT_MSG(DYNA_REC, !IsSingle(Rd), "Singles are not supported!");
  1940. Write32((Q << 30) | (U << 29) | (0b1110001 << 21) | (size << 22) | (opcode << 12) | (1 << 11) |
  1941. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1942. }
  1943. void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size,
  1944. ARM64Reg Rt, ARM64Reg Rn)
  1945. {
  1946. ASSERT_MSG(DYNA_REC, !IsSingle(Rt), "Singles are not supported!");
  1947. bool quad = IsQuad(Rt);
  1948. Write32((quad << 30) | (0b1101 << 24) | (L << 22) | (R << 21) | (opcode << 13) | (S << 12) |
  1949. (size << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  1950. }
  1951. void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size,
  1952. ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
  1953. {
  1954. ASSERT_MSG(DYNA_REC, !IsSingle(Rt), "Singles are not supported!");
  1955. bool quad = IsQuad(Rt);
  1956. Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (DecodeReg(Rm) << 16) |
  1957. (opcode << 13) | (S << 12) | (size << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  1958. }
  1959. void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
  1960. {
  1961. ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "Vector is not supported!");
  1962. Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) |
  1963. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1964. }
  1965. void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode,
  1966. ARM64Reg Rd, ARM64Reg Rn)
  1967. {
  1968. ASSERT_MSG(DYNA_REC, Rn <= ARM64Reg::SP, "Only GPRs are supported as source!");
  1969. Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | (opcode << 16) |
  1970. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  1971. }
  1972. void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round,
  1973. bool sign)
  1974. {
  1975. DEBUG_ASSERT_MSG(DYNA_REC, IsScalar(Rn), "fcvts: Rn must be floating point");
  1976. if (IsGPR(Rd))
  1977. {
  1978. // Use the encoding that transfers the result to a GPR.
  1979. const bool sf = Is64Bit(Rd);
  1980. const int type = IsDouble(Rn) ? 1 : 0;
  1981. int opcode = (sign ? 1 : 0);
  1982. int rmode = 0;
  1983. switch (round)
  1984. {
  1985. case RoundingMode::A:
  1986. rmode = 0;
  1987. opcode |= 4;
  1988. break;
  1989. case RoundingMode::P:
  1990. rmode = 1;
  1991. break;
  1992. case RoundingMode::M:
  1993. rmode = 2;
  1994. break;
  1995. case RoundingMode::Z:
  1996. rmode = 3;
  1997. break;
  1998. case RoundingMode::N:
  1999. rmode = 0;
  2000. break;
  2001. }
  2002. EmitConversion2(sf, 0, true, type, rmode, opcode, 0, Rd, Rn);
  2003. }
  2004. else
  2005. {
  2006. // Use the encoding (vector, single) that keeps the result in the fp register.
  2007. int sz = IsDouble(Rn);
  2008. int opcode = 0;
  2009. switch (round)
  2010. {
  2011. case RoundingMode::A:
  2012. opcode = 0x1C;
  2013. break;
  2014. case RoundingMode::N:
  2015. opcode = 0x1A;
  2016. break;
  2017. case RoundingMode::M:
  2018. opcode = 0x1B;
  2019. break;
  2020. case RoundingMode::P:
  2021. opcode = 0x1A;
  2022. sz |= 2;
  2023. break;
  2024. case RoundingMode::Z:
  2025. opcode = 0x1B;
  2026. sz |= 2;
  2027. break;
  2028. }
  2029. Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) |
  2030. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2031. }
  2032. }
  2033. void ARM64FloatEmitter::FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round)
  2034. {
  2035. EmitConvertScalarToInt(Rd, Rn, round, false);
  2036. }
  2037. void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round)
  2038. {
  2039. EmitConvertScalarToInt(Rd, Rn, round, true);
  2040. }
  2041. void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode,
  2042. u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn)
  2043. {
  2044. Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | (rmode << 19) |
  2045. (opcode << 16) | (scale << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2046. }
  2047. void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm)
  2048. {
  2049. ASSERT_MSG(DYNA_REC, !IsQuad(Rn), "Vector is not supported!");
  2050. bool is_double = IsDouble(Rn);
  2051. Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (DecodeReg(Rm) << 16) |
  2052. (op << 14) | (1 << 13) | (DecodeReg(Rn) << 5) | opcode2);
  2053. }
  2054. void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn,
  2055. ARM64Reg Rm)
  2056. {
  2057. ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "Vector is not supported!");
  2058. bool is_double = IsDouble(Rd);
  2059. Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (DecodeReg(Rm) << 16) |
  2060. (cond << 12) | (3 << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2061. }
  2062. void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2063. {
  2064. ASSERT_MSG(DYNA_REC, !IsSingle(Rd), "Singles are not supported!");
  2065. bool quad = IsQuad(Rd);
  2066. u32 encoded_size = 0;
  2067. if (size == 16)
  2068. encoded_size = 1;
  2069. else if (size == 32)
  2070. encoded_size = 2;
  2071. else if (size == 64)
  2072. encoded_size = 3;
  2073. Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (DecodeReg(Rm) << 16) | (op << 12) |
  2074. (1 << 11) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2075. }
  2076. void ARM64FloatEmitter::EmitExtract(u32 imm4, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2077. {
  2078. ASSERT_MSG(DYNA_REC, !IsSingle(Rd), "Singles are not supported!");
  2079. bool quad = IsQuad(Rd);
  2080. Write32((quad << 30) | (23 << 25) | (op << 22) | (DecodeReg(Rm) << 16) | (imm4 << 11) |
  2081. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2082. }
  2083. void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8)
  2084. {
  2085. ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "Vector is not supported!");
  2086. bool is_double = !IsSingle(Rd);
  2087. Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | (imm8 << 13) |
  2088. (1 << 12) | (imm5 << 5) | DecodeReg(Rd));
  2089. }
  2090. void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
  2091. {
  2092. ASSERT_MSG(DYNA_REC, (imm & 0b1111000) != 0, "Can't have zero immh");
  2093. Write32((Q << 30) | (U << 29) | (0xF << 24) | (imm << 16) | (opcode << 11) | (1 << 10) |
  2094. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2095. }
  2096. void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 imm, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
  2097. {
  2098. Write32((1 << 30) | (U << 29) | (0x3E << 23) | (imm << 16) | (opcode << 11) | (1 << 10) |
  2099. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2100. }
  2101. void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt,
  2102. ARM64Reg Rn)
  2103. {
  2104. bool quad = IsQuad(Rt);
  2105. u32 encoded_size = 0;
  2106. if (size == 16)
  2107. encoded_size = 1;
  2108. else if (size == 32)
  2109. encoded_size = 2;
  2110. else if (size == 64)
  2111. encoded_size = 3;
  2112. Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | (encoded_size << 10) |
  2113. (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  2114. }
  2115. void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode,
  2116. ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
  2117. {
  2118. bool quad = IsQuad(Rt);
  2119. u32 encoded_size = 0;
  2120. if (size == 16)
  2121. encoded_size = 1;
  2122. else if (size == 32)
  2123. encoded_size = 2;
  2124. else if (size == 64)
  2125. encoded_size = 3;
  2126. Write32((quad << 30) | (0b11001 << 23) | (L << 22) | (DecodeReg(Rm) << 16) | (opcode << 12) |
  2127. (encoded_size << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  2128. }
  2129. void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd,
  2130. ARM64Reg Rn)
  2131. {
  2132. ASSERT_MSG(DYNA_REC, !IsQuad(Rd), "Vector is not supported!");
  2133. Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | (1 << 14) |
  2134. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2135. }
  2136. void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H,
  2137. ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2138. {
  2139. bool quad = IsQuad(Rd);
  2140. Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) |
  2141. (DecodeReg(Rm) << 16) | (opcode << 12) | (H << 11) | (DecodeReg(Rn) << 5) |
  2142. DecodeReg(Rd));
  2143. }
  2144. void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  2145. {
  2146. ASSERT_MSG(DYNA_REC, !(imm < -256 || imm > 255), "received too large offset: {}", imm);
  2147. Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (DecodeReg(Rn) << 5) |
  2148. DecodeReg(Rt));
  2149. }
  2150. void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt,
  2151. ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
  2152. {
  2153. u32 type_encode = 0;
  2154. u32 opc = 0;
  2155. switch (type)
  2156. {
  2157. case IndexType::Signed:
  2158. type_encode = 0b010;
  2159. break;
  2160. case IndexType::Post:
  2161. type_encode = 0b001;
  2162. break;
  2163. case IndexType::Pre:
  2164. type_encode = 0b011;
  2165. break;
  2166. case IndexType::Unsigned:
  2167. ASSERT_MSG(DYNA_REC, false, "IndexType::Unsigned is unsupported!");
  2168. break;
  2169. }
  2170. if (size == 128)
  2171. {
  2172. ASSERT_MSG(DYNA_REC, !(imm & 0xF), "Invalid offset {:#x}! (size {})", imm, size);
  2173. opc = 2;
  2174. imm >>= 4;
  2175. }
  2176. else if (size == 64)
  2177. {
  2178. ASSERT_MSG(DYNA_REC, !(imm & 0x7), "Invalid offset {:#x}! (size {})", imm, size);
  2179. opc = 1;
  2180. imm >>= 3;
  2181. }
  2182. else if (size == 32)
  2183. {
  2184. ASSERT_MSG(DYNA_REC, !(imm & 0x3), "Invalid offset {:#x}! (size {})", imm, size);
  2185. opc = 0;
  2186. imm >>= 2;
  2187. }
  2188. ASSERT_MSG(DYNA_REC, imm >= -64 && imm < 64, "imm too large for load/store pair! {}", imm);
  2189. Write32((opc << 30) | (0b1011 << 26) | (type_encode << 23) | (load << 22) | ((imm & 0x7F) << 15) |
  2190. (DecodeReg(Rt2) << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  2191. }
  2192. void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn,
  2193. ArithOption Rm)
  2194. {
  2195. ASSERT_MSG(DYNA_REC, Rm.IsExtended(), "Must contain an extended reg as Rm!");
  2196. u32 encoded_size = 0;
  2197. u32 encoded_op = 0;
  2198. if (size == 8)
  2199. {
  2200. encoded_size = 0;
  2201. encoded_op = 0;
  2202. }
  2203. else if (size == 16)
  2204. {
  2205. encoded_size = 1;
  2206. encoded_op = 0;
  2207. }
  2208. else if (size == 32)
  2209. {
  2210. encoded_size = 2;
  2211. encoded_op = 0;
  2212. }
  2213. else if (size == 64)
  2214. {
  2215. encoded_size = 3;
  2216. encoded_op = 0;
  2217. }
  2218. else if (size == 128)
  2219. {
  2220. encoded_size = 0;
  2221. encoded_op = 2;
  2222. }
  2223. if (load)
  2224. encoded_op |= 1;
  2225. const int decoded_Rm = DecodeReg(Rm.GetReg());
  2226. Write32((encoded_size << 30) | (encoded_op << 22) | (0b111100001 << 21) | (decoded_Rm << 16) |
  2227. Rm.GetData() | (1 << 11) | (DecodeReg(Rn) << 5) | DecodeReg(Rt));
  2228. }
  2229. void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh)
  2230. {
  2231. union
  2232. {
  2233. u8 hex;
  2234. struct
  2235. {
  2236. unsigned defgh : 5;
  2237. unsigned abc : 3;
  2238. };
  2239. } v;
  2240. v.hex = abcdefgh;
  2241. Write32((Q << 30) | (op << 29) | (0xF << 24) | (v.abc << 16) | (cmode << 12) | (o2 << 11) |
  2242. (1 << 10) | (v.defgh << 5) | DecodeReg(Rd));
  2243. }
  2244. void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  2245. {
  2246. EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
  2247. }
  2248. void ARM64FloatEmitter::STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  2249. {
  2250. EmitLoadStoreImmediate(size, 0, type, Rt, Rn, imm);
  2251. }
  2252. // Loadstore unscaled
  2253. void ARM64FloatEmitter::LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  2254. {
  2255. u32 encoded_size = 0;
  2256. u32 encoded_op = 0;
  2257. if (size == 8)
  2258. {
  2259. encoded_size = 0;
  2260. encoded_op = 1;
  2261. }
  2262. else if (size == 16)
  2263. {
  2264. encoded_size = 1;
  2265. encoded_op = 1;
  2266. }
  2267. else if (size == 32)
  2268. {
  2269. encoded_size = 2;
  2270. encoded_op = 1;
  2271. }
  2272. else if (size == 64)
  2273. {
  2274. encoded_size = 3;
  2275. encoded_op = 1;
  2276. }
  2277. else if (size == 128)
  2278. {
  2279. encoded_size = 0;
  2280. encoded_op = 3;
  2281. }
  2282. EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
  2283. }
  2284. void ARM64FloatEmitter::STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
  2285. {
  2286. u32 encoded_size = 0;
  2287. u32 encoded_op = 0;
  2288. if (size == 8)
  2289. {
  2290. encoded_size = 0;
  2291. encoded_op = 0;
  2292. }
  2293. else if (size == 16)
  2294. {
  2295. encoded_size = 1;
  2296. encoded_op = 0;
  2297. }
  2298. else if (size == 32)
  2299. {
  2300. encoded_size = 2;
  2301. encoded_op = 0;
  2302. }
  2303. else if (size == 64)
  2304. {
  2305. encoded_size = 3;
  2306. encoded_op = 0;
  2307. }
  2308. else if (size == 128)
  2309. {
  2310. encoded_size = 0;
  2311. encoded_op = 2;
  2312. }
  2313. EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
  2314. }
  2315. // Loadstore single structure
  2316. void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
  2317. {
  2318. bool S = 0;
  2319. u32 opcode = 0;
  2320. u32 encoded_size = 0;
  2321. ARM64Reg encoded_reg = ARM64Reg::INVALID_REG;
  2322. if (size == 8)
  2323. {
  2324. S = (index & 4) != 0;
  2325. opcode = 0;
  2326. encoded_size = index & 3;
  2327. if (index & 8)
  2328. encoded_reg = EncodeRegToQuad(Rt);
  2329. else
  2330. encoded_reg = EncodeRegToDouble(Rt);
  2331. }
  2332. else if (size == 16)
  2333. {
  2334. S = (index & 2) != 0;
  2335. opcode = 2;
  2336. encoded_size = (index & 1) << 1;
  2337. if (index & 4)
  2338. encoded_reg = EncodeRegToQuad(Rt);
  2339. else
  2340. encoded_reg = EncodeRegToDouble(Rt);
  2341. }
  2342. else if (size == 32)
  2343. {
  2344. S = (index & 1) != 0;
  2345. opcode = 4;
  2346. encoded_size = 0;
  2347. if (index & 2)
  2348. encoded_reg = EncodeRegToQuad(Rt);
  2349. else
  2350. encoded_reg = EncodeRegToDouble(Rt);
  2351. }
  2352. else if (size == 64)
  2353. {
  2354. S = 0;
  2355. opcode = 4;
  2356. encoded_size = 1;
  2357. if (index == 1)
  2358. encoded_reg = EncodeRegToQuad(Rt);
  2359. else
  2360. encoded_reg = EncodeRegToDouble(Rt);
  2361. }
  2362. EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn);
  2363. }
  2364. void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
  2365. {
  2366. bool S = 0;
  2367. u32 opcode = 0;
  2368. u32 encoded_size = 0;
  2369. ARM64Reg encoded_reg = ARM64Reg::INVALID_REG;
  2370. if (size == 8)
  2371. {
  2372. S = (index & 4) != 0;
  2373. opcode = 0;
  2374. encoded_size = index & 3;
  2375. if (index & 8)
  2376. encoded_reg = EncodeRegToQuad(Rt);
  2377. else
  2378. encoded_reg = EncodeRegToDouble(Rt);
  2379. }
  2380. else if (size == 16)
  2381. {
  2382. S = (index & 2) != 0;
  2383. opcode = 2;
  2384. encoded_size = (index & 1) << 1;
  2385. if (index & 4)
  2386. encoded_reg = EncodeRegToQuad(Rt);
  2387. else
  2388. encoded_reg = EncodeRegToDouble(Rt);
  2389. }
  2390. else if (size == 32)
  2391. {
  2392. S = (index & 1) != 0;
  2393. opcode = 4;
  2394. encoded_size = 0;
  2395. if (index & 2)
  2396. encoded_reg = EncodeRegToQuad(Rt);
  2397. else
  2398. encoded_reg = EncodeRegToDouble(Rt);
  2399. }
  2400. else if (size == 64)
  2401. {
  2402. S = 0;
  2403. opcode = 4;
  2404. encoded_size = 1;
  2405. if (index == 1)
  2406. encoded_reg = EncodeRegToQuad(Rt);
  2407. else
  2408. encoded_reg = EncodeRegToDouble(Rt);
  2409. }
  2410. EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
  2411. }
  2412. void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
  2413. {
  2414. EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn);
  2415. }
  2416. void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
  2417. {
  2418. EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn);
  2419. }
  2420. void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
  2421. {
  2422. EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn, Rm);
  2423. }
  2424. void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
  2425. {
  2426. EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn, Rm);
  2427. }
  2428. void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
  2429. {
  2430. bool S = 0;
  2431. u32 opcode = 0;
  2432. u32 encoded_size = 0;
  2433. ARM64Reg encoded_reg = ARM64Reg::INVALID_REG;
  2434. if (size == 8)
  2435. {
  2436. S = (index & 4) != 0;
  2437. opcode = 0;
  2438. encoded_size = index & 3;
  2439. if (index & 8)
  2440. encoded_reg = EncodeRegToQuad(Rt);
  2441. else
  2442. encoded_reg = EncodeRegToDouble(Rt);
  2443. }
  2444. else if (size == 16)
  2445. {
  2446. S = (index & 2) != 0;
  2447. opcode = 2;
  2448. encoded_size = (index & 1) << 1;
  2449. if (index & 4)
  2450. encoded_reg = EncodeRegToQuad(Rt);
  2451. else
  2452. encoded_reg = EncodeRegToDouble(Rt);
  2453. }
  2454. else if (size == 32)
  2455. {
  2456. S = (index & 1) != 0;
  2457. opcode = 4;
  2458. encoded_size = 0;
  2459. if (index & 2)
  2460. encoded_reg = EncodeRegToQuad(Rt);
  2461. else
  2462. encoded_reg = EncodeRegToDouble(Rt);
  2463. }
  2464. else if (size == 64)
  2465. {
  2466. S = 0;
  2467. opcode = 4;
  2468. encoded_size = 1;
  2469. if (index == 1)
  2470. encoded_reg = EncodeRegToQuad(Rt);
  2471. else
  2472. encoded_reg = EncodeRegToDouble(Rt);
  2473. }
  2474. EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn);
  2475. }
  2476. void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
  2477. {
  2478. bool S = 0;
  2479. u32 opcode = 0;
  2480. u32 encoded_size = 0;
  2481. ARM64Reg encoded_reg = ARM64Reg::INVALID_REG;
  2482. if (size == 8)
  2483. {
  2484. S = (index & 4) != 0;
  2485. opcode = 0;
  2486. encoded_size = index & 3;
  2487. if (index & 8)
  2488. encoded_reg = EncodeRegToQuad(Rt);
  2489. else
  2490. encoded_reg = EncodeRegToDouble(Rt);
  2491. }
  2492. else if (size == 16)
  2493. {
  2494. S = (index & 2) != 0;
  2495. opcode = 2;
  2496. encoded_size = (index & 1) << 1;
  2497. if (index & 4)
  2498. encoded_reg = EncodeRegToQuad(Rt);
  2499. else
  2500. encoded_reg = EncodeRegToDouble(Rt);
  2501. }
  2502. else if (size == 32)
  2503. {
  2504. S = (index & 1) != 0;
  2505. opcode = 4;
  2506. encoded_size = 0;
  2507. if (index & 2)
  2508. encoded_reg = EncodeRegToQuad(Rt);
  2509. else
  2510. encoded_reg = EncodeRegToDouble(Rt);
  2511. }
  2512. else if (size == 64)
  2513. {
  2514. S = 0;
  2515. opcode = 4;
  2516. encoded_size = 1;
  2517. if (index == 1)
  2518. encoded_reg = EncodeRegToQuad(Rt);
  2519. else
  2520. encoded_reg = EncodeRegToDouble(Rt);
  2521. }
  2522. EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
  2523. }
  2524. // Loadstore multiple structure
  2525. void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
  2526. {
  2527. ASSERT_MSG(DYNA_REC, !(count == 0 || count > 4), "Must have a count of 1 to 4 registers! ({})",
  2528. count);
  2529. u32 opcode = 0;
  2530. if (count == 1)
  2531. opcode = 0b111;
  2532. else if (count == 2)
  2533. opcode = 0b1010;
  2534. else if (count == 3)
  2535. opcode = 0b0110;
  2536. else if (count == 4)
  2537. opcode = 0b0010;
  2538. EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
  2539. }
  2540. void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn,
  2541. ARM64Reg Rm)
  2542. {
  2543. ASSERT_MSG(DYNA_REC, !(count == 0 || count > 4), "Must have a count of 1 to 4 registers! ({})",
  2544. count);
  2545. ASSERT_MSG(DYNA_REC, type == IndexType::Post, "Only post indexing is supported!");
  2546. u32 opcode = 0;
  2547. if (count == 1)
  2548. opcode = 0b111;
  2549. else if (count == 2)
  2550. opcode = 0b1010;
  2551. else if (count == 3)
  2552. opcode = 0b0110;
  2553. else if (count == 4)
  2554. opcode = 0b0010;
  2555. EmitLoadStoreMultipleStructurePost(size, 1, opcode, Rt, Rn, Rm);
  2556. }
  2557. void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
  2558. {
  2559. ASSERT_MSG(DYNA_REC, !(count == 0 || count > 4), "Must have a count of 1 to 4 registers! ({})",
  2560. count);
  2561. u32 opcode = 0;
  2562. if (count == 1)
  2563. opcode = 0b111;
  2564. else if (count == 2)
  2565. opcode = 0b1010;
  2566. else if (count == 3)
  2567. opcode = 0b0110;
  2568. else if (count == 4)
  2569. opcode = 0b0010;
  2570. EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);
  2571. }
  2572. void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn,
  2573. ARM64Reg Rm)
  2574. {
  2575. ASSERT_MSG(DYNA_REC, !(count == 0 || count > 4), "Must have a count of 1 to 4 registers! ({})",
  2576. count);
  2577. ASSERT_MSG(DYNA_REC, type == IndexType::Post, "Only post indexing is supporte!");
  2578. u32 opcode = 0;
  2579. if (count == 1)
  2580. opcode = 0b111;
  2581. else if (count == 2)
  2582. opcode = 0b1010;
  2583. else if (count == 3)
  2584. opcode = 0b0110;
  2585. else if (count == 4)
  2586. opcode = 0b0010;
  2587. EmitLoadStoreMultipleStructurePost(size, 0, opcode, Rt, Rn, Rm);
  2588. }
  2589. // Scalar - 1 Source
  2590. void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)
  2591. {
  2592. if (IsScalar(Rd) && IsScalar(Rn))
  2593. {
  2594. EmitScalar1Source(0, 0, IsDouble(Rd), 0, Rd, Rn);
  2595. }
  2596. else if (IsGPR(Rd) != IsGPR(Rn))
  2597. {
  2598. const ARM64Reg gpr = IsGPR(Rn) ? Rn : Rd;
  2599. const ARM64Reg fpr = IsGPR(Rn) ? Rd : Rn;
  2600. const int sf = Is64Bit(gpr) ? 1 : 0;
  2601. const int type = Is64Bit(gpr) ? (top ? 2 : 1) : 0;
  2602. const int rmode = top ? 1 : 0;
  2603. const int opcode = IsGPR(Rn) ? 7 : 6;
  2604. ASSERT_MSG(DYNA_REC, !top || IsQuad(fpr), "FMOV: top can only be used with quads");
  2605. // TODO: Should this check be more lenient? Sometimes you do want to do things like
  2606. // read the lower 32 bits of a double
  2607. ASSERT_MSG(DYNA_REC,
  2608. (!Is64Bit(gpr) && IsSingle(fpr)) ||
  2609. (Is64Bit(gpr) && ((IsDouble(fpr) && !top) || (IsQuad(fpr) && top))),
  2610. "FMOV: Mismatched sizes");
  2611. Write32((sf << 31) | (0x1e << 24) | (type << 22) | (1 << 21) | (rmode << 19) | (opcode << 16) |
  2612. (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2613. }
  2614. else
  2615. {
  2616. ASSERT_MSG(DYNA_REC, 0, "FMOV: Unsupported case");
  2617. }
  2618. }
  2619. // Loadstore paired
  2620. void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
  2621. s32 imm)
  2622. {
  2623. EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);
  2624. }
  2625. void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn,
  2626. s32 imm)
  2627. {
  2628. EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);
  2629. }
  2630. // Loadstore register offset
  2631. void ARM64FloatEmitter::STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  2632. {
  2633. EncodeLoadStoreRegisterOffset(size, false, Rt, Rn, Rm);
  2634. }
  2635. void ARM64FloatEmitter::LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, ArithOption Rm)
  2636. {
  2637. EncodeLoadStoreRegisterOffset(size, true, Rt, Rn, Rm);
  2638. }
  2639. void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn)
  2640. {
  2641. EmitScalar1Source(0, 0, IsDouble(Rd), 1, Rd, Rn);
  2642. }
  2643. void ARM64FloatEmitter::FNEG(ARM64Reg Rd, ARM64Reg Rn)
  2644. {
  2645. EmitScalar1Source(0, 0, IsDouble(Rd), 2, Rd, Rn);
  2646. }
  2647. void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
  2648. {
  2649. EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
  2650. }
  2651. void ARM64FloatEmitter::FRINTI(ARM64Reg Rd, ARM64Reg Rn)
  2652. {
  2653. EmitScalar1Source(0, 0, IsDouble(Rd), 15, Rd, Rn);
  2654. }
  2655. void ARM64FloatEmitter::FRECPE(ARM64Reg Rd, ARM64Reg Rn)
  2656. {
  2657. EmitScalar2RegMisc(0, IsDouble(Rd) ? 3 : 2, 0x1D, Rd, Rn);
  2658. }
  2659. void ARM64FloatEmitter::FRSQRTE(ARM64Reg Rd, ARM64Reg Rn)
  2660. {
  2661. EmitScalar2RegMisc(1, IsDouble(Rd) ? 3 : 2, 0x1D, Rd, Rn);
  2662. }
  2663. // Scalar - pairwise
  2664. void ARM64FloatEmitter::FADDP(ARM64Reg Rd, ARM64Reg Rn)
  2665. {
  2666. EmitScalarPairwise(1, IsDouble(Rd), 0b01101, Rd, Rn);
  2667. }
  2668. void ARM64FloatEmitter::FMAXP(ARM64Reg Rd, ARM64Reg Rn)
  2669. {
  2670. EmitScalarPairwise(1, IsDouble(Rd), 0b01111, Rd, Rn);
  2671. }
  2672. void ARM64FloatEmitter::FMINP(ARM64Reg Rd, ARM64Reg Rn)
  2673. {
  2674. EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01111, Rd, Rn);
  2675. }
  2676. void ARM64FloatEmitter::FMAXNMP(ARM64Reg Rd, ARM64Reg Rn)
  2677. {
  2678. EmitScalarPairwise(1, IsDouble(Rd), 0b01100, Rd, Rn);
  2679. }
  2680. void ARM64FloatEmitter::FMINNMP(ARM64Reg Rd, ARM64Reg Rn)
  2681. {
  2682. EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01100, Rd, Rn);
  2683. }
  2684. // Scalar - 2 Source
  2685. void ARM64FloatEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2686. {
  2687. ASSERT_MSG(DYNA_REC, IsDouble(Rd), "Only double registers are supported!");
  2688. EmitScalarThreeSame(0, 3, 0b10000, Rd, Rn, Rm);
  2689. }
  2690. void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2691. {
  2692. EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
  2693. }
  2694. void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2695. {
  2696. EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
  2697. }
  2698. void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2699. {
  2700. EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
  2701. }
  2702. void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2703. {
  2704. EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
  2705. }
  2706. void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2707. {
  2708. EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
  2709. }
  2710. void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2711. {
  2712. EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
  2713. }
  2714. void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2715. {
  2716. EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
  2717. }
  2718. void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2719. {
  2720. EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
  2721. }
  2722. void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2723. {
  2724. EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
  2725. }
  2726. void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  2727. {
  2728. EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
  2729. }
  2730. void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  2731. {
  2732. EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
  2733. }
  2734. void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  2735. {
  2736. EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
  2737. }
  2738. void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
  2739. {
  2740. EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
  2741. }
  2742. void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm,
  2743. ARM64Reg Ra, int opcode)
  2744. {
  2745. int type = isDouble ? 1 : 0;
  2746. int o1 = opcode >> 1;
  2747. int o0 = opcode & 1;
  2748. m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (DecodeReg(Rm) << 16) | (o0 << 15) |
  2749. (DecodeReg(Ra) << 10) | (DecodeReg(Rn) << 5) | DecodeReg(Rd));
  2750. }
  2751. // Scalar floating point immediate
  2752. void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8)
  2753. {
  2754. EmitScalarImm(0, 0, 0, 0, Rd, imm8);
  2755. }
  2756. // Vector
  2757. void ARM64FloatEmitter::ADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2758. {
  2759. EmitThreeSame(0, MathUtil::IntLog2(size) - 3, 0b10000, Rd, Rn, Rm);
  2760. }
  2761. void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2762. {
  2763. EmitThreeSame(0, 0, 3, Rd, Rn, Rm);
  2764. }
  2765. void ARM64FloatEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2766. {
  2767. EmitThreeSame(0, 1, 3, Rd, Rn, Rm);
  2768. }
  2769. void ARM64FloatEmitter::BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2770. {
  2771. EmitThreeSame(1, 3, 3, Rd, Rn, Rm);
  2772. }
  2773. void ARM64FloatEmitter::BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2774. {
  2775. EmitThreeSame(1, 2, 3, Rd, Rn, Rm);
  2776. }
  2777. void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2778. {
  2779. EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
  2780. }
  2781. void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
  2782. {
  2783. u32 imm5 = 0;
  2784. if (size == 8)
  2785. {
  2786. imm5 = 1;
  2787. imm5 |= index << 1;
  2788. }
  2789. else if (size == 16)
  2790. {
  2791. imm5 = 2;
  2792. imm5 |= index << 2;
  2793. }
  2794. else if (size == 32)
  2795. {
  2796. imm5 = 4;
  2797. imm5 |= index << 3;
  2798. }
  2799. else if (size == 64)
  2800. {
  2801. imm5 = 8;
  2802. imm5 |= index << 4;
  2803. }
  2804. EmitCopy(IsQuad(Rd), 0, imm5, 0, Rd, Rn);
  2805. }
  2806. void ARM64FloatEmitter::FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2807. {
  2808. Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xF, Rd, Rn);
  2809. }
  2810. void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2811. {
  2812. EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);
  2813. }
  2814. void ARM64FloatEmitter::FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2815. {
  2816. EmitThreeSame(0, size >> 6, 0b11110, Rd, Rn, Rm);
  2817. }
  2818. void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2819. {
  2820. EmitThreeSame(0, size >> 6, 0x19, Rd, Rn, Rm);
  2821. }
  2822. void ARM64FloatEmitter::FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2823. {
  2824. EmitThreeSame(0, 2 | size >> 6, 0b11110, Rd, Rn, Rm);
  2825. }
  2826. void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2827. {
  2828. Emit2RegMisc(false, 0, size >> 6, 0x17, Rd, Rn);
  2829. }
  2830. void ARM64FloatEmitter::FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2831. {
  2832. Emit2RegMisc(true, 0, size >> 6, 0x17, Rd, Rn);
  2833. }
  2834. void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
  2835. {
  2836. Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 5, 0x16, Rd, Rn);
  2837. }
  2838. void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2839. {
  2840. Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1B, Rd, Rn);
  2841. }
  2842. void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2843. {
  2844. Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
  2845. }
  2846. void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2847. {
  2848. EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);
  2849. }
  2850. void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2851. {
  2852. EmitThreeSame(1, size >> 6, 0x1B, Rd, Rn, Rm);
  2853. }
  2854. void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2855. {
  2856. Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn);
  2857. }
  2858. void ARM64FloatEmitter::FRECPE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2859. {
  2860. Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1D, Rd, Rn);
  2861. }
  2862. void ARM64FloatEmitter::FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2863. {
  2864. Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1D, Rd, Rn);
  2865. }
  2866. void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2867. {
  2868. EmitThreeSame(0, 2 | (size >> 6), 0x1A, Rd, Rn, Rm);
  2869. }
  2870. void ARM64FloatEmitter::FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2871. {
  2872. EmitThreeSame(0, 2 | (size >> 6), 0x19, Rd, Rn, Rm);
  2873. }
  2874. void ARM64FloatEmitter::NOT(ARM64Reg Rd, ARM64Reg Rn)
  2875. {
  2876. Emit2RegMisc(IsQuad(Rd), 1, 0, 5, Rd, Rn);
  2877. }
  2878. void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2879. {
  2880. EmitThreeSame(0, 2, 3, Rd, Rn, Rm);
  2881. }
  2882. void ARM64FloatEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  2883. {
  2884. EmitThreeSame(0, 3, 3, Rd, Rn, Rm);
  2885. }
  2886. void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2887. {
  2888. Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 1, Rd, Rn);
  2889. }
  2890. void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2891. {
  2892. Emit2RegMisc(IsQuad(Rd), 1, size >> 4, 0, Rd, Rn);
  2893. }
  2894. void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2895. {
  2896. Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);
  2897. }
  2898. void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2899. {
  2900. Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);
  2901. }
  2902. void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2903. {
  2904. Emit2RegMisc(IsQuad(Rd), 1, size >> 6, 0x1D, Rd, Rn);
  2905. }
  2906. void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
  2907. {
  2908. EmitShiftImm(IsQuad(Rd), 0, size * 2 - scale, 0x1C, Rd, Rn);
  2909. }
  2910. void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
  2911. {
  2912. EmitShiftImm(IsQuad(Rd), 1, size * 2 - scale, 0x1C, Rd, Rn);
  2913. }
  2914. void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
  2915. {
  2916. Emit2RegMisc(false, 0, dest_size >> 4, 0b10100, Rd, Rn);
  2917. }
  2918. void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
  2919. {
  2920. Emit2RegMisc(true, 0, dest_size >> 4, 0b10100, Rd, Rn);
  2921. }
  2922. void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
  2923. {
  2924. Emit2RegMisc(false, 1, dest_size >> 4, 0b10100, Rd, Rn);
  2925. }
  2926. void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
  2927. {
  2928. Emit2RegMisc(true, 1, dest_size >> 4, 0b10100, Rd, Rn);
  2929. }
  2930. void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
  2931. {
  2932. Emit2RegMisc(false, 0, dest_size >> 4, 0b10010, Rd, Rn);
  2933. }
  2934. void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
  2935. {
  2936. Emit2RegMisc(true, 0, dest_size >> 4, 0b10010, Rd, Rn);
  2937. }
  2938. // Move
  2939. void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  2940. {
  2941. u32 imm5 = 0;
  2942. if (size == 8)
  2943. imm5 = 1;
  2944. else if (size == 16)
  2945. imm5 = 2;
  2946. else if (size == 32)
  2947. imm5 = 4;
  2948. else if (size == 64)
  2949. imm5 = 8;
  2950. EmitCopy(IsQuad(Rd), 0, imm5, 1, Rd, Rn);
  2951. }
  2952. void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn)
  2953. {
  2954. u32 imm5 = 0;
  2955. if (size == 8)
  2956. {
  2957. imm5 = 1;
  2958. imm5 |= index << 1;
  2959. }
  2960. else if (size == 16)
  2961. {
  2962. imm5 = 2;
  2963. imm5 |= index << 2;
  2964. }
  2965. else if (size == 32)
  2966. {
  2967. imm5 = 4;
  2968. imm5 |= index << 3;
  2969. }
  2970. else if (size == 64)
  2971. {
  2972. imm5 = 8;
  2973. imm5 |= index << 4;
  2974. }
  2975. EmitCopy(1, 0, imm5, 3, Rd, Rn);
  2976. }
  2977. void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2)
  2978. {
  2979. u32 imm5 = 0, imm4 = 0;
  2980. if (size == 8)
  2981. {
  2982. imm5 = 1;
  2983. imm5 |= index1 << 1;
  2984. imm4 = index2;
  2985. }
  2986. else if (size == 16)
  2987. {
  2988. imm5 = 2;
  2989. imm5 |= index1 << 2;
  2990. imm4 = index2 << 1;
  2991. }
  2992. else if (size == 32)
  2993. {
  2994. imm5 = 4;
  2995. imm5 |= index1 << 3;
  2996. imm4 = index2 << 2;
  2997. }
  2998. else if (size == 64)
  2999. {
  3000. imm5 = 8;
  3001. imm5 |= index1 << 4;
  3002. imm4 = index2 << 3;
  3003. }
  3004. EmitCopy(1, 1, imm5, imm4, Rd, Rn);
  3005. }
  3006. void ARM64FloatEmitter::UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
  3007. {
  3008. bool b64Bit = Is64Bit(Rd);
  3009. ASSERT_MSG(DYNA_REC, Rd < ARM64Reg::SP, "Destination must be a GPR!");
  3010. ASSERT_MSG(DYNA_REC, !(b64Bit && size != 64),
  3011. "Must have a size of 64 when destination is 64bit!");
  3012. u32 imm5 = 0;
  3013. if (size == 8)
  3014. {
  3015. imm5 = 1;
  3016. imm5 |= index << 1;
  3017. }
  3018. else if (size == 16)
  3019. {
  3020. imm5 = 2;
  3021. imm5 |= index << 2;
  3022. }
  3023. else if (size == 32)
  3024. {
  3025. imm5 = 4;
  3026. imm5 |= index << 3;
  3027. }
  3028. else if (size == 64)
  3029. {
  3030. imm5 = 8;
  3031. imm5 |= index << 4;
  3032. }
  3033. EmitCopy(b64Bit, 0, imm5, 7, Rd, Rn);
  3034. }
  3035. void ARM64FloatEmitter::SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
  3036. {
  3037. bool b64Bit = Is64Bit(Rd);
  3038. ASSERT_MSG(DYNA_REC, Rd < ARM64Reg::SP, "Destination must be a GPR!");
  3039. ASSERT_MSG(DYNA_REC, size != 64, "SMOV doesn't support 64bit destination. Use UMOV!");
  3040. u32 imm5 = 0;
  3041. if (size == 8)
  3042. {
  3043. imm5 = 1;
  3044. imm5 |= index << 1;
  3045. }
  3046. else if (size == 16)
  3047. {
  3048. imm5 = 2;
  3049. imm5 |= index << 2;
  3050. }
  3051. else if (size == 32)
  3052. {
  3053. imm5 = 4;
  3054. imm5 |= index << 3;
  3055. }
  3056. EmitCopy(b64Bit, 0, imm5, 5, Rd, Rn);
  3057. }
  3058. // One source
  3059. void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn)
  3060. {
  3061. u32 dst_encoding = 0;
  3062. u32 src_encoding = 0;
  3063. if (size_to == 16)
  3064. dst_encoding = 3;
  3065. else if (size_to == 32)
  3066. dst_encoding = 0;
  3067. else if (size_to == 64)
  3068. dst_encoding = 1;
  3069. if (size_from == 16)
  3070. src_encoding = 3;
  3071. else if (size_from == 32)
  3072. src_encoding = 0;
  3073. else if (size_from == 64)
  3074. src_encoding = 1;
  3075. Emit1Source(0, 0, src_encoding, 4 | dst_encoding, Rd, Rn);
  3076. }
  3077. void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn)
  3078. {
  3079. if (IsScalar(Rn))
  3080. {
  3081. // Source is in FP register (like destination!). We must use a vector encoding.
  3082. bool sign = false;
  3083. int sz = IsDouble(Rn);
  3084. Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (DecodeReg(Rn) << 5) |
  3085. DecodeReg(Rd));
  3086. }
  3087. else
  3088. {
  3089. bool sf = Is64Bit(Rn);
  3090. u32 type = 0;
  3091. if (IsDouble(Rd))
  3092. type = 1;
  3093. EmitConversion(sf, 0, type, 0, 2, Rd, Rn);
  3094. }
  3095. }
  3096. void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)
  3097. {
  3098. if (IsScalar(Rn))
  3099. {
  3100. // Source is in FP register (like destination!). We must use a vector encoding.
  3101. bool sign = true;
  3102. int sz = IsDouble(Rn);
  3103. Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (DecodeReg(Rn) << 5) |
  3104. DecodeReg(Rd));
  3105. }
  3106. else
  3107. {
  3108. bool sf = Is64Bit(Rn);
  3109. u32 type = 0;
  3110. if (IsDouble(Rd))
  3111. type = 1;
  3112. EmitConversion(sf, 0, type, 0, 3, Rd, Rn);
  3113. }
  3114. }
  3115. void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
  3116. {
  3117. bool sf = Is64Bit(Rn);
  3118. u32 type = 0;
  3119. if (IsDouble(Rd))
  3120. type = 1;
  3121. EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
  3122. }
  3123. void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
  3124. {
  3125. bool sf = Is64Bit(Rn);
  3126. u32 type = 0;
  3127. if (IsDouble(Rd))
  3128. type = 1;
  3129. EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
  3130. }
  3131. void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)
  3132. {
  3133. EmitCompare(0, 0, 0, 0, Rn, Rm);
  3134. }
  3135. void ARM64FloatEmitter::FCMP(ARM64Reg Rn)
  3136. {
  3137. EmitCompare(0, 0, 0, 8, Rn, (ARM64Reg)0);
  3138. }
  3139. void ARM64FloatEmitter::FCMPE(ARM64Reg Rn, ARM64Reg Rm)
  3140. {
  3141. EmitCompare(0, 0, 0, 0x10, Rn, Rm);
  3142. }
  3143. void ARM64FloatEmitter::FCMPE(ARM64Reg Rn)
  3144. {
  3145. EmitCompare(0, 0, 0, 0x18, Rn, (ARM64Reg)0);
  3146. }
  3147. void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3148. {
  3149. EmitThreeSame(0, size >> 6, 0x1C, Rd, Rn, Rm);
  3150. }
  3151. void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  3152. {
  3153. Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xD, Rd, Rn);
  3154. }
  3155. void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3156. {
  3157. EmitThreeSame(1, size >> 6, 0x1C, Rd, Rn, Rm);
  3158. }
  3159. void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  3160. {
  3161. Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x0C, Rd, Rn);
  3162. }
  3163. void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3164. {
  3165. EmitThreeSame(1, 2 | (size >> 6), 0x1C, Rd, Rn, Rm);
  3166. }
  3167. void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  3168. {
  3169. Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x0C, Rd, Rn);
  3170. }
  3171. void ARM64FloatEmitter::FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  3172. {
  3173. Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xD, Rd, Rn);
  3174. }
  3175. void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
  3176. {
  3177. Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xE, Rd, Rn);
  3178. }
  3179. void ARM64FloatEmitter::FACGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3180. {
  3181. EmitThreeSame(1, size >> 6, 0x1D, Rd, Rn, Rm);
  3182. }
  3183. void ARM64FloatEmitter::FACGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3184. {
  3185. EmitThreeSame(1, 2 | (size >> 6), 0x1D, Rd, Rn, Rm);
  3186. }
  3187. void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
  3188. {
  3189. EmitCondSelect(0, 0, cond, Rd, Rn, Rm);
  3190. }
  3191. // Permute
  3192. void ARM64FloatEmitter::UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3193. {
  3194. EmitPermute(size, 0b001, Rd, Rn, Rm);
  3195. }
  3196. void ARM64FloatEmitter::TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3197. {
  3198. EmitPermute(size, 0b010, Rd, Rn, Rm);
  3199. }
  3200. void ARM64FloatEmitter::ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3201. {
  3202. EmitPermute(size, 0b011, Rd, Rn, Rm);
  3203. }
  3204. void ARM64FloatEmitter::UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3205. {
  3206. EmitPermute(size, 0b101, Rd, Rn, Rm);
  3207. }
  3208. void ARM64FloatEmitter::TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3209. {
  3210. EmitPermute(size, 0b110, Rd, Rn, Rm);
  3211. }
  3212. void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
  3213. {
  3214. EmitPermute(size, 0b111, Rd, Rn, Rm);
  3215. }
  3216. // Extract
  3217. void ARM64FloatEmitter::EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 index)
  3218. {
  3219. EmitExtract(index, 0, Rd, Rn, Rm);
  3220. }
  3221. // Scalar shift by immediate
  3222. void ARM64FloatEmitter::SHL(ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3223. {
  3224. constexpr size_t src_size = 64;
  3225. ASSERT_MSG(DYNA_REC, IsDouble(Rd), "Only double registers are supported!");
  3226. ASSERT_MSG(DYNA_REC, shift < src_size, "Shift amount must be less than the element size! {} {}",
  3227. shift, src_size);
  3228. EmitScalarShiftImm(0, src_size | shift, 0b01010, Rd, Rn);
  3229. }
  3230. void ARM64FloatEmitter::URSHR(ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3231. {
  3232. constexpr size_t src_size = 64;
  3233. ASSERT_MSG(DYNA_REC, IsDouble(Rd), "Only double registers are supported!");
  3234. ASSERT_MSG(DYNA_REC, shift < src_size, "Shift amount must be less than the element size! {} {}",
  3235. shift, src_size);
  3236. EmitScalarShiftImm(1, src_size * 2 - shift, 0b00100, Rd, Rn);
  3237. }
  3238. // Vector shift by immediate
  3239. void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3240. {
  3241. SSHLL(src_size, Rd, Rn, shift, false);
  3242. }
  3243. void ARM64FloatEmitter::SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3244. {
  3245. SSHLL(src_size, Rd, Rn, shift, true);
  3246. }
  3247. void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3248. {
  3249. SHRN(dest_size, Rd, Rn, shift, false);
  3250. }
  3251. void ARM64FloatEmitter::SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3252. {
  3253. SHRN(dest_size, Rd, Rn, shift, true);
  3254. }
  3255. void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3256. {
  3257. USHLL(src_size, Rd, Rn, shift, false);
  3258. }
  3259. void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3260. {
  3261. USHLL(src_size, Rd, Rn, shift, true);
  3262. }
  3263. void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
  3264. {
  3265. SXTL(src_size, Rd, Rn, false);
  3266. }
  3267. void ARM64FloatEmitter::SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
  3268. {
  3269. SXTL(src_size, Rd, Rn, true);
  3270. }
  3271. void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
  3272. {
  3273. UXTL(src_size, Rd, Rn, false);
  3274. }
  3275. void ARM64FloatEmitter::UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
  3276. {
  3277. UXTL(src_size, Rd, Rn, true);
  3278. }
  3279. void ARM64FloatEmitter::SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3280. {
  3281. ASSERT_MSG(DYNA_REC, shift < src_size, "Shift amount must be less than the element size! {} {}",
  3282. shift, src_size);
  3283. EmitShiftImm(1, 0, src_size | shift, 0b01010, Rd, Rn);
  3284. }
  3285. void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
  3286. {
  3287. ASSERT_MSG(DYNA_REC, shift < src_size, "Shift amount must be less than the element size! {} {}",
  3288. shift, src_size);
  3289. EmitShiftImm(upper, 0, src_size | shift, 0b10100, Rd, Rn);
  3290. }
  3291. void ARM64FloatEmitter::URSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
  3292. {
  3293. ASSERT_MSG(DYNA_REC, shift < src_size, "Shift amount must be less than the element size! {} {}",
  3294. shift, src_size);
  3295. EmitShiftImm(1, 1, src_size * 2 - shift, 0b00100, Rd, Rn);
  3296. }
  3297. void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
  3298. {
  3299. ASSERT_MSG(DYNA_REC, shift < src_size, "Shift amount must be less than the element size! {} {}",
  3300. shift, src_size);
  3301. EmitShiftImm(upper, 1, src_size | shift, 0b10100, Rd, Rn);
  3302. }
  3303. void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
  3304. {
  3305. ASSERT_MSG(DYNA_REC, shift < dest_size, "Shift amount must be less than the element size! {} {}",
  3306. shift, dest_size);
  3307. EmitShiftImm(upper, 1, dest_size * 2 - shift, 0b10000, Rd, Rn);
  3308. }
  3309. void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)
  3310. {
  3311. SSHLL(src_size, Rd, Rn, 0, upper);
  3312. }
  3313. void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)
  3314. {
  3315. USHLL(src_size, Rd, Rn, 0, upper);
  3316. }
  3317. // vector x indexed element
  3318. void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
  3319. {
  3320. ASSERT_MSG(DYNA_REC, size == 32 || size == 64, "Only 32bit or 64bit sizes are supported! {}",
  3321. size);
  3322. bool L = false;
  3323. bool H = false;
  3324. if (size == 32)
  3325. {
  3326. L = index & 1;
  3327. H = (index >> 1) & 1;
  3328. }
  3329. else if (size == 64)
  3330. {
  3331. H = index == 1;
  3332. }
  3333. EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm);
  3334. }
  3335. void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
  3336. {
  3337. ASSERT_MSG(DYNA_REC, size == 32 || size == 64, "Only 32bit or 64bit sizes are supported! {}",
  3338. size);
  3339. bool L = false;
  3340. bool H = false;
  3341. if (size == 32)
  3342. {
  3343. L = index & 1;
  3344. H = (index >> 1) & 1;
  3345. }
  3346. else if (size == 64)
  3347. {
  3348. H = index == 1;
  3349. }
  3350. EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
  3351. }
  3352. // Modified Immediate
  3353. void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u64 imm, u8 shift)
  3354. {
  3355. bool Q = IsQuad(Rd);
  3356. u8 cmode = 0;
  3357. u8 op = 0;
  3358. u8 abcdefgh = imm & 0xFF;
  3359. if (size == 8)
  3360. {
  3361. ASSERT_MSG(DYNA_REC, shift == 0, "size8 doesn't support shift! ({})", shift);
  3362. ASSERT_MSG(DYNA_REC, !(imm & ~0xFFULL), "size8 only supports 8bit values! ({})", imm);
  3363. }
  3364. else if (size == 16)
  3365. {
  3366. ASSERT_MSG(DYNA_REC, shift == 0 || shift == 8, "size16 only supports shift of 0 or 8! ({})",
  3367. shift);
  3368. ASSERT_MSG(DYNA_REC, !(imm & ~0xFFULL), "size16 only supports 8bit values! ({})", imm);
  3369. if (shift == 8)
  3370. cmode |= 2;
  3371. }
  3372. else if (size == 32)
  3373. {
  3374. ASSERT_MSG(DYNA_REC, shift == 0 || shift == 8 || shift == 16 || shift == 24,
  3375. "size32 only supports shift of 0, 8, 16, or 24! ({})", shift);
  3376. // XXX: Implement support for MOVI - shifting ones variant
  3377. ASSERT_MSG(DYNA_REC, !(imm & ~0xFFULL), "size32 only supports 8bit values! ({})", imm);
  3378. switch (shift)
  3379. {
  3380. case 8:
  3381. cmode |= 2;
  3382. break;
  3383. case 16:
  3384. cmode |= 4;
  3385. break;
  3386. case 24:
  3387. cmode |= 6;
  3388. break;
  3389. default:
  3390. break;
  3391. }
  3392. }
  3393. else // 64
  3394. {
  3395. ASSERT_MSG(DYNA_REC, shift == 0, "size64 doesn't support shift! ({})", shift);
  3396. op = 1;
  3397. cmode = 0xE;
  3398. abcdefgh = 0;
  3399. for (int i = 0; i < 8; ++i)
  3400. {
  3401. u8 tmp = (imm >> (i << 3)) & 0xFF;
  3402. ASSERT_MSG(DYNA_REC, tmp == 0xFF || tmp == 0, "size64 Invalid immediate! ({} -> {})", imm,
  3403. tmp);
  3404. if (tmp == 0xFF)
  3405. abcdefgh |= (1 << i);
  3406. }
  3407. }
  3408. EncodeModImm(Q, op, cmode, 0, Rd, abcdefgh);
  3409. }
  3410. void ARM64FloatEmitter::ORR_BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift, u8 op)
  3411. {
  3412. bool Q = IsQuad(Rd);
  3413. u8 cmode = 1;
  3414. if (size == 16)
  3415. {
  3416. ASSERT_MSG(DYNA_REC, shift == 0 || shift == 8, "size16 only supports shift of 0 or 8! {}",
  3417. shift);
  3418. if (shift == 8)
  3419. cmode |= 2;
  3420. }
  3421. else if (size == 32)
  3422. {
  3423. ASSERT_MSG(DYNA_REC, shift == 0 || shift == 8 || shift == 16 || shift == 24,
  3424. "size32 only supports shift of 0, 8, 16, or 24! ({})", shift);
  3425. // XXX: Implement support for MOVI - shifting ones variant
  3426. switch (shift)
  3427. {
  3428. case 8:
  3429. cmode |= 2;
  3430. break;
  3431. case 16:
  3432. cmode |= 4;
  3433. break;
  3434. case 24:
  3435. cmode |= 6;
  3436. break;
  3437. default:
  3438. break;
  3439. }
  3440. }
  3441. else
  3442. {
  3443. ASSERT_MSG(DYNA_REC, false, "Only size of 16 or 32 is supported! ({})", size);
  3444. }
  3445. EncodeModImm(Q, op, cmode, 0, Rd, imm);
  3446. }
  3447. void ARM64FloatEmitter::ORR(u8 size, ARM64Reg Rd, u8 imm, u8 shift)
  3448. {
  3449. ORR_BIC(size, Rd, imm, shift, 0);
  3450. }
  3451. void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm, u8 shift)
  3452. {
  3453. ORR_BIC(size, Rd, imm, shift, 1);
  3454. }
  3455. void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers, ARM64Reg tmp)
  3456. {
  3457. bool bundled_loadstore = false;
  3458. for (int i = 0; i < 32; ++i)
  3459. {
  3460. if (!registers[i])
  3461. continue;
  3462. int count = 0;
  3463. while (++count < 4 && (i + count) < 32 && registers[i + count])
  3464. {
  3465. }
  3466. if (count > 1)
  3467. {
  3468. bundled_loadstore = true;
  3469. break;
  3470. }
  3471. }
  3472. if (bundled_loadstore && tmp != ARM64Reg::INVALID_REG)
  3473. {
  3474. DEBUG_ASSERT_MSG(DYNA_REC, Is64Bit(tmp), "Expected a 64-bit temporary register!");
  3475. int num_regs = registers.Count();
  3476. m_emit->SUB(ARM64Reg::SP, ARM64Reg::SP, num_regs * 16);
  3477. m_emit->ADD(tmp, ARM64Reg::SP, 0);
  3478. std::vector<ARM64Reg> island_regs;
  3479. for (int i = 0; i < 32; ++i)
  3480. {
  3481. if (!registers[i])
  3482. continue;
  3483. int count = 0;
  3484. // 0 = true
  3485. // 1 < 4 && registers[i + 1] true!
  3486. // 2 < 4 && registers[i + 2] true!
  3487. // 3 < 4 && registers[i + 3] true!
  3488. // 4 < 4 && registers[i + 4] false!
  3489. while (++count < 4 && (i + count) < 32 && registers[i + count])
  3490. {
  3491. }
  3492. if (count == 1)
  3493. island_regs.push_back(ARM64Reg::Q0 + i);
  3494. else
  3495. ST1(64, count, IndexType::Post, ARM64Reg::Q0 + i, tmp);
  3496. i += count - 1;
  3497. }
  3498. // Handle island registers
  3499. std::vector<ARM64Reg> pair_regs;
  3500. for (auto& it : island_regs)
  3501. {
  3502. pair_regs.push_back(it);
  3503. if (pair_regs.size() == 2)
  3504. {
  3505. STP(128, IndexType::Post, pair_regs[0], pair_regs[1], tmp, 32);
  3506. pair_regs.clear();
  3507. }
  3508. }
  3509. if (pair_regs.size())
  3510. STR(128, IndexType::Post, pair_regs[0], tmp, 16);
  3511. }
  3512. else
  3513. {
  3514. std::vector<ARM64Reg> pair_regs;
  3515. for (auto it : registers)
  3516. {
  3517. pair_regs.push_back(ARM64Reg::Q0 + it);
  3518. if (pair_regs.size() == 2)
  3519. {
  3520. STP(128, IndexType::Pre, pair_regs[0], pair_regs[1], ARM64Reg::SP, -32);
  3521. pair_regs.clear();
  3522. }
  3523. }
  3524. if (pair_regs.size())
  3525. STR(128, IndexType::Pre, pair_regs[0], ARM64Reg::SP, -16);
  3526. }
  3527. }
  3528. void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, ARM64Reg tmp)
  3529. {
  3530. bool bundled_loadstore = false;
  3531. int num_regs = registers.Count();
  3532. for (int i = 0; i < 32; ++i)
  3533. {
  3534. if (!registers[i])
  3535. continue;
  3536. int count = 0;
  3537. while (++count < 4 && (i + count) < 32 && registers[i + count])
  3538. {
  3539. }
  3540. if (count > 1)
  3541. {
  3542. bundled_loadstore = true;
  3543. break;
  3544. }
  3545. }
  3546. if (bundled_loadstore && tmp != ARM64Reg::INVALID_REG)
  3547. {
  3548. // The temporary register is only used to indicate that we can use this code path
  3549. std::vector<ARM64Reg> island_regs;
  3550. for (int i = 0; i < 32; ++i)
  3551. {
  3552. if (!registers[i])
  3553. continue;
  3554. int count = 0;
  3555. while (++count < 4 && (i + count) < 32 && registers[i + count])
  3556. {
  3557. }
  3558. if (count == 1)
  3559. island_regs.push_back(ARM64Reg::Q0 + i);
  3560. else
  3561. LD1(64, count, IndexType::Post, ARM64Reg::Q0 + i, ARM64Reg::SP);
  3562. i += count - 1;
  3563. }
  3564. // Handle island registers
  3565. std::vector<ARM64Reg> pair_regs;
  3566. for (auto& it : island_regs)
  3567. {
  3568. pair_regs.push_back(it);
  3569. if (pair_regs.size() == 2)
  3570. {
  3571. LDP(128, IndexType::Post, pair_regs[0], pair_regs[1], ARM64Reg::SP, 32);
  3572. pair_regs.clear();
  3573. }
  3574. }
  3575. if (pair_regs.size())
  3576. LDR(128, IndexType::Post, pair_regs[0], ARM64Reg::SP, 16);
  3577. }
  3578. else
  3579. {
  3580. bool odd = num_regs % 2;
  3581. std::vector<ARM64Reg> pair_regs;
  3582. for (int i = 31; i >= 0; --i)
  3583. {
  3584. if (!registers[i])
  3585. continue;
  3586. if (odd)
  3587. {
  3588. // First load must be a regular LDR if odd
  3589. odd = false;
  3590. LDR(128, IndexType::Post, ARM64Reg::Q0 + i, ARM64Reg::SP, 16);
  3591. }
  3592. else
  3593. {
  3594. pair_regs.push_back(ARM64Reg::Q0 + i);
  3595. if (pair_regs.size() == 2)
  3596. {
  3597. LDP(128, IndexType::Post, pair_regs[1], pair_regs[0], ARM64Reg::SP, 32);
  3598. pair_regs.clear();
  3599. }
  3600. }
  3601. }
  3602. }
  3603. }
  3604. void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3605. {
  3606. if (!Is64Bit(Rn))
  3607. {
  3608. // To handle 32-bit logical immediates, the very easiest thing is to repeat
  3609. // the input value twice to make a 64-bit word. The correct encoding of that
  3610. // as a logical immediate will also be the correct encoding of the 32-bit
  3611. // value.
  3612. //
  3613. // Doing this here instead of in the LogicalImm constructor makes it easier
  3614. // to check if the input is all ones.
  3615. imm = (imm << 32) | (imm & 0xFFFFFFFF);
  3616. }
  3617. if (imm == 0)
  3618. {
  3619. MOVZ(Rd, 0);
  3620. }
  3621. else if ((~imm) == 0)
  3622. {
  3623. if (Rd != Rn)
  3624. MOV(Rd, Rn);
  3625. }
  3626. else if (const auto result = LogicalImm(imm, GPRSize::B64))
  3627. {
  3628. AND(Rd, Rn, result);
  3629. }
  3630. else
  3631. {
  3632. ASSERT_MSG(DYNA_REC, scratch != ARM64Reg::INVALID_REG,
  3633. "ANDI2R - failed to construct logical immediate value from {:#10x}, need scratch",
  3634. imm);
  3635. MOVI2R(scratch, imm);
  3636. AND(Rd, Rn, scratch);
  3637. }
  3638. }
  3639. void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3640. {
  3641. if (!Is64Bit(Rn))
  3642. {
  3643. // To handle 32-bit logical immediates, the very easiest thing is to repeat
  3644. // the input value twice to make a 64-bit word. The correct encoding of that
  3645. // as a logical immediate will also be the correct encoding of the 32-bit
  3646. // value.
  3647. //
  3648. // Doing this here instead of in the LogicalImm constructor makes it easier
  3649. // to check if the input is all ones.
  3650. imm = (imm << 32) | (imm & 0xFFFFFFFF);
  3651. }
  3652. if (imm == 0)
  3653. {
  3654. if (Rd != Rn)
  3655. MOV(Rd, Rn);
  3656. }
  3657. else if ((~imm) == 0)
  3658. {
  3659. MOVN(Rd, 0);
  3660. }
  3661. else if (const auto result = LogicalImm(imm, GPRSize::B64))
  3662. {
  3663. ORR(Rd, Rn, result);
  3664. }
  3665. else
  3666. {
  3667. ASSERT_MSG(DYNA_REC, scratch != ARM64Reg::INVALID_REG,
  3668. "ORRI2R - failed to construct logical immediate value from {:#10x}, need scratch",
  3669. imm);
  3670. MOVI2R(scratch, imm);
  3671. ORR(Rd, Rn, scratch);
  3672. }
  3673. }
  3674. void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3675. {
  3676. if (!Is64Bit(Rn))
  3677. {
  3678. // To handle 32-bit logical immediates, the very easiest thing is to repeat
  3679. // the input value twice to make a 64-bit word. The correct encoding of that
  3680. // as a logical immediate will also be the correct encoding of the 32-bit
  3681. // value.
  3682. //
  3683. // Doing this here instead of in the LogicalImm constructor makes it easier
  3684. // to check if the input is all ones.
  3685. imm = (imm << 32) | (imm & 0xFFFFFFFF);
  3686. }
  3687. if (imm == 0)
  3688. {
  3689. if (Rd != Rn)
  3690. MOV(Rd, Rn);
  3691. }
  3692. else if ((~imm) == 0)
  3693. {
  3694. MVN(Rd, Rn);
  3695. }
  3696. else if (const auto result = LogicalImm(imm, GPRSize::B64))
  3697. {
  3698. EOR(Rd, Rn, result);
  3699. }
  3700. else
  3701. {
  3702. ASSERT_MSG(DYNA_REC, scratch != ARM64Reg::INVALID_REG,
  3703. "EORI2R - failed to construct logical immediate value from {:#10x}, need scratch",
  3704. imm);
  3705. MOVI2R(scratch, imm);
  3706. EOR(Rd, Rn, scratch);
  3707. }
  3708. }
  3709. void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3710. {
  3711. if (!Is64Bit(Rn))
  3712. {
  3713. // To handle 32-bit logical immediates, the very easiest thing is to repeat
  3714. // the input value twice to make a 64-bit word. The correct encoding of that
  3715. // as a logical immediate will also be the correct encoding of the 32-bit
  3716. // value.
  3717. //
  3718. // Doing this here instead of in the LogicalImm constructor makes it easier
  3719. // to check if the input is all ones.
  3720. imm = (imm << 32) | (imm & 0xFFFFFFFF);
  3721. }
  3722. if (imm == 0)
  3723. {
  3724. ANDS(Rd, Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR,
  3725. Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR);
  3726. }
  3727. else if ((~imm) == 0)
  3728. {
  3729. ANDS(Rd, Rn, Rn);
  3730. }
  3731. else if (const auto result = LogicalImm(imm, GPRSize::B64))
  3732. {
  3733. ANDS(Rd, Rn, result);
  3734. }
  3735. else
  3736. {
  3737. ASSERT_MSG(DYNA_REC, scratch != ARM64Reg::INVALID_REG,
  3738. "ANDSI2R - failed to construct logical immediate value from {:#10x}, need scratch",
  3739. imm);
  3740. MOVI2R(scratch, imm);
  3741. ANDS(Rd, Rn, scratch);
  3742. }
  3743. }
  3744. void ARM64XEmitter::AddImmediate(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool shift, bool negative,
  3745. bool flags)
  3746. {
  3747. if (!negative)
  3748. {
  3749. if (!flags)
  3750. ADD(Rd, Rn, imm, shift);
  3751. else
  3752. ADDS(Rd, Rn, imm, shift);
  3753. }
  3754. else
  3755. {
  3756. if (!flags)
  3757. SUB(Rd, Rn, imm, shift);
  3758. else
  3759. SUBS(Rd, Rn, imm, shift);
  3760. }
  3761. }
  3762. void ARM64XEmitter::ADDI2R_internal(ARM64Reg Rd, ARM64Reg Rn, u64 imm, bool negative, bool flags,
  3763. ARM64Reg scratch)
  3764. {
  3765. DEBUG_ASSERT(Is64Bit(Rd) == Is64Bit(Rn));
  3766. if (!Is64Bit(Rd))
  3767. imm &= 0xFFFFFFFFULL;
  3768. bool has_scratch = scratch != ARM64Reg::INVALID_REG;
  3769. u64 imm_neg = Is64Bit(Rd) ? u64(-s64(imm)) : u64(-s64(imm)) & 0xFFFFFFFFuLL;
  3770. bool neg_neg = negative ? false : true;
  3771. // Special path for zeroes
  3772. if (imm == 0 && !flags)
  3773. {
  3774. if (Rd == Rn)
  3775. {
  3776. return;
  3777. }
  3778. else if (DecodeReg(Rd) != DecodeReg(ARM64Reg::SP) && DecodeReg(Rn) != DecodeReg(ARM64Reg::SP))
  3779. {
  3780. MOV(Rd, Rn);
  3781. return;
  3782. }
  3783. }
  3784. // Regular fast paths, aarch64 immediate instructions
  3785. // Try them all first
  3786. if (imm <= 0xFFF)
  3787. {
  3788. AddImmediate(Rd, Rn, imm, false, negative, flags);
  3789. return;
  3790. }
  3791. if (imm <= 0xFFFFFF && (imm & 0xFFF) == 0)
  3792. {
  3793. AddImmediate(Rd, Rn, imm >> 12, true, negative, flags);
  3794. return;
  3795. }
  3796. if (imm_neg <= 0xFFF)
  3797. {
  3798. AddImmediate(Rd, Rn, imm_neg, false, neg_neg, flags);
  3799. return;
  3800. }
  3801. if (imm_neg <= 0xFFFFFF && (imm_neg & 0xFFF) == 0)
  3802. {
  3803. AddImmediate(Rd, Rn, imm_neg >> 12, true, neg_neg, flags);
  3804. return;
  3805. }
  3806. // ADD+ADD is slower than MOVK+ADD, but inplace.
  3807. // But it supports a few more bits, so use it to avoid MOVK+MOVK+ADD.
  3808. // As this splits the addition in two parts, this must not be done on setting flags.
  3809. if (!flags && (imm >= 0x10000u || !has_scratch) && imm < 0x1000000u)
  3810. {
  3811. AddImmediate(Rd, Rn, imm & 0xFFF, false, negative, false);
  3812. AddImmediate(Rd, Rd, imm >> 12, true, negative, false);
  3813. return;
  3814. }
  3815. if (!flags && (imm_neg >= 0x10000u || !has_scratch) && imm_neg < 0x1000000u)
  3816. {
  3817. AddImmediate(Rd, Rn, imm_neg & 0xFFF, false, neg_neg, false);
  3818. AddImmediate(Rd, Rd, imm_neg >> 12, true, neg_neg, false);
  3819. return;
  3820. }
  3821. ASSERT_MSG(DYNA_REC, has_scratch,
  3822. "ADDI2R - failed to construct arithmetic immediate value from {:#10x}, need scratch",
  3823. imm);
  3824. negative ^= MOVI2R2(scratch, imm, imm_neg);
  3825. if (!negative)
  3826. {
  3827. if (!flags)
  3828. ADD(Rd, Rn, scratch);
  3829. else
  3830. ADDS(Rd, Rn, scratch);
  3831. }
  3832. else
  3833. {
  3834. if (!flags)
  3835. SUB(Rd, Rn, scratch);
  3836. else
  3837. SUBS(Rd, Rn, scratch);
  3838. }
  3839. }
  3840. void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3841. {
  3842. ADDI2R_internal(Rd, Rn, imm, false, false, scratch);
  3843. }
  3844. void ARM64XEmitter::ADDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3845. {
  3846. ADDI2R_internal(Rd, Rn, imm, false, true, scratch);
  3847. }
  3848. void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3849. {
  3850. ADDI2R_internal(Rd, Rn, imm, true, false, scratch);
  3851. }
  3852. void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3853. {
  3854. ADDI2R_internal(Rd, Rn, imm, true, true, scratch);
  3855. }
  3856. void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch)
  3857. {
  3858. ADDI2R_internal(Is64Bit(Rn) ? ARM64Reg::ZR : ARM64Reg::WZR, Rn, imm, true, true, scratch);
  3859. }
  3860. bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
  3861. {
  3862. if (const auto result = IsImmArithmetic(imm))
  3863. {
  3864. const auto [val, shift] = *result;
  3865. ADD(Rd, Rn, val, shift);
  3866. return true;
  3867. }
  3868. return false;
  3869. }
  3870. bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
  3871. {
  3872. if (const auto result = IsImmArithmetic(imm))
  3873. {
  3874. const auto [val, shift] = *result;
  3875. SUB(Rd, Rn, val, shift);
  3876. return true;
  3877. }
  3878. return false;
  3879. }
  3880. bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u64 imm)
  3881. {
  3882. if (const auto result = IsImmArithmetic(imm))
  3883. {
  3884. const auto [val, shift] = *result;
  3885. CMP(Rn, val, shift);
  3886. return true;
  3887. }
  3888. return false;
  3889. }
  3890. bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
  3891. {
  3892. if (const auto result = LogicalImm(imm, Is64Bit(Rd) ? GPRSize::B64 : GPRSize::B32))
  3893. {
  3894. AND(Rd, Rn, result);
  3895. return true;
  3896. }
  3897. return false;
  3898. }
  3899. bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
  3900. {
  3901. if (const auto result = LogicalImm(imm, Is64Bit(Rd) ? GPRSize::B64 : GPRSize::B32))
  3902. {
  3903. ORR(Rd, Rn, result);
  3904. return true;
  3905. }
  3906. return false;
  3907. }
  3908. bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm)
  3909. {
  3910. if (const auto result = LogicalImm(imm, Is64Bit(Rd) ? GPRSize::B64 : GPRSize::B32))
  3911. {
  3912. EOR(Rd, Rn, result);
  3913. return true;
  3914. }
  3915. return false;
  3916. }
  3917. void ARM64FloatEmitter::MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate)
  3918. {
  3919. ASSERT_MSG(DYNA_REC, !IsDouble(Rd), "MOVI2F does not yet support double precision");
  3920. if (value == 0.0f)
  3921. {
  3922. FMOV(Rd, IsDouble(Rd) ? ARM64Reg::ZR : ARM64Reg::WZR);
  3923. if (negate)
  3924. FNEG(Rd, Rd);
  3925. // TODO: There are some other values we could generate with the float-imm instruction, like
  3926. // 1.0...
  3927. }
  3928. else if (const auto imm = FPImm8FromFloat(value))
  3929. {
  3930. FMOV(Rd, *imm);
  3931. }
  3932. else
  3933. {
  3934. ASSERT_MSG(DYNA_REC, scratch != ARM64Reg::INVALID_REG,
  3935. "Failed to find a way to generate FP immediate {} without scratch", value);
  3936. if (negate)
  3937. value = -value;
  3938. const u32 ival = std::bit_cast<u32>(value);
  3939. m_emit->MOVI2R(scratch, ival);
  3940. FMOV(Rd, scratch);
  3941. }
  3942. }
  3943. // TODO: Quite a few values could be generated easily using the MOVI instruction and friends.
  3944. void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch)
  3945. {
  3946. // TODO: Make it work with more element sizes
  3947. // TODO: Optimize - there are shorter solution for many values
  3948. ARM64Reg s = ARM64Reg::S0 + DecodeReg(Rd);
  3949. MOVI2F(s, value, scratch);
  3950. DUP(32, Rd, Rd, 0);
  3951. }
  3952. } // namespace Arm64Gen