core.cc 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. #include "core.h"
  2. #include "network.h"
  3. #include "syscall_model.h"
  4. #include "branch_predictor.h"
  5. #include "memory_manager_base.h"
  6. #include "performance_model.h"
  7. #include "instruction.h"
  8. #include "clock_skew_minimization_object.h"
  9. #include "core_manager.h"
  10. #include "dvfs_manager.h"
  11. #include "hooks_manager.h"
  12. #include "trace_manager.h"
  13. #include "simulator.h"
  14. #include "log.h"
  15. #include "config.hpp"
  16. #include "stats.h"
  17. #include "topology_info.h"
  18. #include "cheetah_manager.h"
  19. #include <cstring>
  20. #if 0
  21. extern Lock iolock;
  22. # define MYLOG(...) { ScopedLock l(iolock); fflush(stderr); fprintf(stderr, "[%8lu] %dcor %-25s@%03u: ", getPerformanceModel()->getCycleCount(ShmemPerfModel::_USER_THREAD), m_core_id, __FUNCTION__, __LINE__); fprintf(stderr, __VA_ARGS__); fprintf(stderr, "\n"); fflush(stderr); }
  23. #else
  24. # define MYLOG(...) {}
  25. #endif
  26. #define VERBOSE 0
  27. const char * ModeledString(Core::MemModeled modeled) {
  28. switch(modeled)
  29. {
  30. case Core::MEM_MODELED_NONE: return "none";
  31. case Core::MEM_MODELED_COUNT: return "count";
  32. case Core::MEM_MODELED_COUNT_TLBTIME: return "count/tlb";
  33. case Core::MEM_MODELED_TIME: return "time";
  34. case Core::MEM_MODELED_FENCED: return "fenced";
  35. case Core::MEM_MODELED_RETURN: return "return";
  36. }
  37. return "?";
  38. }
  39. const char * core_state_names[] = {
  40. "running",
  41. "initializing",
  42. "stalled",
  43. "sleeping",
  44. "waking_up",
  45. "idle",
  46. "broken",
  47. };
  48. static_assert(Core::NUM_STATES == sizeof(core_state_names) / sizeof(core_state_names[0]),
  49. "Not enough values in core_state_names");
  50. const char * Core::CoreStateString(Core::State state)
  51. {
  52. LOG_ASSERT_ERROR(state < Core::NUM_STATES, "Invalid core state %d", state);
  53. return core_state_names[state];
  54. }
  55. Lock Core::m_global_core_lock;
  56. UInt64 Core::g_instructions_hpi_global = 0;
  57. UInt64 Core::g_instructions_hpi_global_callback = 0;
  58. Core::Core(SInt32 id)
  59. : m_core_id(id)
  60. , m_dvfs_domain(Sim()->getDvfsManager()->getCoreDomain(id))
  61. , m_thread(NULL)
  62. , m_bbv(id)
  63. , m_topology_info(new TopologyInfo(id))
  64. , m_cheetah_manager(Sim()->getCfg()->getBool("core/cheetah/enabled") ? new CheetahManager(id) : NULL)
  65. , m_core_state(Core::IDLE)
  66. , m_icache_last_block(-1)
  67. , m_spin_loops(0)
  68. , m_spin_instructions(0)
  69. , m_spin_elapsed_time(SubsecondTime::Zero())
  70. , m_instructions(0)
  71. , m_instructions_callback(UINT64_MAX)
  72. , m_instructions_hpi_callback(0)
  73. , m_instructions_hpi_last(0)
  74. {
  75. LOG_PRINT("Core ctor for: %d", id);
  76. registerStatsMetric("core", id, "instructions", &m_instructions);
  77. registerStatsMetric("core", id, "spin_loops", &m_spin_loops);
  78. registerStatsMetric("core", id, "spin_instructions", &m_spin_instructions);
  79. registerStatsMetric("core", id, "spin_elapsed_time", &m_spin_elapsed_time);
  80. Sim()->getStatsManager()->logTopology("hwcontext", id, id);
  81. m_network = new Network(this);
  82. m_clock_skew_minimization_client = ClockSkewMinimizationClient::create(this);
  83. m_shmem_perf_model = new ShmemPerfModel();
  84. LOG_PRINT("instantiated memory manager model");
  85. m_memory_manager = MemoryManagerBase::createMMU(
  86. Sim()->getCfg()->getString("caching_protocol/type"),
  87. this, m_network, m_shmem_perf_model);
  88. m_performance_model = PerformanceModel::create(this);
  89. }
  90. Core::~Core()
  91. {
  92. if (m_cheetah_manager)
  93. delete m_cheetah_manager;
  94. delete m_topology_info;
  95. delete m_memory_manager;
  96. delete m_shmem_perf_model;
  97. delete m_performance_model;
  98. if (m_clock_skew_minimization_client)
  99. delete m_clock_skew_minimization_client;
  100. delete m_network;
  101. }
  102. void Core::enablePerformanceModels()
  103. {
  104. getShmemPerfModel()->enable();
  105. getMemoryManager()->enableModels();
  106. getNetwork()->enableModels();
  107. getPerformanceModel()->enable();
  108. }
  109. void Core::disablePerformanceModels()
  110. {
  111. getShmemPerfModel()->disable();
  112. getMemoryManager()->disableModels();
  113. getNetwork()->disableModels();
  114. getPerformanceModel()->disable();
  115. }
  116. bool
  117. Core::countInstructions(IntPtr address, UInt32 count)
  118. {
  119. bool check_rescheduled = false;
  120. m_instructions += count;
  121. if (m_bbv.sample())
  122. m_bbv.count(address, count);
  123. m_performance_model->countInstructions(address, count);
  124. if (isEnabledInstructionsCallback())
  125. {
  126. if (m_instructions >= m_instructions_callback)
  127. {
  128. disableInstructionsCallback();
  129. Sim()->getHooksManager()->callHooks(HookType::HOOK_INSTR_COUNT, m_core_id);
  130. // When using the fast-forward performance model, HOOK_INSTR_COUNT may cause a rescheduling
  131. // of the current thread so let it know that it should make the appropriate checks
  132. check_rescheduled = true;
  133. }
  134. }
  135. hookPeriodicInsCheck();
  136. return check_rescheduled;
  137. }
  138. void
  139. Core::hookPeriodicInsCheck()
  140. {
  141. if (m_instructions > m_instructions_hpi_callback)
  142. {
  143. __sync_fetch_and_add(&g_instructions_hpi_global, m_instructions - m_instructions_hpi_last);
  144. m_instructions_hpi_callback += Sim()->getConfig()->getHPIInstructionsPerCore();
  145. m_instructions_hpi_last = m_instructions;
  146. // Quick, unlocked check if we should do the HOOK_PERIODIC_INS callback
  147. if (g_instructions_hpi_global > g_instructions_hpi_global_callback)
  148. hookPeriodicInsCall();
  149. }
  150. }
  151. void
  152. Core::hookPeriodicInsCall()
  153. {
  154. // Take the Thread lock, to make sure no other core calls us at the same time
  155. // and that the hook callback is also serialized w.r.t. other global events
  156. ScopedLock sl(Sim()->getThreadManager()->getLock());
  157. // Definitive, locked checked if we should do the HOOK_PERIODIC_INS callback
  158. if (g_instructions_hpi_global > g_instructions_hpi_global_callback)
  159. {
  160. Sim()->getHooksManager()->callHooks(HookType::HOOK_PERIODIC_INS, g_instructions_hpi_global);
  161. g_instructions_hpi_global_callback += Sim()->getConfig()->getHPIInstructionsGlobal();
  162. }
  163. }
  164. bool
  165. Core::accessBranchPredictor(IntPtr eip, bool taken, IntPtr target)
  166. {
  167. PerformanceModel *prfmdl = getPerformanceModel();
  168. BranchPredictor *bp = prfmdl->getBranchPredictor();
  169. if (bp)
  170. {
  171. bool prediction = bp->predict(eip, target);
  172. bp->update(prediction, taken, eip, target);
  173. return (prediction != taken);
  174. }
  175. else
  176. {
  177. return false;
  178. }
  179. }
  180. MemoryResult
  181. makeMemoryResult(HitWhere::where_t _hit_where, SubsecondTime _latency)
  182. {
  183. LOG_ASSERT_ERROR(_hit_where < HitWhere::NUM_HITWHERES, "Invalid HitWhere %u", (long)_hit_where);
  184. MemoryResult res;
  185. res.hit_where = _hit_where;
  186. res.latency = _latency;
  187. return res;
  188. }
  189. void
  190. Core::logMemoryHit(bool icache, mem_op_t mem_op_type, IntPtr address, MemModeled modeled, IntPtr eip)
  191. {
  192. getMemoryManager()->addL1Hits(icache, mem_op_type, 1);
  193. }
  194. MemoryResult
  195. Core::readInstructionMemory(IntPtr address, UInt32 instruction_size)
  196. {
  197. LOG_PRINT("Instruction: Address(0x%x), Size(%u), Start READ",
  198. address, instruction_size);
  199. UInt64 blockmask = ~(getMemoryManager()->getCacheBlockSize() - 1);
  200. bool single_cache_line = ((address & blockmask) == ((address + instruction_size - 1) & blockmask));
  201. // Assume the core reads full instruction cache lines and caches them internally for subsequent instructions.
  202. // This reduces L1-I accesses and power to more realistic levels.
  203. // For Nehalem, it's in fact only 16 bytes, other architectures (Sandy Bridge) have a micro-op cache,
  204. // so this is just an approximation.
  205. // When accessing the same cache line as last time, don't access the L1-I
  206. if ((address & blockmask) == m_icache_last_block)
  207. {
  208. if (single_cache_line)
  209. {
  210. return makeMemoryResult(HitWhere::L1I, getMemoryManager()->getL1HitLatency());
  211. }
  212. else
  213. {
  214. // Instruction spanning cache lines: drop the first line, do access the second one
  215. address = (address & blockmask) + getMemoryManager()->getCacheBlockSize();
  216. }
  217. }
  218. // Update the most recent cache line accessed
  219. m_icache_last_block = address & blockmask;
  220. // Cases with multiple cache lines or when we are not sure that it will be a hit call into the caches
  221. return initiateMemoryAccess(MemComponent::L1_ICACHE,
  222. Core::NONE, Core::READ, address & blockmask, NULL, getMemoryManager()->getCacheBlockSize(), MEM_MODELED_COUNT_TLBTIME, 0, SubsecondTime::MaxTime());
  223. }
  224. void Core::accessMemoryFast(bool icache, mem_op_t mem_op_type, IntPtr address)
  225. {
  226. if (m_cheetah_manager && icache == false)
  227. m_cheetah_manager->access(mem_op_type, address);
  228. SubsecondTime latency = getMemoryManager()->coreInitiateMemoryAccessFast(icache, mem_op_type, address);
  229. if (latency > SubsecondTime::Zero())
  230. m_performance_model->handleMemoryLatency(latency, HitWhere::MISS);
  231. }
  232. MemoryResult
  233. Core::initiateMemoryAccess(MemComponent::component_t mem_component,
  234. lock_signal_t lock_signal,
  235. mem_op_t mem_op_type,
  236. IntPtr address,
  237. Byte* data_buf, UInt32 data_size,
  238. MemModeled modeled,
  239. IntPtr eip,
  240. SubsecondTime now)
  241. {
  242. MYLOG("access %lx+%u %c%c modeled(%s)", address, data_size, mem_op_type == Core::WRITE ? 'W' : 'R', mem_op_type == Core::READ_EX ? 'X' : ' ', ModeledString(modeled));
  243. if (data_size <= 0)
  244. {
  245. return makeMemoryResult((HitWhere::where_t)mem_component,SubsecondTime::Zero());
  246. }
  247. // Setting the initial time
  248. SubsecondTime initial_time = (now == SubsecondTime::MaxTime()) ? getPerformanceModel()->getElapsedTime() : now;
  249. // Protect from concurrent access by user thread (doing rewritten memops) and core thread (doing icache lookups)
  250. if (lock_signal != Core::UNLOCK)
  251. m_mem_lock.acquire();
  252. #if 0
  253. static int i = 0;
  254. static Lock iolock;
  255. if ((i++) % 1000 == 0) {
  256. ScopedLock slio(iolock);
  257. printf("[TIME],%lu,", (Timer::now() / 100000) % 10000000);
  258. for(int i = 0; i < Sim()->getConfig()->getApplicationCores(); ++i)
  259. if (i == m_core_id)
  260. printf("%lu,%lu,%lu,", initial_time, getShmemPerfModel()->getCycleCount(ShmemPerfModel::_USER_THREAD), getShmemPerfModel()->getCycleCount(ShmemPerfModel::_SIM_THREAD));
  261. else
  262. printf(",,,");
  263. printf("\n");
  264. }
  265. #endif
  266. getShmemPerfModel()->setElapsedTime(ShmemPerfModel::_USER_THREAD, initial_time);
  267. LOG_PRINT("Time(%s), %s - ADDR(0x%x), data_size(%u), START",
  268. itostr(initial_time).c_str(),
  269. ((mem_op_type == READ) ? "READ" : "WRITE"),
  270. address, data_size);
  271. UInt32 num_misses = 0;
  272. HitWhere::where_t hit_where = HitWhere::UNKNOWN;
  273. UInt32 cache_block_size = getMemoryManager()->getCacheBlockSize();
  274. IntPtr begin_addr = address;
  275. IntPtr end_addr = address + data_size;
  276. IntPtr begin_addr_aligned = begin_addr - (begin_addr % cache_block_size);
  277. IntPtr end_addr_aligned = end_addr - (end_addr % cache_block_size);
  278. Byte *curr_data_buffer_head = (Byte*) data_buf;
  279. for (IntPtr curr_addr_aligned = begin_addr_aligned; curr_addr_aligned <= end_addr_aligned; curr_addr_aligned += cache_block_size)
  280. {
  281. // Access the cache one line at a time
  282. UInt32 curr_offset;
  283. UInt32 curr_size;
  284. // Determine the offset
  285. if (curr_addr_aligned == begin_addr_aligned)
  286. {
  287. curr_offset = begin_addr % cache_block_size;
  288. }
  289. else
  290. {
  291. curr_offset = 0;
  292. }
  293. // Determine the size
  294. if (curr_addr_aligned == end_addr_aligned)
  295. {
  296. curr_size = (end_addr % cache_block_size) - (curr_offset);
  297. if (curr_size == 0)
  298. {
  299. continue;
  300. }
  301. }
  302. else
  303. {
  304. curr_size = cache_block_size - (curr_offset);
  305. }
  306. LOG_PRINT("Start InitiateSharedMemReq: ADDR(0x%x), offset(%u), curr_size(%u)", curr_addr_aligned, curr_offset, curr_size);
  307. if (m_cheetah_manager)
  308. m_cheetah_manager->access(mem_op_type, curr_addr_aligned);
  309. HitWhere::where_t this_hit_where = getMemoryManager()->coreInitiateMemoryAccess(
  310. mem_component,
  311. lock_signal,
  312. mem_op_type,
  313. curr_addr_aligned, curr_offset,
  314. data_buf ? curr_data_buffer_head : NULL, curr_size,
  315. modeled);
  316. if (hit_where != (HitWhere::where_t)mem_component)
  317. {
  318. // If it is a READ or READ_EX operation,
  319. // 'initiateSharedMemReq' causes curr_data_buffer_head
  320. // to be automatically filled in
  321. // If it is a WRITE operation,
  322. // 'initiateSharedMemReq' reads the data
  323. // from curr_data_buffer_head
  324. num_misses ++;
  325. }
  326. if (hit_where == HitWhere::UNKNOWN || (this_hit_where != HitWhere::UNKNOWN && this_hit_where > hit_where))
  327. hit_where = this_hit_where;
  328. LOG_PRINT("End InitiateSharedMemReq: ADDR(0x%x), offset(%u), curr_size(%u)", curr_addr_aligned, curr_offset, curr_size);
  329. // Increment the buffer head
  330. curr_data_buffer_head += curr_size;
  331. }
  332. // Get the final cycle time
  333. SubsecondTime final_time = getShmemPerfModel()->getElapsedTime(ShmemPerfModel::_USER_THREAD);
  334. LOG_ASSERT_ERROR(final_time >= initial_time,
  335. "final_time(%s) < initial_time(%s)",
  336. itostr(final_time).c_str(),
  337. itostr(initial_time).c_str());
  338. LOG_PRINT("Time(%s), %s - ADDR(0x%x), data_size(%u), END\n",
  339. itostr(final_time).c_str(),
  340. ((mem_op_type == READ) ? "READ" : "WRITE"),
  341. address, data_size);
  342. if (lock_signal != Core::LOCK)
  343. m_mem_lock.release();
  344. // Calculate the round-trip time
  345. SubsecondTime shmem_time = final_time - initial_time;
  346. switch(modeled)
  347. {
  348. #if 0
  349. case MEM_MODELED_DYNINFO:
  350. {
  351. DynamicInstructionInfo info = DynamicInstructionInfo::createMemoryInfo(eip, true, shmem_time, address, data_size, (mem_op_type == WRITE) ? Operand::WRITE : Operand::READ, num_misses, hit_where);
  352. m_performance_model->pushDynamicInstructionInfo(info);
  353. #endif
  354. case MEM_MODELED_TIME:
  355. case MEM_MODELED_FENCED:
  356. if (m_performance_model->isEnabled())
  357. {
  358. /* queue a fake instruction that will account for the access latency */
  359. PseudoInstruction *i = new MemAccessInstruction(shmem_time, address, data_size, modeled == MEM_MODELED_FENCED);
  360. m_performance_model->queuePseudoInstruction(i);
  361. }
  362. break;
  363. case MEM_MODELED_COUNT:
  364. case MEM_MODELED_COUNT_TLBTIME:
  365. if (shmem_time > SubsecondTime::Zero())
  366. m_performance_model->handleMemoryLatency(shmem_time, hit_where);
  367. break;
  368. case MEM_MODELED_NONE:
  369. case MEM_MODELED_RETURN:
  370. break;
  371. }
  372. if (modeled != MEM_MODELED_NONE)
  373. {
  374. getShmemPerfModel()->incrTotalMemoryAccessLatency(shmem_time);
  375. }
  376. LOG_ASSERT_ERROR(hit_where != HitWhere::UNKNOWN, "HitWhere == UNKNOWN");
  377. return makeMemoryResult(hit_where, shmem_time);
  378. }
  379. // FIXME: This should actually be 'accessDataMemory()'
  380. /*
  381. * accessMemory (lock_signal_t lock_signal, mem_op_t mem_op_type, IntPtr d_addr, char* data_buffer, UInt32 data_size)
  382. *
  383. * Arguments:
  384. * lock_signal :: NONE, LOCK, or UNLOCK
  385. * mem_op_type :: READ, READ_EX, or WRITE
  386. * d_addr :: address of location we want to access (read or write)
  387. * data_buffer :: buffer holding data for WRITE or buffer which must be written on a READ
  388. * data_size :: size of data we must read/write
  389. *
  390. * Return Value:
  391. * number of misses :: State the number of cache misses
  392. */
  393. MemoryResult
  394. Core::accessMemory(lock_signal_t lock_signal, mem_op_t mem_op_type, IntPtr d_addr, char* data_buffer, UInt32 data_size, MemModeled modeled, IntPtr eip, SubsecondTime now, bool is_fault_mask)
  395. {
  396. // In PINTOOL mode, if the data is requested, copy it to/from real memory
  397. if (data_buffer && !is_fault_mask)
  398. {
  399. if (Sim()->getConfig()->getSimulationMode() == Config::PINTOOL)
  400. {
  401. nativeMemOp (NONE, mem_op_type, d_addr, data_buffer, data_size);
  402. }
  403. else if (Sim()->getConfig()->getSimulationMode() == Config::STANDALONE)
  404. {
  405. Sim()->getTraceManager()->accessMemory(m_core_id, lock_signal, mem_op_type, d_addr, data_buffer, data_size);
  406. }
  407. data_buffer = NULL; // initiateMemoryAccess's data is not used
  408. }
  409. if (modeled == MEM_MODELED_NONE)
  410. return makeMemoryResult(HitWhere::UNKNOWN, SubsecondTime::Zero());
  411. else
  412. return initiateMemoryAccess(MemComponent::L1_DCACHE, lock_signal, mem_op_type, d_addr, (Byte*) data_buffer, data_size, modeled, eip, now);
  413. }
  414. MemoryResult
  415. Core::nativeMemOp(lock_signal_t lock_signal, mem_op_t mem_op_type, IntPtr d_addr, char* data_buffer, UInt32 data_size)
  416. {
  417. if (data_size <= 0)
  418. {
  419. return makeMemoryResult(HitWhere::UNKNOWN,SubsecondTime::Zero());
  420. }
  421. if (lock_signal == LOCK)
  422. {
  423. assert(mem_op_type == READ_EX);
  424. m_global_core_lock.acquire();
  425. }
  426. if ( (mem_op_type == READ) || (mem_op_type == READ_EX) )
  427. {
  428. applicationMemCopy ((void*) data_buffer, (void*) d_addr, (size_t) data_size);
  429. }
  430. else if (mem_op_type == WRITE)
  431. {
  432. applicationMemCopy ((void*) d_addr, (void*) data_buffer, (size_t) data_size);
  433. }
  434. if (lock_signal == UNLOCK)
  435. {
  436. assert(mem_op_type == WRITE);
  437. m_global_core_lock.release();
  438. }
  439. return makeMemoryResult(HitWhere::UNKNOWN,SubsecondTime::Zero());
  440. }
  441. __attribute__((weak)) void
  442. applicationMemCopy(void *dest, const void *src, size_t n)
  443. {
  444. memcpy(dest, src, n);
  445. }
  446. void
  447. Core::emulateCpuid(UInt32 eax, UInt32 ecx, cpuid_result_t &res) const
  448. {
  449. switch(eax)
  450. {
  451. case 0x0:
  452. {
  453. cpuid(0, 0, res);
  454. res.eax = std::max(UInt32(0xb), res.eax); // Maximum input eax: make sure 0xb is included
  455. break;
  456. }
  457. case 0x1:
  458. {
  459. // Return native results, except for CPU id
  460. cpuid(eax, ecx, res);
  461. res.ebx = (m_core_id << 24) | (Sim()->getConfig()->getApplicationCores() << 16) | (res.ebx &0xffff);
  462. break;
  463. }
  464. case 0xb:
  465. {
  466. // Extended Topology Enumeration Leaf
  467. switch(ecx)
  468. {
  469. case 0:
  470. // Level 0: SMT
  471. res.eax = TopologyInfo::SMT_SHIFT_BITS;
  472. res.ebx = m_topology_info->smt_count; // SMT threads / core
  473. res.ecx = ecx | (1 << 8); // Level type = SMT
  474. break;
  475. case 1:
  476. // Level 1: cores
  477. res.eax = TopologyInfo::PACKAGE_SHIFT_BITS;
  478. res.ebx = m_topology_info->smt_count * m_topology_info->core_count; // HW contexts / package
  479. res.ecx = ecx | (2 << 8); // Level type = Core
  480. break;
  481. default:
  482. // Invalid level
  483. res.eax = 0;
  484. res.ebx = 0;
  485. res.ecx = ecx;
  486. break;
  487. }
  488. res.edx = m_topology_info->apic_id;
  489. break;
  490. }
  491. default:
  492. {
  493. // Return native results (original cpuid instruction is deleted)
  494. cpuid(eax, ecx, res);
  495. break;
  496. }
  497. }
  498. #if VERBOSE
  499. printf("CPUID[%d]: %08x %08x => ", m_core_id, eax, ecx);
  500. printf("%08x %08x %08x %08x\n", res.eax, res.ebx, res.ecx, res.edx);
  501. #endif
  502. }