r800test.txt 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. The test program, in short:
  2. Execute the same instruction (or short instruction sequence) number of
  3. times and use the E6 timer to measure how long it took.
  4. The E6 timer has a resolution of 255682Hz (3579545Hz / 14). R800 runs at
  5. 7159090Hz (2 * 3579545Hz). So the timer ticks once every 28 R800 ticks.
  6. -----------------------------------------------
  7. org #c000
  8. start equ #0100
  9. ld de,start
  10. ld hl,begin
  11. ld bc,begin1-begin
  12. ldir
  13. push de
  14. ld hl,loop
  15. ld bc,loop1-loop
  16. ldir
  17. pop hl
  18. ld bc,(20000-1)*(loop1-loop) ; number varies depending on
  19. ldir ; length of the test-sequence
  20. ld hl,end
  21. ld bc,end1-end
  22. ldir
  23. jp start
  24. begin di
  25. out (#e6),a
  26. begin1
  27. loop muluw hl,bc ; test sequence
  28. loop1
  29. end in a,(#e6)
  30. ld l,a
  31. in a,(#e7)
  32. ld h,a
  33. ei
  34. ret
  35. end1
  36. -----------------------------------------------
  37. Below are the results, it's formatted like this
  38. ----------------------------------------------
  39. [ prologue ]
  40. instruction-sequence [ repeat-count ]
  41. result: <raw-result>
  42. cycles: <cycles> [ instruction break-down ]
  43. notes:
  44. ----------------------------------------------
  45. Prologue are extra setup instructions, they are only executed once, only a few
  46. test have them
  47. Instruction-sequence is the actual instruction that's been tested. Mostly this
  48. is a single instruction, but sometimes there is a pair of instructions.
  49. repeat-count indicates how many times the instruction-sequence is repeated.
  50. I tried to repeat the instruction as much as possible. But to fit longer
  51. instruction sequences (longer in number of opcode bytes) in memory, the
  52. repeat count is sometimes reduced.
  53. raw-result: this is the value of the E6 timer at the end of the test. Note
  54. that you cannot directly compare this number to other test because other
  55. test might have a different repeat-count.
  56. cycles: here I calculated the number of cycles required per
  57. 'instruction-sequence'-iteration. I calculated this by comparing the result
  58. to the 'nop' instruction (a nop takes 1 cycle). Note that the duration
  59. of most instructions in almost never exactly an integer multiple of the
  60. duration of the nop instruction. A possible explanation might be the
  61. refresh (only happens at the end of an instruction, so for longer instructions
  62. the chance of a refresh is relatively less). But we need additional test to
  63. fully explain the refresh behaviour.
  64. Instruction break-down: Here I tried to split the instruction into micro-ops.
  65. I used the following notation:
  66. f opcode fetch, no page-break, takes 1 cycle
  67. r data read, no page-break, takes 1 cycle
  68. w data write, no page-break, takes 1 cycle
  69. F opcode fetch, page-break, takes 2 cycles
  70. R data read, page-break, takes 2 cycles
  71. W data write, page-break, takes 2 cycles
  72. x extra cycle (for various stuff)
  73. i IO
  74. ---------------------------------------------------------------
  75. * Instructions that don't do additional bus transactions (read, write, IO).
  76. nop [x 40000]
  77. result: 1629-1645
  78. openmsx-rev11643: 1637-1638
  79. cycles: 1 [f]
  80. add hl,bc [x 40000]
  81. result: 1633-1645
  82. openmsx-rev11643: 1637-1638
  83. cycles: 1 [f]
  84. cpl [x 40000]
  85. result: varies between 1628-1647, most often around 1638
  86. openmsx-rev11643: 1637-1638
  87. cycles: 1 [f]
  88. note: same result for cpl, daa, ccf, scf, exx, ex af,af', ld sp,hl, rrca, ...
  89. neg [x 20000]
  90. result: 1637 (most often 1637, sometimes 1638)
  91. openmsx-rev11643: 1643-1645
  92. cycles: 2 [ff]
  93. note: - measurement on real HW is MUCH more stable for this test compared to
  94. previous tests
  95. - same result for ld {a,i,r},{a,i,r}
  96. - all results below for the real HW probably vary as well, though in
  97. my initial tests on the real HW I only noted the most often
  98. occuring value. I should re-test, but that's a lot of work.
  99. im 0/1/2 [x 20000]
  100. result: 2453-2360, most often 2456
  101. openmsx-rev11643: 2459-2460
  102. cycles: 2 [ffx]
  103. di [x 40000]
  104. result: 3267
  105. openmsx-old: 3267
  106. openmsx-rev11643: 3273-3274
  107. cycles: 2 [fx]
  108. note: I added 'openmsx-old' tags where older openmsx revisions were better
  109. (possibly by accident) than the current revision.
  110. ei [x 40000] (VDP irq disabled)
  111. result: 1435 (!)
  112. openmsx-rev11643: 1434-1436
  113. cycles: 1 [f]
  114. note: This is faster than nop!!
  115. Both instructions are one cycle, but it seems there's no refresh delay
  116. after a 'ei' instructions. At least with this assumption the theory
  117. matches very closely the measurment.... Implemented like this in openMSX
  118. and it matches the measurement very well.
  119. I verified that 'VDP irq disabled' has no influence on speed: I mean
  120. even with VDP irq disabled, the speed of 'nop' is the same as before.
  121. ld a,ixh [x 20000]
  122. result: 1637
  123. openmsx-old: 1636
  124. openmsx-rev11643: 1643-1545
  125. cycles: 2 [ff]
  126. muluw hl,bc [x 20000]
  127. result: 29354
  128. openmsx-rev11643: 29360-29361
  129. cycles: 36 [ffx34]
  130. mulub a,b [x 20000]
  131. result: 11419
  132. openmsx-rev11643: 11425-11427
  133. cycles: 14 [ffx12]
  134. * Instructions that do a single read/write operation
  135. ld (#d000),a [x 10000]
  136. result: 2449
  137. openmsx-rev11643: 2446-2447
  138. cycles: 6 [FffW]
  139. note: page-break when switching from opcode fetching to data writes
  140. and again when switching from data write to opcode fetching
  141. ld a,(#d000) [x 10000]
  142. result: 2449
  143. openmsx-rev11643: 2445-2447
  144. cycles: 6 [FffR]
  145. note: same as above but for read
  146. ld a,(#0100) [x 80(!)]
  147. result: 20
  148. openmsx-rev11643: 20
  149. cycles: 6 [FffR]
  150. note: opcode fetch and read are in the same 256-byte page,
  151. but still there is a page-break when switching from
  152. opcode reading to data reading
  153. [ ld hl,#d000 ]
  154. ld a,(hl) [x 40000]
  155. result: 6522
  156. openmsx-rev11643: 6522-6523
  157. cycles: 4 [FR]
  158. [ ld hl,#d000 ; ld bc,0 ]
  159. ld a,(hl) ; add hl,bc [x 20000]
  160. result: 4054
  161. openmsx-rev11643: 4054-4055
  162. cycles: 3 + 2 [fR F]
  163. [ ld hl,start+XX ; ld bc,2 ]
  164. ld a,(hl) ; add hl,bc [x 20000]
  165. result: 4055
  166. openmsx-rev11643: 4054-4055
  167. cycles: 3 + 2 [fR F]
  168. note: same test as above, but now the data is in the same page as
  169. the instructions. There is still a page-break.
  170. [ ld hl,start+XX ; ld bc,2 ]
  171. ld (hl),a ; add hl,bc [x 20000]
  172. result: 4054
  173. openmsx-rev11643: 4054-4055
  174. cycles: 3 + 2 [fW F]
  175. note: same as above but for writes
  176. ld r,(ix+o) [x 10000]
  177. result: 2854
  178. openmsx-rev11643: 2848-2849
  179. cycles: 7 [FffxR]
  180. note: between opcode fetching (3 bytes) and read there is one additional
  181. cycle to calculate the result of IX+o
  182. ld (ix+o),r [x 10000]
  183. result: 2854
  184. openmsx-rev11643: 2848-2849
  185. cycles: 7 [FffxW]
  186. note: same as above, but for write
  187. ld (ix+o),n [x 10000]
  188. result: 2861
  189. openmsx-rev11643: 2848-2849
  190. cycles: 7 [FfffW]
  191. note: Has the same number of cycles as 'ld (ix+o),r' even though this
  192. instruction is one byte longer. The IX+o calculation is done
  193. in parallel with fetching the last opcode byte.
  194. Z80 shows the same parallelism.
  195. note: Even though it has the same number of cycles as 'ld (ix+o),r', it
  196. is consistently slightly slower. A possible explanation is that there
  197. are page-breaks on opcode fetches in this test, since this instruction
  198. has length 4 instead of 3.
  199. bit 0,a [x 20000]
  200. result: 1637
  201. openmsx-old: 1636
  202. openmsx-rev11643: 1643-1645
  203. cycles: 2 [ff]
  204. bit 0,(hl) [x 20000]
  205. result: 4055
  206. openmsx-rev11643: 4065-4066
  207. cycles: 5 [FfR]
  208. bit 0,(ix+o) [x 10000]
  209. result: 2861
  210. openmsx-rev11643: 2848-2849
  211. cycles: 7 [FfffR]
  212. note: I always found it strange that the format of these instructions is
  213. <0xDD> <0xCB> <offset> <function> (offset before function)
  214. But this allows to hide the latency of calculating IX+o. The same
  215. happens on a Z80.
  216. * Instructions that read/write 2 bytes
  217. ld (#d000),hl [x 10000]
  218. result: 2854
  219. openmsx-rev11643: 2848-2849
  220. cycles: 7 [FffWw]
  221. note: no page-break for the second data write
  222. ld (#d0ff),hl [x 10000]
  223. result: 3264
  224. openmsx-rev11643: 3260-3262
  225. cycles: 8 [FffWW]
  226. note: of course here there must be a page-break
  227. ld hl,(#d000) [x 10000]
  228. result: 2854
  229. openmsx-rev11643: 2848-2849
  230. cycles: 7 [FffRr]
  231. note: similar for read
  232. ld hl,(#d0ff) [x 10000]
  233. result: 3264
  234. openmsx-rev11643: 3260-3262
  235. cycles: 8 [FffRR]
  236. push hl ; pop hl [x 20000]
  237. result: 8956
  238. openmsx-old: 8950
  239. openmsx-rev11643: 8928-8930
  240. cycles: 6 + 5 [FxWw FRr]
  241. note: push is once cycle slower than pop, probably because push first has
  242. to decrease SP, while pop can directly put the value of SP on the bus
  243. * Read-modify-write instructions (single byte)
  244. inc a [x 40000]
  245. result: 1628-1642
  246. openmsx-rev11643: 1637-1638
  247. cycles: 1 [f]
  248. inc (hl) [x 40000]
  249. result: 11393
  250. openmsx-rev11643: 11392-11393
  251. cycles: 7 [FRxW]
  252. note: This instructions is slower than I expected, but it can be explained
  253. like this:
  254. - there is one cycle between data read and data write to get the
  255. time to actually increase the value
  256. - when switching from data-read to data-write there is a page-break
  257. (even though the address is identical)
  258. But see also the 'ex (sp),hl' instruction below
  259. inc (ix+o) [x 10000]
  260. result: 4080
  261. openmsx-rev11643: 4076-4077
  262. cycles: 10 [FffxRxW]
  263. note: similar as above, but 3 cycles longer:
  264. - need to fetch 2 more opcode bytes
  265. - need to calculate IX+o before data read can be executed
  266. res 0,a [x 20000]
  267. result: 1637
  268. openmsx-old: 1637
  269. openmsx-rev11643: 1643-1645
  270. cycles: 2 [ff]
  271. res 0,(hl) [x 20000]
  272. result: 6523
  273. openmsx-rev11643: 6522-6523
  274. cycles: 8 [FfRxW]
  275. note: similar to 'inc (hl)' but opcode is one byte longer
  276. res 0,(ix+o) [x 10000]
  277. result: 4083
  278. openmsx-rev11643: 4076-4077
  279. cycles: 10 [FfffRxW]
  280. note: similar to 'inc (ix+o)'. Opcode is one byte longer, but cost
  281. of IX+o can be hidden
  282. rld [x 20000]
  283. result: 6529
  284. openmsx-old: 6528
  285. openmsx-rev11643: 6521-6523
  286. cycles: 8 cycles [FfRxW]
  287. note: similar to 'res 0,(hl)'
  288. ld hl,#0000 ; ld de,#0100 ; ld bc,40000 ; ldir [no repeat] (src != dst)
  289. result: 11393
  290. openmsx-rev11643: 11393-11394
  291. cycles: 7 [FfRW]
  292. note: - as expected, 3 page-breaks are needed
  293. - there's no extra cycle needed to repeat the instruction
  294. TODO make a specific test for this ldi <-> ldir
  295. ld hl,#0100 ; ld de,#0100 ; ld bc,40000 ; ldir [no repeat] (src == dst)
  296. result: 11393
  297. openmsx-rev11643: 11393-11394
  298. cycles: 7 [FfRW]
  299. note: same timing as above, even though the read and write addresses are
  300. the same, there is a page-break in between
  301. * Read-modify-write 2 bytes
  302. ex (sp),hl [x 40000]
  303. result: 11393
  304. openmsx-rev11643: 11392-11393
  305. cycles: 7 [FRrww]
  306. note: - This result was unexpected to me: based on the results in the previous
  307. section, I expected a page-break between the read and writes.
  308. - 7 cycles is really the minimum. There are 5 bus transactions (1 opcode
  309. fetch, 2 data reads and 2 data writes). Stack and instructions are in
  310. different pages so there must be at least 2 page-breaks.
  311. - I've confirmed the [frrww] order, see exsphl.txt for details.
  312. [ ld sp,#xxFF ]
  313. ex (sp),hl [x 40000]
  314. result: 14660
  315. openmsx-rev11643: 14634-14635
  316. cycles: 9 [FRRwW]
  317. note: - Now we forced a page-break between reading/writing of the word. So two
  318. additional cycles.
  319. - This confirms that first two bytes are read and only then two byte are
  320. written: the order [FRwRw] would only require 8 cycles
  321. [ ld sp,#01F0 ]
  322. ex (sp),hl [x 200(!)]
  323. result: 58
  324. openmsx-rev11643: 56-58
  325. cycles: 7 [FRrww]
  326. note: Stack pointer in same page as instructions. But there still seems to be
  327. a pagebreak between instructions and data.
  328. ex (sp),ix [x 20000]
  329. result: 6529
  330. openmsx-rev11643: 6521-6523
  331. cycles: 8 [FfRrww]
  332. [ ld sp,#xxFF ]
  333. ex (sp),ix [x 20000]
  334. result: 8153
  335. openmsx-rev11643: 8152-8153
  336. cycles: 10 [FfRRwW]
  337. * Jump instructions
  338. jr $+2 [x 20000]
  339. result: 2455
  340. openmsx-rev11643: 2459-2460
  341. cycles: 3 [ffx]
  342. note: one additional cycle to calculate PC+offset
  343. jr $+3 ; nop [x 10000] <- nop is skipped
  344. result: 1229
  345. openmsx-rev11643: 1229-1230
  346. cycles: 3 [ffx]
  347. note: - even if opcode fetching is not contiguous there is no extra
  348. page-break
  349. - slightly slower than test above, because code is less dense
  350. and thus overall there are more opcode-fetch page-breaks
  351. jr $+2 ; nop [x 10000]
  352. result: 1636
  353. openmsx-rev11643: 1637-1638
  354. cycles: 3 + 1 [ffx f]
  355. [ ld a,0 ; or a ]
  356. jr z,$+2 [x 20000]
  357. result: 2454
  358. openmsx-rev11643: 2459-2460
  359. cycles: 3 [ffx]
  360. note: condition is true, jump is taken
  361. [ ld a,1 ; or a ]
  362. jr z,$+2 [x 20000]
  363. result: 1637
  364. openmsx-old: 1636
  365. openmsx-rev11643: 1641-1642
  366. cycles: 2 [ff]
  367. note: condition is false, jump not taken
  368. PC+offset does not need to be calculated, so no extra cycle
  369. djnz $+2 [x 20000]
  370. result: 2454
  371. openmsx-rev11643: 2457-2459
  372. cycles: 3 [ffx]
  373. note: - whether the jump is taken or not does not matter for the program flow
  374. - 1/256 of the times the jump is not taken, this can explain why this
  375. test is slightly faster than 'jr $+2' (but it's well within measure
  376. error margin)
  377. [ ld b,0 ]
  378. djnz $+0 [x 2000(!)]
  379. result: i 63014
  380. openmsx-rev11643: 62707-62708 (1)
  381. openmsx-rev11643: 63013-63014 (2)
  382. cycles: 3 [ffx]
  383. note: - Only repeated instruction 2000 times (iso 20000) because otherwise
  384. timer would overflow.
  385. - The difference between (1) and (2) is that in (1) the djnz instruction
  386. is 2-bytes aligned, while in (2) it's mis-aligned. This matters
  387. because it will cause a lot more page-breaks. Maybe some of the
  388. regression in the other tests could also be explained like this?
  389. Till now I didn't pay much attention to instruction alignment.
  390. [ ld b,1 ]
  391. djnz $ ; inc b [x 10000]
  392. result: 1228
  393. openmsx-rev11643: 1229-1230
  394. cycles: 3 [ff f]
  395. note: djnz only takes 2 cycles when it doesn't jump, same result as for jr
  396. jp $+3 [x 10000]
  397. result: 2028
  398. openmsx-old: 2040
  399. openmsx-rev11643: not yet retested
  400. cycles: 5 [Fffx]
  401. note: Two additional cycles are unexpected. It can either be explained as
  402. two 'x' cycles or as one 'x' cycles plus a forced page-break.
  403. See also tests below.
  404. [ ld a,0 ; or a ]
  405. jp z,$+3 [x 10000]
  406. result: 2029
  407. openmsx: 2041
  408. openmsx-rev11643: not yet retested
  409. cycles: 5 [Fffx]
  410. note: condition is true, jump is taken
  411. [ ld a,1 ; or a ]
  412. jp z,$+3 [x 10000]
  413. result: 1229
  414. openmsx: 1227
  415. openmsx-rev11643: not yet retested
  416. cycles: 3 [fff]
  417. note: condition is false, jump not taken
  418. extra 2 cycles are gone
  419. [ ld hl,start+XX ; ld bc,2 ]
  420. add hl,bc ; jp (hl) [x 20000]
  421. result: 3260
  422. openmsx-rev11643: 3266-3268
  423. cycles: 4 [F fx]
  424. note: Similar as above, 2 extra cycles
  425. [ ld hl,start+XX ; ld bc,#0102 ; ld de,#FF02 ]
  426. add hl,bc ; jp (hl) ; add hl,de ; jp (hl) [x 10000]
  427. result: 3241
  428. openmsx-old: 3240
  429. openmsx-rev11643: 3201 TODO retest on real machine, I have the feeling
  430. I'm not doing the exact same test now and before
  431. cycles: 8 cycles [F fx F fx]
  432. note: In this test every jump has a destination in a different 256-byte page.
  433. A single 'jp (hl)' instruction is still responsible for 3 cycles
  434. (though the cost of the forced page-break is pushed to the next
  435. instruction). This confirms that the two extra cycles in a jump are
  436. one 'x' cycle plus one forced page-break (in case of two 'x' cycles,
  437. this test would show yet an additional cost for the page-break).
  438. note: The 64 backward jumps in the first 256-byte page and the 64 forward
  439. jumps in the last page in this test are not executed.
  440. call $+3 ; pop hl [x 10000]
  441. result: 4880
  442. openmsx-old: 4891
  443. openmsx-rev11643: not yet retested
  444. cycles: 7 + 5 [FffWw FRr]
  445. note: This is again as expected, though the two extra cycles can be hidden
  446. by the data-writes.
  447. [ ld a,0 ; or a ]
  448. call z,$+3 ; pop hl [x 10000]
  449. result: 4880
  450. openmsx-old: 4891
  451. openmsx-rev11643: not yet retested
  452. cycles: 7 + 5 [FffWw FRr]
  453. note: condition is true, call is executed
  454. as expected, same result as unconditional call
  455. [ ld a,1 ; or a ]
  456. call z,$+3 [x 10000]
  457. result: 1229
  458. openmsx-old: 1228
  459. openmsx-rev11643: not yet retested
  460. cycles: 3 [fff]
  461. note: condition is false, call not executed
  462. [ ld hl,start+XX ; ld bc,3 ]
  463. add hl,bc ; push hl ; ret [x 10000]
  464. result: 4884-4887
  465. openmsx-rev11643: 4885-4886
  466. cycles: 12 [F fxWw FRr]
  467. note: 'RET' probably behaves as a 'JP (HL)' (two extra cycles), but these
  468. cycles can be hidden by the two data reads
  469. [ ld hl,start+XX ; ld bc,3 ; ld a,0 ; or a ]
  470. add hl,bc ; push hl ; ret z [x 10000]
  471. result: 4884-4887
  472. openmsx-rev11643: 4885-4886
  473. cycles: 12 [F fxWw FRr]
  474. note: Same test as above, but with a conditional ret that is always executed.
  475. As expected same result as above.
  476. [ ld a,1 ; or a ]
  477. ret z [x 40000]
  478. result: 1644
  479. openmsx-rev11643: 1637-1638
  480. cycles: 1 [f]
  481. * IO instructions
  482. in a,(0) [x 20000]
  483. result: 8066
  484. openmsx-old: 8126
  485. openmsx-rev11643: 8152-8153
  486. cycles: 10 [ffI8]
  487. note: - 8 cycles for IO seems a lot, but this makes it the same speed as Z80
  488. Z80: 4 cycles @ 3.5MHz
  489. R800: 8 cycles @ 7MHz
  490. This is needed to keep the timing on the external cartridge slots
  491. the same.
  492. - Besides IO port 0, i checked a lot of other ports, but not all.
  493. I did check some random unused port numbers and at least one input
  494. port from each IO-device in the turbor. Only the VDP ports are
  495. different (see below)
  496. in a,(#98) [x 20000]
  497. result: 44286
  498. openmsx-rev11643: 40713
  499. cycles: +/- 54
  500. note: - Can be explained by the 'intelligent' VDP-delay added by the S1990
  501. Need additional tests to accurately measure this.
  502. - Only IO ports 0x98-0x9B show this behaviour
  503. - TODO this still differs a lot between emulation and real HW
  504. [ ld b,0 ]
  505. in a,(0) ; djnz $-2 [x 1] [no page-break between in/djnz]
  506. result: 126
  507. openmsx-old: 132 136
  508. openmsx-rev11643: not yet retested
  509. cycles: 13 [ffI8 ffx]
  510. [ ld b,0 ]
  511. in a,(0) ; djnz $-2 [x 1] [page-break between in/djnz]
  512. result: 145
  513. openmsx-old: 156
  514. openmsx-rev11643: not yet retested
  515. cycles: 15 [FfI8 Ffx]
  516. note: - we forced a page-break between the in and the djnz instruction by
  517. properly aligning the start address
  518. - 2 extra cycles compared to test above, this confirms the decomposition
  519. of the in instruction is [ffI8] (as opposed to [ffI7 + page-break])
  520. out (0),a [x 20000]
  521. result: 8065
  522. openmsx-old: 8125
  523. openmsx-rev11643: 8152-8153
  524. cycles: 10 [ffI8]
  525. note: similar to 'in a,(0)'
  526. out (#98),a [x 20000]
  527. result: 44285
  528. openmsx-rev11643: 40713
  529. cycles: +/- 54
  530. note: similar to 'in a,(#98)'
  531. Theory: R800 is connected to a 7MHz bus, but IO is done over a 3.5MHz bus. If
  532. the start of IO is at an odd clock cycle (so not at a clock edge at the
  533. slower bus) there is an extra wait cycle required.
  534. in a,(0) ; [nop] x A [x 8000]
  535. result: A=0 -> 3226
  536. A=1 -> 3274
  537. A=2 -> 3872
  538. A=3 -> 3932
  539. A=4 -> 4525
  540. openmsx-old: A=0 -> 3248
  541. A=1 -> 3255
  542. A=2 -> 3893
  543. A=3 -> 3913
  544. A=4 -> 4557
  545. openmsx-rev11643: A=0 -> 3261
  546. A=1 -> 3269
  547. A=2 -> 3914
  548. A=3 -> 3926
  549. A=4 -> 4565
  550. cycles: A=0 -> 10 [ffxoI6]
  551. A=1 -> 10 [f ffxI6]
  552. A=2 -> 12 [f f ffxoI6]
  553. A=3 -> 12 [f f f ffxI6]
  554. A=4 -> 14 [f f f f ffxoI6]
  555. note: When there are an odd number of NOP instructions between two OUT
  556. instructions, one of the NOPs can be executed seemingly 'for free'.
  557. Or in other words when there's an even number of NOP instructions, the
  558. OUT instruction has an extra penality cycle.
  559. These results could be explained by assuming the following structure for
  560. the out command: 3 cycles [ffx..] followed by possibly one extra
  561. cycle to align to the slow 3.5MHz bus, followed by 6 cycles (= 3 cycles
  562. on the slow bus) for the actual IO.
  563. * Internal-ROM
  564. ld hl,(#xxyy) [x 10000]
  565. with yy=#00 -> no page break
  566. with xx in normal RAM
  567. result: 2855
  568. cycles: 7 [FffRr]
  569. with xx in ROM, DRAM mode
  570. result: 2855
  571. cycles: 7 [FffRr]
  572. with xx in ROM
  573. result: 4080
  574. cycles: 10 [FffRmRm] (m -> memory wait cycle)
  575. with yy=#ff -> page break during read
  576. with xx in normal RAM
  577. result: 3265
  578. cycles: 8 [FffRR]
  579. with xx in ROM, DRAM mode
  580. result: 3264
  581. cycles: 8 [FffRR]
  582. with xx in ROM
  583. result: 4080
  584. cycles: 10 [FffRmRm]
  585. note: - RAM or ROM in DRAM mode seems to have the same speed, including
  586. page break optimization
  587. - reads from ROM take 3 cycles, they don't become slower in case
  588. of a page break. So it seem there are always 2 cycles to set the
  589. address plus one wait cycle.
  590. - I did the same test with the 'ld (#xxyy),hl' instruction (even
  591. though writing to ROM doesn't make sense). I got the same results.
  592. [ ld a,128 ; or a ]
  593. [ BIOS ROM contains nop (x 11) ; ret m at address #1EB2 ]
  594. call #1EB2 [x 10000]
  595. result: [(D)RAM] 9763-9770
  596. [ROM] 18757
  597. cycles: [(D)RAM] 24 [FffWw F f f f f f f f f f f fRr ] (<- 23 !!)
  598. [ROM] 46 [FffWw Fm Fm Fm Fm Fm Fm Fm Fm Fm Fm Fm FmRr ]
  599. note: - this confirm 3 cycles per memory read from ROM
  600. - we measured 24 cycles for (D)RAM while there should only be 23,
  601. this might be a measurement error though: to calculate the number
  602. of cycles (in long code sequences) I compare it with the duration
  603. of a NOP instruction, for long durations differences this may be
  604. inaccurate
  605. * External-slot
  606. ld hl,(#4000) [x 5000]
  607. result: 2857
  608. cycles: 14 [FffRmmmRmmm] ??
  609. note: 5 cycles per memory access, close to 3 cycles (@ 3.5MHz) on Z80
  610. but I'd expected it to match exactly. It's strange to have halve
  611. cycles on the external 3.5MHz cartridge slots.
  612. ld (#4000),hl [x 5000]
  613. result: 2857
  614. cycles: 14 [FffRmmmRmmm] ??
  615. ld a,(#4000) [x 5000]
  616. result: 1634
  617. cycles: 8 [FffRmm] ??
  618. ld (#4000),a [x 5000]
  619. result: 1634
  620. cycles: 8 [FffWmm] ??
  621. [ ld hl,#4000 ]
  622. ld a,(hl) [x 15000]
  623. result: 3669
  624. cycles: 6 [FRmm] ??
  625. [ ld hl,#4000 ]
  626. ld (hl),a [x 15000]
  627. result: 3669
  628. cycles: 6 [FWmm] ??
  629. [ ld hl,#4000 ]
  630. inc (hl) [x 15000]
  631. result: 7337
  632. cycles: 12 [FRmmxWmmm] ??
  633. TODO: RETN, RST
  634. CPI(R), LDI(R), INI(R), OUTI(R)
  635. HALT (how to test?)
  636. * refresh
  637. org #c000
  638. di
  639. ld b,0
  640. ld c,0
  641. out (#e6),a
  642. loop nop ; variable number of nops
  643. djnz loop
  644. dec c
  645. jr nz,loop
  646. in a,(#e6)
  647. ld l,a
  648. in a,(#e7)
  649. ld h,a
  650. ret
  651. nops | real | openmsx-old
  652. -----+-------+---------
  653. 0 | 8088 | 8036
  654. 1 | 10717 | 10716
  655. 2 | 13336 | 13382
  656. 3 | 16079 | 16042
  657. 4 | 18740 | 18726
  658. 10 | 34778 | 34753
  659. cycles (without refresh) = 65536 * nops + 197375