test-vram-timing.code.asm 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. ; Second battery of tests: Test the timings for VRAM access, relative to the INT
  2. ; Fills the first 16K of VRAM with the given byte
  3. ; Input: E = value for first byte of every other address
  4. ; D = Value for second byte of every other address
  5. ; e.g. if DE = 0305h, the values are 5, 3, 5, 3, 5, 3, ...
  6. ; Trashes: nothing, but fiddles with VDP registers
  7. FillVRAM proc
  8. ; We support up to 80,000 cycles/frame. With a 49 cycle loop, at
  9. ; 2 bytes per loop, that takes up to 3266 bytes. Therefore we fill
  10. ; 4095 bytes starting at 3000h.
  11. push bc
  12. push af
  13. xor a ; A14-A16 set to 0
  14. out (99h),a
  15. ld a,80h+14 ; register 14 in V9938; 6 in earlier ones
  16. out (99h),a
  17. ld (RG00SAV+14),a ; save new value of register 14
  18. ld a,(RG0SAV+6)
  19. out (99h),a
  20. ld a,80h+6
  21. out (99h),a ; restore register 6 in case it was overwritten
  22. xor a
  23. out (99h),a ; A0-A7 set to 0
  24. ld a,70h
  25. out (99h),a ; A8-A13 = 30h (3000h), write mode
  26. ld a,d
  27. xor e
  28. ld d,a ; Prepare value to xor with
  29. ld a,e
  30. ld bc,10FFh ; total VRAM to fill: 4095
  31. ; (prevents incrementing into A14,
  32. ; allowing us to avoid setting A14-A16 later)
  33. _FillVRAMloop: out (98h),a ; 12T
  34. xor d ; 5T
  35. dec c ; 5T
  36. jp nz,_FillVRAMloop; 12T ; inner loop: 29T exactly
  37. djnz _FillVRAMloop ; 14T ; We're not under fixed-time constraints
  38. ; -5T
  39. pop af
  40. pop bc
  41. ret
  42. endp
  43. ; Set VDP blank mode
  44. BlankVideo proc
  45. push af
  46. ld a,(RG0SAV+1)
  47. and 10111111b ; clear /BLANK bit
  48. out (99h),a
  49. ld a,81h ; reg 1
  50. out (99h),a
  51. pop af
  52. ret
  53. endp
  54. ; Unset VDP blank mode
  55. UnblankVideo proc
  56. push af
  57. ld a,(RG0SAV+1)
  58. or 01000000b ; set /BLANK bit (no blanking)
  59. out (99h),a
  60. ld a,81h ; reg 1
  61. out (99h),a
  62. pop af
  63. ret
  64. endp
  65. ; Test times
  66. TestVRAMTiming proc
  67. di
  68. ; As a first precaution, we're filling all VRAM with a known pattern
  69. ld de,66EEh
  70. call FillVRAM
  71. ; Sanity check: verify VRAM contents
  72. xor a
  73. out (99h),a ; A0-A7 set to 0
  74. ld a,30h
  75. out (99h),a ; A8-A13 = 30h (3000h), read mode
  76. ld bc,0FF10h ; 4095 bytes
  77. ld de,66EEh xor 0EE00h
  78. _VRAMverify: in a,(98h)
  79. cp e
  80. jp nz,_VerifyError
  81. xor d
  82. ld e,a
  83. djnz _VRAMverify
  84. dec c
  85. jp nz,_VRAMverify
  86. ; Find all cycles after the vertical interrupt for which a 12T separation
  87. ; between writes is not sufficient.
  88. ;
  89. ; Method: Perform two consecutive writes, the second 12T away from the first,
  90. ; then leave enough time for processing. Later, compare the expectations with
  91. ; the actual values present in VRAM.
  92. ;
  93. ; Do this at every possible phase of the total loop length with respect to the
  94. ; vertical interrupt, storing the results in a bit array.
  95. ;
  96. ; We assume that the value written by the CPU goes to a latch, and that when
  97. ; the VDP has time to service the transfer, the last value written to the latch
  98. ; is the one that gets written to VRAM. The VDP always has time to service the
  99. ; second byte written, so we assume it never fails. The first one, on the other
  100. ; hand, may be overwritten.
  101. ;
  102. ; Claims have been heard, that on occasion, only some bits are written. Since
  103. ; we're going to check the stored values, that claim will be tested too.
  104. ; We could check all 71364 (or whatever) cycles, one per frame, but
  105. ; that would take about 20 minutes. Instead, we parallelize it and
  106. ; check multiple cycles in the same frame. We later read back the
  107. ; written bytes to find out which writes failed and how. Then we
  108. ; shift the phase to test the next batch, until all cycles have
  109. ; been tested.
  110. ld c,49 ; Number of cycles in the write loop
  111. call DivCycFrmByC
  112. ; We want ceiling division, so if remainder was nonzero, increment HL
  113. ld a,c
  114. ex af,af' ; Save remainder in A'
  115. xor a
  116. cp c
  117. ld de,-1 ; because HL reaches -1 when counting down, not 0
  118. adc hl,de
  119. ld (CycDivByLoop),hl
  120. xor a
  121. ld (VRAMW_Phase),a ; Init phase
  122. ld hl,FirstBad1
  123. ld (hl),80h
  124. inc hl
  125. ld (hl),38h
  126. inc hl
  127. ld (hl),01h ; 13880h = 80000
  128. _NextPhase: ; Fill VRAM with 01h
  129. ld de,0101h
  130. call FillVRAM
  131. ld de,-1 ; Loop increment
  132. xor a
  133. out (99h),a ; A0-A7 set to 0
  134. ld a,70h
  135. out (99h),a ; A8-A13 = 30h (3000h), write mode
  136. ld bc,0FC98h ; C = VRAM R/W port; B = byte to write to even addresses
  137. call SyncVInt
  138. ; di, IntVec trashed, int not acked, 9T into the interrupt
  139. ; 9T ; from SyncVInt
  140. ; Start a fresh frame at the correct cycle
  141. ; We could handle wraparound instead, but this is much easier.
  142. ld a,(VRAMW_Phase) ; 14T ; Delay by current phase (0..48)
  143. sub 100 ; 8T ; 9+14+8+5+5+5+18+17+8+11 = 100
  144. ld l,a ; 5T
  145. sbc a,a ; 5T
  146. ld h,a ; 5T
  147. call WaitFrmPlusHL ; 18T
  148. ld hl,(CycDivByLoop);17T
  149. ld a,0FEh ; 8T ; Determine the value that goes to odd addresses
  150. ; Write loop (49T long). This is the "master length" that affects
  151. ; many other parts of the code.
  152. _WriteLoop: out (c),b ; 11T ; before output
  153. ; actual output of 0FCh; distance: 37T from previous write
  154. ; 3T ; after output
  155. out (98h),a ; 9T ; before output
  156. ; actual write of 0FEh; distance: 12T from previous write!
  157. ; 3T ; after output
  158. add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
  159. jp c,_WriteLoop ; 11T ; loop: (11+3)+(9+3)+12+11 = 49T; spacing: 3+12+11+11 = 37T
  160. ; Any violation of alternance is a failed write.
  161. ; We hope (and there are reasons behind it) that we don't get
  162. ; exactly the same pattern from a failed write as for a successful write.
  163. ; Find the first position where the alternance fails and determine
  164. ; the corresponding cycle number. Store the minimum.
  165. ld hl,VRAMW_Phase
  166. inc (hl) ; Increment phase for next loop
  167. ld l,(hl) ; Fetch incremented value. We need to take the
  168. ; incremented value instead of the original value,
  169. ; because it's used for a comparison which is done
  170. ; in reverse order of how it should be done, causing
  171. ; an off-by-one.
  172. ; Set up address 3000h for read in VDP
  173. ; Let's try writing to the address register as fast as possible
  174. ld bc,99h
  175. ld a,30h
  176. out (c),b ; A0-A7 set to 0
  177. out (99h),a ; A8-A11 = 0, A12-A13 = 1 (3000h), read mode
  178. ld iy,3000h-1 ; IY tracks VRAM address for error reporting
  179. ; E:H:L tracks cycle number of current VRAM position
  180. ld h,b
  181. ld e,b
  182. ld bc,49
  183. _AltCheck: in a,(98h) ; 9T+3T
  184. cp 0FCh ; 8T
  185. jp nz,_BadAlt ; 11T ; 3+8+11+9=31, enough
  186. in a,(98h) ; 9T+3T
  187. cp 0FEh
  188. ; WRONG: "If the fast write has succeeded, the slow write MUST succeed."
  189. ; The V9938 begs to disagree.
  190. ;jp nz,_CompareError1
  191. jp nz,_BadAlt
  192. inc iy
  193. inc iy
  194. add hl,bc
  195. ld a,e
  196. adc a,b
  197. ld e,a
  198. ld a,(CycFrm1)
  199. sub l
  200. ld a,(CycFrm2)
  201. sbc a,h
  202. ld a,(CycFrm3)
  203. sbc a,e
  204. jp nc,_AltCheck ; The subtraction is reversed, so this check is off by one,
  205. ; but given the instruction set, it's faster in this direction.
  206. ; That's why we took the incremented value of the phase
  207. ; instead of the direct one.
  208. _BadAlt: ld a,(FirstBad1)
  209. sub l
  210. ld a,(FirstBad2)
  211. sbc a,h
  212. ld a,(FirstBad3)
  213. sbc a,e
  214. jr c,_NoRecord
  215. ; We're still one above the real value
  216. ld bc,-1
  217. add hl,bc
  218. ld (FirstBad1),hl
  219. ld a,e
  220. adc a,b
  221. ld (FirstBad3),a
  222. _NoRecord:
  223. ; Check other phases
  224. ld a,(VRAMW_Phase)
  225. cp 49
  226. jp nz,_NextPhase
  227. ret
  228. _CompareError1: ld l,a
  229. ld h,0FEh
  230. ld (ErrParams),hl
  231. ld (ErrParams+2),iy; VRAM address with error
  232. ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
  233. jp Finish
  234. _CompareError2: ld l,a
  235. ld h,0FCh
  236. ld (ErrParams),hl
  237. ld (ErrParams+2),iy
  238. ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
  239. jp Finish
  240. _VerifyError: dec b ; Calc failure address
  241. dec c
  242. ld d,c ; swap bytes
  243. ld e,b
  244. ld hl,4000h
  245. scf
  246. sbc hl,de
  247. ld (ErrParams),hl
  248. ld a,4 ; Error code 4: VRAM verification error
  249. jp Finish
  250. endp
  251. ; Code adapted from multiple sources on the internet.
  252. ; Divide cycles per frame by C.
  253. ; Input: C = divisor (assumes C > [CycFrm3] so that the result fits in 16 bits)
  254. ; Output: Quotient in HL, remainder in C.
  255. ; Trashes: AF
  256. ; Uses exactly 729 T-states regardless of input (on MSX, running on Z80)
  257. ; Note CycFrm3 is typically < 2 so any divisor > 1 will probably do.
  258. ;
  259. DivCycFrmByC proc
  260. ld hl,(CycFrm1) ; 17T
  261. ld a,(CycFrm3) ; 14T
  262. add hl,hl ; 12T ; First bit
  263. rept 16 ; 16 * (
  264. adc a,a ; 5T
  265. sub c ; 5T
  266. jr nc,$+3 ; 13T ; rept-local labels are not working for us
  267. ; -5T ; for false branch
  268. add a,c ; 5T ; Subtracted once too much, adjust back; compensates timing
  269. ; Jump destination
  270. adc hl,hl ; 17T ; Shift in the inverted next bit of the quotient
  271. endm ; )
  272. ld c,a ; 5T ; save remainder
  273. ld a,l ; 5T ; Complement HL
  274. cpl ; 5T
  275. ld l,a ; 5T
  276. ld a,h ; 5T
  277. cpl ; 5T
  278. ld h,a ; 5T ; total 7 * 5T for complement. Using ccf in the loop would be 16 * 5T.
  279. ret ; 11T
  280. ; 17+14+12+16*(5+5+13-5+5+17)+5+5+5+5+5+5+5+11 = 729
  281. endp
  282. ; Used for unit testing of the division routine
  283. UnitTestDiv:
  284. ld a,(DAC+2)
  285. call DivCycFrmByC
  286. ld (DAC+2),hl
  287. ret