123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350 |
- ; Second battery of tests: Test the timings for VRAM access, relative to the INT
- ; Fills the first 16K of VRAM with the given byte
- ; Input: E = value for first byte of every other address
- ; D = Value for second byte of every other address
- ; e.g. if DE = 0305h, the values are 5, 3, 5, 3, 5, 3, ...
- ; Trashes: nothing, but fiddles with VDP registers
- FillVRAM proc
- ; We support up to 80,000 cycles/frame. With a 49 cycle loop, at
- ; 2 bytes per loop, that takes up to 3266 bytes. Therefore we fill
- ; 4095 bytes starting at 3000h.
- push bc
- push af
- xor a ; A14-A16 set to 0
- out (99h),a
- ld a,80h+14 ; register 14 in V9938; 6 in earlier ones
- out (99h),a
- ld (RG00SAV+14),a ; save new value of register 14
- ld a,(RG0SAV+6)
- out (99h),a
- ld a,80h+6
- out (99h),a ; restore register 6 in case it was overwritten
- xor a
- out (99h),a ; A0-A7 set to 0
- ld a,70h
- out (99h),a ; A8-A13 = 30h (3000h), write mode
- ld a,d
- xor e
- ld d,a ; Prepare value to xor with
- ld a,e
- ld bc,10FFh ; total VRAM to fill: 4095
- ; (prevents incrementing into A14,
- ; allowing us to avoid setting A14-A16 later)
- _FillVRAMloop: out (98h),a ; 12T
- xor d ; 5T
- dec c ; 5T
- jp nz,_FillVRAMloop; 12T ; inner loop: 29T exactly
- djnz _FillVRAMloop ; 14T ; We're not under fixed-time constraints
- ; -5T
- pop af
- pop bc
- ret
- endp
- ; Set VDP blank mode
- BlankVideo proc
- push af
- ld a,(RG0SAV+1)
- and 10111111b ; clear /BLANK bit
- out (99h),a
- ld a,81h ; reg 1
- out (99h),a
- pop af
- ret
- endp
- ; Unset VDP blank mode
- UnblankVideo proc
- push af
- ld a,(RG0SAV+1)
- or 01000000b ; set /BLANK bit (no blanking)
- out (99h),a
- ld a,81h ; reg 1
- out (99h),a
- pop af
- ret
- endp
- ; Test times
- TestVRAMTiming proc
- di
- ; As a first precaution, we're filling all VRAM with a known pattern
- ld de,66EEh
- call FillVRAM
- ; Sanity check: verify VRAM contents
- xor a
- out (99h),a ; A0-A7 set to 0
- ld a,30h
- out (99h),a ; A8-A13 = 30h (3000h), read mode
- ld bc,0FF10h ; 4095 bytes
- ld de,66EEh xor 0EE00h
- _VRAMverify: in a,(98h)
- cp e
- jp nz,_VerifyError
- xor d
- ld e,a
- djnz _VRAMverify
- dec c
- jp nz,_VRAMverify
- ; Find all cycles after the vertical interrupt for which a 12T separation
- ; between writes is not sufficient.
- ;
- ; Method: Perform two consecutive writes, the second 12T away from the first,
- ; then leave enough time for processing. Later, compare the expectations with
- ; the actual values present in VRAM.
- ;
- ; Do this at every possible phase of the total loop length with respect to the
- ; vertical interrupt, storing the results in a bit array.
- ;
- ; We assume that the value written by the CPU goes to a latch, and that when
- ; the VDP has time to service the transfer, the last value written to the latch
- ; is the one that gets written to VRAM. The VDP always has time to service the
- ; second byte written, so we assume it never fails. The first one, on the other
- ; hand, may be overwritten.
- ;
- ; Claims have been heard, that on occasion, only some bits are written. Since
- ; we're going to check the stored values, that claim will be tested too.
- ; We could check all 71364 (or whatever) cycles, one per frame, but
- ; that would take about 20 minutes. Instead, we parallelize it and
- ; check multiple cycles in the same frame. We later read back the
- ; written bytes to find out which writes failed and how. Then we
- ; shift the phase to test the next batch, until all cycles have
- ; been tested.
- ld c,49 ; Number of cycles in the write loop
- call DivCycFrmByC
- ; We want ceiling division, so if remainder was nonzero, increment HL
- ld a,c
- ex af,af' ; Save remainder in A'
- xor a
- cp c
- ld de,-1 ; because HL reaches -1 when counting down, not 0
- adc hl,de
- ld (CycDivByLoop),hl
- xor a
- ld (VRAMW_Phase),a ; Init phase
- ld hl,FirstBad1
- ld (hl),80h
- inc hl
- ld (hl),38h
- inc hl
- ld (hl),01h ; 13880h = 80000
- _NextPhase: ; Fill VRAM with 01h
- ld de,0101h
- call FillVRAM
- ld de,-1 ; Loop increment
- xor a
- out (99h),a ; A0-A7 set to 0
- ld a,70h
- out (99h),a ; A8-A13 = 30h (3000h), write mode
- ld bc,0FC98h ; C = VRAM R/W port; B = byte to write to even addresses
- call SyncVInt
- ; di, IntVec trashed, int not acked, 9T into the interrupt
- ; 9T ; from SyncVInt
- ; Start a fresh frame at the correct cycle
- ; We could handle wraparound instead, but this is much easier.
- ld a,(VRAMW_Phase) ; 14T ; Delay by current phase (0..48)
- sub 100 ; 8T ; 9+14+8+5+5+5+18+17+8+11 = 100
- ld l,a ; 5T
- sbc a,a ; 5T
- ld h,a ; 5T
- call WaitFrmPlusHL ; 18T
- ld hl,(CycDivByLoop);17T
- ld a,0FEh ; 8T ; Determine the value that goes to odd addresses
- ; Write loop (49T long). This is the "master length" that affects
- ; many other parts of the code.
- _WriteLoop: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 37T from previous write
- ; 3T ; after output
- out (98h),a ; 9T ; before output
- ; actual write of 0FEh; distance: 12T from previous write!
- ; 3T ; after output
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- jp c,_WriteLoop ; 11T ; loop: (11+3)+(9+3)+12+11 = 49T; spacing: 3+12+11+11 = 37T
- ; Any violation of alternance is a failed write.
- ; We hope (and there are reasons behind it) that we don't get
- ; exactly the same pattern from a failed write as for a successful write.
- ; Find the first position where the alternance fails and determine
- ; the corresponding cycle number. Store the minimum.
- ld hl,VRAMW_Phase
- inc (hl) ; Increment phase for next loop
- ld l,(hl) ; Fetch incremented value. We need to take the
- ; incremented value instead of the original value,
- ; because it's used for a comparison which is done
- ; in reverse order of how it should be done, causing
- ; an off-by-one.
- ; Set up address 3000h for read in VDP
- ; Let's try writing to the address register as fast as possible
- ld bc,99h
- ld a,30h
- out (c),b ; A0-A7 set to 0
- out (99h),a ; A8-A11 = 0, A12-A13 = 1 (3000h), read mode
- ld iy,3000h-1 ; IY tracks VRAM address for error reporting
- ; E:H:L tracks cycle number of current VRAM position
- ld h,b
- ld e,b
- ld bc,49
- _AltCheck: in a,(98h) ; 9T+3T
- cp 0FCh ; 8T
- jp nz,_BadAlt ; 11T ; 3+8+11+9=31, enough
- in a,(98h) ; 9T+3T
- cp 0FEh
- ; WRONG: "If the fast write has succeeded, the slow write MUST succeed."
- ; The V9938 begs to disagree.
- ;jp nz,_CompareError1
- jp nz,_BadAlt
- inc iy
- inc iy
- add hl,bc
- ld a,e
- adc a,b
- ld e,a
- ld a,(CycFrm1)
- sub l
- ld a,(CycFrm2)
- sbc a,h
- ld a,(CycFrm3)
- sbc a,e
- jp nc,_AltCheck ; The subtraction is reversed, so this check is off by one,
- ; but given the instruction set, it's faster in this direction.
- ; That's why we took the incremented value of the phase
- ; instead of the direct one.
- _BadAlt: ld a,(FirstBad1)
- sub l
- ld a,(FirstBad2)
- sbc a,h
- ld a,(FirstBad3)
- sbc a,e
- jr c,_NoRecord
- ; We're still one above the real value
- ld bc,-1
- add hl,bc
- ld (FirstBad1),hl
- ld a,e
- adc a,b
- ld (FirstBad3),a
- _NoRecord:
- ; Check other phases
- ld a,(VRAMW_Phase)
- cp 49
- jp nz,_NextPhase
- ret
- _CompareError1: ld l,a
- ld h,0FEh
- ld (ErrParams),hl
- ld (ErrParams+2),iy; VRAM address with error
- ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
- jp Finish
- _CompareError2: ld l,a
- ld h,0FCh
- ld (ErrParams),hl
- ld (ErrParams+2),iy
- ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
- jp Finish
- _VerifyError: dec b ; Calc failure address
- dec c
- ld d,c ; swap bytes
- ld e,b
- ld hl,4000h
- scf
- sbc hl,de
- ld (ErrParams),hl
- ld a,4 ; Error code 4: VRAM verification error
- jp Finish
- endp
- ; Code adapted from multiple sources on the internet.
- ; Divide cycles per frame by C.
- ; Input: C = divisor (assumes C > [CycFrm3] so that the result fits in 16 bits)
- ; Output: Quotient in HL, remainder in C.
- ; Trashes: AF
- ; Uses exactly 729 T-states regardless of input (on MSX, running on Z80)
- ; Note CycFrm3 is typically < 2 so any divisor > 1 will probably do.
- ;
- DivCycFrmByC proc
- ld hl,(CycFrm1) ; 17T
- ld a,(CycFrm3) ; 14T
- add hl,hl ; 12T ; First bit
- rept 16 ; 16 * (
- adc a,a ; 5T
- sub c ; 5T
- jr nc,$+3 ; 13T ; rept-local labels are not working for us
- ; -5T ; for false branch
- add a,c ; 5T ; Subtracted once too much, adjust back; compensates timing
- ; Jump destination
- adc hl,hl ; 17T ; Shift in the inverted next bit of the quotient
- endm ; )
- ld c,a ; 5T ; save remainder
- ld a,l ; 5T ; Complement HL
- cpl ; 5T
- ld l,a ; 5T
- ld a,h ; 5T
- cpl ; 5T
- ld h,a ; 5T ; total 7 * 5T for complement. Using ccf in the loop would be 16 * 5T.
- ret ; 11T
- ; 17+14+12+16*(5+5+13-5+5+17)+5+5+5+5+5+5+5+11 = 729
- endp
- ; Used for unit testing of the division routine
- UnitTestDiv:
- ld a,(DAC+2)
- call DivCycFrmByC
- ld (DAC+2),hl
- ret
|