123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622 |
- ; Second battery of tests: Test the timings for VRAM access, relative to the INT
- ; Test times
- TestVRAMTiming proc
- di
- ; As a first precaution, we're filling all VRAM with a known pattern
- ld de,66EEh
- call FillVRAM
- ; Sanity check: verify VRAM contents
- xor a
- out (99h),a ; A0-A7 set to 0
- ld a,30h
- out (99h),a ; A8-A13 = 30h (3000h), read mode
- ld bc,0FF10h ; 4095 bytes
- ld de,66EEh xor 0EE00h
- _VRAMverify: in a,(98h)
- cp e
- jp nz,_VerifyError
- xor d
- ld e,a
- djnz _VRAMverify
- dec c
- jp nz,_VRAMverify
- ; Find all cycles after the vertical interrupt for which a 12T separation
- ; between writes is not sufficient.
- ;
- ; Method: Perform two consecutive writes, the second 12T away from the first,
- ; then leave enough time for processing. Later, compare the expectations with
- ; the actual values present in VRAM.
- ;
- ; Do this at every possible phase of the total loop length with respect to the
- ; vertical interrupt, storing the results in a bit array.
- ; We could check all 71364 (or whatever) cycles, one per frame, but
- ; that would take about 20 minutes. Instead, we parallelize it and
- ; check multiple cycles in the same frame. Later we read back the
- ; written bytes to find out which writes failed and how. Then we
- ; shift the phase to test the next batch, until all cycles have
- ; been tested.
- ld hl,(CycFrm1)
- ld a,(CycFrm3)
- ld (FirstBad12),hl
- ld (FirstBad12+2),a
- ld (FirstBad14),hl
- ld (FirstBad14+2),a
- ld (FirstBad17),hl
- ld (FirstBad17+2),a
- ld (FirstBad18),hl
- ld (FirstBad18+2),a
- ld (FirstBad19),hl
- ld (FirstBad19+2),a
- ld (FirstBad20),hl
- ld (FirstBad20+2),a
- ld (FirstBad21),hl
- ld (FirstBad21+2),a
- ld (FirstBad22),hl
- ld (FirstBad22+2),a
- ld (FirstBad23),hl
- ld (FirstBad23+2),a
- ld (FirstBad24),hl
- ld (FirstBad24+2),a
- ld a,59 ; Number of cycles in the write loop
- ld ix,_WLoop_12_59
- call _PerformTest
- ld (FirstBad12),hl
- ld (FirstBad12+2),a
- ret nc
- ld a,61 ; Number of cycles in the write loop
- ld ix,_WLoop_14_61
- call _PerformTest
- ld (FirstBad14),hl
- ld (FirstBad14+2),a
- ret nc
- ld a,59 ; Number of cycles in the write loop
- ld ix,_WLoop_17_59
- call _PerformTest
- ld (FirstBad17),hl
- ld (FirstBad17+2),a
- ret nc
- ld a,77 ; Number of cycles in the write loop
- ld ix,_WLoop_18_77
- call _PerformTest
- ld (FirstBad18),hl
- ld (FirstBad18+2),a
- ret nc
- ld a,61 ; Number of cycles in the write loop
- ld ix,_WLoop_19_61
- call _PerformTest
- ld (FirstBad19),hl
- ld (FirstBad19+2),a
- ret nc
- ld a,67 ; Number of cycles in the write loop
- ld ix,_WLoop_20_67
- call _PerformTest
- ld (FirstBad20),hl
- ld (FirstBad20+2),a
- ret nc
- ld a,68 ; Number of cycles in the write loop
- ld ix,_WLoop_21_68
- call _PerformTest
- ld (FirstBad21),hl
- ld (FirstBad21+2),a
- ret nc
- ld a,69 ; Number of cycles in the write loop
- ld ix,_WLoop_22_69
- call _PerformTest
- ld (FirstBad22),hl
- ld (FirstBad22+2),a
- ret nc
- ld a,70 ; Number of cycles in the write loop
- ld ix,_WLoop_23_70
- call _PerformTest
- ld (FirstBad23),hl
- ld (FirstBad23+2),a
- ret nc
- ld a,66 ; Number of cycles in the write loop
- ld ix,_WLoop_24_66
- call _PerformTest
- ld (FirstBad24),hl
- ld (FirstBad24+2),a
- ;ret nc
- ret
- _JpWriteLoop: jp (ix) ; 10T
- ; Input: A = cycles per write loop
- ; IX = pointer to write loop
- ; Output:
- ; L = [FirstBad1]
- ; H = [FirstBad2]
- ; A = [FirstBad3]
- ; CF: Set if FirstBad < CycFrm, Reset otherwise
- ; Trashes: F,BC,DE,HL,IY,BC',DE',HL'
- _PerformTest:
- ld (CycPerLoop),a
- ld c,a
- call DivCycFrmByC
- ; We want ceiling division, so if remainder was nonzero, increment HL
- ;ld a,c
- ;ex af,af' ; Save remainder in A' (not deemed necessary)
- xor a
- ld (VRAMW_Phase),a
- cp c
- ld de,-1 ; because HL reaches -1 later when counting down, not 0
- adc hl,de
- ld (CycDivByLoop),hl
- ld hl,FirstBad1
- ld (hl),80h
- inc hl
- ld (hl),38h
- inc hl
- ld (hl),01h ; 13880h = 80000
- _NextPhase: ; Fill VRAM with 01h
- ld de,0101h
- call FillVRAM
- exx
- ld hl,(CycDivByLoop)
- ld de,-1 ; Loop increment
- exx
- xor a
- out (99h),a ; A0-A7 set to 0
- ld a,70h
- out (99h),a ; A8-A13 = 30h (3000h), write mode
- ld bc,0FC98h ; C = VRAM R/W port; B = byte to write to even addresses
- ld hl,ScratchWLoop
- ld (hl),0FEh ; Byte to write to odd addresses
- push bc
- exx
- pop bc
- exx
- call SyncVInt
- ; di, IntVec trashed, int not acked, 9T into the interrupt
- ; 9T ; from SyncVInt
- ; Start a fresh frame at the correct cycle
- ; We could handle wraparound instead, but this is much easier.
- ld a,(VRAMW_Phase) ; 14T ; Delay by current phase (0..48)
- sub 122 ; 8T ; 9+14+8+5+5+5+18+11+8+18+10+11 = 122
- ld l,a ; 5T
- sbc a,a ; 5T
- ld h,a ; 5T
- call WaitFrmPlusHL ; 18T
- ld hl,ScratchWLoop ; 11T
- ld a,(hl) ; 8T ; Determine the value that goes to odd addresses
- call _JpWriteLoop ; 18T
- ; 10T ; JP (IX)
- ; 11T ; OUT (C),B (before the out is effective)
- ; Any violation of alternance is a failed write.
- ; We hope (and there are reasons behind it) that we don't get
- ; exactly the same pattern from a failed write as for a successful write.
- ; Find the first position where the alternance fails and determine
- ; the corresponding cycle number. Store the minimum.
- ld hl,VRAMW_Phase
- inc (hl) ; Increment phase for next loop
- ld l,(hl) ; Fetch incremented value. We need to take the
- ; incremented value instead of the original value,
- ; because it's used for a comparison which is done
- ; in reverse order of how it should be done, causing
- ; an off-by-one.
- ; Set up address 3000h for read in VDP
- ; Let's try writing to the address register as fast as possible
- ld bc,99h
- ld a,30h
- out (c),b ; A0-A7 set to 0
- out (99h),a ; A8-A11 = 0, A12-A13 = 1 (3000h), read mode
- ld iy,3000h-1 ; IY tracks VRAM address for error reporting
- ; E:H:L tracks cycle number of current VRAM position
- ld h,b
- ld e,b
- ld a,(CycPerLoop)
- ld c,a
- ld b,0
- _AltCheck: in a,(98h) ; 9T+3T
- cp 0FCh ; 8T
- jp nz,_BadAlt ; 11T ; 3+8+11+9=31, enough
- in a,(98h) ; 9T+3T
- cp 0FEh
- ; WRONG: "If the fast write has succeeded, the slow write MUST succeed."
- ; The V9938 begs to disagree.
- ;jp nz,_CompareError1
- jp nz,_BadAlt
- inc iy
- inc iy
- add hl,bc
- ld a,e
- adc a,b
- ld e,a
- ld a,(CycFrm1)
- sub l
- ld a,(CycFrm2)
- sbc a,h
- ld a,(CycFrm3)
- sbc a,e
- jp nc,_AltCheck ; The subtraction is reversed, so this check is off by one,
- ; but given the instruction set, it's faster in this direction.
- ; That's why we took the incremented value of the phase
- ; instead of the direct one.
- _BadAlt: ld a,(FirstBad1)
- sub l
- ld a,(FirstBad2)
- sbc a,h
- ld a,(FirstBad3)
- sbc a,e
- jr c,_NoRecord
- ; We're still one above the real value
- ld bc,-1
- add hl,bc
- ld (FirstBad1),hl
- ld a,e
- adc a,b
- ld (FirstBad3),a
- _NoRecord:
- ; Check other phases
- ld hl,CycPerLoop
- ld a,(VRAMW_Phase)
- cp (hl)
- jp nz,_NextPhase
- ; Calculate FirstBad minus CycFrm
- ; (No Carry indicates we're done)
- ld hl,(CycFrm1)
- ld a,(FirstBad1)
- sub l
- ld a,(FirstBad2)
- sbc a,h
- ld hl,(CycFrm2)
- ld a,(FirstBad3)
- sbc a,h
- ld hl,(FirstBad1)
- ld a,(FirstBad3)
- ret
- ; Write loops
- ; _WLoop_12_59: 12T between writes, 59T long
- _WLoop_12_59: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 47T from previous write
- ; 3T ; after output
- out (98h),a ; 9T ; before output
- ; actual write of 0FEh; distance: 12T from previous write!
- ; 3T ; after output
- exx ; 5T
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_12_59 ; 11T ; loop: (11+3)+(9+3)+5+12+5+11 = 59T
- ret ; We're out of the timed area now
- ; _WLoop_14_61: 14T between writes, 61T long
- _WLoop_14_61: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 47T from previous write
- ; 3T ; after output
- out (c),a ; 11T ; before output
- ; actual write of 0FEh; distance: 14T from previous write!
- ; 3T ; after output
- exx ; 5T
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_14_61 ; 11T ; loop: (11+3)+(11+3)+5+12+5+11 = 61T
- ret ; We're out of the timed area now
- ; _WLoop_17_59: 17T between writes, 59T long
- _WLoop_17_59: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 42T from previous write
- ; 3T ; after output
- exx ; 5T
- out (98h),a ; 9T ; before output
- ; actual write of 0FEh; distance: 17T from previous write!
- ; 3T ; after output
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_17_59 ; 11T ; loop: (11+3)+5+(9+3)+12+5+11 = 59T
- ret ; We're out of the timed area now
- ; _WLoop_18_77: 18T between writes, 77T long
- _WLoop_18_77: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 59T from previous write
- ; 3T ; after output
- outi ; 15T ; before output
- ; actual write of 0FEh; distance: 18T from previous write!
- ; 3T ; after output
- dec hl ; 7T
- inc b ; 5T ; compensate for changes made by OUTI
- exx ; 5T
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_18_77 ; 11T ; loop: (11+3)+(15+3)+7+5+5+12+5+11 = 77T
- ret ; We're out of the timed area now
- _WLoop_19_61: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 42T from previous write
- ; 3T ; after output
- exx ; 5T
- out (c),a ; 11T ; before output
- ; actual write of 0FEh; distance: 19T from previous write!
- ; 3T ; after output
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_19_61 ; 11T ; loop: (11+3)+5+(11+3)+12+5+11 = 61T
- ret ; We're out of the timed area now
- _WLoop_20_67: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 47T from previous write
- ; 3T ; after output
- ld l,0 ; 8T ; dummy, for delay
- out (98h),a ; 9T ; before output
- ; actual write of 0FEh; distance: 20T from previous write!
- ; 3T ; after output
- exx ; 5T
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_20_67 ; 11T ; loop: (11+3)+8+(9+3)+5+12+5+11 = 67T
- ret ; We're out of the timed area now
- _WLoop_21_68: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 47T from previous write
- ; 3T ; after output
- inc hl ; 7T ; dummy, for delay
- out (c),a ; 11T ; before output
- ; actual write of 0FEh; distance: 21T from previous write!
- ; 3T ; after output
- exx ; 5T
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_21_68 ; 11T ; loop: (11+3)+7+(11+3)+5+12+5+11 = 68T
- ret ; We're out of the timed area now
- _WLoop_22_69: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 47T from previous write
- ; 3T ; after output
- ld l,0 ; 8T ; dummy, for delay
- out (c),a ; 11T ; before output
- ; actual write of 0FEh; distance: 22T from previous write!
- ; 3T ; after output
- exx ; 5T
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_22_69 ; 11T ; loop: (11+3)+8+(11+3)+5+12+5+11 = 69T
- ret ; We're out of the timed area now
- _WLoop_23_70: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 47T from previous write
- ; 3T ; after output
- ld hl,0 ; 11T ; dummy, for delay
- out (98h),a ; 9T ; before output
- ; actual write of 0FEh; distance: 23T from previous write!
- ; 3T ; after output
- exx ; 5T
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_23_70 ; 11T ; loop: (11+3)+11+(9+3)+5+12+5+11 = 70T
- ret ; We're out of the timed area now
- _WLoop_24_66: out (c),b ; 11T ; before output
- ; actual output of 0FCh; distance: 42T from previous write
- ; 3T ; after output
- nop ; 5T ; dummy, for delay
- exx ; 5T
- out (c),a ; 11T ; before output
- ; actual write of 0FEh; distance: 24T from previous write!
- ; 3T ; after output
- add hl,de ; 12T ; dec counter; there will be carry unless HL was 0.
- exx ; 5T
- jp c,_WLoop_24_66 ; 11T ; loop: (11+3)+5+5+(11+3)+12+5+11 = 66T
- ret ; We're out of the timed area now
- _CompareError1: ld l,a
- ld h,0FEh
- ld (ErrParams),hl
- ld (ErrParams+2),iy; VRAM address with error
- ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
- jp Finish
- _CompareError2: ld l,a
- ld h,0FCh
- ld (ErrParams),hl
- ld (ErrParams+2),iy
- ld a,5 ; Error code 5: Unexpected VRAM contents during analysis
- jp Finish
- _VerifyError: dec b ; Calc failure address
- dec c
- ld d,c ; swap bytes
- ld e,b
- ld hl,4000h
- scf
- sbc hl,de
- ld (ErrParams),hl
- ld a,4 ; Error code 4: VRAM verification error
- jp Finish
- endp
- ; Fills the first 16K of VRAM with the given byte
- ; Input: E = value for first byte of every other address
- ; D = Value for second byte of every other address
- ; e.g. if DE = 0305h, the values are 5, 3, 5, 3, 5, 3, ...
- ; Trashes: nothing, but fiddles with VDP registers
- FillVRAM proc
- ; We support up to 80,000 cycles/frame. With a 49 cycle loop, at
- ; 2 bytes per loop, that takes up to 3266 bytes. Therefore 4095
- ; bytes are enough, so we fill 4095 bytes starting at 3000h.
- push bc
- push af
- xor a ; A14-A16 set to 0
- out (99h),a
- ld a,80h+14 ; register 14 in V9938; 6 in earlier ones
- out (99h),a
- ld (RG00SAV+14),a ; save new value of register 14
- ld a,(RG0SAV+6)
- out (99h),a
- ld a,80h+6
- out (99h),a ; restore register 6 in case it was overwritten
- xor a
- out (99h),a ; A0-A7 set to 0
- ld a,70h
- out (99h),a ; A8-A13 = 30h (3000h), write mode
- ld a,d
- xor e
- ld d,a ; Prepare value to xor with
- ld a,e
- ld bc,10FFh ; total VRAM to fill: 4095
- ; (prevents incrementing into A14,
- ; allowing us to avoid setting A14-A16 later)
- _FillVRAMloop: out (98h),a ; 12T
- xor d ; 5T
- dec c ; 5T
- jp nz,_FillVRAMloop; 12T ; inner loop: 29T exactly
- djnz _FillVRAMloop ; 14T ; We're not under fixed-time constraints
- ; -5T
- pop af
- pop bc
- ret
- endp
- ; Set VDP blank mode
- BlankVideo proc
- push af
- ld a,(RG0SAV+1)
- and 10111111b ; clear /BLANK bit
- out (99h),a
- ld a,81h ; reg 1
- out (99h),a
- pop af
- ret
- endp
- ; Unset VDP blank mode
- UnblankVideo proc
- push af
- ld a,(RG0SAV+1)
- or 01000000b ; set /BLANK bit (no blanking)
- out (99h),a
- ld a,81h ; reg 1
- out (99h),a
- pop af
- ret
- endp
- ; Code adapted from multiple sources on the internet.
- ; Divide cycles per frame by C.
- ; Input: C = divisor (assumes C > [CycFrm3] so that the result fits in 16 bits)
- ; Output: Quotient in HL, remainder in C.
- ; Trashes: AF
- ; Uses exactly 729 T-states regardless of input (on MSX, running on Z80)
- ; Note CycFrm3 is typically < 2 so any divisor > 1 will probably do.
- ;
- DivCycFrmByC proc
- ld hl,(CycFrm1) ; 17T
- ld a,(CycFrm3) ; 14T
- add hl,hl ; 12T ; First bit
- rept 16 ; 16 * (
- adc a,a ; 5T
- sub c ; 5T
- jr nc,$+3 ; 13T ; rept-local labels are not working for us
- ; -5T ; for false branch
- add a,c ; 5T ; Subtracted once too much, adjust back; compensates timing
- ; Jump destination
- adc hl,hl ; 17T ; Shift in the inverted next bit of the quotient
- endm ; )
- ld c,a ; 5T ; save remainder
- ld a,l ; 5T ; Complement HL
- cpl ; 5T
- ld l,a ; 5T
- ld a,h ; 5T
- cpl ; 5T
- ld h,a ; 5T ; total 7 * 5T for complement. Using ccf in the loop would be 16 * 5T.
- ret ; 11T
- ; 17+14+12+16*(5+5+13-5+5+17)+5+5+5+5+5+5+5+11 = 729
- endp
- ; Used for unit testing of the division routine
- UnitTestDiv:
- ld a,(DAC+2)
- call DivCycFrmByC
- ld (DAC+2),hl
- ret
|