.286
.model TINY
.data
ADJ_Y equ 100
ADJ_X equ 160
exit_str db "circle.com by ", 22h, "The Almighty Pegasus Epsilon", 22h
db " <pegasus@pimpninjas.org>", 0Ah, 0Dh
db "(C) 2021-2022 Distribute Unmodified - "
db "https://pegasus.pimpninjas.org/license", 0Ah, 0Dh
db "DOSBox users: Press Ctrl+F10 to release mouse to system.", 0Ah, 0Dh
include palette.inc
step db 1
.code
ORG 100h
start:
MOV BH, 0A0h
MOV ES, BX
;INT 33h ; reset mouse driver
MOV AH, 0Fh
INT 10h ; save video mode
XOR AH, AH
PUSH AX
MOV AL, 13h
INT 10h ; set video mode 13h
LEA SI, palette
MOV DX, 03C8h
MOV AL, 1
OUT DX, AL ; configure video palette
INC DX
MOV CX, 02FDh
REP OUTSB ; load color data
frame:
xor di, di
wait_for_retrace_start:
IN AL, DX
TEST AL, 8
JZ wait_for_retrace_start
;MOV AX, 3
;INT 33h ; query mouse position
;SHR CX, 1
; CX = real coordinate
; DX = imaginary coordinate
; ES = video memory
; CS = code segment
; SP = stack pointer
; SP = register of last resort, since we know the value it needs on exit
; available: AH, AL, BH, BL, DS, FS, GS, SI, DI, BP
mov si, -ADJ_Y ; row
row:
mov bp, -ADJ_X ; col
col:
mov ax, si ; ax = row
mov bx, 307 ; correct for non-square pixels
imul bx ; with fixed-point math
; this 8-bit rotate right on a 32-bit virtual register
; is slow as hell. don't use this code.
; mov cx, 8 ; 2,2,1 cycles
;bigshift:
; shr ax, 1 ; 2,3,? cycles, 8 times = 16,24,???????? cycles
; rcr dx, 1 ; 2,9,3 cycles, 8 times = 16,72,24 cycles
; loop bigshift ; 8,11,6 cycles, 8 times = 64,88,48 cycles
; total: 98,186,73+???????? cycles - too slow!
; use this instead.
ror ax, 8 ; ah:al = al:ah, 2,3,3 cycles
ror dx, 8 ; dh:dl = dl:dh, 2,3,3 cycles
xchg ah, dh ; ah:dh = dh:ah, 3,3,3 cycles
; total = 7,9,9 cycles - way faster!
; these three together make ah:al:dh:dl = dl:ah:al:dh
; an 8-bit rotate right on a 32-bit virtual register
; back to the show...
imul ax
; ax:dx = row * row
mov bx, ax
mov cx, dx
; bx:cx = row * row
mov ax, bp
imul ax
; ax:dx = col * col
add dx, cx
adc ax, bx
; ax:dx = row * row + col * col
; stosb deals with all of this so we don't have to calculate address
; but if we did...
; mov di, si ; di = si
; add di, ADJ_Y ; di += 100 - adjust image to vertical screen center
; shl di, 2 ; di *= 4
; di = si * 4
; add di, si ; di += si
; add di, ADJ_Y ; di += 100 - adjust image to vertical screen center
; di = si * 4 + si
; di = si * 5
; shl di, 6 ; di *= 64
; di = si * 5 * 64
; di = si * 320
; add di, bp ; di += bp
; add di, ADJ_X ; di += 160 - adjust image to horizontal screen center
; di = si * 320 + bp
sar ax, 5 ; magic numbers
; scale entire image by dividing by 32
add al, step
; test al, al ; skip the zeroth entry in the palette - that's black
; jnz go
; inc al
; branches are slow, use this trick here instead!
cmp al, 1 ; cmp sets FLAGS as if it were doing sub <arg>, <arg>
; if al == 0, subtracting 1 results in a borrow, which sets CF
adc al, 0 ; adc adds its second argument plus the carry flag
; to its first argument, so if al = 0, add one to it
; without ever branching!
go:
neg di
add di, 0FA00h ; 320 * 200 = 64000 = 0xFA00
mov es:[di], al ; put the color on the screen (bottom half)
neg di
add di, 0FA00h
stosb ; put the color on the screen (top half) and move to next pixel
inc bp
cmp bp, ADJ_X ; is bp > ADJ_X? (end of the row)
jl col
inc si
test si, si ; is si < 0? (middle of the screen)
jl row
dec step
; mandelbrot/julia ponderings
; for row 0 to 199 AH
; for col 0 to 319 BX
; z2.real = z.real * z.real
; z2.imag = z.imag * z.imag
; ; escape check
; z.imag = 2 * z.real * z.imag + DX
; z.real = z2.real - z2.imag + CX
; ; periodicity check
; ; store result at row * 320 + col - seven registers needed
; ; but only if calculating pixel address - stosb eliminates that need?
; loop col
; loop row
MOV DX, 3DAh
wait_for_retrace_end:
IN AL, DX
TEST AL, 8
JNZ wait_for_retrace_end
MOV AH, 1
INT 16h
JZ frame
exit:
; reset video mode
POP AX
INT 10h
; clear keyboard buffer
INT 16h
; print exit message
MOV AX, CS
MOV ES, AX
MOV AX, 1301h
MOV BX, 7
MOV CX, palette - exit_str
XOR DX, DX
LEA BP, exit_str
INT 10h
; exit
RET
END start