; a small S.A.T.U.R.N. demo by DamageX
; rendering in interlaced mode, with lighting, Z sort, and back face culling
; (never mind that Z sort isn`t really needed for this particular model!)
; and (poorly) compressed 16-bit bitmap
; assemble with SUPERAS

        setpc $06004000

; program RAM usage
;
; 60DB000 polygon avg. Z coordinates and polygon numbers
;
; 60DF000 number of verteces
; 60DF004 number of polygons
; 60DF008 axy
; 60DF00C axz
; 60DF010 ayz
; 60DF014 camdist
; 60DF018 zoom
; 60DF01C vblank frame counter
; 60DF020-8 sin,cos,-sin axy
; 60DF02C-34 sin,cos,-sin axz
; 60DF038-40 sin,cos,-sin ayz
;
; 60E0000 untransformed vertex data
; 60F0000 transformed vertex data
; 60F8000 polygon list


        ; change system clock to 28.6MHz

        imov.l #$6000320,r1
        mov.l @r1,r1
        jsr @r1
        nop

        ; setup vblank interrupt routine

; 25FE00A0 - SCU interrupt control register 
; setting a bit masks the interrupt
; bit 0 is VBlank (vector $40)

        imov.l $6000100,r1
        mova (vbint,PC),r0
        mov.l r0,@r1

        imov.l $25FE00A0,r2
        imov.l $FFFFBFFE,r4
        mov.l r4,@r2

        imov.l #$E0,r1
        ldc r1,SR

        bra main
        nop

        align.l

vbint:
        mov.l r2,@-r15
        mov.l r4,@-r15

        imov.l $60DF01C,r2
        mov.l @r2,r4
        add #1,r4
        mov.l r4,@r2

        imov.l $25FE00A0,r2
        imov.l $FFFFBFFE,r4
        mov.l r4,@r2

        mov.l @r15+,r4
        mov.l @r15+,r2
        rte
        nop

        idata

main:

; init vdp2 registers
        imov.l $5F80000,r2    ; VDP2 registers address
        mov.l @(blah3,PC),r3    ; table with register values address
        imov.l $5F800FE,r5    ; a reserved register address to skip
        imov.l $5F80120,r6    ; last register + 2

        mov.w @r3+,r1
        mov.w r1,@r2            ; set $5F80000 and then skip to $5F8000E
        add #$0E,r2

loop1:
        mov.w @r3+,r1
        cmp/eq r2,r5
        bt nowrite
        mov.w r1,@r2
nowrite:
        add #2,r2
        cmp/eq r2,r6
        bf loop1

        bra copybmp
        nop

        idata
        
blah3:
        dc.l regvalues2


; register settings for a 352x448 screen with NBG0 as a 32K color bitmap

regvalues2:
        dc.w $80C1
        dc.w 0
        dc.w $EF44,$EF44,$FFFF,$FFFF,$EF44,$EF44,$FFFF,$FFFF
        dc.w $0101,0,0,0
        dc.w $0036,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,1,0,1,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w $20,0,0,0,0,0,0,0
        dc.w $707,$707,$707,$707
        dc.w 1,0,0,0
        dc.w 0,0,0,0,0,0,0,0
        dc.w 0,0,0,0,0,0,0,0


; 5E00000-5E7FFFF - VDP2 RAM
; 5F00000-5F00FFF - color RAM
; vdp 2 registers
; 5F80000 r/w - screen mode
;       bit 15 disp - 1=enabled
;       bit 8 bdclmd - 0=black, 1=back screen
;       bit 7-6 lsmd - 0=normal, 3=interlaced
;       bits 5-4 vreso - 0=224 lines, 1=240 lines
;       bit 2 hreso2 - 0=normal, 1=31khz horizontal frequency
;       bit 1 hreso1 - 0=normal, 1=double hres
;       bit 0 hreso0 - 0=320/640 pixels, 1=352/704 pixels
; 5F80002 r/w - external signal enable
;       bit 9 exlten - 0=latch h/v counters when reading this register
;                       1=latch on external signal (eg. light gun)
;       bit 8 exsyen - 0=normal, 1=external sync
;       bit 1 dasel - (something to do with external video priority?)
;       bit 0 exbgen - 1=substitute external video for nbg1
; 5F80004 r - screen status
;       bit 9 exltfg - 1=hv counters are latched, resets on read
;       bit 8 exsyfg - set when vdp is synchronized with external signal
;       bit 3 vblank
;       bit 2 hblank
;       bit 1 odd - (always shows odd in non-interlaced mode)
;       bit 0 pal
; 5F80006 r/w - vram size
;       bit 15 vramsz - 0=4mbit 1=8mbit
;       bits 3-0 vdp version
; 5F80008 r - h-counter bits 9-0 - (counts 14mhz pixels)
; 5F8000A r - v-counter bits 9-0 - counts 0-447
;       bit 0 clear for non-interlace 15khz, set for interlaced even fields
; 5F8000C reserved
; 5F8000E r/w - RAM control
;       bit 15 crkte - 0=coefficient table stored in VRAM, 1=stored in CRAM
;       bits 13-12 crmd - 0=RGB15x1024, 1=RGB15x2048, 2=RGB24x1024
;       bits 9 vrbmd - \
;       bits 8 vramd -  \ 1=partition into two banks
;       bits 7-6 rdbsa0 - \    0=not used for RBG0
;       bits 5-4 rdbsa1 -  \   1=used for end table
;       bits 3-2 rdbsb0 -   \  2=used for name table
;       bits 1-0 rdbsb1 -    \ 3=used for character set (or bitmap)
; 5F80010 w - vram cycle pattern A0 (nibbles 0-3)
; 5F80012 w - vram cycle pattern A0 (nibbles 4-7)
; 5F80014 w - vram cycle pattern A1 values 0-3 NBGx name table read
; 5F80016 w - vram cycle pattern A1        4-7 NBGx charset/bitmap read
; 5F80018 w - vram cycle pattern B0        8-11 undefined
; 5F8001A w - vram cycle pattern B0        12-13 NBG0/1 vscroll table read
; 5F8001C w - vram cycle pattern B1        14 CPU access
; 5F8001E w - vram cycle pattern B1        15 no access
; 5F80020 w - screen display enable
;       bits 12-8 r0/n3/n2/n1/n0 transparency disable
;       bits 5-0 r1/r0/n3/n2/n1/n0 display enable
; 5F80022 w - mosaic control
;       bits 15-12 mzszv - vertical dot size (1-16 pixels)
;       bits 11-18 mzszh - horizontal dot size (1-16 pixels)
;       bits 4-0 - r0/n3/n2/n1/n0 mosaic enable
; 5F80024 w - special function code select
;       bits 4-0 - r0/n3/n2/n1/n0 - 0=code A, 1=code B
; 5F80026 w - special function code
;       bits 15-8 sfcdb - \  enables special function according
;       bits 7-0 sfcda -   \  to bits 1-3 of dot color code
; 5F80028 w - character control (n0,n1)
;       bits 13-12 - nbg1 color depth 
;       bits 11-10 - nbg1 bitmap size
;       bit 9 - nbg1 bitmap enable      bit 8 - nbg1 character size
;       bits 6-4 - nbg0 color depth 
;       bits 3-2 - nbg0 bitmap size
;       bit 1 - nbg0 bitmap enable      bit 0 - nbg0 character size
; 5F8002A w - character control (n2,n3,r0)
;       bits 14-12 - rbg0 color depth
;       bits 10 - rbg0 bitmap size
;       bit 9 - rbg0 bitmap enable      bit 8 - rbg0 character size
;       bit 5 - nbg3 color depth        bit 4 - nbg3 character size
;       bit 1 - nbg2 color depth        bit 0 - nbg2 character size
;         color depths  0=16          bitmap sizes  0=512x256
;                       1=256                       1=512x512
;                       2=2048                      2=1024x256
;                       3=32768                     3=1024x512
;                       4=16777216    char sizes 0=8x8, 1=16x16
; 5F8002C w - bitmap palette number (nbg0,nbg1)
;       bit 13 - priority               bit 12 - color calculation enable
;       bits 10-8 - nbg1 bitmap palette
;       bit 5 - priority               bit 4 - color calculation enable
;       bits 2-1 - nbg0 bitmap palette
; 5F8002E w - bitmap palette number (rbg0)
;       bit 5 - priority               bit 4 - color calculation enable
;       bits 2-1 - rbg0 bitmap palette
; 5F80030 w - pattern name control nbg0
; 5F80032 w - pattern name control nbg1
; 5F80034 w - pattern name control nbg2
; 5F80036 w - pattern name control nbg3
; 5F80038 w - pattern name control rbg0
;       bit 15 - name data size 0=1 word, 1=2 words
;       bit 14 - 0=10bit char number with h/v flip, 1=12bit char number
;       bit 9 - priority something-or-other
;       bit 8 - color calculation something-or-other
;       bits 7-5 - palette number (top 3 bits?)
;       bits 4-0 - charset number?
; 5F8003A w - plane size
;       bits 15-14 rotation parameter A screen-over process\  0=repetition
;       bits 13-12 rotation parameter A plane size          | 1=character
;       bits 11-10 rotation parameter B screen-over process/  2=transparent
;       bits 9-8 rotation parameter B plane size
;       bits 7-6 NBG3 plane size \
;       bits 5-4 NBG2 plane size  \   0=1 page
;       bits 3-2 NBG1 plane size   \  1=2 pages x 1 page
;       bits 1-0 NBG0 plane size    \ 2=2 pages x 2 pages
; 5F8003C w - map offset
;       bits 14-12 NBG3
;       bits 10-8 NBG2
;       bits 6-4 NBG1
;       bits 2-0 NBG0
; 5F8003E w - map offset
;       bits 6-4 rotation parameter B
;       bits 2-0 rotation parameter A
; 5F80040 w - map NBG0 - bits 13-8 plane B, bits 5-0 plane A
; 5F80042 w - map NBG0 - bits 13-8 plane D, bits 5-0 plane C
; 5F80044 w - map NBG1 - bits 13-8 plane B, bits 5-0 plane A
; 5F80046 w - map NBG1 - bits 13-8 plane D, bits 5-0 plane C
; 5F80048 w - map NBG2 - bits 13-8 plane B, bits 5-0 plane A
; 5F8004A w - map NBG2 - bits 13-8 plane D, bits 5-0 plane C
; 5F8004C w - map NBG3 - bits 13-8 plane B, bits 5-0 plane A
; 5F8004E w - map NBG3 - bits 13-8 plane D, bits 5-0 plane C
; 5F80050 w - map rotation A - bits 13-8 plane B, bits 5-0 plane A
; 5F80052 w - map rotation A - bits 13-8 plane D, bits 5-0 plane C
; 5F80054 w - map rotation A - bits 13-8 plane F, bits 5-0 plane E
; 5F80056 w - map rotation A - bits 13-8 plane H, bits 5-0 plane G
; 5F80058 w - map rotation A - bits 13-8 plane J, bits 5-0 plane I
; 5F8005A w - map rotation A - bits 13-8 plane L, bits 5-0 plane K
; 5F8005C w - map rotation A - bits 13-8 plane N, bits 5-0 plane M
; 5F8005E w - map rotation A - bits 13-8 plane P, bits 5-0 plane O
; 5F80060 w - map rotation B - bits 13-8 plane B, bits 5-0 plane A
; 5F80062 w - map rotation B - bits 13-8 plane D, bits 5-0 plane C
; 5F80064 w - map rotation B - bits 13-8 plane F, bits 5-0 plane E
; 5F80066 w - map rotation B - bits 13-8 plane H, bits 5-0 plane G
; 5F80068 w - map rotation B - bits 13-8 plane J, bits 5-0 plane I
; 5F8006A w - map rotation B - bits 13-8 plane L, bits 5-0 plane K
; 5F8006C w - map rotation B - bits 13-8 plane N, bits 5-0 plane M
; 5F8006E w - map rotation B - bits 13-8 plane P, bits 5-0 plane O
;
; 5F80070 w - NBG0 horizontal scroll (integer) bits 10-0
; 5F80072 w - NBG0 horizontal scroll (fraction) bits 15-8
; 5F80074 w - NBG0 vertical scroll (integer) bits 10-0
; 5F80076 w - NBG0 vertical scroll (fraction) bits 15-8
; 5F80078 w - NBG0 horizontal increment (integer) bits 2-0
; 5F8007A w - NBG0 horizontal increment (fraction) bits 15-8
; 5F8007C w - NBG0 vertical increment (integer) bits 2-0
; 5F8007E w - NBG0 vertical increment (fraction) bits 15-8
; 5F80080 w - NBG1 horizontal scroll (integer) bits 10-0
; 5F80082 w - NBG1 horizontal scroll (fraction) bits 15-8
; 5F80084 w - NBG1 vertical scroll (integer) bits 10-0
; 5F80086 w - NBG1 vertical scroll (fraction) bits 15-8
; 5F80088 w - NBG1 horizontal increment (integer) bits 2-0
; 5F8008A w - NBG1 horizontal increment (fraction) bits 15-8
; 5F8008C w - NBG1 vertical increment (integer) bits 2-0
; 5F8008E w - NBG1 vertical increment (fraction) bits 15-8
; 5F80090 w - NBG2 horizontal scroll (integer) bits 10-0
; 5F80092 w - NBG2 vertical scroll (integer) bits 10-0
; 5F80094 w - NBG3 horizontal scroll (integer) bits 10-0
; 5F80096 w - NBG3 vertical scroll (integer) bits 10-0
;
; 5F80098 w - horizontal reduction enable
;       bits 9-8 nbg1 \  0=normal       1=half size
;       bits 1-0 nbg0  \ 2 or 3=quarter size
; 5F8009A w - line and vertical cell scroll control (??)
; 5F8009C-E w - vertical cell scroll table address (bits 18-1)
; 5F800A0-2 w - NBG0 line scroll table address (bits 18-1)
; 5F800A4-6 w - NBG1 line scroll table address (bits 18-1)
; 5F800A8-A w - line color table address (bits 18-1) bit 31=lct enable
; 5F800AC-E w - back screen table address (bits 18-1) bit 31=bst enable
; 5F800B0 w - rotation parameter mode rbg0
;                0=A    2=according to coefficient data
;                1=B    3=according to rotation parameter window
; 5F800B2 w - rotation parameter read control (??)
; 5F800B4 w - coefficient table control ...
;
; ...
;
; 5F800E0 w - sprite control
;               bits 13-12 color calculation condition
;                       0=priority<=color calc number
;                       1=priority=color calc number
;                       2=priority>=color calc number
;                       3=only when color data MSB is set
;               bits 10-8 color calculation condition number
;               bit 5 sprite color mode 0=palettized only 1=palettized and RGB
;               bit 4 sprite window enable
;               bits 3-0 sprite type (?)
; ...
;
; 5F800F0 w - priority - bits 10-8 sprite type 1, bits 2-0 sprite type 0
; 5F800F2 w - priority - bits 10-8 sprite type 3, bits 2-0 sprite type 2
; 5F800F4 w - priority - bits 10-8 sprite type 5, bits 2-0 sprite type 4
; 5F800F6 w - priority - bits 10-8 sprite type 7, bits 2-0 sprite type 6
; 5F800F8 w - priority number - bits 10-8 NBG1, bits 2-0 NBG0
; 5F800FA w - priority number - bits 10-8 NBG3, bits 2-0 NBG2
; 5F800FC w - priority number - bits 2-0 RBG0
; 5F800FE reserved
;
; ...


copybmp:
        mov.l @(bmpaddr,PC),r0
        mov.l @(blahd,PC),r1    ; VDP2 RAM address
        mov.w @r0+,r2
        mov.w @r0+,r3

        add #4,r0

        mov.l @(blahbh,PC),r9   ; screen bitmap width in bytes

        mov -1,r8

        mov #0,r7
loopy2:
        mov r1,r4
        add r9,r1
        mov #0,r6
loopy:
        cmp/pz r8
        bt a112

        mov.w @r0+,r8
        bra a113
        mov.w r8,@r4

a112:
        ; awesomest data compression scheme ever...
        ; if the high bit was clear then write the pixel twice
        mov.w r8,@r4
        mov -1,r8
a113:
        add #2,r4
        add #1,r6
        cmp/eq r6,r2
        bf loopy
        add #1,r7
        cmp/eq r7,r3
        bf loopy2

        bra vdp1crap
        nop

        align.l
blahd:
        dc.l $05E00000
blahbh:
        dc.l 1024
bmpaddr:
        dc.l thebmp


vdp1crap:
; init vdp1 command tables
        mova @(ptable,PC),r0        ; table with register values
        imov.l $5C00000,r2    ; VDP1 RAM base address
        imov.l $5C000E0,r6    ; last word write address + 2

@loop2:
        mov.w @r0+,r1
        mov.w r1,@r2
        add #2,r2
        cmp/eq r2,r6
        bf @loop2

; init vdp1 registers
        mova @(regvalues1,PC),r0        ; table with register values
        imov.l $5D00000,r2    ; VDP1 registers address
        imov.l $5D0000C,r6    ; last register + 2

@loop1:
        mov.w @r0+,r1
        mov.w r1,@r2
        add #2,r2
        cmp/eq r2,r6
        bf @loop1


        bra rendercrap
        nop

        idata

regvalues1:
        dc.w 0,8,2,0
        dc.w 0,$58E0


; 5C00000-5C7FFFF - VDP1 work RAM
; 5C80000-5CBFFFF - off-screen frame buffer access
; vdp 1 registers
; 5D00000 w - tv mode
;               bit 3 enables v-blank erase
;               bits 0-2 display mode
;                       0=normal 512x256        1=high res 1024x256
;                       2=rotated 512x256       3=rotated 512x512
;                       4=31KHz
; 5D00002 w - frame buffer change register
;               bit 4 even/odd coordinate select bit 0=even 1=odd
;                       determines which pixels are sampled when drawing
;                       scaled/distorted sprites using high-speed shrink mode
;               bit 3 double interlace enable
;               bit 2 - set to 0 for normal mode
;                       when in double interlace mode 0=even lines 1=odd
;                       (even and odd lines are rendered to different buffers)
;               bits 0-1 change mode
;                       0=automatic - switches buffers and erases
;                       2=manual - no change
;                       3=reverts to manual after one change+erase
; 5D00004 w - plot trigger register 0=idle 1=start now 2=start at frame change
; 5D00006 w - data word to be written during frame buffer erase
; 5D00008 w - erasure upper left coordinate
;               bits 14-9 - X coordinate (units of 16 bytes)
;               bits 8-0 - Y coordinate
; 5D0000A w - erasure lower right coordinate
;               bits 15-9 - X coordinate (units of 16 bytes)
;               bits 8-0 - Y coordinate
; 5D0000C w - write zero to stop drawing
; 5D00010 r - status register
;               bit 1 - set if drawing is done
;               bit 0 - set if drawing was done at the end of previous frame
; 5D00012 r - last operation command address (at previous frame end)
; 5D00014 r - current operation command address
; 5D00016 r - mode status register (mirrors some bits from first 3 registers)


ptable:
        dc.w 9,0,0,0,0,0,0,0
        dc.w 0,0,351,447,0,0,0,0

        dc.w 10,0,0,0,0,0,176,224
        dc.w 0,0,0,0,0,0,0,0

        dc.w $8004,0,$08C0,$FFFF,0,0,40,40
        dc.w 59,40,59,59,40,59,0,0



; command table format
; 00 - control word
;       bit 15 - end drawing (stops without processing this table)
;       bits 14-12 jump mode
;               0=advances to the next table after this one
;               1=jumps to CMDLINK table after this one
;               2=CMDLINK table receives subroutine call after this table
;               3=returns from subroutine after this table
;               4=skips to the next table
;               5=jumps to CMDLINK table, skipping this one
;               6=CMDLINK table receives subroutine call, skipping this one
;               7=returns from subroutine without processing this table
;       bits 11-8 zoom (point which stays fixed when a sprite is scaled)
;               0=none (uses vertex A and vertex C as corners)
;               5=upper left    6=upper center  7=upper right
;               9=centerleft   10=centercenter 11=centerright
;              13=lower left   14=lower center 15=lower right
;                       (uses vertex A as fixed point, vertex B as size)
;       bits 5-4 character read direction
;               0=normal        1=H flip
;               2=V flip        3=H+V flip
;       bits 3-0 command select
;               0=normal sprite         1=scaled sprite
;               2=distorted sprite     
;               4=polygon               5=polyline
;               6=line
;               8=set user clipping     9=set system clipping
;              10=set local coordinate
; 02 - CMDLINK
;       bits 15-2 address of command table used for jumps/calls
; 04 - draw mode word
;       bit 15 - sets MSB for all pixels that are written
;       bit 12 - enables high-speed shrink
;       bit 11 - pre-clipping disable
;       bit 10 - user clipping enable (system clipping is always on)
;       bit 9 - specifies whether clipping is done inside or outside
;               0=drawing is done inside
;       bit 8 - mesh enable
;       bit 7 - end code disable (character pattern)
;       bit 6 - transparent pixel disable
;       bits 5-3 color mode
;               0=16 colors palettized  1=16 colors VDP1 lookup table
;               2=64 colors palettized  3=128 colors palettized
;               4=256 colors palettized 5=32K colors
;       bit 2 - gouraud shading enable
;       bit 1 - 1/2 luminance enable           \
;       bit 0 - background 1/2 luminance enable \ (combine for transparency)
; 06 - aux color
;       specifies either a color bank, lookup table address, or RGB color
;                         (top 8-12 bits)    (top 14 bits)
; 08 - character address (top 14 bits)
; 0A - character size (bits 13-8 are X/8, bits 7-0 are Y)
; 0C - vertex A - X coordinate \
; 0E - vertex A - Y coordinate  \
; 10 - vertex B - X coordinate   \
; 12 - vertex B - Y coordinate    \ value ranges from -1024 to 1023
; 14 - vertex C - X coordinate    / and is sign-extended to 16 bits
; 16 - vertex C - Y coordinate   /
; 18 - vertex D - X coordinate  /
; 1A - vertex D - Y coordinate /
; 1C - gouraud shading table address (all 16 bits)
; system clipping uses vertex C (as lower right corner)
; user clipping uses vertex A and vertex C
; local coordinates uses vertex A


; Saturn SH-2 memory map (add $20000000 for non-cacheing access)
;
; 0-1FFFF boot ROM
; 100000 SMPC (registers in this range are written by BIOS routines)
; 180000-18FFFF backup RAM?
; 200000-2FFFFF work RAM low?
; 2000000-3FFFFFF cartridge
;
; 5A00000-5A7FFFF sound RAM
; 5B00000 SCSP registers
; 5C00000-5C7FFFF VDP1 RAM
; 5C80000-5CBFFFF off-screen frame buffer
; 5D00000 VDP1 registers
; 5E00000-5E7FFFF VDP2 RAM
; 5F00000-5F00FFF color RAM
; 5F80000 VDP2 registers
; 5FC0000 SMPC?
; 5FE0000 SCU
;
; 6000000-60FFFFF SDRAM


rendercrap:

        imov.l #$60DF000,r14
        mov.l @(madr,PC),r0
        add #4,r0
        mov.w @r0+,r1           ; number of verteces
        mov.l r1,@r14
        mov.w @r0+,r2           ; number of polygons
        mov.l r2,@(4,r14)
        mov.w @r0+,r3           ; number of lines (ignored)

        add #2,r0       ; JUST SAY NO to unaligned reads

        add r2,r1

        ; copy vertex+normal data
        mov r1,r6
        shll r6
        add r1,r6
        imov.l #$60E0000,r5
a100:
        mov.l @r0+,r4
        swap.b r4,r4
        swap.w r4,r4
        swap.b r4,r4
        mov.l r4,@r5
        add #4,r5
        dt r6
        bf a100

        ; copy polygon data
        imov.l #$60F8000,r3
        shll2 r2
a101:
        mov.w @r0+,r4
        swap.b r4,r4
        mov.w r4,@r3
        add #2,r3
        dt r2
        bf a101

        imov.l #400,r4
        mov.l r4,@($14,r14)     ; camdist
;        imov.l #120,r4
 ;       mov.l r4,@($18,r14)     ; zoom

        bra transforms
        nop

        idata

madr:
        dc.l model


transforms:

        ; lookup sin/cos values from table
        imov.l #$FFFC,r8
        imov.l #$4000,r9
        mov.l @(stadr,PC),r6
        mov r14,r13
        add #$20,r13

        bsr cslookup
        mov.l @(8,r14),r0
        add #12,r13
        bsr cslookup
        mov.l @($0C,r14),r0
        add #12,r13
        bsr cslookup
        mov.l @($10,r14),r0


        ; process the entire list of verteces
        mov.l @r14,r1
        mov.l @(4,r14),r2
        add r2,r1
        imov.l #$60E0000,r2
        imov.l #$60F0000,r3

a102:
        ; axy rotation
        clrmac
        mov r14,r13
        add #$24,r13
        mac.l @r2+,@r13+
        mac.l @r2+,@r13+
        sts MACL,r6             ; nx
        shlr16 r6
        exts.w r6,r6

        add -12,r13
        clrmac
        add -8,r2
        mac.l @r2+,@r13+
        mac.l @r2+,@r13+
        sts MACL,r7             ; ny
        shlr16 r7
        exts.w r7,r7

        ; axz rotation
        mov.l @($30,r14),r9
        dmuls.l r6,r9
        add #12,r13
        mac.l @r2+,@r13+
        sts MACL,r5             ; nx final
        shlr16 r5
        exts.w r5,r5

        add -4,r2
        mov.l @r2+,r12
        dmuls.l r9,r12
        sts MACL,r12
        mov.l @($2C,r14),r9
        dmuls.l r6,r9
        sts MACL,r11
        add r11,r12             ; nz
        shlr16 r12
        exts.w r12,r12

        ; ayz rotation
;        mov.l @($38,r14),r8
 ;       mov.l @($3C,r14),r9
  ;      mov.l @($40,r14),r10
        mov.l @r13+,r8
        mov.l @r13+,r9
        mov.l @r13+,r10
        dmuls.l r12,r9
        sts MACL,r11
        dmuls.l r7,r10
        sts MACL,r6
        add r11,r6              ; nz final
        shlr16 r6
        exts.w r6,r6

        dmuls.l r7,r9
        sts MACL,r7
        dmuls.l r12,r8
        sts MACL,r10
        add r7,r10              ; ny final
        shlr16 r10
        exts.w r10,r10

;        shar r5
 ;       shar r10
  ;      shar r6

;        mov.l @(4,r14),r11
 ;       cmp/hs r1,r11           ; was it a vertex or a normal vector?
  ;      bt a104

        mov.l @($14,r14),r4     ; camdist
;        mov.l @($18,r14),r7     ; zoom
        add r6,r4

        imov.l #177,r7     ; zoom H
        dmuls.l r7,r5
        sts MACL,r5

        imov.l #300,r7     ; zoom V
        dmuls.l r7,r10
        sts MACL,r10

        ; divide x by distance
        mov r4,r7
        shll16 r7
        mov #0,r8
        mov r5,r9
        rotcl r9
        subc r8,r5
        div0s r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        div1 r7,r5
        exts.w r5,r5
        rotcl r5
        addc r8,r5

        ; divide y by distance
        mov r4,r7
        shll16 r7
        mov #0,r8
        mov r10,r9
        rotcl r9
        subc r8,r10
        div0s r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        div1 r7,r10
        exts.w r10,r10
        rotcl r10
        addc r8,r10

a104:
        ; store transformed x,y,z
        mov.w r5,@r3
        add #2,r3
        mov.w r10,@r3
        add #2,r3
        mov.w r6,@r3
        add #4,r3

        dt r1
        bf a102

        bra sortpolys
        nop

        idata

cslookup:
        and r8,r0
        mov.l @(r0,r6),r7
        mov.l r7,@r13           ; sin
        add r9,r0
        and r8,r0
        mov.l @(r0,r6),r7
        mov.l r7,@(4,r13)     ; cos
        add r9,r0
        and r8,r0
        mov.l @(r0,r6),r7
        rts
        mov.l r7,@(8,r13)     ; -sin

sortpolys:
        imov.l #$60DB000,r1
        mov r1,r11
        mov.l @(4,r14),r2
        imov.l #$60F8000,r3
        imov.l #$60F0004,r4
        mov #0,r8

a108:
        mov.w @r3+,r5
        shll2 r5
        shll r5
        add r4,r5
        mov.w @r5,r6

        mov.w @r3+,r5
        shll2 r5
        shll r5
        add r4,r5
        mov.w @r5,r7
        add r7,r6

        mov.w @r3+,r5
        shll2 r5
        shll r5
        add r4,r5
        mov.w @r5,r7
        add r7,r6

        mov.w @r3+,r5
        shll2 r5
        shll r5
        add r4,r5
        mov.w @r5,r7
        add r7,r6

        shll16 r6
        or r8,r6
        mov.l r6,@r1    ; combine average Z and poly # into an L-word

        mov r1,r10
a111:
        cmp/eq r10,r11    ; are we at the top of the list?
        bt a110

        add -4,r10
        mov.l @r10,r9
        cmp/gt r9,r6
        bf a109
        ; if the previous poly has a lesser Z then re-order them

        mov.l r6,@r10
        bra a111
        mov.l r9,@(4,r10)

a109:
;        add #4,r1
a110:
        add #4,r1
        add #1,r8

        dt r2
        bf a108

        bra buildpolys
        nop

        idata

buildpolys:

        ; build VDP1 command tables
        imov.l #$60F0000,r4
        mov r4,r13        
        mov.l @r14,r1
        shll r1
        shll2 r1
        add r1,r13
        add #4,r13       ; since we only care about the Z right now

        mov.l @(4,r14),r1
        imov.l #$5C00040,r3
        imov.l #$60DB000,r12

a103:
        mov.l @r12+,r0
        extu.w r0,r0
        imov.l #$60F8000,r2
        shll2 r0
        shll r0
        add r0,r2       ; polygon data address
        mov r13,r7
        add r0,r7       ; surface normal vector address

        ; back face culling
        mov.w @r7,r0
        add #4,r0
        cmp/pz r0
        bt a107
        bra a106
        nop
a107:

        mov #4,r0
        mov.w r0,@r3
        mov #0,r0
        mov.w r0,@(2,r3)
        imov.l #$FFFF08C0,r0
        mov.w r0,@(4,r3)
;        swap.w r0,r0

;        mov -1,r0
 ;       shll8 r0
  ;      add r1,r0

        mov.w @r7,r0
        cmp/pz r0
        bt a105
        neg r0,r0
a105:
        shlr8 r0        ; light intensity
 ;       swap.b r0,r0
        
        shll r0
        shll r0

        or #$80,r0      ; set highest bit for RGB pixel mode
        swap.b r0,r0
        
        mov.w r0,@(6,r3)
        mov #0,r0
        mov.w r0,@(8,r3)
        mov.w r0,@(10,r3)

        mov.w @r2+,r5
        shll r5
        shll2 r5
        add r4,r5
        mov.w @r5+,r0
        mov.w r0,@(12,r3)
        mov.w @r5+,r0
        mov.w r0,@(14,r3)

        mov.w @r2+,r5
        shll r5
        shll2 r5
        add r4,r5
        mov.w @r5+,r0
        mov.w r0,@(16,r3)
        mov.w @r5+,r0
        mov.w r0,@(18,r3)

        mov.w @r2+,r5
        shll r5
        shll2 r5
        add r4,r5
        mov.w @r5+,r0
        mov.w r0,@(20,r3)
        mov.w @r5+,r0
        mov.w r0,@(22,r3)

        mov.w @r2+,r5
        shll r5
        shll2 r5
        add r4,r5
        mov.w @r5+,r0
        mov.w r0,@(24,r3)
        mov.w @r5+,r0
        mov.w r0,@(26,r3)

        mov #0,r0
        mov.w r0,@(28,r3)

        add #$20,r3

a106:

        dt r1
        bf a103

        imov.l #$8004,r0
        mov.w r0,@r3            ; end command

        bsr waitvdp1
        nop
        bsr setvdp1field
        nop
        bsr waitv
        nop
;        bsr waitvdp1
 ;       nop
  ;      bsr setvdp1field
   ;     nop
 ;       bsr waitv
    ;    nop

        ; change some crap and loop

        mov.l @($0C,r14),r0
        add #100,r0
        mov.l r0,@($0C,r14)

        mov.l @($10,r14),r0
        add #20,r0
        mov.l r0,@($10,r14)

        bra transforms
        nop

        idata

stadr:
        dc.l sintabl

waitv:
        mov.l @($1C,r14),r1
@wloop:
        mov.l @($1C,r14),r2
        cmp/eq r1,r2
        bt @wloop
        rts
        nop

waitvdp1:
        imov.l #$25D00010,r1
@wloop:
        mov.w @r1,r0
        tst #2,r0
        bt @wloop
        rts
        nop

setvdp1field:
        imov.l #$25F80004,r1
        imov.l #$25D00002,r2
        mov.w @r1,r0
;        not r0,r0
        and #2,r0
        shll r0
        or #8,r0
        mov.w r0,@r2
        rts
        nop

        idata

model:
        incbin "sphere5.mb"

        align.l
sintabl:
        incbin "sintabl2.bin"

        align.l
thebmp:
        incbin "frozen2c.sbm"
