''
'' VGA scanline driver 400x300 - foreground renderer
''
''   Based on "Ball" demo for Gameduino
''   Copyright (c) 2011 by James Bowman <jamesb@excamera.com>
''
''        Author: Marko Lukat
'' Last modified: 2012/12/24
''       Version: 0.11
''
OBJ
  system: "boing-bel-corecon"
  
PUB null
'' This is not a top level object.

PUB init(ID, mailbox)

  return system.launch(ID, @entry, mailbox)
  
DAT             org     0                       ' foreground renderer

entry           jmpret  $, #setup               ' once
                
                rdlong  indx, blnk              ' |
                cmpsub  indx, scry wz           ' |
        if_ne   jmp     #$-2                    ' waiting for last line to be fetched

' Skip 26 sync lines and advance by a further 161 hub windows. This pushes an
' assumed foreground renderer hubop beyond the first video renderer line rdlong
' if it were to render scanline -1.
' After the waitcnt we have no further insns (18 + 14 = 32, 2 hub windows).

                mov     cnt, cnt
                add     cnt, $+1
                long    13{18} + 14 + 132*16*26 + 16*(161 - 2)

loop            waitcnt cnt, eins               ' initial sync point
'               wrbyte  indx, base              ' assumed hubop

                call    #pixels                 ' fetch foreground data (~92 hub windows)

                waitcnt cnt, zwei               ' block until line is available

                rdlong  bflag, bfadr wz
        if_nz   call    #shadow                 ' draw shadow
                cmp     bflag, #0 wz
        if_nz   call    #solid                  ' draw ball

                rol     mask, #8                ' update shadow mask, it's unused
                                                ' during 1st frame (msky == scry)
                add     indx, #1                ' line done, advance
                cmpsub  indx, scry wz           ' optionally wrap line index
        if_nz   jmp     #loop

' per frame updates (during the first frame mask/shadow are off-screen)

                rdlong  temp, blnk              ' |
                cmp     temp, scry wz           ' |
        if_ne   jmp     #$-2                    ' wait for ?/scry transition
                
                rdword  mskx, crdx              ' |
                shl     mskx, #16               ' |
                sar     mskx, #16               ' |
                rdword  msky, crdy              ' |
                shl     msky, #16               ' |
                sar     msky, #16               ' update and sign-extend mask coordinates

                maxs    mskx, scrx              ' |
                maxs    msky, scry              ' reasonable limit(s) (off-screen)

                mov     shdx, mskx              ' |
                add     shdx, #SOFFX            ' |
                mov     shdy, msky              ' |
                add     shdy, #SOFFY            ' apply shadow offset(s)

                mov     mskc, msky              ' |
                add     mskc, #BSIZE -1         ' |
                mov     shdc, shdy              ' |
                add     shdc, #BSIZE -1         ' bounding box setup
                
                long    $00FF00FF               ' |
                mov     mask, $-1               ' reset shadow mask (scry == 2n+?)

eins            long    100*16                  ' initial line fetch
zwei            long    164*16                  ' remainder of two scan lines
drei            long    132*16*28               ' skip all sync lines

                add     cnt, drei
                jmp     #loop

' support code

pixels          rdlong  addr, feed wz           ' get current buffer address

        if_z    mov     msky, scry              ' move off-screen (disabled)
        if_z    jmp     pixels_ret              ' early exit

                mov     ecnt, #BSIZE/4
                movd    :set, #data

                mov     temp, mskx              ' |
                and     temp, #%11 wz           ' observe alignment
                
                shl     temp, #3                ' [4..0]: 24/16/8/0
                movs    :two, temp
                neg     temp, temp              ' [4..0]: 8/16/24/0
                movs    :one, temp

        if_nz   add     ecnt, #1                ' need to process one more long
        if_nz   sub     addr, #4                ' starting with addr[-1]

:loop           rdlong  arg0, addr              '  +0 = 00: DDCCBBAA, 01: ------DD
                add     addr, #4                '  +8   10: ----DDCC, 11: --DDCCBB
        if_z    jmp     #:set                   '  -4   faster if aligned
        
                rdlong  arg1, addr              '  +0 = 00: n/a,      01: CCBBAA--
:two            shl     arg1, #0-0              '  +8   10: BBAA----, 11: AA------
:one            shr     arg0, #0-0              '  -4
                or      arg0, arg1              '  +0 =

:set            mov     0-0, arg0               '  +4
                add     $-1, dst1               '  +8
                
                djnz    ecnt, #:loop            '  -4

pixels_ret      ret

solid           cmps    indx, msky wc           ' |
        if_nc   cmps    mskc, indx wc           ' |
        if_c    jmp     solid_ret               ' vertical bounds check

                mov     temp, indx
                sub     temp, msky
                cmp     temp, #BSIZE/2 wc
        if_nc   neg     temp, temp
        if_nc   add     temp, #BSIZE-1
                add     temp, #table            ' offset into table

                movs    $+2, temp
                neg     arg2, #1                ' pipeline
                mov     arg0, 0-0               ' fetch offset/length pair

                mov     arg1, arg0
                shr     arg1, #16 wz            ' length
                
                and     arg0, #511              ' relative offset
                add     arg0, mskx              ' absolute offset

                add     arg2, arg0              ' |
                add     arg2, arg1              ' right side inclusive

                cmps    arg2, #0 wc
        if_nc   cmps    scrc, arg0 wc           ' |
    if_z_or_c   jmp     solid_ret               ' horizontal bounds check

                cmps    arg0, #0 wc             ' |
        if_c    subabs  arg1, arg0              ' |
        if_c    mov     arg0, #0                ' clipped left

                cmps    scrc, arg2 wc           ' |
        if_c    mov     arg1, scrx              ' |
        if_c    sub     arg1, arg0              ' clipped right side

' arg0..(arg0+arg1-1) fits into the scanline buffer, draw the line

                mov     addr, base
                add     addr, arg0              ' @byte[base][arg0]

                mov     arg2, mskx              ' |
                and     arg2, #%11              ' |
                add     arg2, arg0              ' |
                sub     arg2, mskx              ' byte offset into data array

                shr     arg1, #2 wz,nr          ' special code for 1..3
        if_nz   jmpret  zero, #:full wc,nr      ' carry set if taken    (##)

                ' do an unaligned load

                mov     arg0, arg2              ' remember for byte alignment

                shr     arg2, #2                ' long offset
                add     arg2, #data             ' long address
                              
                movs    $+2, arg2
                add     arg2, #1                ' pipeline
                mov     arg3, 0-0               ' load 1st long
                movs    $+2, arg2
                test    arg0, #%11 wz           ' pipeline
                mov     arg2, 0-0               ' load 2nd long

        if_nz   shl     arg0, #3                ' 00: DDCCBBAA, 01: --DDCCBB
        if_nz   shr     arg3, arg0              ' 10: ----DDCC, 11: ------DD
        if_nz   neg     arg0, arg0              ' 00: n/a,      01: AA------
        if_nz   shl     arg2, arg0              ' 10: BBAA----, 11: CCBBAA--
        if_nz   or      arg3, arg2              ' combine both longs
                                                  
                wrbyte  arg3, addr
                add     addr, #1
                shr     arg3, #8
                djnz    arg1, #$-3              ' relaxed @ 2 hub windows/byte
                
                jmp     solid_ret

' length is prefix + 4n + suffix, split up and deal with it

:full           shr     arg2, #2                ' long offset
                add     arg2, #data             ' long address

                neg     ecnt, arg0              ' 0123 >> 0321
                and     ecnt, #%11 wz           ' pixels in prefix
                sub     arg1, ecnt              ' update length
        if_z    jmp     #:core

                ' handle prefix

                shl     arg0, #3                ' 1..3 >> 8..24
                rcr     ecnt, arg0              ' create reverse mask   (##)

                movs    $+2, arg2               ' prefix data
                rev     ecnt, #0                ' adjust

                mov     arg3, 0-0               ' |
                andn    arg3, ecnt              ' only keep masked data

                rdlong  quad, addr
                and     quad, ecnt
                or      quad, arg3              ' combine with background
                wrlong  quad, addr

                add     arg2, #1                ' advance src
                add     addr, #4                ' advance dst
                andn    addr, #%11              ' |

                ' handle 4
                
:core           test    arg1, #%100 wz
        if_nz   movd    $+2, arg2
        if_nz   sub     arg1, #4                ' update length
        if_nz   wrlong  0-0, addr
        if_nz   add     addr, #4                ' advance dst
        if_nz   add     arg2, #1                ' advance src

                ' handle 8n

                mov     ecnt, arg1              ' remember for tail (%-??)
                shr     arg1, #3 wz             ' check 8n count
        if_z    jmp     #:suffix                ' skip body
        
                mov     frqb, addr
                shr     frqb, #1{/2}

                add     arg2, arg1              ' |
                add     arg2, arg1              ' advance src
                
                movd    :one, arg2
                sub     :one, dst1              ' data[n][-1]
                movd    :two, arg2
                sub     :two, dst2              ' data[n][-2]

                mov     phsb, arg1
                shl     phsb, #3
                mov     addr, phsb              ' advance dst
                sub     phsb, #1                ' 8n - 1
                
:one            wrlong  0-0, phsb
                sub     $-1, dst2
                sub     phsb, #7 wz
:two            wrlong  0-0, phsb
                sub     $-1, dst2
        if_nz   djnz    phsb, #:one
        
                ' handle suffix

:suffix         and     ecnt, #%11 wz           ' suffix (unaligned)
        if_z    jmp     solid_ret               ' early exit

                shl     ecnt, #3                ' 1..3 >> 8..24
                neg     arg1, #1                ' create mask
                
                movs    $+2, arg2               ' suffix data
                shl     arg1, ecnt              ' adjust

                mov     arg3, 0-0               ' |
                andn    arg3, arg1              ' only keep masked data

                rdlong  quad, addr
                and     quad, arg1                
                or      quad, arg3              ' combine with background
                wrlong  quad, addr

solid_ret       ret

shadow          cmps    indx, shdy wc           ' |
        if_nc   cmps    shdc, indx wc           ' |
        if_c    jmp     shadow_ret              ' vertical bounds check

                mov     temp, indx
                sub     temp, shdy
                cmp     temp, #BSIZE/2 wc
        if_nc   neg     temp, temp
        if_nc   add     temp, #BSIZE-1
                add     temp, #table            ' offset into table

                movs    $+2, temp
                neg     arg2, #1                ' pipeline
                mov     arg0, 0-0               ' fetch offset/length pair

                mov     arg1, arg0
                shr     arg1, #16 wz            ' length
                
                and     arg0, #511              ' relative offset
                add     arg0, shdx              ' absolute offset

                add     arg2, arg0              ' |
                add     arg2, arg1              ' right side inclusive

                cmps    arg2, #0 wc
        if_nc   cmps    scrc, arg0 wc           ' |
    if_z_or_c   jmp     shadow_ret              ' horizontal bounds check

                cmps    arg0, #0 wc             ' |
        if_c    subabs  arg1, arg0              ' |
        if_c    mov     arg0, #0                ' clipped left

                cmps    scrc, arg2 wc           ' |
        if_c    mov     arg1, scrx              ' |
        if_c    sub     arg1, arg0              ' clipped right side

' arg0..(arg0+arg1-1) fits into the scanline buffer, draw the line

                mov     addr, base
                add     addr, arg0              ' @byte[base][arg0]

                shr     arg1, #2 wz,wc,nr       ' special code for 1..3
        if_nz   jmpret  zero, #:full wc,nr      ' carry set if taken    (##)

                test    arg0, #1 wz
        if_nz   rol     mask, #8                ' odd bytes only
        
                rdbyte  quad, addr
                and     quad, mask
                rol     mask, #8
                wrbyte  quad, addr
                add     addr, #1
                djnz    arg1, #$-5

    if_c_eq_z   rol     mask, #8                ' restore mask
        
                jmp     shadow_ret
                
:full           neg     ecnt, arg0              ' 0123 >> 0321
                and     ecnt, #%11 wz           ' pixels in prefix
                sub     arg1, ecnt              ' update length
        if_z    jmp     #:core

                shl     arg0, #3                ' 1..3 >> 8..24
                rcr     ecnt, arg0              ' create reverse mask   (##)
                rev     ecnt, #0                ' adjust
                
                rdlong  quad, addr
                or      ecnt, mask              ' add shadow mask
                and     quad, ecnt
                wrlong  quad, addr

                add     addr, #4
'{rd/wrlong}    andn    addr, #%11

:core           mov     ecnt, arg1              ' remember for tail (%-??)
                shr     arg1, #2 wz             ' check long count
        if_z    jmp     #:tail                  ' skip body
        
                rdlong  quad, addr
                and     quad, mask
                cmp     arg1, #1 wz
                wrlong  quad, addr
                add     addr, #4
        if_nz   djnz    arg1, #$-5

:tail           and     ecnt, #%11 wz           ' suffix (unaligned)
        if_z    jmp     shadow_ret              ' early exit

                shl     ecnt, #3                ' 1..3 >> 8..24
                neg     arg1, #1                ' |
                shl     arg1, ecnt              ' protect unused pixels
                rdlong  quad, addr
                or      arg1, mask              ' add shadow mask
                and     quad, arg1                
                wrlong  quad, addr

shadow_ret      ret

' initialised data and/or presets

table           word    48,  16, 43,  26, 39,  34, 36,  40, 34,  44, 32,  48, 30,  52, 28,  56
                word    26,  60, 25,  62, 23,  66, 22,  68, 21,  70, 19,  74, 18,  76, 17,  78 
                word    16,  80, 15,  82, 14,  84, 13,  86, 13,  86, 12,  88, 11,  90, 10,  92 
                word    10,  92,  9,  94,  8,  96,  8,  96,  7,  98,  7,  98,  6, 100,  6, 100
                word     5, 102,  5, 102,  4, 104,  4, 104,  3, 106,  3, 106,  3, 106,  2, 108
                word     2, 108,  2, 108,  2, 108,  1, 110,  1, 110,  1, 110,  1, 110,  1, 110
                word     0, 112,  0, 112,  0, 112,  0, 112,  0, 112,  0, 112,  0, 112,  0, 112

feed            long    -12                     ' |
crdx            long    -8                      ' |
crdy            long    -6                      ' |
                                                ' quick access relative to par
blnk            long    -4                      ' |
base            long    NEGX                    ' |

dst1            long    1 << 9                  ' dst +/-= 1
dst2            long    2 << 9                  ' dst +/-= 2

scrc            long    399                     ' upper limit (inclusive)
scrx            long    400

bfadr           long    448
bflag           long    0

' Stuff below is re-purposed for temporary storage.

setup           add     crdx, par               ' mask coordinates      (%%)
                add     crdy, par               ' |
                add     feed, par               ' mask buffer location
                
                add     base, par               ' scanline buffer 
                add     blnk, base wc           ' frame indicator

                rdword  indx, blnk wz           '                       (%%)
        if_nz   mov     scry, indx wc           '                       (%%)
   if_c_or_nz   jmp     #$-2                    ' auto-detect res_y

' The loop is only left once a non-zero value has been written to scry
' and indx transitions to zero afterwards.

                mov     msky, scry              ' move off-screen
                mov     shdy, scry              ' |

                movi    ctrb, #%0_11111_000     ' LOGIC always (loader support)

                add     bfadr, par
                jmp     %%0                     ' return

                fit
                
' uninitialised data and/or temporaries

                org     setup
                
scry            res     1                       ' must be 1st..5th      (%%)
indx            res     1                       ' |

addr            res     1                       ' scanline reference
ecnt            res     1                       ' element count

mask            res     1                       ' shadow mask

mskc            res     1                       ' upper limit (inclusive)
mskx            res     1                       ' mask coordinates
msky            res     1                       ' signed 16bit

shdc            res     1                       ' upper limit (inclusive)
shdx            res     1                       ' shadow coordinates
shdy            res     1                       ' signed 16bit

arg0            res     1
arg1            res     1
arg2            res     1
arg3            res     1

temp            res     1
quad            res     1

data            res     BSIZE/4 +1{unaligned}

tail            fit

CON
  zero  = $1F0                                  ' par (dst only)

  BSIZE = 112                                   ' mask width/height (4n)
  SOFFX = 15                                    ' |
  SOFFY = 15                                    ' shadow offset
  
DAT
{{

 TERMS OF USE: MIT License

 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and
 associated documentation files (the "Software"), to deal in the Software without restriction, including
 without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the
 following conditions:

 The above copyright notice and this permission notice shall be included in all copies or substantial
 portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

}}
DAT
