// MicroWay NumberSmasher 860 Boot ROM Disassembly // Commented & Explained by Axel Muhr // For more info visit: // http://www.geekdot.com/index.php?page=number-smasher-860 // After Reset the i860 jumps to 0xFFFFFF00 which should contain an address // where to start. // When loaded to 0x0000 there are these lines with an offset of 256 bytes to the // end of the ROM: // 1F00 br loc_0 // 1F04 shl r0, r0, r0 // NOP (Delay slot) // 1F08 xorh 0xFFFF, r31, r31 // r31 = 0xFFFF0000 // So I guess ROM starts at 0xFFFFDFFF // Base Address: 0000h Range: 0000h - 2000h Loaded length: 2000h // Processor: 860xr // Segment type: Pure code .text // ROM .align .byte loc_0: // CODE XREF: ROM:1F00 orh 0xF800, r0, r4 // r4 = 0xF8000000 (Base address?) orh 0x80, r0, r21 // r21 = 0x00800000 (>8MB) // used to store DirBase // in sub_flush_cache main_loop: call getlink // Get a byte from C012 (watch following delay slot!) shl r0, r4, r16 // mov r4, r16 - save base(?)address or r16, r16, r0 // if r16 = 0 (i.e. POKE) bc poke_start // branch to poke_start // else it's a PEEK (1) call Read_Data // (watch following delay slot!) shl r0, r4, r16 // mov r4, r16 - save base(?)address ld.l r0(r16), r17 // load the data from the given address and write it to C012 call Write_Data // (watch following delay slot!) shl r0, r4, r16 // mov r4, r16 - save base(?)address br main_loop // LOOP shl r0, r0, r0 // nop (Delay slot!)1 // ------------------- poke_start: call Read_Data // Read POKE address (watch following delay slot!) shl r0, r4, r16 // mov r4, r16 - save base(?)address shl r0, r16, r5 // mov r16, r5 - save POKE address into r5 call Read_Data // Read POKE data (watch following delay slot!) shl r0, r4, r16 // mov r4, r16 - save base(?)address // r5 = POKE address // r16 = POKE data or r5, r5, r0 // if r5 != 0 (i.e. not the 'start command') bnc just_poke // branch to just_poke // else r16 is the new start-address shl r0, r16, r4 // mov r16, r4 (=code start address) or 8, r0, r16 // r16 = 8 orh 0x4000, r0, r1 or 0x2000, r1, r1 // r1 = 0x40002000 st.l 1(r16), r1 // store it at the 8th byte. This is the tricky part: // 0x40002000 means: 'bri r4' orh 0xA000, r0, r1 st.l 5(r16), r1 // store it at the 12th byte // which is shl r0, r0, r0 aka 'nop' call sub_flush_cache // flush the cache (watch following delay slot!) shl r0, r0, r16 // mov 0, r16 shl r0, r0, r0 // nop ld.c dirbase, r1 // load DirBase register into r1 andnot 0x80, r1, r1 // delete 8th bit in r1 (=swich to 64bit mode) or r0, r0, r0 st.c r1, dirbase // write back DirBase bc 0x2008 // After switching to 64bit mode directly jump to a // 64bit address. Because the ROM "lives" at 0xFFFFDFFF // this branch goes to 0x00000008, where we just put 'bri r4' // and r4 contains the code-start address. shl r0, r0, r0 // nop just_poke: br main_loop // (watch following delay slot!) st.l 1(r5), r16 // The actual POKE // Store-long (32bit) data (r5) to address (r16) // ------------------- S U B R O U T I N E ------------------- // Write a long (32 bytes) to the C012 Write_Data: shl r0, r1, r18 // mov r1, r18 - save return address call putlink // (watch following delay slot!) shl r0, r17, r19 // mov r17, r19 - backup data into r19 call putlink // (watch following delay slot!) shr 8, r19, r17 // data >> 8 call putlink // (watch following delay slot!) shr 0x10, r19, r17 // data >> 16 call putlink // (watch following delay slot!) shr 0x18, r19, r17 // data >> 24 st.b 0, r0 // dummy store shl r0, r0, r0 // nop bri r18 // return from subroutine 'br r1' shl r0, r0, r0 // nop (Delay slot!) // End of function Write_Data // ------------------- S U B R O U T I N E ------------------- // Read a long (32 bytes) from the C012 // r16 is the result Read_Data: shl r0, r1, r18 // save return address into r18 call getlink // (watch following delay slot!) shl r0, r16, r19 // mov r16, r19 - backup data into r19 and 0xFF, r16, r20 // remove prefix & store 1st byte in r20 call getlink // (watch following delay slot!) shl r0, r19, r16 // mov r19, r16 - copy backup into r16 shl 8, r16, r16 // data << 8 and or r16, r20, r20 // put 2nd byte into r20 call getlink // (watch following delay slot!) shl r0, r19, r16 // mov r19, r16 - copy backup into r16 shl 0x10, r16, r16 // data << 16 or r16, r20, r20 // put 3rd byte into r20 call getlink // (watch following delay slot!) shl r0, r19, r16 // mov r19, r16 - copy backup into r16 shl 0x18, r16, r16 // data << 24 st.b 0, r0 // dummy store shl r0, r0, r0 // nop bri r18 // return from subroutine 'br r1' or r20, r16, r16 // (watch this delay slot!) put 4th byte into r16 for return // End of function Read_Data // ------------------- S U B R O U T I N E ------------------- // Read a byte from the C012 getlink: ld.b 0x17(r16), r17 // Read C012 Output Status and 1, r17, r0 // if "0" (No Data) set carry st.b 0, r0 // dummy store shl r0, r0, r0 // nop bc getlink ld.b 7(r16), r16 st.b 0, r0 // dummy store shl r0, r0, r0 // nop bri r1 // return sub (watch following delay slot!) and 0xFF, r16, r16 // return data in r16, remove sign // End of function getlink // ------------------- S U B R O U T I N E ------------------- // Write a byte from the C012 putlink: ld.b 0x1F(r16), r20 // Read C012 Input Status and 1, r20, r0 // if "0" (Not Ready) set carry st.b 0, r0 // dummy store shl r0, r0, r0 // nop bc putlink // loop if carry set (not ready) st.b 0, r0 // dummy store shl r0, r0, r0 // nop bri r1 // return sub (watch following delay slot!) st.b 0xF(r16), r17 // Get byte from 0xF into r17 // End of function putlink // -- Probably unused subroutines? -- ld.b 0x17(r16), r16 st.b 0, r0 // dummy store shl r0, r0, r0 // nop bri r1 // return sub (watch following delay slot!) and 1, r16, r16 // // ------------------- ld.b 0x1F(r16), r16 st.b 0, r0 // dummy store shl r0, r0, r0 // nop bri r1 // return sub (watch following delay slot!) and 1, r16, r16 // // ------------------- S U B R O U T I N E ------------------- // Nearly a 1:1 copy of Intels 'official' Flush Cache routie sub_flush_cache: shl r0, r1, r22 // mov r1, r22 - save return address adds -1, r0, r19 // r19 = -1 loop-increment ld.c dirbase, r21 // load DirBase register or 0x800, r21, r21 // set 12th bit in DirBase (upper RC) call D_FLUSH st.c r21, dirbase // write back Dirbase and call D_FLUSH (Delay slot!) or 0x100, r21, r21 // set 9th bit in DirBase (upper RB) call D_FLUSH st.c r21, dirbase // write back Dirbase and call D_FLUSH (Delay slot!) xor 0x900, r21, r21 // clear bits 9 & 12 in DirBase st.c r21, dirbase // write back Dirbase and bri r22 // branch indirect to r22 (=saved r1 i.e. 'br r1') shl r0, r0, r0 // nop(Delay slot!) // End of function sub_flush_cache // ------------------- S U B R O U T I N E ------------------- D_FLUSH: or 0xFE0, r0, r18 // This is the addr. where the cache content goes or 0x7F, r0, r20 // set counter r20 for bla (0x7F, 127d, = one Cacheline) ld.l 0x20(r18), r31 shl 0, r31, r31 bla r19, r20, D_FLUSH_LOOP // shl r0, r0, r0 // nop (Delay slot!) D_FLUSH_LOOP: bla r19, r20, 0x401D8 // = 262144d = 256k flush 0x20(r18)++ // (Delay slot!) bri r1 ld.l 0xFFFFFE00(r18), r0 // (Delay slot!) // End of function D_FLUSH // end of file