
/***************************************************************************
 *     Copyright Motorola, Inc. 1989-2003 ALL RIGHTS RESERVED
 *
 * You are hereby granted a copyright license to use, modify, and
 * distribute the SOFTWARE, also know as DINK32 (Dynamic Interactive Nano 
 * Kernel for 32-bit processors) solely in conjunction with the development 
 * and marketing of your products which use and incorporate microprocessors 
 * which implement the PowerPC(TM) architecture manufactured by 
 * Motorola and provided you comply with all of the following restrictions 
 * i) this entire notice is retained without alteration in any
 * modified and/or redistributed versions, and 
 * ii) that such modified versions are clearly identified as such. 
 * No licenses are granted by implication, estoppel or
 * otherwise under any patents or trademarks of Motorola, Inc.
 * 
 * The SOFTWARE is provided on an "AS IS" basis and without warranty. To
 * the maximum extent permitted by applicable law, MOTOROLA DISCLAIMS ALL
 * WARRANTIES WHETHER EXPRESS OR IMPLIED, INCLUDING IMPLIED WARRANTIES OF
 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE AND ANY WARRANTY 
 * AGAINST INFRINGEMENT WITH REGARD TO THE SOFTWARE 
 * (INCLUDING ANY MODIFIED VERSIONS THEREOF) AND ANY ACCOMPANYING 
 * WRITTEN MATERIALS.
 * 
 * To the maximum extent permitted by applicable law, IN NO EVENT SHALL
 * MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING WITHOUT 
 * LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS 
 * INTERRUPTION, LOSS OF BUSINESS INFORMATION,
 * OR OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
 * SOFTWARE.
 * Motorola assumes no responsibility for the maintenance and support of
 * the SOFTWARE.
 ************************************************************************/

// file:	startup.S
// 	
// purpose:  boot code for e500 core
// 	
// history:
// 	??/??/02 -- AAB
//		    created
//	03APR11  -- GM
//		    rearchitected and configuration added.
//
	

#include "dink_asm.h"
#include "e500.h"



//===========================================================================
// Configuration


//---------------------------------------------------------------------------
// DDR OPTIONS

//#define DDR_REGISTERED
//#define DDR_CL_2
#define DDR_CL_2_5
//#define DDR_CL_3
#define DDR_32BIT_BUS
#define DDR_MEMORY_TEST

//---------------------------------------------------------------------------
// Copy options

//#define SCRUB
#define COPY_VERIFY
//#define POST_COPY_VERIFY

// Number of bytes to copy
#define COPY_SIZE  0x000C0000

#define FILL_SIZE  0x00100000
#define FILL_PATT  0x00000000


//---------------------------------------------------------------------------
// Copy options

#define FLASH_SPEED	120


//===========================================================================
// At POR one TLB entry is allocated for 4k space (0xfffff000-0xffffffff)
// with 1-to-1 translation.  A branch located at 0xfffffffc jumps to this
// location.  "begin_init" must be positioned at 0xFFFF_F000, this is
// handled via the linker script.
//
// R27: ResetBase	(0xFFC0_0000)

	.global begin_init
begin_init:		
	xor	r0,r0,r0

	bl	get_ip			// Get current address in R27.
get_ip:	isync
	mflr	r27


// Set up IVORs and IVPR
// Since nothing is available yet except the upper 4K, set the IVORs
// to point to 'begin_init'; at least on errors it will be visible.

	li	r3,0x0FFF
	andc	r3,r27,r3		// Strip off lower 12 bits
	mtspr	ivpr,r3

	mtspr	ivor0,r3 		// critical input
	mtspr	ivor1,r3		// machine check
	mtspr	ivor2,r3		// dsi
	mtspr	ivor3,r3		// isi
	mtspr	ivor4,r3		// external input
	mtspr	ivor5,r3		// alignment
	mtspr	ivor6,r3		// program
	mtspr	ivor7,r3		// floating point unavailable3
	mtspr	ivor8,r3		// system call
	mtspr	ivor9,r3		// auxiliary processor unavail.
	mtspr	ivor10,r3		// decrementer
	mtspr	ivor11,r3		// fixed interval timer
	mtspr	ivor12,r3		// watchdog timer
	mtspr	ivor13,r3		// data tlb error
	mtspr	ivor14,r3		// instruction TLB error
	mtspr	ivor15,r3		// debug
	mtspr	ivor32,r3		// SPU
	mtspr	ivor33,r3		// Vector float data error
	mtspr	ivor34,r3		// Vector round error
	mtspr	ivor35,r3		// performance monitor


// Set up PID registers

	li	r3,0
	mtspr	pid0,r3			// PID0 = 0
	isync
	li	r3,1
	mtspr	pid1,r3			// PID1 = 1
	isync
	li	r3,2
	mtspr	pid2,r3			// PID2 = 2
	isync


// Now use R27 to point to the 'true' base address of the flash.
// For Elysium the base of FLash where the image is burnt is
// located at 0xFFC0_0000 (last 4MB) and for the PQ3-ADS board
// this address is 0xFFF0_0000 (last 1MB)
#ifdef MPC8540ADS
#ifdef ADS_PILOT_BOARD
	lis	r3,0xFFC0		// Find start of flash.
#else 
	lis	r3,0xFFF0		// Find start of flash.
#endif
	
#else   //Elysium
	lis	r3,0xFFC0		// Find start of flash.
#endif

	and	r27,r27,r3


//---------------------------------------------------------------------------
// Processor setup.

	li	r3,0x4000		// Enable timebase
	mtspr	hid0,r3
	sync

	lis	r3,0x7fff			// Set DECAR with max
	ori	r3,r3,0xffff			// count.
	mtspr	decar,r3	
	sync

	lis	r3,0x0040			// Set auto-reload DEC
	mtspr	tcr,r3
	sync


//---------------------------------------------------------------------------
// Flash invalidate the L1 I+D MMU and TLB0 and TLB1.

	li	r3,0x001c		// Inval LI I+D and TLB0
	mtspr	mmucsr0,r3
	isync

pollmmucsr0:
	mfspr	r4,mmucsr0
	and.	r4,r4,r3		// All clear?
	bne	pollmmucsr0


// Note: currently, this never exits.  It didn't work when merged with
// the above, either.

#ifdef TLB1_FLASH_INVAL_WORKS
	li	r3,0x0002		// Inval TLB1
	mtspr	mmucsr0,r3
	isync
pollmmucsr1:
	mfspr	r4,mmucsr0
	and.	r4,r4,r3		// All clear?
	bne	pollmmucsr1
#endif


//===========================================================================
// The memory map needs to look like this for (E)DINK and the MPC85x0 to be
// minimally usable:
//
// =========================   ====  === === ===  ===========================
// RANGE                       SIZE  TLB LAW BRx  DESCRIPTION
// =========================   ====  === === ===  ===========================
// 0x0000_0000 - 0x7FFF_FFFF	2G   1&2  1   -	  DDR SDRAM
// 0x8000_0000 - 0x8FFF_FFFF   256M   4   2   -	  PCI MEM
// 0x9000_0000 - 0x9FFF_FFFF   256M   5   2   -	  PCI IO
// 0xC000_0000 - 0xDFFF_FFFF   512M  6&7  3   -	  RapidIO
// 0xFC00_0000 - 0xFC0F_FFFF	1M    3	  -   -   CCSRBAR
// 0xFC10_0000 - 0xFFCF_FFFF   15M    3	  -   -   unused
// 0xFD00_0000 - 0xFD0F_FFFF	1M    3	  0   2   NVRAM
// 0xFD10_0000 - 0xFDFF_FFFF   15M    3	  0       unused
// 0xFE00_0000 - 0xFEFF_FFFF   16M    3	  0   1   ROM 1
// 0xFF00_0000 - 0xFFFF_FFFF   16M    0   0   0   ROM 0
//
// To get this the TLB, LAW, and BRx registers need to be initialized.
//===========================================================================



//---------------------------------------------------------------------------
// TLB SETUP
//
	.global	tlb_setup
tlb_setup:

	lis	r7,mas_tlb_table@ha		// Point to table
	addi	r7,r7,mas_tlb_table@l

	oris	r7,r7,0xFFFFF000@h		// Force into upper 4K
	ori	r7,r7,0xFFFFF000@l		// (table BETTER be there!)

	lwzu	r3,0(r7)			// Get table size
	mtctr	r3
masloop:
	lwzu	r3,4(r7)			// Load MAS0-3 values
	lwzu	r4,4(r7)
	lwzu	r5,4(r7)
	lwzu	r6,4(r7)

	mtspr	mas0,r3				// Transfer to SPRs
	mtspr	mas1,r4
	mtspr	mas2,r5
	mtspr	mas3,r6

	isync					// Probably overly-guarded,
	msync					// but what the heck...not
	tlbwe	
	isync					// like this is a critical
	msync					// inner loop.

	bdnz	masloop


//===========================================================================
// L1 I/D cache setup


#ifdef ICACHEON
	bl      startup_cache_inval_enable_L1I
#endif

#ifdef DCACHEON
	bl      startup_cache_inval_enable_L1D
#endif


//===========================================================================
// Set up CCSRBAR.
//
// R20: CCSRBAR

	lis	r20,0xFF70			// CCSRBAR

#define RELOCATE_CCSRBAR
#ifdef RELOCATE_CCSRBAR

	lis	r4,0x000F
	ori	r4,r4,0xC000			// New CCSRBAR

	lwz	r3,0(r20)			// Get current CCSRBAR
	isync
	stw	r4,0(r20)
	isync

	lis	r20,0xFC00 			// Load new CCSRBAR value
	lwz	r3,0(r20)			// Get new CCSRBAR
	isync

#endif /* RELOCATE_CCSRBAR */


//===========================================================================
// Setup local access window registers.

// Set LAW #0 to LBIU: Flash #0, Flash #1 and NVRAM.
// NOTE: CCSR is in this space, but is not visible to the LAW decode
// process (AFAIK).

	addi	r3,r20,LAW_BAR0			// LAWBAR0
	lis	r4,0x000F			//   BASE = 0xF000_0000
	stw	r4,0(r3)

	addi	r3,r20,LAW_AR0			// LAWAR0
	lis	r4,LAW_EN			//   Enable
	oris	r4,r4,LAW_TGT_LBUS		//   Flash on local bus
	ori	r4,r4,LAW_SIZE_256M		//   F000_0000-FFFF_FFFF
	stw	r4,0(r3)

// Set LAW #1 to DDR.

#ifdef	DDR_AS_SYS_MEM
	addi	r3,r20,LAW_BAR1			// LAWBAR1
	lis	r4,0x0000			//   BASE = 0x0000_0000
	stw	r4,0(r3)

	addi	r3,r20,LAW_AR1			// LAWAR1
	lis	r4,LAW_EN			//   Enable
	oris	r4,r4,LAW_TGT_DDR		//   DDR SDRAM
	ori	r4,r4,LAW_SIZE_2G		//   0000_0000-7FFF_FFFF
	stw	r4,0(r3)
#endif

#ifdef	LOCAL_SDRAM_AS_SYS_MEM
	addi	r3,r20,LAW_BAR1			// LAWBAR1
	lis	r4,0x0000			//   BASE = 0x0000_0000
	stw	r4,0(r3)

	addi	r3,r20,LAW_AR1			// LAWAR1
	lis	r4,LAW_EN			//   Enable
	oris	r4,r4,LAW_TGT_LBUS		//   LOCAL BUS SDRAM
	ori	r4,r4,LAW_SIZE_2G		//   0000_0000-7FFF_FFFF
	stw	r4,0(r3)
#endif

// Set LAW #2 to PCI.

	addi	r3,r20,LAW_BAR2			// LAWBAR2
	lis	r4,0x0008			//   BASE = 0x8000_0000
	stw	r4,0(r3)

	addi	r3,r20,LAW_AR2			// LAWSR2
	lis	r4,LAW_EN			//   Enable
	oris	r4,r4,LAW_TGT_PCI		//   PCI
	ori	r4,r4,LAW_SIZE_512M		//   8000_0000-9FFF_FFFF
	stw	r4,0(r3)

// Set LAW #3 to RIO.

	addi	r3,r20,LAW_BAR3			// LAWBAR3
	lis	r4,0x000C			//   BASE = 0xC000_0000
	stw	r4,0(r3)

	addi	r3,r20,LAW_AR3			// LAWSR3
	lis	r4,LAW_EN			//   Enable
	oris	r4,r4,LAW_TGT_RAPIDIO		//   RapidIO
	ori	r4,r4,LAW_SIZE_512M		//   C000_0000-DFFF_FFFF
	stw	r4,0(r3)

// Temporary Test LAW -- used for whatever.

#define TEMP_LAW
#ifdef TEMP_LAW
	addi	r3,r20,LAW_BAR4			// LAWBAR4
	lis	r4,0x000E			//   BASE = 0xE000_0000
	stw	r4,0(r3)

	addi	r3,r20,LAW_AR4			// LAWAR4
	lis	r4,LAW_EN			//   Enable
	oris	r4,r4,LAW_TGT_LBUS		//   Flash on local bus
	ori	r4,r4,LAW_SIZE_256M		//   E000_0000-EFFF_FFFF
	stw	r4,0(r3)
#endif /* TEMP_LAW */

	msync

	lwz	r4,0(r3)	// Syncronizing load -- see pp2.2.3.5
	
	isync


//===========================================================================
// Preliminary speed measurement.  Needed prior to console and memory
// initialization.
//
// R21: PCI speed
// R22: CCB speed
// R23: MEM speed
// R24: CPU speed
// R25: DEC ticks (ns)
//
// Note: all speeds in kHz for better resolution.


#ifdef MPC8540ADS
// ADS board doesn't get the clock from PCI. The sysclock
// is 66MHz

	li	r21,6666  		// 66MHz sysclock
	mulli	r21,r21,10
	addi	r21,r21,5		// Round a little

#else
// First, get the PCI speed.  Elysium X2 can read this directly, X1
// cannot.

	li	r21,3333  		// Assume 33.333 MHz for now.
	mulli	r21,r21,10
	addi	r21,r21,5		// Round a little
#endif


// Next, the PCI->CCB multiplier.

	oris	r4,r20,0x000E		// Read PORPLLSR
	lwz	r3,0(r4)
	srawi	r3,r3,1
	andi.	r3,r3,0x1F		// R3 = PCI->CCB multiplier
	mullw	r22,r3,r21	


// MEM is half the CCB (but effective data rate is 2X for DDR)

	li	r5,2
	divwu	r23,r22,r5
	

// CPU is scaled from the CCB.

	oris	r4,r20,0x000E		// Read PORPLLSR
	lwz	r3,0(r4)
	srawi	r3,r3,16
	andi.	r3,r3,0x1F		// R3 = CCB->PCI multiplier * 2
	mullw	r24,r22,r3
	li	r5,2
	divwu	r24,r24,r5


// Get bus/decrementer clock in 100ps multiples.  Thus, 133 MHz CCB clock
// is a decrementer period of 7.5ns, which is represented as '75'.

	lis	r4,0x98
	ori	r4,r4,0x9680		// R4 = 10,000,000
	divwu	r25,r4,r22		// R25 = ticks


//===========================================================================
// Console IO setup.

	.global	kio
kio:
	bl	kinit			// Initialize console
	li	r3,0x47
	bl	kputc
	bl	kput_initmsg
	bl	kputcrlf


//===========================================================================
// LOCAL BUS setup.
//
// BR0 -- already points to the boot flash.
// BR1 -- alternate flash.
// BR2 -- NVRAM.
//
// Flash banks #0/#1 are 2 Am29LV641s -120 (generally)
//   4Mb x 32 bits or 16MB total each.
//
// NVRAM is DS1553WP-120

//Elysium board would need the Power-on reset impedance 
//control. It also needs to configure the LCRR register,
//CS1 and CS2. PQ3-ADS board don't need these

#ifndef MPC8540ADS //applicable only for ELYSIUM board

// Change local bus output impedance to 45 ohms (Elysium does not have
// series termination on that bus).

	oris	r4,r20,POWIMPSCR@h	// Power-On Reset Impedance control
	ori	r4,r4,POWIMPSCR@l
	lwz	r3,0(r4)
	ori	r3,r3,0x003F		// LocalBus = 45 ohms
	stw	r3,0(r4)
	msync


// Calculate the SCY parameter for the board using the CCB clock, the
// LCRR[CLKDIV] and the desired speed.  120ns for all, as above.

	oris	r4,r20,LCRR		// LocalBus Control
	lwz	r3,0(r4)
	andi.	r3,r3,0x000F		// R3 = CLKDIV 
					// CCB->LocalBus clock divider
	divwu	r5,r22,r3		// R5 = LB CLK (in kHz)
	lis	r3,(1000000)@h
	ori	r3,r3,(1000000)@l
	divw	r3,r3,r5		// R3 = LB CLK in ns
	li	r5,FLASH_SPEED
	divw	r3,r5,r3		// R3 = # clks to wait

	addi	r3,r3,2			// Bump up some for safety (optional)

	slwi	r7,r3,4			// R7 = SCY value


// First, BR1.

	ori	r4,r20,OR1		// OR1
	lis	r3,0xFF00		// AM= 16MB mask
	or	r3,r3,r7		// Merge SCY

	stw	r3,0(r4)
	msync

	ori	r4,r20,BR1		// BR1
	lis	r3,0xFE00		//   Base =0xFE00_xxxx
	ori	r3,r3,BR_PS_32BIT	//   32 bits only
	ori	r3,r3,BR_V		//   VALID

	stw	r3,0(r4)
	msync


// Next, BR2.

	ori	r4,r20,OR2		// OR2
	lis	r3,0xFF00		// AM= 16MB mask
	or	r3,r3,r7		// Merge SCY

	stw	r3,0(r4)
	msync

	ori	r4,r20,BR2		// BR2
	lis	r3,0xFD00		// BA=0xFD00_xxxx
	ori	r3,r3,0x0800		// PS  = 01 = 8 bits
	ori	r3,r3,BR_V		// VALID

	stw	r3,0(r4)
	msync


#endif

// Reduce BR0 to only the boot flash now.
#ifdef MPC8540ADS

	//adjusting parmeters for LCRR
	//----------------------------
	oris	r4,r20,LCRR		// LocalBus Control
	lwz	r3,0(r4)
	andi.	r3,r3,0x000F		// R3 = CLKDIV 
					// CCB->LocalBus clock divider
	divwu	r5,r22,r3		// R5 = LB CLK (in kHz)
	lis	r3,(1000000)@h
	ori	r3,r3,(1000000)@l
	divw	r3,r3,r5		// R3 = LB CLK in ns
	li	r5,FLASH_SPEED
	divw	r3,r5,r3		// R3 = # clks to wait

	addi	r3,r3,2			// Bump up some for safety (optional)

	slwi	r7,r3,4			// R7 = SCY value


	//program BR0
	//------------
	ori	r4,r20,BR0		// BR0 -- do BR0 first!
	lwz	r3,0(r4)

#ifdef ADS_ENG_BOARD	
	oris	r3,r3,0xFF80		// AM= 8MB mask
#endif
#ifdef ADS_PILOT_BOARD
	oris	r3,r3,0xFF00		// AM= 16MB mask
	ori	r3,r3,0x1800            //changing port size to 32-bit(for pilot board)
#endif
	stw	r3,0(r4)
	msync	


	//program OR0
	//------------	
	ori	r4,r20,OR0		// OR0
#ifdef ADS_ENG_BOARD		
	lis	r3,0xFF80		// BA=0xFF80_xxxx
	ori	r3,r3,0x6e61		// 0x6e61 value is taken from u-boot
#endif
#ifdef ADS_PILOT_BOARD

#if 1
	lis	r3,0xFF00		// BA=0xFF00_xxxx
	ori	r3,r3,0x6ff7		// 0x6ff7 value is taken from u-boot	
	stw	r3,0(r4)
	msync
#endif
#if 0
	ori	r4,r20,OR0		// OR0
	lis	r3,0xFF00		// BA=0xFF00_xxxx
	or	r3,r3,r7		// Merge SCY
	stw	r3,0(r4)
	msync
#endif

#endif


#else //ELYSIUM flash

	ori	r4,r20,BR0		// BR0 -- do BR0 first!
	lwz	r3,0(r4)
	oris	r3,r3,0xFF00		// AM= 16MB mask
	stw	r3,0(r4)
	msync
	
	ori	r4,r20,OR0		// OR0
	lis	r3,0xFF00		// BA=0xFF00_xxxx
	or	r3,r3,r7		// Merge SCY
	stw	r3,0(r4)
	msync

#endif

#ifdef ISS
	b	ddr_pci_setup_end
#endif 



//===========================================================================
// I2C Setup.


#ifdef DDR_AS_SYS_MEM

//===========================================================================
// DDR SDRAM Setup.

	.global	ddr_setup
ddr_setup:
#if 1
#ifdef MPC8540ADS
//testing ddr init code
	//apply ddr ddl patch, affects some early pilot boards
	//will not be a problem when the rev2 chip comes up
	oris	r4,r20,0xe0e10@h
	ori	r4,r4,0xe0e10@l
	lwz	r3,0(r4)
	rlwinm	r3,r3,16,8,15
	oris	r3,r3,0x8000
	stw	r3,0(r4)
	msync
	isync
	msync

	//CSO_BNDS
	ori	r3,r20,0x2000
	lis	r4,0x0000
	ori	r4,r4,0x0007
	stw	r4,0(r3)

	//CSO_CONFIG
	ori	r3,r20,0x2080
	lis	r4,0x8000
	ori	r4,r4,0x0002
	stw	r4,0(r3)

	//TIMING_CFG_1
	ori	r3,r20,0x2108
	lis	r4,0x3734
	//org setting
	//ori	r4,r4,0xC321
	//york's setting
	ori	r4,r4,0x4321
	stw	r4,0(r3)

	//TIMING_CFG_2
	ori	r3,r20,0x210C
	lis	r4,0x0000
	//org setting
	//ori	r4,r4,0x0c00
	//york's setting
	ori	r4,r4,0x0800
	stw	r4,0(r3)

	//DDR_SDRAM_MODE
	ori	r3,r20,0x2118
	lis	r4,0x0000
	//org setting
	//ori	r4,r4,0x0162
	//york's setting
	ori	r4,r4,0x0062
	stw	r4,0(r3)

	//DDR_SDRAM_INTERVAL
	ori	r3,r20,0x2124
	//org setting
	//lis	r4,0x2d92
	//york's setting
	//lis	r4,0x03a3
	lis	r4,0x0520
	ori	r4,r4,0x0100
	stw	r4,0(r3)

	//DDR_DEBUG_4
	ori	r3,r20,0x2F0C
	lis	r4,0x0000
	stw	r4,0(r3)

	//DDR_SDRAM_CFG
	addi	r3,r20,0x2110
	lis	r4,0xC200
	stw	r4,0(r3)
	msync
	isync

	//200usec delay
	lis	r3,(200000*10)@h
	ori	r3,r3,(200000*10)@l
	divwu	r4,r3,r25
	mtctr	r4
ddr_wait_1:
	bdnz	ddr_wait_1
	nop

#endif
#endif
//==================================================================
//==================================================================
//==================================================================
#if 0
#ifdef MPC8540ADS //ddr initialization for PQ3 ADS board
	mr	r28,r20
      
        //-------------------
	//values of r21,r22,r23,r24,r25 are 
	//taken from above
	

	//-------------------
	ori	r28,r28,8192
	lwz	r9,0(r28)
	lis	r0,-256
	ori	r0,r0,65280
	and	r9,r9,r0
	stw	r9,0(r28)
	lwz	r0,0(r28)
	ori	r0,r0,7
	stw	0,0(r28)
	sync	
	isync
	sync	

	lwz	r9,128(r28)
	lis	r0,16191
	ori	r0,r0,63736
	and	r9,r9,r0
	stw	r9,128(r28)
	lwz	r0,128(r28)
	oris	r0,0,32896
	ori	r0,r0,1
	stw	r0,128(r28)
	sync	
	isync
	sync	

	lwz	r9,264(r28)
	lis	r0,-30584
	ori	r0,r0,3212
	and	r9,r9,0
	stw	r9,264(r28)
	lwz	r0,264(r28)
	oris	r0,r0,22371
	ori	r0,r0,49970
	stw	r0,264(r28)
	sync	
	isync
	sync	
	lwz 	r0,268(r28)
	rlwinm 	r0,r0,0,22,18
	stw	r0,268(r28)
	lwz	r0,268(r28)
	ori	r0,r0,2048
	stw	r0,268(r28)
	sync	
	isync
	sync	
	lwz	r0,280(r28)
	li	r0,0
	stw	r0,280(r28)
	lwz	r0,280(r28)
	ori	r0,r0,34
	stw	r0,280(r28)
	sync	
	isync
	sync	
	lwz	r9,292(r28)
	lis	r0,-16384
	ori	r0,r0,49152
	and	r9,r9,r0
	stw	r9,292(r28)
	lwz	r0,292(r28)
	oris	r0,r0,1040
	stw	r0,292(r28)
	sync	
	isync
	sync	
	li	r0,13
	stw	r0,3852(r28)
	sync
	isync
	sync	

	lis	r3,(500000*10)@h	// R3 = 500us = 5M* 100ps ns
	ori	r3,r3,(500000*10)@l
	divwu	r4,r3,r25		// Scale by decrementer rate
	mtctr	r4
	
        ddr_wait_1:
                bdnz    ddr_wait_1
                nop

	lwz	r9,272(r28)
	lis	r0,4063
	ori	r0,r0,65535
	and	r9,r9,0
	stw	r9,272(r28)
	lwz	r0,272(r28)
	oris	r0,r0,49664
	stw	r0,272(r28)
	sync	
	isync
	sync	

	lis	r3,(500000*10)@h	// R3 = 500us = 5M* 100ps ns
	ori	r3,r3,(500000*10)@l
	divwu	r4,r3,r25		// Scale by decrementer rate
	mtctr	r4
	
ddr_wait_2:
	bdnz    ddr_wait_2
	nop

#else  //ddr initialization for ELYSIUM board - start;added ak

//-------------------------------
// Set CS0_BNDS

	ori	r3,r20,DDR_CS0_BNDS	// Set boundary for bank #1
	Lis	r4,0x0000		// Start: MSB 12 bits
	ori	r4,r4,0x0FFF		// End:   MSB 12 bits
	stw	r4,0(r3)

//-------------------------------
// Set CS0_CONFIG

	ori	r3,r20,DDR_CS0_CONFIG			// CS0_CONFIG
	lis	r4,DDR_CSx_CONFIG_CS_x_EN
//	oris	r4,r4,DDR_CSx_CONFIG_AP_x_EN		//
//	ori	r4,r4,DDR_CSx_CONFIG_ROW_BITS_CS_12
	ori	r4,r4,DDR_CSx_CONFIG_ROW_BITS_CS_13
//	ori	r4,r4,DDR_CSx_CONFIG_ROW_BITS_CS_14

//	ori	r4,r4,DDR_CSx_CONFIG_COL_BITS_CS_8
//	ori	r4,r4,DDR_CSx_CONFIG_COL_BITS_CS_9
  	ori	r4,r4,DDR_CSx_CONFIG_COL_BITS_CS_10
//	ori	r4,r4,DDR_CSx_CONFIG_COL_BITS_CS_11
	stw	r4,0(r3)

//-------------------------------
// TIMING_CFG_1
//
// Assumes 133 MHz clock (266 DDR) or 7.5ns.

	ori	r3,r20,DDR_TIMING_CFG_1

	lis	r4,DDR_TIMING_CFG_1_PRETOACT(7)
	oris	r4,r4,DDR_TIMING_CFG_1_ACTTOPRE(7)
	oris	r4,r4,DDR_TIMING_CFG_1_ACTTORW(7)
#ifdef DDR_CL_2
	oris	r4,r4,DDR_TIMING_CFG_1_CL_2
#endif
#ifdef DDR_CL_2_5
	oris	r4,r4,DDR_TIMING_CFG_1_CL_2_5
#endif
#ifdef DDR_CL_3
	oris	r4,r4,DDR_TIMING_CFG_1_CL_3
#endif

	ori	r4,r4,DDR_TIMING_CFG_1_REFREC(15)
	ori	r4,r4,DDR_TIMING_CFG_1_WRREC(3)
	ori	r4,r4,DDR_TIMING_CFG_1_ACTTOACT(4)
	ori	r4,r4,DDR_TIMING_CFG_1_WRTORD(2)
	
//	lis	r4,0x7774
//	ori	r4,r4,0xf342
	stw	r4,0(r3)

//-------------------------------
// TIMING_CFG_2 -- only WR_DATA_DELAY.  2 (= 2/8) is recommended.
//
// NB: was 4 initially...

	ori	r3,r20,DDR_TIMING_CFG_2
 	li	r4,DDR_TIMING_CFG_2_WR_DATA_DELAY(2)
	li	r4,0x800
	stw	r4,0(r3)

//-------------------------------
// DDR_SDRAM_MODE

	addi	r3,r20,0x2118		// DDR_SDRAM_MODE
#ifdef DDR_CL_2
	li	r4,0x0020		//   CL=2
	li	r26,2			// memCL = 2
#endif
#ifdef DDR_CL_2_5
	li	r4,0x0060		//   CL=2.5
	li	r26,25			// memCL = 2.5
#endif
#ifdef DDR_CL_3
	li	r4,0x0030		//   CL=3
	li	r26,30			// memCL = 3
#endif
#ifdef DDR_32BIT_BUS
	ori	r4,r4,0x0003		//   8-BEAT BURST (32-bit)
#else
	ori	r4,r4,0x0002		//   4-BEAT BURST (64-bit)
#endif
	stw	r4,0(r3)

//-------------------------------
// DDR_SDRAM_INTERVAL

	addi	r3,r20,0x2124
	lis	r4,0x04D0		// Refresh interval
//	ori	r4,r4,0x04D0		// Open page interval
	stw	r4,0(r3)


//-------------------------------
// DEBUG
//
	addi	r3,r20,0x2F04		// DEBUG_2
	lis	r4,0x2000		// 8060
	ori	r4,r4,0x0000
	stw	r4,0(r3)

	addi	r3,r20,0x2F00		// DEBUG_1
	lis	r4,0xFF01
	ori	r4,r4,0x0000
	stw	r4,0(r3)

	addi	r3,r20,0x2F0C		// DEBUG_4
	lis	r4,0x0000
	ori	r4,r4,0x0000
	stw	r4,0(r3)


// Prior to setting MEM_EN, wait 200uS.  I don't think this is necessary,
// though.

#define PRE_DDR_WAIT
#ifdef PRE_DDR_WAIT
	lis	r3,(200000*10)@h	// R3 = 200us = 2M* 100ps ns
	ori	r3,r3,(200000*10)@l
	divwu	r4,r3,r25		// Scale by decrementer rate
	mtctr	r4
ddr_wait_1:
	bdnz	ddr_wait_1
	nop
#endif 

//-------------------------------
// After DDR register setup, enable the DDR controller.

	addi	r3,r20,DDR_SDRAM_CFG
	lis	r4,DDR_SDRAM_CFG_MEM_EN
	oris	r4,r4,DDR_SDRAM_CFG_DDR_SDRAM

#ifdef DDR_REGISTERED
	oris	r4,r4,DDR_SDRAM_CFG_RD_EN
#endif

#ifdef DDR_32BIT_BUS
	oris	r4,r4,DDR_SDRAM_CFG_MEM_32BIT
#endif
	stw	r4,0(r3)

	msync
	isync

//--------------------
// Wait 200 uS.

	lis	r3,(200000*10)@h	// R3 = 200us = 2M* 100ps ns
	ori	r3,r3,(200000*10)@l
	divwu	r4,r3,r25		// Scale by decrementer rate
	mtctr	r4
ddr_wait_2:
	bdnz	ddr_wait_2
	nop

#endif //ddr initialization for ELYSIUM board - end;added ak
#endif //end of #if 0
#endif //end of DDR_AS_SYS_MEM


#ifdef LOCAL_SDRAM_AS_SYS_MEM
	.global	local_sdram_setup
local_sdram_setup:
//local bus sdram initialization

//lcrr
	oris	r4,r20,LCRR		
	addis   r5,r0,0x8003
	ori	r5,r5,0x0004
	stw	r5,0x0(r4)

//or2
	oris	r4,r20,0x5014		
	addis   r5,r0,0xfc00
	ori	r5,r5,0x6901
	stw	r5,0x0(r4)

//br2
	oris	r4,r20,0x5010		
	addis   r5,r0,0x0000
	ori	r5,r5,0x1861
	stw	r5,0x0(r4)


//lbcr
	oris	r4,r20,0x50d0		
	addis   r5,r0,0x0000
	ori	r5,r5,0x0000
	stw	r5,0x0(r4)


//lsdmr-1
	oris	r4,r20,0x5094		
	addis   r5,r0,0x2861
	ori	r5,r5,0xb723
	stw	r5,0x0(r4)
	sync

//write long
	li	r4,0x0
	addis   r5,r0,0x0000
	ori	r5,r5,0x00ff
	stw	r5,0x0(r4)


//lsdmr-2
	oris	r4,r20,0x5094		
	addis   r5,r0,0x0861
	ori	r5,r5,0xb723
	stw	r5,0x0(r4)
	sync


//write long
	li	r4,0x0
	addis   r5,r0,0x0000
	ori	r5,r5,0x00ff
	stw	r5,0x0(r4)

//lsdmr-3
	oris	r4,r20,0x5094		
	addis   r5,r0,0x0861
	ori	r5,r5,0xb723
	stw	r5,0x0(r4)
	sync


//write long
	li	r4,0x0
	addis   r5,r0,0x0000
	ori	r5,r5,0x00ff
	stw	r5,0x0(r4)

//lsdmr-4
	oris	r4,r20,0x5094		
	addis   r5,r0,0x1861
	ori	r5,r5,0xb723
	stw	r5,0x0(r4)
	sync

//write long
	li	r4,0x0
	addis   r5,r0,0x0000
	ori	r5,r5,0x00ff
	stw	r5,0x0(r4)

//lsdmr-5
	oris	r4,r20,0x5094		
	addis   r5,r0,0x4061
	ori	r5,r5,0xb723
	stw	r5,0x0(r4)
	sync


//lsrt
	oris	r4,r20,0x50a4		
	addis   r5,r0,0x2000
	ori	r5,r5,0x0000
	stw	r5,0x0(r4)

//mrtpr
	oris	r4,r20,0x5084		
	addis   r5,r0,0x2000
	ori	r5,r5,0x0000
	stw	r5,0x0(r4)
	sync

#endif //end of local bus sdram initialization


//===========================================================================
//===========================================================================


		
//===========================================================================
// PCI Setup.

	.global	pci_setup
pci_setup:
	ori	r4,r20,PCI_POWBAR	// Set Mem Base
	lis	r3,0x0008		// = 0x8000_0000
	stw	r3,0(r4)
	msync

	ori	r4,r20,PCI_POWAR	// Enable memory R/W
	lis	r3,PCI_POWAR_EN
	oris	r3,r3,PCI_POWAR_RTT_MEMR
	ori	r3,r3,PCI_POWAR_WTT_MEMW
	ori	r3,r3,PCI_POWAR_OWS_128M
	stw	r3,0(r4)
	msync

	ori	r4,r20,PCI_POWBAR1	// Set IO Base
	lis	r3,0x0009		// = 0x9000_0000
	stw	r3,0(r4)
	msync
	
	ori	r4,r20,PCI_POWAR1	// Enable IO R/W
	lis	r3,PCI_POWAR_EN
	oris	r3,r3,PCI_POWAR_RTT_IOR
	ori	r3,r3,PCI_POWAR_WTT_IOW
	ori	r3,r3,PCI_POWAR_OWS_64K
	stw	r3,0(r4)
	msync




//===========================================================================
// Debug messages.

#define DEBUG_MSGS

#ifdef DEBUG_MSGS
	bl	debug_cfg
	bl	debug_mem
	bl	debug_mem_dumpregs
	bl	kputcrlf

	bl	debug_rio

	bl	debug_pci
#endif


ddr_pci_setup_end:	
		
//===========================================================================
// Memory initialization.  DINK lives at 0x0...0x0100000-1.

// First, optionally, scrub DINK's memory

	.global scrub_memory
scrub_memory:

#ifdef SCRUB
	li	r3,0x53			// SCRUB
	bl	kputc
	li	r3,0x43
	bl	kputc
	li	r3,0x52
	bl	kputc
	li	r3,0x55
	bl	kputc
	li	r3,0x42
	bl	kputc
	bl	kputcrlf


	lis	r13,0			// R13: destination (sdram)
	lis	r16,FILL_SIZE@ha	// R16: fill size
	addi	r16,r16,FILL_SIZE@l
	addi	r13,r13,-4
slp:	
	stwu	r0,4(r13)		// Write RAM
	addic.	r16,r16,-4		// decrement size, set cr0
	bgt	slp
#endif

#ifdef DDR_MEMORY_TEST
	li 	r7,0x0
	lis	r4,0x0200
	ori	r4,r4,0x0000 //r4=32M-word=128MB
	//lis	r4,0x0040
	//ori	r4,r4,0x0000  //r4=4M-word=16MB=1/8 of the actual size
	mtspr 	ctr,r4
	lis	r5,0x5555
	ori	r5,r5,0x5555

//writing to ddr mem
ddr_test_loop_wr:
	stw	r5,0(r7)
	addi	r7,r7,4
	//li	r3,0x41			// A
	//bl	kputc		
	bdnz	ddr_test_loop_wr
	nop
	sync
	isync

	//li	r3,0x41   //A
	//bl	kputc
	
//reading and comparing
	li 	r7,0x0
	mtspr 	ctr,r4

ddr_test_loop_rd:
	lwz	r6,0(r7)
	cmplw	r5,r6
	//li	r3,0x42			// B
	//bl	kputc			
	bne	ddr_error_loop
	nop
	addi	r7,r7,4
	bdnz	ddr_test_loop_rd
	nop
	//print sdram test passed
	bl	ddr_success_loop	

spin_passed:
	nop
	nop
	b	spin_passed
	
ddr_error_loop:
	//print sdram test failed
	bl	debug_ddr_test_fail
ddr_error_loop_1:	
	nop
	nop
	nop
	nop
	b 	ddr_error_loop_1	

ddr_success_loop:
	//print sdram test failed
	bl	debug_ddr_test_pass
ddr_success_loop_1:	
	nop
	nop
	nop
	nop
	b 	ddr_success_loop_1	

#endif



//----------------------------------------
// Now copy EDINK to SDRAM.

	.global copy_flash_to_ram
copy_flash_to_ram: 

	li	r3,0x43			// COPY
	bl	kputc
	li	r3,0x4F
	bl	kputc
	li	r3,0x50
	bl	kputc
	li	r3,0x59
	bl	kputc
	bl	kputcrlf

	lis	r13,0			// R13: destination (sdram)
	mr	r14,r27			// R14: source (base eprom address)
	lis	r16,COPY_SIZE@h		// R16: image size
	ori	r16,r16,COPY_SIZE@l

lp1:	
	lwzx	r15,0,r14		// Read flash
	msync
	stwx	r15,0,r13		// Write RAM

	mr	r3,r13
	andi.	r3,r3,0x3FFF
	bne	copy_no_msg
	mr	r3,r13
	bl	kputhex
	li	r3,0x0d
	bl	kputc
copy_no_msg:


#ifdef COPY_VERIFY
	lwzx	r17,0,r13		// load word from dram. 
	msync
	cmp	0,0,r17,r15		// check to see if dram got written 
	beq	no_cp_err

	bl	kputcrlf
	li	r3,0x45
	bl	kputc

no_e:
	li	r3,0x3d
	bl	kputc
	mr	r3,r13			// Error address
	bl	kputhex
	li	r3,0x3d
	bl	kputc
	mr	r3,r15			// Expected data
	bl	kputhex
	li	r3,0x3d
	bl	kputc
	mr	r3,r17			// Actual data
	bl	kputhex
	bl	kputcrlf

//	b	error_dram_init

no_cp_err:
#endif

	addi	r14,r14,4		// go to next word of eprom and dram. 
	addi	r13,r13,4
	addic.	r16,r16,-4		// decrement size, set cr0
	bgt	lp1
	
	msync


//------------------------------------
// Now verify the copy

#ifdef POST_COPY_VERIFY

	bl	kputcrlf
	li	r3,0x56			// VERIFY
	bl	kputc
	li	r3,0x45
	bl	kputc
	li	r3,0x52
	bl	kputc
	li	r3,0x49
	bl	kputc
	li	r3,0x46
	bl	kputc
	li	r3,0x59
	bl	kputc
	bl	kputcrlf



					
	lis	r13,0			// R13: destination (sdram)
	mr	r14,r27			// R14: source (base eprom address)
	lis	r16,COPY_SIZE@h		// R16: image size
	ori	r16,r16,COPY_SIZE@l
	li	r8,0

verlp:
	lwzx	r15,0,r14		// Read flash
	lwzx	r3,0,r8			// Read junk word
	lwzx	r17,0,r13		// Read word from dram. 

	cmp	0,0,r17,r15		// check to see if dram got written 
	beq	nover_err

	bl	kputcrlf
	li	r3,0x45
	bl	kputc
	li	r3,0x3d
	bl	kputc
	mr	r3,r13			// Error address
	bl	kputhex
	li	r3,0x3d
	bl	kputc
	mr	r3,r15			// Expected data
	bl	kputhex
	li	r3,0x3d
	bl	kputc
	mr	r3,r17			// Actual data
	bl	kputhex
	bl	kputcrlf

nover_err:
	addi	r14,r14,4		// go to next word of eprom and dram. 
	addi	r13,r13,4
	addic.	r16,r16,-4		// decrement size, set cr0
	bgt	verlp
#endif /* POST_COPY_VERIFY */


//----------------------------------------
// On exit, R13 points to the last memory location filled.  While R13
// is less than FILL_SIZE, fill memory.

	lis	r16,FILL_SIZE@h		// R16: image size
	ori	r16,r16,FILL_SIZE@l
	lis	r3,FILL_PATT@h		// R3: fill pattern
	ori	r3,r3,FILL_PATT@l
fill_lp:
	stw	r3,0(r13)
	addi	r13,r13,4
	cmp	0,0,r13,r16
	blt	fill_lp

	bl	kputcrlf
	mr	r3,r13
	bl	kputhex
	bl	kputcrlf
	bl	kputcrlf


copy_flash_end:
	bl	kputcrlf
	li	r3,0x4F
	bl	kputc
	li	r3,0x4B
	bl	kputc
	bl	kputcrlf


//===========================================================================
// Common register setup prior to store_globals.

#ifdef MPC8540ADS
	li	r3,ADS_PQ3
	mtsprg0	r3
#else
	li	r3,MARS_ELY
	mtsprg0	r3
#endif

	lis	r3,0x9000		// PCI base.
	mtsprg1	r3

	ori	r3,r20,0x8000
	mtsprg2	r3			// PCI CFG_ADDR
	ori	r3,r20,0x8004
	mtsprg3	r3			// PCI CFG_DATA
	
	mr	r29,r20
	li	r30,0


//r31 stores the size of the ddr memory. In case of
//Elysium the DDR memory size is 512MB and in case 
//of PQ3 ADS board it is 128MB
#ifdef MPC8540ADS
#ifdef DDR_AS_SYS_MEM
	lis	r31,0x0800  //128MB DDR memory on ADS
#endif
#ifdef LOCAL_SDRAM_AS_SYS_MEM
	lis	r31,0x0400  //64MB local bus SDRAM on ADS
#endif

#else
	lis     r31,0x2000  //512MB DDR memory on ELYSIUM      
#endif

#ifdef ADS_DEBUG_BOOT_ROM
	bl	kputcrlf
	li	r3,0x52  //R
	bl	kputc

	bl	kputcrlf
	li	r3,0x52  //R
	bl	kputc
#endif


// --------------------------------------------------------------
//  It is only here that we can store into global variables. If done before
//  the copy routine then the copy routine will overwrite them.
//
//  SPRG0 = board_type
//  SPRG1 = IO_Base
//  SPRG2 = CfgAddr
//  SPRG3 = CfgData
//  R20   = CCSRBAR/PCSRBAR
//  R21   = PCI speed
//  R22   = CCB speed
//  R23   = MEM speed
//  R24   = CPU speed
//  R25   = jiffies
//  R26   = CL
//  R27   = ResetBase
//  R29   = GT64260 Base (if MVP)
//  R30   = BusType
//  R31   = Total memory size

	.global init_globals
init_globals: 

        lis     r3,board_type@ha        // save 'board_type'
        addi    r3,r3,board_type@l
	mfspr	r5,sprg0
        stwx    r5,0,r3

#ifdef ADS_DEBUG_BOOT_ROM
	bl	kputcrlf
	li	r3,0x53  //S
	bl	kputc
#endif

	lis	r3,io_base_addr@h	// save 'io_base'
	ori	r3,r3,io_base_addr@l
        mfspr   r8,sprg1
	stwx	r8,0,r3

	lis	r3,in_which_code@h	//  fix in_which_code to dink value (0) 
	ori	r3,r3,in_which_code@l
	lis	r4,0			//  load r4 with 0 
	stwx	r4,0,r3			//  and save into variable. 

        lis     r3,config_addr@h        //  load address of config_addr
        ori     r3,r3,config_addr@l
        mfspr   r5,sprg2                //  retrieve value saved to sprg2
        stwx    r5,0,r3                 //  save value to config_addr

        lis     r3,config_data@h        //  load address of config_data
        ori     r3,r3,config_data@l
        mfspr   r5,sprg3                //  retrieve value saved to sprg3
        stwx    r5,0,r3                 //  save value to config_data

        lis     r3,memCL@ha        	// Save R26 into "memCL"
        addi    r3,r3,memCL@l
        stwx    r26,0,r3

        lis     r3,memSize@ha        	// Save R31 into "memSize"
        addi    r3,r3,memSize@l
        stwx    r31,0,r3

        lis     r3,BusProtocol@ha      	// Save R30 into "BusProtocol"
        addi    r3,r3,BusProtocol@l       
        stwx    r30,0,r3

        lis     r3,CCSRBAR_Base@ha     	// Save R29 into "CCSRBAR_Base"
        addi    r3,r3,CCSRBAR_Base@l       
        stwx    r29,0,r3

        lis     r3,ResetBase@ha      	// Save R27 into "ResetBase"
        addi    r3,r3,ResetBase@l       
        stwx    r27,0,r3

        lis     r3,board_version@ha     // Save R28 into "board_version"
        addi    r3,r3,board_version@l
        stwx    r28,0,r3

        lis     r3,speed_pci@ha		// Save R21 into "speed_pci"
        addi    r3,r3,speed_pci@l
        stwx    r21,0,r3

        lis     r3,speed_ccb@ha		// Save R22 into "speed_ccb"
        addi    r3,r3,speed_ccb@l
        stwx    r22,0,r3

        lis     r3,speed_mem@ha		// Save R23 into "speed_mem"
        addi    r3,r3,speed_mem@l
        stwx    r23,0,r3

        lis     r3,speed_cpu@ha		// Save R24 into "speed_cpu"
        addi    r3,r3,speed_cpu@l
        stwx    r24,0,r3

        lis     r3,jiffies@ha		// Save R25 into "jiffies"
        addi    r3,r3,jiffies@l
        stwx    r25,0,r3

#ifdef ADS_DEBUG_BOOT_ROM
	bl	kputcrlf
	li	r3,0x54  //T
	bl	kputc
#endif


//===========================================================================
// Transfer to 'C'-code main().

	.global	go_main
go_main:

// Now that EDINK is in low memory, and we are about to jump there,
// move the IVOR registers to point to the SDRAM version.

	lis	r3,0x0000
	bl	setup_exception_table_addresses


// Set the decrementer to the highest value that won't cause an interrupt
// DEC interrupts stay pending until the MSR[EE] bit is set, this gives
// the user more time to set their Dec and enable EE before dink's Dec
// rolls over and causes a pending int.

	lis	r3,0x7fff		
	ori	r3,r3,0xffff
	mtspr	dec_r,r3	
	isync

#ifdef ADS_DEBUG_BOOT_ROM
	bl	kputcrlf
	li	r3,0x55  //U
	bl	kputc
#endif


//enable the external interrupt


// Set up SRR0 (jump address), SRR1 (MSR) 

	lis	r3,main@ha		// get start address of dink 
	ori	r3,r3,main@l


// clear the upper 8 bits to fake jump to low address for the
// simulator  (remove next 3 lines of code for hardware)

	lis     r4,0xFFF0
        andc    r3,r3,r4

	mtspr	srr0,r3			// load address into srr0. 
	sync
	msync

#ifdef ADS_DEBUG_BOOT_ROM
	bl	kputcrlf
	li	r3,0x56  //V
	bl	kputc	
#endif


// Message...

	li	r3,0x47
	bl	kputc
	li	r3,0x4F
	bl	kputc
	li	r3,0x3d
	bl	kputc

	lis	r3,main@ha		// get start address of dink 
	ori	r3,r3,main@l
	bl	kputhex
	bl	kputcrlf


// SRR1/MSR = 0x0200_0000
// UCLE = 0 user not allowed to cache lock (takes DSI)
// SPE = 1 SPU instructions enabled
// WE = 0 wait state disabled
// CE = 0 critical input/watchdog timer interrupts disabled
// EE = 0 External interrupts disabled
// PR = 0 Processor in supervisor mode
// FP = 0 Floating point not available
// ME = 0 machine check interrupts disabled
// FE0 = 0 this is reserved and permanently cleared for e500
// UBLE = 0 user execution of BTB lock instructions disallowed
// DE = 1 debug interrupts disabled
// FE1 = 0 same as FE0
// IS = 0 Dink instruction space (see also the TLB entry, MAS1)
// DS = 0 Dink data space (see also the TLB entry, MAS1)
// PMM = 0 For now we leave this alone

	lis	r4,0x0200		// Set MSR to 0x0200_0000 turn on SPE
	ori	r4,r4,0x0200		// = supervisor mode, no exceptions
                                	// use IS = DS space 0 
	mtspr	srr1,r4
	isync

	lis     r13,_SDA_BASE_@ha     // set up small data area
	addi    r13,r13,_SDA_BASE_@l

	lis     r1,CPU0_STACK@ha    //  set up stack space
	addi    r1,r1,CPU0_STACK@l

//jump to main


#ifdef ADS_DEBUG_BOOT_ROM
	bl	kputcrlf
	li	r3,0x57  //W
	bl	kputc
#endif

	.global  rfiDink
rfiDink:				// so we can find this address
					// and break on it before setting
					// breakpoints in RAM space.

	isync
	msync

	rfi				//  branch to (e)dink main


//---------------------------------------
// error traps:  loop to self forever with diagnostic bus code.

	.global error_dram_init
error_dram_init:
	mr	r15,r7
	mr	r14,r5			// R14=R5 is expected data
	bl	kputcrlf
	li	r3,0x45
	bl	kputc
	li	r3,0x3d
	bl	kputc
	mr	r3,r13
	bl	kputhex
	li	r3,0x3d
	bl	kputc
	mr	r3,r15
	bl	kputhex
	li	r3,0x3d
	bl	kputc
	mr	r3,r15
	bl	kputhex

error_lp:
	lis	r3,0xDEAD		// Load error flag
	ori	r3,r3,0xD00D		// and code
	b	error_lp


//===========================================================================
// setup_exception_table_addresses -- accepts the ivpr value 
// (the upper 16 bits of the exception table address) as a 
// parameter in the upper 16 bits
// of r3

/*****************************************************
 *   Set up the exceptions to look like classic
 *   where possible set up the same exception table as in classic
 *   e.g.  dsi at 0x300, program at 0x700, system call at 0xc00
 *   new exception arbitrarily at unused locations.
*****************************************************/

	.global setup_exception_table_addresses
setup_exception_table_addresses:		

	mtspr	ivpr, r3
	
// critical input
	ori	r4, r3, 0x0100
	mtspr	ivor0, r4

// machine check
	ori	r4, r3, 0x0200
	mtspr	ivor1, r4

// dsi
	ori	r4, r3, 0x0300
	mtspr	ivor2, r4

// isi
	ori	r4, r3, 0x0400
	mtspr	ivor3, r4
	
//external input
	ori	r4, r3, 0x0500
	mtspr	ivor4, r4
	
// alignment
	ori	r4, r3, 0x0600
	mtspr	ivor5, r4
	
// program
	ori	r4, r3, 0x0700
	mtspr	ivor6, r4
	
// floating point unavailable3
	ori	r4, r3, 0x0800
	mtspr	ivor7, r4

// decrementer
	ori	r4, r3, 0x0900
	mtspr	ivor10, r4

// auxiliary processor unavailable
	ori	r4, r3, 0x0a00
	mtspr	ivor9, r4

// fixed interval timer
	ori	r4, r3, 0x0b00
	mtspr	ivor11, r4

// system call
	ori	r4, r3, 0x0c00
	mtspr	ivor8, r4

// watchdog timer
	ori	r4, r3, 0x0d00
	mtspr	ivor12, r4

// not used
	// ori	r4, r3, 0x0e00

// performance monitor
	ori	r4, r3, 0x0f00
	mtspr	ivor35, r4

// instruction TLB error
	ori	r4, r3, 0x1000
	mtspr	ivor14, r4

// data tlb error
	ori	r4, r3, 0x1100
	mtspr	ivor13, r4

// Vector float data error
	ori	r4, r3, 0x1200
	mtspr	ivor33, r4

// Vector round error
	ori	r4, r3, 0x1300
	mtspr	ivor34, r4

// Not used
	// ori	r4, r3, 0x1400

// debug
	ori	r4, r3, 0x1500
	mtspr	ivor15, r4

// SPU
	ori	r4, r3, 0x1600
	mtspr	ivor32, r4


	isync
	msync

	blr


//===========================================================================
//

	.text
	.align	2
	.global startup_cache_inval_enable_L1I

startup_cache_inval_enable_L1I:

//L1I Flash invalidate

	msync
	isync
	mfspr    r3,l1csr1
	ori      r3,r3,0x0002		//set ICFI bit
	mtspr    l1csr1, r3
   
//L1I enable

        msync 
        isync
        ori      r3,r3,0x0001		//set ICE bit
        mtspr    l1csr1, r3
	isync
        blr


//===========================================================================
//

	.global cache_inval_enable_L1D

startup_cache_inval_enable_L1D:

//L1D Flash invalidate

	msync
	isync
	mfspr    r3,l1csr0
        ori      r3,r3,0x0002		//set CFI bit
        mtspr    l1csr0,r3
   
//L1D enable

        msync 
        isync
        ori      r3,r3,0x0001		//set CE bit
        mtspr    l1csr0,r3
        isync

        blr


//===========================================================================

	.align	4
	.text			// MUST be in upper 4K boot range!

	.global	mas_tlb_table
mas_tlb_table:

// Number of TLBs to load.

	.long	8

// TLB1, #0 -- FLASH #1, FLASH #2, NVRAM, CCSRBAR

	.long	bem_MAS0( 1, 0 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0xF0000000, 1, bem2_WIMGE(1,1,0,1,0) )
	.long	bem_MAS3( 0xF0000000, PERM_ALL )

// TLB1, #1 -- SDRAM part 1

	.long	bem_MAS0( 1, 1 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0x00000000, 1, bem2_WIMGE(0,1,0,0,0) )
	.long	bem_MAS3( 0x00000000, PERM_ALL )

// TLB1, #2 -- SDRAM part 2

	.long	bem_MAS0( 1, 2 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0x20000000, 1, bem2_WIMGE(0,1,0,0,0) )
	.long	bem_MAS3( 0x20000000, PERM_ALL )

// TLB1, #3 -- Test Space.

	.long	bem_MAS0( 1, 3 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0xE0000000, 1, bem2_WIMGE(1,1,0,1,0) )
	.long	bem_MAS3( 0xE0000000, PERM_ALL )

// TLB1, #4 -- PCI part 1

	.long	bem_MAS0( 1, 4 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0x80000000, 1, bem2_WIMGE(1,1,0,1,0) )
	.long	bem_MAS3( 0x80000000, PERM_ALL )

// TLB1, #5 -- PCI part 2

	.long	bem_MAS0( 1, 5 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0x90000000, 1, bem2_WIMGE(1,1,0,1,0) )
	.long	bem_MAS3( 0x90000000, PERM_ALL )

// TLB1, #6 -- RIO part 1

	.long	bem_MAS0( 1, 6 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0xA0000000, 1, bem2_WIMGE(0,0,1,0,0) )
	.long	bem_MAS3( 0xA0000000, PERM_ALL )

// TLB1, #7 -- RIO part 2

	.long	bem_MAS0( 1, 7 )
	.long	bem_MAS1( TLB_SIZE_256M )
	.long	bem_MAS2( 0xB0000000, 1, bem2_WIMGE(0,0,1,0,0) )
	.long	bem_MAS3( 0xB0000000, PERM_ALL )
