_abort_stack_end

Sorry - mixing up support threads. On the A9, if you stop on entry to main(), you should be able to inspect the ulCriticalNesting variable - if its not 9999 then the C startup code is not doing what it is supposed to to get the system ready to execute your program.

@RAc

boot.S

/*****************************************************************************/
/**
* @file boot.S
*
* @addtogroup a9_boot_code Cortex A9 Processor Boot Code
* @{
* <h2> boot.S </h2>
* The boot code performs minimum configuration which is required for an
* application to run starting from processor's reset state. Below is a
* sequence illustrating what all configuration is performed before control
* reaches to main function.
*
* 1. Program vector table base for exception handling
* 2. Invalidate instruction cache, data cache and TLBs
* 3. Program stack pointer for various modes (IRQ, FIQ, supervisor, undefine,
*    abort, system)
* 4. Configure MMU with short descriptor translation table format and program
*    base address of translation table
* 5. Enable data cache, instruction cache and MMU
* 6. Enable Floating point unit
* 7. Transfer control to _start which clears BSS sections, initializes
*    global timer and runs global constructor before jumping to main
*    application
*
*   ...
*
******************************************************************************/

#include "xparameters.h"
#include "xil_errata.h"

.globl MMUTable
.global _prestart
.global _boot
.global __stack
.global __irq_stack
.global __supervisor_stack
.global __abort_stack
.global __fiq_stack
.global __undef_stack
.global _vector_table

.globl _cpu0_catch
.globl _cpu1_catch
.globl OKToRun
.globl EndlessLoop0

.set PSS_L2CC_BASE_ADDR, 0xF8F02000
.set PSS_SLCR_BASE_ADDR, 0xF8000000

.set RESERVED,		0x0fffff00
.set TblBase ,		MMUTable
.set LRemap,		0xFE00000F		/* set the base address of the peripheral block as not shared */
.set L2CCWay,		(PSS_L2CC_BASE_ADDR + 0x077C)	/*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_INVLD_WAY_OFFSET)*/
.set L2CCSync,		(PSS_L2CC_BASE_ADDR + 0x0730)	/*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_SYNC_OFFSET)*/
.set L2CCCrtl,		(PSS_L2CC_BASE_ADDR + 0x0100)	/*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CNTRL_OFFSET)*/
.set L2CCAuxCrtl,	(PSS_L2CC_BASE_ADDR + 0x0104)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_AUX_CNTRL_OFFSET)*/
.set L2CCTAGLatReg,	(PSS_L2CC_BASE_ADDR + 0x0108)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_TAG_RAM_CNTRL_OFFSET)*/
.set L2CCDataLatReg,	(PSS_L2CC_BASE_ADDR + 0x010C)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_DATA_RAM_CNTRL_OFFSET)*/
.set L2CCIntClear,	(PSS_L2CC_BASE_ADDR + 0x0220)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_IAR_OFFSET)*/
.set L2CCIntRaw,	(PSS_L2CC_BASE_ADDR + 0x021C)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_ISR_OFFSET)*/

.set SLCRlockReg,	    (PSS_SLCR_BASE_ADDR + 0x04)	/*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_LOCK_OFFSET)*/
.set SLCRUnlockReg,     (PSS_SLCR_BASE_ADDR + 0x08)	/*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_UNLOCK_OFFSET)*/
.set SLCRL2cRamReg,     (PSS_SLCR_BASE_ADDR + 0xA1C) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_L2C_RAM_OFFSET)*/
.set SLCRCPURSTReg,     (0xF8000000 + 0x244)           /*(XPS_SYS_CTRL_BASEADDR + A9_CPU_RST_CTRL_OFFSET)*/
.set EFUSEStaus,        (0xF800D000 + 0x10)            /*(XPS_EFUSE_BASEADDR + EFUSE_STATUS_OFFSET)*/

/* workaround for simulation not working when L1 D and I caches,MMU and  L2 cache enabled - DT568997 */
.if SIM_MODE == 1
.set CRValMmuCac,	0b00000000000000	/* Disable IDC, and MMU */
.else
.set CRValMmuCac,	0b01000000000101	/* Enable IDC, and MMU */
.endif

.set CRValHiVectorAddr,	0b10000000000000	/* Set the Vector address to high, 0xFFFF0000 */

.set L2CCAuxControl,	0x72360000		/* Enable all prefetching, Cache replacement policy, Parity enable,
                                        Event monitor bus enable and Way Size (64 KB) */
.set L2CCControl,	0x01			/* Enable L2CC */
.set L2CCTAGLatency,	0x0111			/* latency for TAG RAM */
.set L2CCDataLatency,	0x0121			/* latency for DATA RAM */

.set SLCRlockKey,	        0x767B			/* SLCR lock key */
.set SLCRUnlockKey,	        0xDF0D			/* SLCR unlock key */
.set SLCRL2cRamConfig,      0x00020202      /* SLCR L2C ram configuration */

/* Stack Pointer locations for boot code */
.set Undef_stack,	__undef_stack
.set FIQ_stack,		__fiq_stack
.set Abort_stack,	__abort_stack
.set SPV_stack,		__supervisor_stack
.set IRQ_stack,		__irq_stack
.set SYS_stack,		__stack

.set vector_base,	_vector_table

.set FPEXC_EN,		0x40000000		/* FPU enable bit, (1 << 30) */

.section .boot,"ax"


/* this initializes the various processor modes */

_prestart:
_boot:

/* Test which processor is running and jump to the catch address */
	mrc	p15,0,r1,c0,c0,5
	and	r1, r1, #0xf
	cmp	r1, #0
	bne	NotCpu0
	ldr	r0, =_cpu0_catch
	b cpuxCont
NotCpu0:
	cmp	r1, #1
	bne	EndlessLoop0
	ldr	r0, =_cpu1_catch
	b cpuxCont
EndlessLoop0:
	wfe
	b	EndlessLoop0

/* Jump to address pointed to by cpux_catch */
cpuxCont:
	ldr lr, [r0]
	bx	lr


OKToRun:
	mrc     p15, 0, r0, c0, c0, 0		/* Get the revision */
	and     r5, r0, #0x00f00000
	and     r6, r0, #0x0000000f
	orr     r6, r6, r5, lsr #20-4

#ifdef CONFIG_ARM_ERRATA_742230
        cmp     r6, #0x22                       /* only present up to r2p2 */
        mrcle   p15, 0, r10, c15, c0, 1         /* read diagnostic register */
        orrle   r10, r10, #1 << 4               /* set bit #4 */
        mcrle   p15, 0, r10, c15, c0, 1         /* write diagnostic register */
#endif

#ifdef CONFIG_ARM_ERRATA_743622
	teq     r5, #0x00200000                 /* only present in r2p* */
	mrceq   p15, 0, r10, c15, c0, 1         /* read diagnostic register */
	orreq   r10, r10, #1 << 6               /* set bit #6 */
	mcreq   p15, 0, r10, c15, c0, 1         /* write diagnostic register */
#endif

	/* set VBAR to the _vector_table address in linker script */
	ldr	r0, =vector_base
	mcr	p15, 0, r0, c12, c0, 0

	/*invalidate scu*/
	ldr	r7, =0xf8f0000c
	ldr	r6, =0xffff
	str	r6, [r7]

	/* Invalidate caches and TLBs */
	mov	r0,#0				/* r0 = 0  */
	mcr	p15, 0, r0, c8, c7, 0		/* invalidate TLBs */
	mcr	p15, 0, r0, c7, c5, 0		/* invalidate icache */
	mcr	p15, 0, r0, c7, c5, 6		/* Invalidate branch predictor array */
	bl	invalidate_dcache		/* invalidate dcache */

	/* Disable MMU, if enabled */
	mrc	p15, 0, r0, c1, c0, 0		/* read CP15 register 1 */
	bic	r0, r0, #0x1			/* clear bit 0 */
	mcr	p15, 0, r0, c1, c0, 0		/* write value back */

#ifdef SHAREABLE_DDR
	/* Mark the entire DDR memory as shareable */
	ldr	r3, =0x3ff			/* 1024 entries to cover 1G DDR */
	ldr	r0, =TblBase			/* MMU Table address in memory */
	ldr	r2, =0x15de6			/* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
shareable_loop:
	str	r2, [r0]			/* write the entry to MMU table */
	add	r0, r0, #0x4			/* next entry in the table */
	add	r2, r2, #0x100000		/* next section */
	subs	r3, r3, #1
	bge	shareable_loop			/* loop till 1G is covered */
#endif

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the irq stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x12			/* IRQ mode */
	msr	cpsr, r2
	ldr	r13,=IRQ_stack			/* IRQ stack pointer */
	bic r2, r2, #(0x1 << 9)    		 /* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the supervisor stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x13			/* supervisor mode */
	msr	cpsr, r2
	ldr	r13,=SPV_stack			/* Supervisor stack pointer */
	bic r2, r2, #(0x1 << 9)     		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the Abort  stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x17			/* Abort mode */
	msr	cpsr, r2
	ldr	r13,=Abort_stack		/* Abort stack pointer */
	bic r2, r2, #(0x1 << 9)     		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the FIQ stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x11			/* FIQ mode */
	msr	cpsr, r2
	ldr	r13,=FIQ_stack			/* FIQ stack pointer */
	bic r2, r2, #(0x1 << 9)    		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the Undefine stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x1b			/* Undefine mode */
	msr	cpsr, r2
	ldr	r13,=Undef_stack		/* Undefine stack pointer */
	bic r2, r2, #(0x1 << 9)     		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the system stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x1F			/* SYS mode */
	msr	cpsr, r2
	ldr	r13,=SYS_stack			/* SYS stack pointer */

	/*set scu enable bit in scu*/
	ldr	r7, =0xf8f00000
	ldr	r0, [r7]
	orr	r0, r0, #0x1
	str	r0, [r7]

	/* enable MMU and cache */

	ldr	r0,=TblBase			/* Load MMU translation table base */
	orr	r0, r0, #0x5B			/* Outer-cacheable, WB */
	mcr	15, 0, r0, c2, c0, 0		/* TTB0 */

	mvn	r0,#0				/* Load MMU domains -- all ones=manager */
	mcr	p15,0,r0,c3,c0,0

	/* Enable mmu, icahce and dcache */
	ldr	r0,=CRValMmuCac
	mcr	p15,0,r0,c1,c0,0		/* Enable cache and MMU */
	dsb					/* dsb	allow the MMU to start up */
	isb					/* isb	flush prefetch buffer */

	/* Write to ACTLR */
	mrc	p15, 0, r0, c1, c0, 1		/* Read ACTLR*/
	orr	r0, r0, #(0x01 << 6)		/* set SMP bit */
	orr	r0, r0, #(0x01 )		/* Cache/TLB maintenance broadcast */
	mcr	p15, 0, r0, c1, c0, 1		/* Write ACTLR*/

/* Invalidate L2 Cache and enable L2 Cache*/
/* For AMP, assume running on CPU1. Don't initialize L2 Cache (up to Linux) */
#if USE_AMP!=1
	ldr	r0,=L2CCCrtl			/* Load L2CC base address base + control register */
	mov	r1, #0				/* force the disable bit */
	str	r1, [r0]			/* disable the L2 Caches */

	ldr	r0,=L2CCAuxCrtl			/* Load L2CC base address base + Aux control register */
	ldr	r1,[r0]				/* read the register */
	ldr	r2,=L2CCAuxControl		/* set the default bits */
	orr	r1,r1,r2
	str	r1, [r0]			/* store the Aux Control Register */

	ldr	r0,=L2CCTAGLatReg		/* Load L2CC base address base + TAG Latency address */
	ldr	r1,=L2CCTAGLatency		/* set the latencies for the TAG*/
	str	r1, [r0]			/* store the TAG Latency register Register */

	ldr	r0,=L2CCDataLatReg		/* Load L2CC base address base + Data Latency address */
	ldr	r1,=L2CCDataLatency		/* set the latencies for the Data*/
	str	r1, [r0]			/* store the Data Latency register Register */

	ldr	r0,=L2CCWay			/* Load L2CC base address base + way register*/
	ldr	r2, =0xFFFF
	str	r2, [r0]			/* force invalidate */

	ldr	r0,=L2CCSync			/* need to poll 0x730, PSS_L2CC_CACHE_SYNC_OFFSET */
						/* Load L2CC base address base + sync register*/
	/* poll for completion */
Sync:	ldr	r1, [r0]
	cmp	r1, #0
	bne	Sync

	ldr	r0,=L2CCIntRaw			/* clear pending interrupts */
	ldr	r1,[r0]
	ldr	r0,=L2CCIntClear
	str	r1,[r0]

	ldr	r0,=SLCRUnlockReg		/* Load SLCR base address base + unlock register */
	ldr	r1,=SLCRUnlockKey	    	/* set unlock key */
	str	r1, [r0]		    	/* Unlock SLCR */

	ldr	r0,=SLCRL2cRamReg		/* Load SLCR base address base + l2c Ram Control register */
	ldr	r1,=SLCRL2cRamConfig        	/* set the configuration value */
	str	r1, [r0]	        	/* store the L2c Ram Control Register */

	ldr	r0,=SLCRlockReg         	/* Load SLCR base address base + lock register */
	ldr	r1,=SLCRlockKey	        	/* set lock key */
	str	r1, [r0]	        	/* lock SLCR */

	ldr	r0,=L2CCCrtl			/* Load L2CC base address base + control register */
	ldr	r1,[r0]				/* read the register */
	mov	r2, #L2CCControl		/* set the enable bit */
	orr	r1,r1,r2
	str	r1, [r0]			/* enable the L2 Caches */
#endif

	mov	r0, r0
	mrc	p15, 0, r1, c1, c0, 2		/* read cp access control register (CACR) into r1 */
	orr	r1, r1, #(0xf << 20)		/* enable full access for p10 & p11 */
	mcr	p15, 0, r1, c1, c0, 2		/* write back into CACR */

	/* enable vfp */
	fmrx	r1, FPEXC			/* read the exception register */
	orr	r1,r1, #FPEXC_EN		/* set VFP enable bit, leave the others in orig state */
	fmxr	FPEXC, r1			/* write back the exception register */

	mrc	p15,0,r0,c1,c0,0		/* flow prediction enable */
	orr	r0, r0, #(0x01 << 11)		/* #0x8000 */
	mcr	p15,0,r0,c1,c0,0

	mrc	p15,0,r0,c1,c0,1		/* read Auxiliary Control Register */
	orr	r0, r0, #(0x1 << 2)		/* enable Dside prefetch */
	orr	r0, r0, #(0x1 << 1)		/* enable L2 Prefetch hint */
	mcr	p15,0,r0,c1,c0,1		/* write Auxiliary Control Register */

	mrs	r0, cpsr			/* get the current PSR */
	bic	r0, r0, #0x100			/* enable asynchronous abort exception */
	msr	cpsr_xsf, r0


	b	_start				/* jump to C startup code */
	and	r0, r0, r0			/* no op */

.Ldone:	b	.Ldone				/* Paranoia: we should never get here */


/*
 *************************************************************************
 *
 * invalidate_dcache - invalidate the entire d-cache by set/way
 *
 * Note: for Cortex-A9, there is no cp instruction for invalidating
 * the whole D-cache. Need to invalidate each line.
 *
 *************************************************************************
 */
invalidate_dcache:
	mrc	p15, 1, r0, c0, c0, 1		/* read CLIDR */
	ands	r3, r0, #0x7000000
	mov	r3, r3, lsr #23			/* cache level value (naturally aligned) */
	beq	finished
	mov	r10, #0				/* start with level 0 */
loop1:
	add	r2, r10, r10, lsr #1		/* work out 3xcachelevel */
	mov	r1, r0, lsr r2			/* bottom 3 bits are the Cache type for this level */
	and	r1, r1, #7			/* get those 3 bits alone */
	cmp	r1, #2
	blt	skip				/* no cache or only instruction cache at this level */
	mcr	p15, 2, r10, c0, c0, 0		/* write the Cache Size selection register */
	isb					/* isb to sync the change to the CacheSizeID reg */
	mrc	p15, 1, r1, c0, c0, 0		/* reads current Cache Size ID register */
	and	r2, r1, #7			/* extract the line length field */
	add	r2, r2, #4			/* add 4 for the line length offset (log2 16 bytes) */
	ldr	r4, =0x3ff
	ands	r4, r4, r1, lsr #3		/* r4 is the max number on the way size (right aligned) */
	clz	r5, r4				/* r5 is the bit position of the way size increment */
	ldr	r7, =0x7fff
	ands	r7, r7, r1, lsr #13		/* r7 is the max number of the index size (right aligned) */
loop2:
	mov	r9, r4				/* r9 working copy of the max way size (right aligned) */
loop3:
	orr	r11, r10, r9, lsl r5		/* factor in the way number and cache number into r11 */
	orr	r11, r11, r7, lsl r2		/* factor in the index number */
	mcr	p15, 0, r11, c7, c6, 2		/* invalidate by set/way */
	subs	r9, r9, #1			/* decrement the way number */
	bge	loop3
	subs	r7, r7, #1			/* decrement the index */
	bge	loop2
skip:
	add	r10, r10, #2			/* increment the cache number */
	cmp	r3, r10
	bgt	loop1

finished:
	mov	r10, #0				/* swith back to cache level 0 */
	mcr	p15, 2, r10, c0, c0, 0		/* select current cache level in cssr */
	dsb
	isb

	bx	lr

.end
/**
* @} End of "addtogroup a9_boot_code".
*/

translation_table.s

/*****************************************************************************/
/**
* @file translation_table.s
*
* @addtogroup a9_boot_code
* @{
* <h2> translation_table.S </h2>
* translation_table.S contains a static page table required by MMU for
* cortex-A9. This translation table is flat mapped (input address = output
* address) with default memory attributes defined for zynq architecture. It
* utilizes short descriptor translation table format with each section defining
* 1MB of memory.
*
* The overview of translation table memory attributes is described below.
*
*|                       | Memory Range            | Definition in Translation Table   |
*|-----------------------|-------------------------|-----------------------------------|
*| DDR                   | 0x00000000 - 0x3FFFFFFF | Normal write-back Cacheable       |
*| PL                    | 0x40000000 - 0xBFFFFFFF | Strongly Ordered                  |
*| Reserved              | 0xC0000000 - 0xDFFFFFFF | Unassigned                        |
*| Memory mapped devices | 0xE0000000 - 0xE02FFFFF | Device Memory                     |
*| Reserved              | 0xE0300000 - 0xE0FFFFFF | Unassigned                        |
*| NAND, NOR             | 0xE1000000 - 0xE3FFFFFF | Device memory                     |
*| SRAM                  | 0xE4000000 - 0xE5FFFFFF | Normal write-back Cacheable       |
*| Reserved              | 0xE6000000 - 0xF7FFFFFF | Unassigned                        |
*| AMBA APB Peripherals  | 0xF8000000 - 0xF8FFFFFF | Device Memory                     |
*| Reserved              | 0xF9000000 - 0xFBFFFFFF | Unassigned                        |
*| Linear QSPI - XIP     | 0xFC000000 - 0xFDFFFFFF | Normal write-through cacheable    |
*| Reserved              | 0xFE000000 - 0xFFEFFFFF | Unassigned                        |
*| OCM                   | 0xFFF00000 - 0xFFFFFFFF | Normal inner write-back cacheable |
*
* @note
*
* For region 0x00000000 - 0x3FFFFFFF, a system where DDR is less than 1GB,
* region after DDR and before PL is marked as undefined/reserved in translation
* table. In 0xF8000000 - 0xF8FFFFFF, 0xF8000C00 - 0xF8000FFF, 0xF8010000 -
* 0xF88FFFFF and 0xF8F03000 to 0xF8FFFFFF are reserved  but due to granual size
* of 1MB, it is not possible to define separate regions for them. For region
* 0xFFF00000 - 0xFFFFFFFF, 0xFFF00000 to 0xFFFB0000 is reserved but due to 1MB
* granual size, it is not possible to define separate region for it
*
*   ...
******************************************************************************/
#include "xparameters.h"
	.globl  MMUTable

	.section .mmu_tbl,"a"

MMUTable:
	/* Each table entry occupies one 32-bit word and there are
	 * 4096 entries, so the entire table takes up 16KB.
	 * Each entry covers a 1MB section.
	 */
.set SECT, 0

#ifdef XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_START, XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_END, XPAR_PS7_DDR_0_S_AXI_HIGHADDR
.set DDR_SIZE, (DDR_END - DDR_START)+1
.set DDR_REG, DDR_SIZE/0x100000
#else
.set DDR_REG, 0
#endif

.set UNDEF_REG, 0x3FF - DDR_REG

				/*0x00000000 - 0x00100000 (cacheable )*/
.word	SECT + 0x15de6		/* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set	SECT, SECT+0x100000

.rept	DDR_REG			/*  (DDR Cacheable) */
.word	SECT + 0x15de6		/* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	UNDEF_REG			/*  (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr


.rept	0x0400			/* 0x40000000 - 0x7fffffff (FPGA slave0) */
.word	SECT + 0xc02		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0400			/* 0x80000000 - 0xbfffffff (FPGA slave1) */
.word	SECT + 0xc02		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0200			/* 0xc0000000 - 0xdfffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x003			/* 0xe0000000 - 0xe02fffff (Memory mapped devices)
				 * UART/USB/IIC/SPI/CAN/GEM/GPIO/QSPI/SD/NAND */
.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0D			/* 0xe0300000 - 0xe0ffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0010			/* 0xe1000000 - 0xe1ffffff (NAND) */
.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0020			/* 0xe2000000 - 0xe3ffffff (NOR) */
.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0020			/* 0xe4000000 - 0xe5ffffff (SRAM) */
.word	SECT + 0xc0e		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0120			/* 0xe6000000 - 0xf7ffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

/* 0xf8000c00 to 0xf8000fff, 0xf8010000 to 0xf88fffff and
   0xf8f03000 to 0xf8ffffff are reserved  but due to granual size of
   1MB, it is not possible to define separate regions for them */

.rept	0x0010			/* 0xf8000000 - 0xf8ffffff (AMBA APB Peripherals) */

.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0030			/* 0xf9000000 - 0xfbffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0020			/* 0xfc000000 - 0xfdffffff (Linear QSPI - XIP) */
.word	SECT + 0xc0a		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x001F			/* 0xfe000000 - 0xffefffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

/* 0xfff00000 to 0xfffb0000 is reserved but due to granual size of
   1MB, it is not possible to define separate region for  it

 0xfff00000 - 0xffffffff
   256K OCM when mapped to high address space
   inner-cacheable */
.word	SECT + 0x4c0e		/* S=b0 TEX=b100 AP=b11, Domain=b0, C=b1, B=b1 */
.set	SECT, SECT+0x100000

.end
/**
* @} End of "addtogroup a9_boot_code".

@rtel : Do you know what causes the problems of memory?

I’m sorry, XuanTran, but I have to leave the discussion here. The recurring pattern among my customers is that FreeRTOS is only used with the ARM Cortex M series whereas prefabbed larger OSs requiring such as Embedded Linux are employed for the A family. Thus, I’m fairly familiar with M but would need (currently unavailable) resources to think myself into the A architecture & idiosyncracies.

Thanks for sharing your code, I hope you’ll find the answer soon - if not here, please share the resolution with us! Best of luck!

@RAc : Thanks for your time and support. You helped me a lot to understand the problem.

Have you tried my suggestion of checking the startup code is working correctly before main() is called?

@rtel :

Yes, I did.
uxCriticalNesting = N/A
uxCurrentNumberOfTasks = 0xa5a5a5a5

Did I do it correctly?

I am using freertos901_xilinx.

uxCurrentNumberOfTasks is initialized to zero here: FreeRTOS-Kernel/tasks.c at main · FreeRTOS/FreeRTOS-Kernel · GitHub

Your observation indicates that your c startup code is not correctly initializing the data and/or bss section (as suspected by Richard). I’d suggest to get a bare metal project working first and ensure that global variables get initialized correctly.

Thanks.

@aggarg :Thanks for your suggestion. I am curious about why c startup code doesn’t work correctly.

It is hard to say without looking at the startup code - incorrect values of some variables which are exported from the linker scripts can be one reason. Try single stepping your startup code. If you share your startup code, we can try to help.

Thanks.

@aggarg: Can you clarify what I should provide? Are they boot files? Sorry for stupid questions.

It depends on your platform - usually there are some assembly files in your project.

I going to again point that a much easier route will be to start with a working sample project and then add FreeRTOS.

Thanks.

@aggarg : Thanks for your tips!

Sorry, I mixed up the variable uxCriticalNesting and ulCriticalNesting in the previous posts.

Putting a break point at the beginning of main(), values of ulCriticalNesting and uxCurrentNumberOfTasks are as follows:

ulCriticalNesting       0x0000270f
uxCurrentNumberOfTasks 	0xa5a5a5a5

I don’t have much knowledge of the startup code of a program. I am not sure, if these following files are correct.

xil-crt0.S

	.file	"xil-crt0.S"
	.section ".got2","aw"
	.align	2

	.text
.Lsbss_start:
	.long	__sbss_start

.Lsbss_end:
	.long	__sbss_end

.Lbss_start:
	.long	__bss_start

.Lbss_end:
	.long	__bss_end

.Lstack:
	.long	__stack


	.globl	_start
_start:
	bl      __cpu_init		/* Initialize the CPU first (BSP provides this) */

	mov	r0, #0

	/* clear sbss */
	ldr 	r1,.Lsbss_start		/* calculate beginning of the SBSS */
	ldr	r2,.Lsbss_end		/* calculate end of the SBSS */

.Lloop_sbss:
	cmp	r1,r2
	bge	.Lenclsbss		/* If no SBSS, no clearing required */
	str	r0, [r1], #4
	b	.Lloop_sbss

.Lenclsbss:
	/* clear bss */
	ldr	r1,.Lbss_start		/* calculate beginning of the BSS */
	ldr	r2,.Lbss_end		/* calculate end of the BSS */

.Lloop_bss:
	cmp	r1,r2
	bge	.Lenclbss		/* If no BSS, no clearing required */
	str	r0, [r1], #4
	b	.Lloop_bss

.Lenclbss:

	/* set stack pointer */
	ldr	r13,.Lstack		/* stack address */

    /* Reset and start Global Timer */
	mov	r0, #0x0
	mov	r1, #0x0

#if USE_AMP != 1
	bl XTime_SetTime
#endif

#ifdef PROFILING			/* defined in Makefile */
	/* Setup profiling stuff */
	bl	_profile_init
#endif /* PROFILING */

   /* run global constructors */
   bl __libc_init_array

	/* make sure argc and argv are valid */
	mov	r0, #0
	mov	r1, #0

	/* Let her rip */
	bl	main

   /* Cleanup global constructors */
   bl __libc_fini_array

#ifdef PROFILING
	/* Cleanup profiling stuff */
	bl	_profile_clean
#endif /* PROFILING */

        /* All done */
	bl	exit

.Lexit:	/* should never get here */
	b .Lexit

.Lstart:
	.size	_start,.Lstart-_start

boot.S

/*****************************************************************************/
/**
* @file boot.S
*
* @addtogroup a9_boot_code Cortex A9 Processor Boot Code
* @{
* <h2> boot.S </h2>
* The boot code performs minimum configuration which is required for an
* application to run starting from processor's reset state. Below is a
* sequence illustrating what all configuration is performed before control
* reaches to main function.
*
* 1. Program vector table base for exception handling
* 2. Invalidate instruction cache, data cache and TLBs
* 3. Program stack pointer for various modes (IRQ, FIQ, supervisor, undefine,
*    abort, system)
* 4. Configure MMU with short descriptor translation table format and program
*    base address of translation table
* 5. Enable data cache, instruction cache and MMU
* 6. Enable Floating point unit
* 7. Transfer control to _start which clears BSS sections, initializes
*    global timer and runs global constructor before jumping to main
*    application
******************************************************************************/

#include "xparameters.h"
#include "xil_errata.h"

.globl MMUTable
.global _prestart
.global _boot
.global __stack
.global __irq_stack
.global __supervisor_stack
.global __abort_stack
.global __fiq_stack
.global __undef_stack
.global _vector_table

.globl _cpu0_catch
.globl _cpu1_catch
.globl OKToRun
.globl EndlessLoop0

.set PSS_L2CC_BASE_ADDR, 0xF8F02000
.set PSS_SLCR_BASE_ADDR, 0xF8000000

.set RESERVED,		0x0fffff00
.set TblBase ,		MMUTable
.set LRemap,		0xFE00000F		/* set the base address of the peripheral block as not shared */
.set L2CCWay,		(PSS_L2CC_BASE_ADDR + 0x077C)	/*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_INVLD_WAY_OFFSET)*/
.set L2CCSync,		(PSS_L2CC_BASE_ADDR + 0x0730)	/*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_SYNC_OFFSET)*/
.set L2CCCrtl,		(PSS_L2CC_BASE_ADDR + 0x0100)	/*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CNTRL_OFFSET)*/
.set L2CCAuxCrtl,	(PSS_L2CC_BASE_ADDR + 0x0104)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_AUX_CNTRL_OFFSET)*/
.set L2CCTAGLatReg,	(PSS_L2CC_BASE_ADDR + 0x0108)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_TAG_RAM_CNTRL_OFFSET)*/
.set L2CCDataLatReg,	(PSS_L2CC_BASE_ADDR + 0x010C)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_DATA_RAM_CNTRL_OFFSET)*/
.set L2CCIntClear,	(PSS_L2CC_BASE_ADDR + 0x0220)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_IAR_OFFSET)*/
.set L2CCIntRaw,	(PSS_L2CC_BASE_ADDR + 0x021C)	/*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_ISR_OFFSET)*/

.set SLCRlockReg,	    (PSS_SLCR_BASE_ADDR + 0x04)	/*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_LOCK_OFFSET)*/
.set SLCRUnlockReg,     (PSS_SLCR_BASE_ADDR + 0x08)	/*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_UNLOCK_OFFSET)*/
.set SLCRL2cRamReg,     (PSS_SLCR_BASE_ADDR + 0xA1C) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_L2C_RAM_OFFSET)*/
.set SLCRCPURSTReg,     (0xF8000000 + 0x244)           /*(XPS_SYS_CTRL_BASEADDR + A9_CPU_RST_CTRL_OFFSET)*/
.set EFUSEStaus,        (0xF800D000 + 0x10)            /*(XPS_EFUSE_BASEADDR + EFUSE_STATUS_OFFSET)*/

/* workaround for simulation not working when L1 D and I caches,MMU and  L2 cache enabled - DT568997 */
.if SIM_MODE == 1
.set CRValMmuCac,	0b00000000000000	/* Disable IDC, and MMU */
.else
.set CRValMmuCac,	0b01000000000101	/* Enable IDC, and MMU */
.endif

.set CRValHiVectorAddr,	0b10000000000000	/* Set the Vector address to high, 0xFFFF0000 */

.set L2CCAuxControl,	0x72360000		/* Enable all prefetching, Cache replacement policy, Parity enable,
                                        Event monitor bus enable and Way Size (64 KB) */
.set L2CCControl,	0x01			/* Enable L2CC */
.set L2CCTAGLatency,	0x0111			/* latency for TAG RAM */
.set L2CCDataLatency,	0x0121			/* latency for DATA RAM */

.set SLCRlockKey,	        0x767B			/* SLCR lock key */
.set SLCRUnlockKey,	        0xDF0D			/* SLCR unlock key */
.set SLCRL2cRamConfig,      0x00020202      /* SLCR L2C ram configuration */

/* Stack Pointer locations for boot code */
.set Undef_stack,	__undef_stack
.set FIQ_stack,		__fiq_stack
.set Abort_stack,	__abort_stack
.set SPV_stack,		__supervisor_stack
.set IRQ_stack,		__irq_stack
.set SYS_stack,		__stack

.set vector_base,	_vector_table

.set FPEXC_EN,		0x40000000		/* FPU enable bit, (1 << 30) */

.section .boot,"ax"


/* this initializes the various processor modes */

_prestart:
_boot:

/* Test which processor is running and jump to the catch address */
	mrc	p15,0,r1,c0,c0,5
	and	r1, r1, #0xf
	cmp	r1, #0
	bne	NotCpu0
	ldr	r0, =_cpu0_catch
	b cpuxCont
NotCpu0:
	cmp	r1, #1
	bne	EndlessLoop0
	ldr	r0, =_cpu1_catch
	b cpuxCont
EndlessLoop0:
	wfe
	b	EndlessLoop0

/* Jump to address pointed to by cpux_catch */
cpuxCont:
	ldr lr, [r0]
	bx	lr


OKToRun:
	mrc     p15, 0, r0, c0, c0, 0		/* Get the revision */
	and     r5, r0, #0x00f00000
	and     r6, r0, #0x0000000f
	orr     r6, r6, r5, lsr #20-4

#ifdef CONFIG_ARM_ERRATA_742230
        cmp     r6, #0x22                       /* only present up to r2p2 */
        mrcle   p15, 0, r10, c15, c0, 1         /* read diagnostic register */
        orrle   r10, r10, #1 << 4               /* set bit #4 */
        mcrle   p15, 0, r10, c15, c0, 1         /* write diagnostic register */
#endif

#ifdef CONFIG_ARM_ERRATA_743622
	teq     r5, #0x00200000                 /* only present in r2p* */
	mrceq   p15, 0, r10, c15, c0, 1         /* read diagnostic register */
	orreq   r10, r10, #1 << 6               /* set bit #6 */
	mcreq   p15, 0, r10, c15, c0, 1         /* write diagnostic register */
#endif

	/* set VBAR to the _vector_table address in linker script */
	ldr	r0, =vector_base
	mcr	p15, 0, r0, c12, c0, 0

	/*invalidate scu*/
	ldr	r7, =0xf8f0000c
	ldr	r6, =0xffff
	str	r6, [r7]

	/* Invalidate caches and TLBs */
	mov	r0,#0				/* r0 = 0  */
	mcr	p15, 0, r0, c8, c7, 0		/* invalidate TLBs */
	mcr	p15, 0, r0, c7, c5, 0		/* invalidate icache */
	mcr	p15, 0, r0, c7, c5, 6		/* Invalidate branch predictor array */
	bl	invalidate_dcache		/* invalidate dcache */

	/* Disable MMU, if enabled */
	mrc	p15, 0, r0, c1, c0, 0		/* read CP15 register 1 */
	bic	r0, r0, #0x1			/* clear bit 0 */
	mcr	p15, 0, r0, c1, c0, 0		/* write value back */

#ifdef SHAREABLE_DDR
	/* Mark the entire DDR memory as shareable */
	ldr	r3, =0x3ff			/* 1024 entries to cover 1G DDR */
	ldr	r0, =TblBase			/* MMU Table address in memory */
	ldr	r2, =0x15de6			/* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
shareable_loop:
	str	r2, [r0]			/* write the entry to MMU table */
	add	r0, r0, #0x4			/* next entry in the table */
	add	r2, r2, #0x100000		/* next section */
	subs	r3, r3, #1
	bge	shareable_loop			/* loop till 1G is covered */
#endif

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the irq stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x12			/* IRQ mode */
	msr	cpsr, r2
	ldr	r13,=IRQ_stack			/* IRQ stack pointer */
	bic r2, r2, #(0x1 << 9)    		 /* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the supervisor stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x13			/* supervisor mode */
	msr	cpsr, r2
	ldr	r13,=SPV_stack			/* Supervisor stack pointer */
	bic r2, r2, #(0x1 << 9)     		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the Abort  stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x17			/* Abort mode */
	msr	cpsr, r2
	ldr	r13,=Abort_stack		/* Abort stack pointer */
	bic r2, r2, #(0x1 << 9)     		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the FIQ stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x11			/* FIQ mode */
	msr	cpsr, r2
	ldr	r13,=FIQ_stack			/* FIQ stack pointer */
	bic r2, r2, #(0x1 << 9)    		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the Undefine stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x1b			/* Undefine mode */
	msr	cpsr, r2
	ldr	r13,=Undef_stack		/* Undefine stack pointer */
	bic r2, r2, #(0x1 << 9)     		/* Set EE bit to little-endian */
	msr spsr_fsxc,r2

	mrs	r0, cpsr			/* get the current PSR */
	mvn	r1, #0x1f			/* set up the system stack pointer */
	and	r2, r1, r0
	orr	r2, r2, #0x1F			/* SYS mode */
	msr	cpsr, r2
	ldr	r13,=SYS_stack			/* SYS stack pointer */

	/*set scu enable bit in scu*/
	ldr	r7, =0xf8f00000
	ldr	r0, [r7]
	orr	r0, r0, #0x1
	str	r0, [r7]

	/* enable MMU and cache */

	ldr	r0,=TblBase			/* Load MMU translation table base */
	orr	r0, r0, #0x5B			/* Outer-cacheable, WB */
	mcr	15, 0, r0, c2, c0, 0		/* TTB0 */

	mvn	r0,#0				/* Load MMU domains -- all ones=manager */
	mcr	p15,0,r0,c3,c0,0

	/* Enable mmu, icahce and dcache */
	ldr	r0,=CRValMmuCac
	mcr	p15,0,r0,c1,c0,0		/* Enable cache and MMU */
	dsb					/* dsb	allow the MMU to start up */
	isb					/* isb	flush prefetch buffer */

	/* Write to ACTLR */
	mrc	p15, 0, r0, c1, c0, 1		/* Read ACTLR*/
	orr	r0, r0, #(0x01 << 6)		/* set SMP bit */
	orr	r0, r0, #(0x01 )		/* Cache/TLB maintenance broadcast */
	mcr	p15, 0, r0, c1, c0, 1		/* Write ACTLR*/

/* Invalidate L2 Cache and enable L2 Cache*/
/* For AMP, assume running on CPU1. Don't initialize L2 Cache (up to Linux) */
#if USE_AMP!=1
	ldr	r0,=L2CCCrtl			/* Load L2CC base address base + control register */
	mov	r1, #0				/* force the disable bit */
	str	r1, [r0]			/* disable the L2 Caches */

	ldr	r0,=L2CCAuxCrtl			/* Load L2CC base address base + Aux control register */
	ldr	r1,[r0]				/* read the register */
	ldr	r2,=L2CCAuxControl		/* set the default bits */
	orr	r1,r1,r2
	str	r1, [r0]			/* store the Aux Control Register */

	ldr	r0,=L2CCTAGLatReg		/* Load L2CC base address base + TAG Latency address */
	ldr	r1,=L2CCTAGLatency		/* set the latencies for the TAG*/
	str	r1, [r0]			/* store the TAG Latency register Register */

	ldr	r0,=L2CCDataLatReg		/* Load L2CC base address base + Data Latency address */
	ldr	r1,=L2CCDataLatency		/* set the latencies for the Data*/
	str	r1, [r0]			/* store the Data Latency register Register */

	ldr	r0,=L2CCWay			/* Load L2CC base address base + way register*/
	ldr	r2, =0xFFFF
	str	r2, [r0]			/* force invalidate */

	ldr	r0,=L2CCSync			/* need to poll 0x730, PSS_L2CC_CACHE_SYNC_OFFSET */
						/* Load L2CC base address base + sync register*/
	/* poll for completion */
Sync:	ldr	r1, [r0]
	cmp	r1, #0
	bne	Sync

	ldr	r0,=L2CCIntRaw			/* clear pending interrupts */
	ldr	r1,[r0]
	ldr	r0,=L2CCIntClear
	str	r1,[r0]

	ldr	r0,=SLCRUnlockReg		/* Load SLCR base address base + unlock register */
	ldr	r1,=SLCRUnlockKey	    	/* set unlock key */
	str	r1, [r0]		    	/* Unlock SLCR */

	ldr	r0,=SLCRL2cRamReg		/* Load SLCR base address base + l2c Ram Control register */
	ldr	r1,=SLCRL2cRamConfig        	/* set the configuration value */
	str	r1, [r0]	        	/* store the L2c Ram Control Register */

	ldr	r0,=SLCRlockReg         	/* Load SLCR base address base + lock register */
	ldr	r1,=SLCRlockKey	        	/* set lock key */
	str	r1, [r0]	        	/* lock SLCR */

	ldr	r0,=L2CCCrtl			/* Load L2CC base address base + control register */
	ldr	r1,[r0]				/* read the register */
	mov	r2, #L2CCControl		/* set the enable bit */
	orr	r1,r1,r2
	str	r1, [r0]			/* enable the L2 Caches */
#endif

	mov	r0, r0
	mrc	p15, 0, r1, c1, c0, 2		/* read cp access control register (CACR) into r1 */
	orr	r1, r1, #(0xf << 20)		/* enable full access for p10 & p11 */
	mcr	p15, 0, r1, c1, c0, 2		/* write back into CACR */

	/* enable vfp */
	fmrx	r1, FPEXC			/* read the exception register */
	orr	r1,r1, #FPEXC_EN		/* set VFP enable bit, leave the others in orig state */
	fmxr	FPEXC, r1			/* write back the exception register */

	mrc	p15,0,r0,c1,c0,0		/* flow prediction enable */
	orr	r0, r0, #(0x01 << 11)		/* #0x8000 */
	mcr	p15,0,r0,c1,c0,0

	mrc	p15,0,r0,c1,c0,1		/* read Auxiliary Control Register */
	orr	r0, r0, #(0x1 << 2)		/* enable Dside prefetch */
	orr	r0, r0, #(0x1 << 1)		/* enable L2 Prefetch hint */
	mcr	p15,0,r0,c1,c0,1		/* write Auxiliary Control Register */

	mrs	r0, cpsr			/* get the current PSR */
	bic	r0, r0, #0x100			/* enable asynchronous abort exception */
	msr	cpsr_xsf, r0


	b	_start				/* jump to C startup code */
	and	r0, r0, r0			/* no op */

.Ldone:	b	.Ldone				/* Paranoia: we should never get here */


/*
 *************************************************************************
 *
 * invalidate_dcache - invalidate the entire d-cache by set/way
 *
 * Note: for Cortex-A9, there is no cp instruction for invalidating
 * the whole D-cache. Need to invalidate each line.
 *
 *************************************************************************
 */
invalidate_dcache:
	mrc	p15, 1, r0, c0, c0, 1		/* read CLIDR */
	ands	r3, r0, #0x7000000
	mov	r3, r3, lsr #23			/* cache level value (naturally aligned) */
	beq	finished
	mov	r10, #0				/* start with level 0 */
loop1:
	add	r2, r10, r10, lsr #1		/* work out 3xcachelevel */
	mov	r1, r0, lsr r2			/* bottom 3 bits are the Cache type for this level */
	and	r1, r1, #7			/* get those 3 bits alone */
	cmp	r1, #2
	blt	skip				/* no cache or only instruction cache at this level */
	mcr	p15, 2, r10, c0, c0, 0		/* write the Cache Size selection register */
	isb					/* isb to sync the change to the CacheSizeID reg */
	mrc	p15, 1, r1, c0, c0, 0		/* reads current Cache Size ID register */
	and	r2, r1, #7			/* extract the line length field */
	add	r2, r2, #4			/* add 4 for the line length offset (log2 16 bytes) */
	ldr	r4, =0x3ff
	ands	r4, r4, r1, lsr #3		/* r4 is the max number on the way size (right aligned) */
	clz	r5, r4				/* r5 is the bit position of the way size increment */
	ldr	r7, =0x7fff
	ands	r7, r7, r1, lsr #13		/* r7 is the max number of the index size (right aligned) */
loop2:
	mov	r9, r4				/* r9 working copy of the max way size (right aligned) */
loop3:
	orr	r11, r10, r9, lsl r5		/* factor in the way number and cache number into r11 */
	orr	r11, r11, r7, lsl r2		/* factor in the index number */
	mcr	p15, 0, r11, c7, c6, 2		/* invalidate by set/way */
	subs	r9, r9, #1			/* decrement the way number */
	bge	loop3
	subs	r7, r7, #1			/* decrement the index */
	bge	loop2
skip:
	add	r10, r10, #2			/* increment the cache number */
	cmp	r3, r10
	bgt	loop1

finished:
	mov	r10, #0				/* swith back to cache level 0 */
	mcr	p15, 2, r10, c0, c0, 0		/* select current cache level in cssr */
	dsb
	isb

	bx	lr

.end
/**
* @} End of "addtogroup a9_boot_code".

translation_table.S

/*****************************************************************************/
/**
* @file translation_table.s
*
* @addtogroup a9_boot_code
* @{
* <h2> translation_table.S </h2>
* translation_table.S contains a static page table required by MMU for
* cortex-A9. This translation table is flat mapped (input address = output
* address) with default memory attributes defined for zynq architecture. It
* utilizes short descriptor translation table format with each section defining
* 1MB of memory.
*
* The overview of translation table memory attributes is described below.
*
*|                       | Memory Range            | Definition in Translation Table   |
*|-----------------------|-------------------------|-----------------------------------|
*| DDR                   | 0x00000000 - 0x3FFFFFFF | Normal write-back Cacheable       |
*| PL                    | 0x40000000 - 0xBFFFFFFF | Strongly Ordered                  |
*| Reserved              | 0xC0000000 - 0xDFFFFFFF | Unassigned                        |
*| Memory mapped devices | 0xE0000000 - 0xE02FFFFF | Device Memory                     |
*| Reserved              | 0xE0300000 - 0xE0FFFFFF | Unassigned                        |
*| NAND, NOR             | 0xE1000000 - 0xE3FFFFFF | Device memory                     |
*| SRAM                  | 0xE4000000 - 0xE5FFFFFF | Normal write-back Cacheable       |
*| Reserved              | 0xE6000000 - 0xF7FFFFFF | Unassigned                        |
*| AMBA APB Peripherals  | 0xF8000000 - 0xF8FFFFFF | Device Memory                     |
*| Reserved              | 0xF9000000 - 0xFBFFFFFF | Unassigned                        |
*| Linear QSPI - XIP     | 0xFC000000 - 0xFDFFFFFF | Normal write-through cacheable    |
*| Reserved              | 0xFE000000 - 0xFFEFFFFF | Unassigned                        |
*| OCM                   | 0xFFF00000 - 0xFFFFFFFF | Normal inner write-back cacheable |
*
* @note
*
* For region 0x00000000 - 0x3FFFFFFF, a system where DDR is less than 1GB,
* region after DDR and before PL is marked as undefined/reserved in translation
* table. In 0xF8000000 - 0xF8FFFFFF, 0xF8000C00 - 0xF8000FFF, 0xF8010000 -
* 0xF88FFFFF and 0xF8F03000 to 0xF8FFFFFF are reserved  but due to granual size
* of 1MB, it is not possible to define separate regions for them. For region
* 0xFFF00000 - 0xFFFFFFFF, 0xFFF00000 to 0xFFFB0000 is reserved but due to 1MB
* granual size, it is not possible to define separate region for it
*
******************************************************************************/
#include "xparameters.h"
	.globl  MMUTable

	.section .mmu_tbl,"a"

MMUTable:
	/* Each table entry occupies one 32-bit word and there are
	 * 4096 entries, so the entire table takes up 16KB.
	 * Each entry covers a 1MB section.
	 */
.set SECT, 0

#ifdef XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_START, XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_END, XPAR_PS7_DDR_0_S_AXI_HIGHADDR
.set DDR_SIZE, (DDR_END - DDR_START)+1
.set DDR_REG, DDR_SIZE/0x100000
#else
.set DDR_REG, 0
#endif

.set UNDEF_REG, 0x3FF - DDR_REG

				/*0x00000000 - 0x00100000 (cacheable )*/
.word	SECT + 0x15de6		/* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set	SECT, SECT+0x100000

.rept	DDR_REG			/*  (DDR Cacheable) */
.word	SECT + 0x15de6		/* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	UNDEF_REG			/*  (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr


.rept	0x0400			/* 0x40000000 - 0x7fffffff (FPGA slave0) */
.word	SECT + 0xc02		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0400			/* 0x80000000 - 0xbfffffff (FPGA slave1) */
.word	SECT + 0xc02		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0200			/* 0xc0000000 - 0xdfffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x003			/* 0xe0000000 - 0xe02fffff (Memory mapped devices)
				 * UART/USB/IIC/SPI/CAN/GEM/GPIO/QSPI/SD/NAND */
.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0D			/* 0xe0300000 - 0xe0ffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0010			/* 0xe1000000 - 0xe1ffffff (NAND) */
.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0020			/* 0xe2000000 - 0xe3ffffff (NOR) */
.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0020			/* 0xe4000000 - 0xe5ffffff (SRAM) */
.word	SECT + 0xc0e		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0120			/* 0xe6000000 - 0xf7ffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

/* 0xf8000c00 to 0xf8000fff, 0xf8010000 to 0xf88fffff and
   0xf8f03000 to 0xf8ffffff are reserved  but due to granual size of
   1MB, it is not possible to define separate regions for them */

.rept	0x0010			/* 0xf8000000 - 0xf8ffffff (AMBA APB Peripherals) */

.word	SECT + 0xc06		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0030			/* 0xf9000000 - 0xfbffffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x0020			/* 0xfc000000 - 0xfdffffff (Linear QSPI - XIP) */
.word	SECT + 0xc0a		/* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b0 */
.set	SECT, SECT+0x100000
.endr

.rept	0x001F			/* 0xfe000000 - 0xffefffff (unassigned/reserved).
				 * Generates a translation fault if accessed */
.word	SECT + 0x0		/* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set	SECT, SECT+0x100000
.endr

/* 0xfff00000 to 0xfffb0000 is reserved but due to granual size of
   1MB, it is not possible to define separate region for  it

 0xfff00000 - 0xffffffff
   256K OCM when mapped to high address space
   inner-cacheable */
.word	SECT + 0x4c0e		/* S=b0 TEX=b100 AP=b11, Domain=b0, C=b1, B=b1 */
.set	SECT, SECT+0x100000

.end
/**
* @} End of "addtogroup a9_boot_code".
*/

There are a number of preprocessor option dependent code paths in your code, for example using SHAREABLE_DDR. Did you make sure that your startup code matches your hardware? Is your hardware custom, an eval board or a commercial board? If the latter, you should first check with your board manufacturer that the startup code fits your hardware.

Not gone back to the top of the thread the refresh my memory - but if I do recall correctly this is a dual core system where one core boots correctly and the other doesn’t. How is the second core meant to boot? Does it start running its own application out of reset, or is the first core that boots meant to be responsible for booting it? If the latter method is the case then I suspect the misbehaving core is just not being booted correctly. Maybe it is jumping to main() without first running the C-startup code, or maybe the first core to boot is meant to set up the environment for all the cores.

It is custom hardware.

Two ARM Cortex A9 cores are used.
The CPU0 is in charge for starting execution code on CPU1.
FSBL which is located in CPU0 is responsible for booting the system.
CPU1 starts correctly as shown in the screenshot below.

CPU1
Before the main() function starts, the values of ulCriticalNesting and uxCurrentNumberOfTasks
ulCriticalNesting 9999 (0x0000270f)
uxCurrentNumberOfTasks 0 (0x00000000)

well, if that’s the case, you won’t have a choice but wade through the MCU reference manual and write the startup code yourself. Or try all combinations of SHAREABLE_DDR etc. set and unset and hope for one combination to work…

The startup code you try to adapt makes assumptions about the hardware, and it’ll work only for a matching hardware. We (ie somebody who is familar enough with the A9 architecture) could only help you here if you disclosed the entire circuit diagram so we could design the startup code. However, that is a lot of work for which a consultant would charge a significant amount of money. I wouldn’t do that for free.

The other issue, of course, is that obviously your hardware is still in the prototypical stage, so the design must be debugged along with the startup code. There may simply be a wiring bug or some pin incompatibility.

@RAc : Thanks so much for your help and guidance!

1 Like