Sorry - mixing up support threads. On the A9, if you stop on entry to main(), you should be able to inspect the ulCriticalNesting variable - if its not 9999 then the C startup code is not doing what it is supposed to to get the system ready to execute your program.
boot.S
/*****************************************************************************/
/**
* @file boot.S
*
* @addtogroup a9_boot_code Cortex A9 Processor Boot Code
* @{
* <h2> boot.S </h2>
* The boot code performs minimum configuration which is required for an
* application to run starting from processor's reset state. Below is a
* sequence illustrating what all configuration is performed before control
* reaches to main function.
*
* 1. Program vector table base for exception handling
* 2. Invalidate instruction cache, data cache and TLBs
* 3. Program stack pointer for various modes (IRQ, FIQ, supervisor, undefine,
* abort, system)
* 4. Configure MMU with short descriptor translation table format and program
* base address of translation table
* 5. Enable data cache, instruction cache and MMU
* 6. Enable Floating point unit
* 7. Transfer control to _start which clears BSS sections, initializes
* global timer and runs global constructor before jumping to main
* application
*
* ...
*
******************************************************************************/
#include "xparameters.h"
#include "xil_errata.h"
.globl MMUTable
.global _prestart
.global _boot
.global __stack
.global __irq_stack
.global __supervisor_stack
.global __abort_stack
.global __fiq_stack
.global __undef_stack
.global _vector_table
.globl _cpu0_catch
.globl _cpu1_catch
.globl OKToRun
.globl EndlessLoop0
.set PSS_L2CC_BASE_ADDR, 0xF8F02000
.set PSS_SLCR_BASE_ADDR, 0xF8000000
.set RESERVED, 0x0fffff00
.set TblBase , MMUTable
.set LRemap, 0xFE00000F /* set the base address of the peripheral block as not shared */
.set L2CCWay, (PSS_L2CC_BASE_ADDR + 0x077C) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_INVLD_WAY_OFFSET)*/
.set L2CCSync, (PSS_L2CC_BASE_ADDR + 0x0730) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_SYNC_OFFSET)*/
.set L2CCCrtl, (PSS_L2CC_BASE_ADDR + 0x0100) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CNTRL_OFFSET)*/
.set L2CCAuxCrtl, (PSS_L2CC_BASE_ADDR + 0x0104) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_AUX_CNTRL_OFFSET)*/
.set L2CCTAGLatReg, (PSS_L2CC_BASE_ADDR + 0x0108) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_TAG_RAM_CNTRL_OFFSET)*/
.set L2CCDataLatReg, (PSS_L2CC_BASE_ADDR + 0x010C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_DATA_RAM_CNTRL_OFFSET)*/
.set L2CCIntClear, (PSS_L2CC_BASE_ADDR + 0x0220) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_IAR_OFFSET)*/
.set L2CCIntRaw, (PSS_L2CC_BASE_ADDR + 0x021C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_ISR_OFFSET)*/
.set SLCRlockReg, (PSS_SLCR_BASE_ADDR + 0x04) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_LOCK_OFFSET)*/
.set SLCRUnlockReg, (PSS_SLCR_BASE_ADDR + 0x08) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_UNLOCK_OFFSET)*/
.set SLCRL2cRamReg, (PSS_SLCR_BASE_ADDR + 0xA1C) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_L2C_RAM_OFFSET)*/
.set SLCRCPURSTReg, (0xF8000000 + 0x244) /*(XPS_SYS_CTRL_BASEADDR + A9_CPU_RST_CTRL_OFFSET)*/
.set EFUSEStaus, (0xF800D000 + 0x10) /*(XPS_EFUSE_BASEADDR + EFUSE_STATUS_OFFSET)*/
/* workaround for simulation not working when L1 D and I caches,MMU and L2 cache enabled - DT568997 */
.if SIM_MODE == 1
.set CRValMmuCac, 0b00000000000000 /* Disable IDC, and MMU */
.else
.set CRValMmuCac, 0b01000000000101 /* Enable IDC, and MMU */
.endif
.set CRValHiVectorAddr, 0b10000000000000 /* Set the Vector address to high, 0xFFFF0000 */
.set L2CCAuxControl, 0x72360000 /* Enable all prefetching, Cache replacement policy, Parity enable,
Event monitor bus enable and Way Size (64 KB) */
.set L2CCControl, 0x01 /* Enable L2CC */
.set L2CCTAGLatency, 0x0111 /* latency for TAG RAM */
.set L2CCDataLatency, 0x0121 /* latency for DATA RAM */
.set SLCRlockKey, 0x767B /* SLCR lock key */
.set SLCRUnlockKey, 0xDF0D /* SLCR unlock key */
.set SLCRL2cRamConfig, 0x00020202 /* SLCR L2C ram configuration */
/* Stack Pointer locations for boot code */
.set Undef_stack, __undef_stack
.set FIQ_stack, __fiq_stack
.set Abort_stack, __abort_stack
.set SPV_stack, __supervisor_stack
.set IRQ_stack, __irq_stack
.set SYS_stack, __stack
.set vector_base, _vector_table
.set FPEXC_EN, 0x40000000 /* FPU enable bit, (1 << 30) */
.section .boot,"ax"
/* this initializes the various processor modes */
_prestart:
_boot:
/* Test which processor is running and jump to the catch address */
mrc p15,0,r1,c0,c0,5
and r1, r1, #0xf
cmp r1, #0
bne NotCpu0
ldr r0, =_cpu0_catch
b cpuxCont
NotCpu0:
cmp r1, #1
bne EndlessLoop0
ldr r0, =_cpu1_catch
b cpuxCont
EndlessLoop0:
wfe
b EndlessLoop0
/* Jump to address pointed to by cpux_catch */
cpuxCont:
ldr lr, [r0]
bx lr
OKToRun:
mrc p15, 0, r0, c0, c0, 0 /* Get the revision */
and r5, r0, #0x00f00000
and r6, r0, #0x0000000f
orr r6, r6, r5, lsr #20-4
#ifdef CONFIG_ARM_ERRATA_742230
cmp r6, #0x22 /* only present up to r2p2 */
mrcle p15, 0, r10, c15, c0, 1 /* read diagnostic register */
orrle r10, r10, #1 << 4 /* set bit #4 */
mcrle p15, 0, r10, c15, c0, 1 /* write diagnostic register */
#endif
#ifdef CONFIG_ARM_ERRATA_743622
teq r5, #0x00200000 /* only present in r2p* */
mrceq p15, 0, r10, c15, c0, 1 /* read diagnostic register */
orreq r10, r10, #1 << 6 /* set bit #6 */
mcreq p15, 0, r10, c15, c0, 1 /* write diagnostic register */
#endif
/* set VBAR to the _vector_table address in linker script */
ldr r0, =vector_base
mcr p15, 0, r0, c12, c0, 0
/*invalidate scu*/
ldr r7, =0xf8f0000c
ldr r6, =0xffff
str r6, [r7]
/* Invalidate caches and TLBs */
mov r0,#0 /* r0 = 0 */
mcr p15, 0, r0, c8, c7, 0 /* invalidate TLBs */
mcr p15, 0, r0, c7, c5, 0 /* invalidate icache */
mcr p15, 0, r0, c7, c5, 6 /* Invalidate branch predictor array */
bl invalidate_dcache /* invalidate dcache */
/* Disable MMU, if enabled */
mrc p15, 0, r0, c1, c0, 0 /* read CP15 register 1 */
bic r0, r0, #0x1 /* clear bit 0 */
mcr p15, 0, r0, c1, c0, 0 /* write value back */
#ifdef SHAREABLE_DDR
/* Mark the entire DDR memory as shareable */
ldr r3, =0x3ff /* 1024 entries to cover 1G DDR */
ldr r0, =TblBase /* MMU Table address in memory */
ldr r2, =0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
shareable_loop:
str r2, [r0] /* write the entry to MMU table */
add r0, r0, #0x4 /* next entry in the table */
add r2, r2, #0x100000 /* next section */
subs r3, r3, #1
bge shareable_loop /* loop till 1G is covered */
#endif
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the irq stack pointer */
and r2, r1, r0
orr r2, r2, #0x12 /* IRQ mode */
msr cpsr, r2
ldr r13,=IRQ_stack /* IRQ stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the supervisor stack pointer */
and r2, r1, r0
orr r2, r2, #0x13 /* supervisor mode */
msr cpsr, r2
ldr r13,=SPV_stack /* Supervisor stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the Abort stack pointer */
and r2, r1, r0
orr r2, r2, #0x17 /* Abort mode */
msr cpsr, r2
ldr r13,=Abort_stack /* Abort stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the FIQ stack pointer */
and r2, r1, r0
orr r2, r2, #0x11 /* FIQ mode */
msr cpsr, r2
ldr r13,=FIQ_stack /* FIQ stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the Undefine stack pointer */
and r2, r1, r0
orr r2, r2, #0x1b /* Undefine mode */
msr cpsr, r2
ldr r13,=Undef_stack /* Undefine stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the system stack pointer */
and r2, r1, r0
orr r2, r2, #0x1F /* SYS mode */
msr cpsr, r2
ldr r13,=SYS_stack /* SYS stack pointer */
/*set scu enable bit in scu*/
ldr r7, =0xf8f00000
ldr r0, [r7]
orr r0, r0, #0x1
str r0, [r7]
/* enable MMU and cache */
ldr r0,=TblBase /* Load MMU translation table base */
orr r0, r0, #0x5B /* Outer-cacheable, WB */
mcr 15, 0, r0, c2, c0, 0 /* TTB0 */
mvn r0,#0 /* Load MMU domains -- all ones=manager */
mcr p15,0,r0,c3,c0,0
/* Enable mmu, icahce and dcache */
ldr r0,=CRValMmuCac
mcr p15,0,r0,c1,c0,0 /* Enable cache and MMU */
dsb /* dsb allow the MMU to start up */
isb /* isb flush prefetch buffer */
/* Write to ACTLR */
mrc p15, 0, r0, c1, c0, 1 /* Read ACTLR*/
orr r0, r0, #(0x01 << 6) /* set SMP bit */
orr r0, r0, #(0x01 ) /* Cache/TLB maintenance broadcast */
mcr p15, 0, r0, c1, c0, 1 /* Write ACTLR*/
/* Invalidate L2 Cache and enable L2 Cache*/
/* For AMP, assume running on CPU1. Don't initialize L2 Cache (up to Linux) */
#if USE_AMP!=1
ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
mov r1, #0 /* force the disable bit */
str r1, [r0] /* disable the L2 Caches */
ldr r0,=L2CCAuxCrtl /* Load L2CC base address base + Aux control register */
ldr r1,[r0] /* read the register */
ldr r2,=L2CCAuxControl /* set the default bits */
orr r1,r1,r2
str r1, [r0] /* store the Aux Control Register */
ldr r0,=L2CCTAGLatReg /* Load L2CC base address base + TAG Latency address */
ldr r1,=L2CCTAGLatency /* set the latencies for the TAG*/
str r1, [r0] /* store the TAG Latency register Register */
ldr r0,=L2CCDataLatReg /* Load L2CC base address base + Data Latency address */
ldr r1,=L2CCDataLatency /* set the latencies for the Data*/
str r1, [r0] /* store the Data Latency register Register */
ldr r0,=L2CCWay /* Load L2CC base address base + way register*/
ldr r2, =0xFFFF
str r2, [r0] /* force invalidate */
ldr r0,=L2CCSync /* need to poll 0x730, PSS_L2CC_CACHE_SYNC_OFFSET */
/* Load L2CC base address base + sync register*/
/* poll for completion */
Sync: ldr r1, [r0]
cmp r1, #0
bne Sync
ldr r0,=L2CCIntRaw /* clear pending interrupts */
ldr r1,[r0]
ldr r0,=L2CCIntClear
str r1,[r0]
ldr r0,=SLCRUnlockReg /* Load SLCR base address base + unlock register */
ldr r1,=SLCRUnlockKey /* set unlock key */
str r1, [r0] /* Unlock SLCR */
ldr r0,=SLCRL2cRamReg /* Load SLCR base address base + l2c Ram Control register */
ldr r1,=SLCRL2cRamConfig /* set the configuration value */
str r1, [r0] /* store the L2c Ram Control Register */
ldr r0,=SLCRlockReg /* Load SLCR base address base + lock register */
ldr r1,=SLCRlockKey /* set lock key */
str r1, [r0] /* lock SLCR */
ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
ldr r1,[r0] /* read the register */
mov r2, #L2CCControl /* set the enable bit */
orr r1,r1,r2
str r1, [r0] /* enable the L2 Caches */
#endif
mov r0, r0
mrc p15, 0, r1, c1, c0, 2 /* read cp access control register (CACR) into r1 */
orr r1, r1, #(0xf << 20) /* enable full access for p10 & p11 */
mcr p15, 0, r1, c1, c0, 2 /* write back into CACR */
/* enable vfp */
fmrx r1, FPEXC /* read the exception register */
orr r1,r1, #FPEXC_EN /* set VFP enable bit, leave the others in orig state */
fmxr FPEXC, r1 /* write back the exception register */
mrc p15,0,r0,c1,c0,0 /* flow prediction enable */
orr r0, r0, #(0x01 << 11) /* #0x8000 */
mcr p15,0,r0,c1,c0,0
mrc p15,0,r0,c1,c0,1 /* read Auxiliary Control Register */
orr r0, r0, #(0x1 << 2) /* enable Dside prefetch */
orr r0, r0, #(0x1 << 1) /* enable L2 Prefetch hint */
mcr p15,0,r0,c1,c0,1 /* write Auxiliary Control Register */
mrs r0, cpsr /* get the current PSR */
bic r0, r0, #0x100 /* enable asynchronous abort exception */
msr cpsr_xsf, r0
b _start /* jump to C startup code */
and r0, r0, r0 /* no op */
.Ldone: b .Ldone /* Paranoia: we should never get here */
/*
*************************************************************************
*
* invalidate_dcache - invalidate the entire d-cache by set/way
*
* Note: for Cortex-A9, there is no cp instruction for invalidating
* the whole D-cache. Need to invalidate each line.
*
*************************************************************************
*/
invalidate_dcache:
mrc p15, 1, r0, c0, c0, 1 /* read CLIDR */
ands r3, r0, #0x7000000
mov r3, r3, lsr #23 /* cache level value (naturally aligned) */
beq finished
mov r10, #0 /* start with level 0 */
loop1:
add r2, r10, r10, lsr #1 /* work out 3xcachelevel */
mov r1, r0, lsr r2 /* bottom 3 bits are the Cache type for this level */
and r1, r1, #7 /* get those 3 bits alone */
cmp r1, #2
blt skip /* no cache or only instruction cache at this level */
mcr p15, 2, r10, c0, c0, 0 /* write the Cache Size selection register */
isb /* isb to sync the change to the CacheSizeID reg */
mrc p15, 1, r1, c0, c0, 0 /* reads current Cache Size ID register */
and r2, r1, #7 /* extract the line length field */
add r2, r2, #4 /* add 4 for the line length offset (log2 16 bytes) */
ldr r4, =0x3ff
ands r4, r4, r1, lsr #3 /* r4 is the max number on the way size (right aligned) */
clz r5, r4 /* r5 is the bit position of the way size increment */
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 /* r7 is the max number of the index size (right aligned) */
loop2:
mov r9, r4 /* r9 working copy of the max way size (right aligned) */
loop3:
orr r11, r10, r9, lsl r5 /* factor in the way number and cache number into r11 */
orr r11, r11, r7, lsl r2 /* factor in the index number */
mcr p15, 0, r11, c7, c6, 2 /* invalidate by set/way */
subs r9, r9, #1 /* decrement the way number */
bge loop3
subs r7, r7, #1 /* decrement the index */
bge loop2
skip:
add r10, r10, #2 /* increment the cache number */
cmp r3, r10
bgt loop1
finished:
mov r10, #0 /* swith back to cache level 0 */
mcr p15, 2, r10, c0, c0, 0 /* select current cache level in cssr */
dsb
isb
bx lr
.end
/**
* @} End of "addtogroup a9_boot_code".
*/
translation_table.s
/*****************************************************************************/
/**
* @file translation_table.s
*
* @addtogroup a9_boot_code
* @{
* <h2> translation_table.S </h2>
* translation_table.S contains a static page table required by MMU for
* cortex-A9. This translation table is flat mapped (input address = output
* address) with default memory attributes defined for zynq architecture. It
* utilizes short descriptor translation table format with each section defining
* 1MB of memory.
*
* The overview of translation table memory attributes is described below.
*
*| | Memory Range | Definition in Translation Table |
*|-----------------------|-------------------------|-----------------------------------|
*| DDR | 0x00000000 - 0x3FFFFFFF | Normal write-back Cacheable |
*| PL | 0x40000000 - 0xBFFFFFFF | Strongly Ordered |
*| Reserved | 0xC0000000 - 0xDFFFFFFF | Unassigned |
*| Memory mapped devices | 0xE0000000 - 0xE02FFFFF | Device Memory |
*| Reserved | 0xE0300000 - 0xE0FFFFFF | Unassigned |
*| NAND, NOR | 0xE1000000 - 0xE3FFFFFF | Device memory |
*| SRAM | 0xE4000000 - 0xE5FFFFFF | Normal write-back Cacheable |
*| Reserved | 0xE6000000 - 0xF7FFFFFF | Unassigned |
*| AMBA APB Peripherals | 0xF8000000 - 0xF8FFFFFF | Device Memory |
*| Reserved | 0xF9000000 - 0xFBFFFFFF | Unassigned |
*| Linear QSPI - XIP | 0xFC000000 - 0xFDFFFFFF | Normal write-through cacheable |
*| Reserved | 0xFE000000 - 0xFFEFFFFF | Unassigned |
*| OCM | 0xFFF00000 - 0xFFFFFFFF | Normal inner write-back cacheable |
*
* @note
*
* For region 0x00000000 - 0x3FFFFFFF, a system where DDR is less than 1GB,
* region after DDR and before PL is marked as undefined/reserved in translation
* table. In 0xF8000000 - 0xF8FFFFFF, 0xF8000C00 - 0xF8000FFF, 0xF8010000 -
* 0xF88FFFFF and 0xF8F03000 to 0xF8FFFFFF are reserved but due to granual size
* of 1MB, it is not possible to define separate regions for them. For region
* 0xFFF00000 - 0xFFFFFFFF, 0xFFF00000 to 0xFFFB0000 is reserved but due to 1MB
* granual size, it is not possible to define separate region for it
*
* ...
******************************************************************************/
#include "xparameters.h"
.globl MMUTable
.section .mmu_tbl,"a"
MMUTable:
/* Each table entry occupies one 32-bit word and there are
* 4096 entries, so the entire table takes up 16KB.
* Each entry covers a 1MB section.
*/
.set SECT, 0
#ifdef XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_START, XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_END, XPAR_PS7_DDR_0_S_AXI_HIGHADDR
.set DDR_SIZE, (DDR_END - DDR_START)+1
.set DDR_REG, DDR_SIZE/0x100000
#else
.set DDR_REG, 0
#endif
.set UNDEF_REG, 0x3FF - DDR_REG
/*0x00000000 - 0x00100000 (cacheable )*/
.word SECT + 0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set SECT, SECT+0x100000
.rept DDR_REG /* (DDR Cacheable) */
.word SECT + 0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept UNDEF_REG /* (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0400 /* 0x40000000 - 0x7fffffff (FPGA slave0) */
.word SECT + 0xc02 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0400 /* 0x80000000 - 0xbfffffff (FPGA slave1) */
.word SECT + 0xc02 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0200 /* 0xc0000000 - 0xdfffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x003 /* 0xe0000000 - 0xe02fffff (Memory mapped devices)
* UART/USB/IIC/SPI/CAN/GEM/GPIO/QSPI/SD/NAND */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0D /* 0xe0300000 - 0xe0ffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0010 /* 0xe1000000 - 0xe1ffffff (NAND) */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0020 /* 0xe2000000 - 0xe3ffffff (NOR) */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0020 /* 0xe4000000 - 0xe5ffffff (SRAM) */
.word SECT + 0xc0e /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0120 /* 0xe6000000 - 0xf7ffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
/* 0xf8000c00 to 0xf8000fff, 0xf8010000 to 0xf88fffff and
0xf8f03000 to 0xf8ffffff are reserved but due to granual size of
1MB, it is not possible to define separate regions for them */
.rept 0x0010 /* 0xf8000000 - 0xf8ffffff (AMBA APB Peripherals) */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0030 /* 0xf9000000 - 0xfbffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0020 /* 0xfc000000 - 0xfdffffff (Linear QSPI - XIP) */
.word SECT + 0xc0a /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x001F /* 0xfe000000 - 0xffefffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
/* 0xfff00000 to 0xfffb0000 is reserved but due to granual size of
1MB, it is not possible to define separate region for it
0xfff00000 - 0xffffffff
256K OCM when mapped to high address space
inner-cacheable */
.word SECT + 0x4c0e /* S=b0 TEX=b100 AP=b11, Domain=b0, C=b1, B=b1 */
.set SECT, SECT+0x100000
.end
/**
* @} End of "addtogroup a9_boot_code".
@rtel : Do you know what causes the problems of memory?
I’m sorry, XuanTran, but I have to leave the discussion here. The recurring pattern among my customers is that FreeRTOS is only used with the ARM Cortex M series whereas prefabbed larger OSs requiring such as Embedded Linux are employed for the A family. Thus, I’m fairly familiar with M but would need (currently unavailable) resources to think myself into the A architecture & idiosyncracies.
Thanks for sharing your code, I hope you’ll find the answer soon - if not here, please share the resolution with us! Best of luck!
@RAc : Thanks for your time and support. You helped me a lot to understand the problem.
Have you tried my suggestion of checking the startup code is working correctly before main() is called?
@rtel :
Yes, I did.
uxCriticalNesting = N/A
uxCurrentNumberOfTasks = 0xa5a5a5a5
Did I do it correctly?
I am using freertos901_xilinx.
uxCurrentNumberOfTasks
is initialized to zero here: FreeRTOS-Kernel/tasks.c at main · FreeRTOS/FreeRTOS-Kernel · GitHub
Your observation indicates that your c startup code is not correctly initializing the data and/or bss section (as suspected by Richard). I’d suggest to get a bare metal project working first and ensure that global variables get initialized correctly.
Thanks.
@aggarg :Thanks for your suggestion. I am curious about why c startup code doesn’t work correctly.
It is hard to say without looking at the startup code - incorrect values of some variables which are exported from the linker scripts can be one reason. Try single stepping your startup code. If you share your startup code, we can try to help.
Thanks.
@aggarg: Can you clarify what I should provide? Are they boot files? Sorry for stupid questions.
It depends on your platform - usually there are some assembly files in your project.
I going to again point that a much easier route will be to start with a working sample project and then add FreeRTOS.
Thanks.
@aggarg : Thanks for your tips!
Sorry, I mixed up the variable uxCriticalNesting and ulCriticalNesting in the previous posts.
Putting a break point at the beginning of main(), values of ulCriticalNesting and uxCurrentNumberOfTasks are as follows:
ulCriticalNesting 0x0000270f
uxCurrentNumberOfTasks 0xa5a5a5a5
I don’t have much knowledge of the startup code of a program. I am not sure, if these following files are correct.
xil-crt0.S
.file "xil-crt0.S"
.section ".got2","aw"
.align 2
.text
.Lsbss_start:
.long __sbss_start
.Lsbss_end:
.long __sbss_end
.Lbss_start:
.long __bss_start
.Lbss_end:
.long __bss_end
.Lstack:
.long __stack
.globl _start
_start:
bl __cpu_init /* Initialize the CPU first (BSP provides this) */
mov r0, #0
/* clear sbss */
ldr r1,.Lsbss_start /* calculate beginning of the SBSS */
ldr r2,.Lsbss_end /* calculate end of the SBSS */
.Lloop_sbss:
cmp r1,r2
bge .Lenclsbss /* If no SBSS, no clearing required */
str r0, [r1], #4
b .Lloop_sbss
.Lenclsbss:
/* clear bss */
ldr r1,.Lbss_start /* calculate beginning of the BSS */
ldr r2,.Lbss_end /* calculate end of the BSS */
.Lloop_bss:
cmp r1,r2
bge .Lenclbss /* If no BSS, no clearing required */
str r0, [r1], #4
b .Lloop_bss
.Lenclbss:
/* set stack pointer */
ldr r13,.Lstack /* stack address */
/* Reset and start Global Timer */
mov r0, #0x0
mov r1, #0x0
#if USE_AMP != 1
bl XTime_SetTime
#endif
#ifdef PROFILING /* defined in Makefile */
/* Setup profiling stuff */
bl _profile_init
#endif /* PROFILING */
/* run global constructors */
bl __libc_init_array
/* make sure argc and argv are valid */
mov r0, #0
mov r1, #0
/* Let her rip */
bl main
/* Cleanup global constructors */
bl __libc_fini_array
#ifdef PROFILING
/* Cleanup profiling stuff */
bl _profile_clean
#endif /* PROFILING */
/* All done */
bl exit
.Lexit: /* should never get here */
b .Lexit
.Lstart:
.size _start,.Lstart-_start
boot.S
/*****************************************************************************/
/**
* @file boot.S
*
* @addtogroup a9_boot_code Cortex A9 Processor Boot Code
* @{
* <h2> boot.S </h2>
* The boot code performs minimum configuration which is required for an
* application to run starting from processor's reset state. Below is a
* sequence illustrating what all configuration is performed before control
* reaches to main function.
*
* 1. Program vector table base for exception handling
* 2. Invalidate instruction cache, data cache and TLBs
* 3. Program stack pointer for various modes (IRQ, FIQ, supervisor, undefine,
* abort, system)
* 4. Configure MMU with short descriptor translation table format and program
* base address of translation table
* 5. Enable data cache, instruction cache and MMU
* 6. Enable Floating point unit
* 7. Transfer control to _start which clears BSS sections, initializes
* global timer and runs global constructor before jumping to main
* application
******************************************************************************/
#include "xparameters.h"
#include "xil_errata.h"
.globl MMUTable
.global _prestart
.global _boot
.global __stack
.global __irq_stack
.global __supervisor_stack
.global __abort_stack
.global __fiq_stack
.global __undef_stack
.global _vector_table
.globl _cpu0_catch
.globl _cpu1_catch
.globl OKToRun
.globl EndlessLoop0
.set PSS_L2CC_BASE_ADDR, 0xF8F02000
.set PSS_SLCR_BASE_ADDR, 0xF8000000
.set RESERVED, 0x0fffff00
.set TblBase , MMUTable
.set LRemap, 0xFE00000F /* set the base address of the peripheral block as not shared */
.set L2CCWay, (PSS_L2CC_BASE_ADDR + 0x077C) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_INVLD_WAY_OFFSET)*/
.set L2CCSync, (PSS_L2CC_BASE_ADDR + 0x0730) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CACHE_SYNC_OFFSET)*/
.set L2CCCrtl, (PSS_L2CC_BASE_ADDR + 0x0100) /*(PSS_L2CC_BASE_ADDR + PSS_L2CC_CNTRL_OFFSET)*/
.set L2CCAuxCrtl, (PSS_L2CC_BASE_ADDR + 0x0104) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_AUX_CNTRL_OFFSET)*/
.set L2CCTAGLatReg, (PSS_L2CC_BASE_ADDR + 0x0108) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_TAG_RAM_CNTRL_OFFSET)*/
.set L2CCDataLatReg, (PSS_L2CC_BASE_ADDR + 0x010C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_DATA_RAM_CNTRL_OFFSET)*/
.set L2CCIntClear, (PSS_L2CC_BASE_ADDR + 0x0220) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_IAR_OFFSET)*/
.set L2CCIntRaw, (PSS_L2CC_BASE_ADDR + 0x021C) /*(PSS_L2CC_BASE_ADDR + XPSS_L2CC_ISR_OFFSET)*/
.set SLCRlockReg, (PSS_SLCR_BASE_ADDR + 0x04) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_LOCK_OFFSET)*/
.set SLCRUnlockReg, (PSS_SLCR_BASE_ADDR + 0x08) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_UNLOCK_OFFSET)*/
.set SLCRL2cRamReg, (PSS_SLCR_BASE_ADDR + 0xA1C) /*(PSS_SLCR_BASE_ADDR + XPSS_SLCR_L2C_RAM_OFFSET)*/
.set SLCRCPURSTReg, (0xF8000000 + 0x244) /*(XPS_SYS_CTRL_BASEADDR + A9_CPU_RST_CTRL_OFFSET)*/
.set EFUSEStaus, (0xF800D000 + 0x10) /*(XPS_EFUSE_BASEADDR + EFUSE_STATUS_OFFSET)*/
/* workaround for simulation not working when L1 D and I caches,MMU and L2 cache enabled - DT568997 */
.if SIM_MODE == 1
.set CRValMmuCac, 0b00000000000000 /* Disable IDC, and MMU */
.else
.set CRValMmuCac, 0b01000000000101 /* Enable IDC, and MMU */
.endif
.set CRValHiVectorAddr, 0b10000000000000 /* Set the Vector address to high, 0xFFFF0000 */
.set L2CCAuxControl, 0x72360000 /* Enable all prefetching, Cache replacement policy, Parity enable,
Event monitor bus enable and Way Size (64 KB) */
.set L2CCControl, 0x01 /* Enable L2CC */
.set L2CCTAGLatency, 0x0111 /* latency for TAG RAM */
.set L2CCDataLatency, 0x0121 /* latency for DATA RAM */
.set SLCRlockKey, 0x767B /* SLCR lock key */
.set SLCRUnlockKey, 0xDF0D /* SLCR unlock key */
.set SLCRL2cRamConfig, 0x00020202 /* SLCR L2C ram configuration */
/* Stack Pointer locations for boot code */
.set Undef_stack, __undef_stack
.set FIQ_stack, __fiq_stack
.set Abort_stack, __abort_stack
.set SPV_stack, __supervisor_stack
.set IRQ_stack, __irq_stack
.set SYS_stack, __stack
.set vector_base, _vector_table
.set FPEXC_EN, 0x40000000 /* FPU enable bit, (1 << 30) */
.section .boot,"ax"
/* this initializes the various processor modes */
_prestart:
_boot:
/* Test which processor is running and jump to the catch address */
mrc p15,0,r1,c0,c0,5
and r1, r1, #0xf
cmp r1, #0
bne NotCpu0
ldr r0, =_cpu0_catch
b cpuxCont
NotCpu0:
cmp r1, #1
bne EndlessLoop0
ldr r0, =_cpu1_catch
b cpuxCont
EndlessLoop0:
wfe
b EndlessLoop0
/* Jump to address pointed to by cpux_catch */
cpuxCont:
ldr lr, [r0]
bx lr
OKToRun:
mrc p15, 0, r0, c0, c0, 0 /* Get the revision */
and r5, r0, #0x00f00000
and r6, r0, #0x0000000f
orr r6, r6, r5, lsr #20-4
#ifdef CONFIG_ARM_ERRATA_742230
cmp r6, #0x22 /* only present up to r2p2 */
mrcle p15, 0, r10, c15, c0, 1 /* read diagnostic register */
orrle r10, r10, #1 << 4 /* set bit #4 */
mcrle p15, 0, r10, c15, c0, 1 /* write diagnostic register */
#endif
#ifdef CONFIG_ARM_ERRATA_743622
teq r5, #0x00200000 /* only present in r2p* */
mrceq p15, 0, r10, c15, c0, 1 /* read diagnostic register */
orreq r10, r10, #1 << 6 /* set bit #6 */
mcreq p15, 0, r10, c15, c0, 1 /* write diagnostic register */
#endif
/* set VBAR to the _vector_table address in linker script */
ldr r0, =vector_base
mcr p15, 0, r0, c12, c0, 0
/*invalidate scu*/
ldr r7, =0xf8f0000c
ldr r6, =0xffff
str r6, [r7]
/* Invalidate caches and TLBs */
mov r0,#0 /* r0 = 0 */
mcr p15, 0, r0, c8, c7, 0 /* invalidate TLBs */
mcr p15, 0, r0, c7, c5, 0 /* invalidate icache */
mcr p15, 0, r0, c7, c5, 6 /* Invalidate branch predictor array */
bl invalidate_dcache /* invalidate dcache */
/* Disable MMU, if enabled */
mrc p15, 0, r0, c1, c0, 0 /* read CP15 register 1 */
bic r0, r0, #0x1 /* clear bit 0 */
mcr p15, 0, r0, c1, c0, 0 /* write value back */
#ifdef SHAREABLE_DDR
/* Mark the entire DDR memory as shareable */
ldr r3, =0x3ff /* 1024 entries to cover 1G DDR */
ldr r0, =TblBase /* MMU Table address in memory */
ldr r2, =0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
shareable_loop:
str r2, [r0] /* write the entry to MMU table */
add r0, r0, #0x4 /* next entry in the table */
add r2, r2, #0x100000 /* next section */
subs r3, r3, #1
bge shareable_loop /* loop till 1G is covered */
#endif
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the irq stack pointer */
and r2, r1, r0
orr r2, r2, #0x12 /* IRQ mode */
msr cpsr, r2
ldr r13,=IRQ_stack /* IRQ stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the supervisor stack pointer */
and r2, r1, r0
orr r2, r2, #0x13 /* supervisor mode */
msr cpsr, r2
ldr r13,=SPV_stack /* Supervisor stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the Abort stack pointer */
and r2, r1, r0
orr r2, r2, #0x17 /* Abort mode */
msr cpsr, r2
ldr r13,=Abort_stack /* Abort stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the FIQ stack pointer */
and r2, r1, r0
orr r2, r2, #0x11 /* FIQ mode */
msr cpsr, r2
ldr r13,=FIQ_stack /* FIQ stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the Undefine stack pointer */
and r2, r1, r0
orr r2, r2, #0x1b /* Undefine mode */
msr cpsr, r2
ldr r13,=Undef_stack /* Undefine stack pointer */
bic r2, r2, #(0x1 << 9) /* Set EE bit to little-endian */
msr spsr_fsxc,r2
mrs r0, cpsr /* get the current PSR */
mvn r1, #0x1f /* set up the system stack pointer */
and r2, r1, r0
orr r2, r2, #0x1F /* SYS mode */
msr cpsr, r2
ldr r13,=SYS_stack /* SYS stack pointer */
/*set scu enable bit in scu*/
ldr r7, =0xf8f00000
ldr r0, [r7]
orr r0, r0, #0x1
str r0, [r7]
/* enable MMU and cache */
ldr r0,=TblBase /* Load MMU translation table base */
orr r0, r0, #0x5B /* Outer-cacheable, WB */
mcr 15, 0, r0, c2, c0, 0 /* TTB0 */
mvn r0,#0 /* Load MMU domains -- all ones=manager */
mcr p15,0,r0,c3,c0,0
/* Enable mmu, icahce and dcache */
ldr r0,=CRValMmuCac
mcr p15,0,r0,c1,c0,0 /* Enable cache and MMU */
dsb /* dsb allow the MMU to start up */
isb /* isb flush prefetch buffer */
/* Write to ACTLR */
mrc p15, 0, r0, c1, c0, 1 /* Read ACTLR*/
orr r0, r0, #(0x01 << 6) /* set SMP bit */
orr r0, r0, #(0x01 ) /* Cache/TLB maintenance broadcast */
mcr p15, 0, r0, c1, c0, 1 /* Write ACTLR*/
/* Invalidate L2 Cache and enable L2 Cache*/
/* For AMP, assume running on CPU1. Don't initialize L2 Cache (up to Linux) */
#if USE_AMP!=1
ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
mov r1, #0 /* force the disable bit */
str r1, [r0] /* disable the L2 Caches */
ldr r0,=L2CCAuxCrtl /* Load L2CC base address base + Aux control register */
ldr r1,[r0] /* read the register */
ldr r2,=L2CCAuxControl /* set the default bits */
orr r1,r1,r2
str r1, [r0] /* store the Aux Control Register */
ldr r0,=L2CCTAGLatReg /* Load L2CC base address base + TAG Latency address */
ldr r1,=L2CCTAGLatency /* set the latencies for the TAG*/
str r1, [r0] /* store the TAG Latency register Register */
ldr r0,=L2CCDataLatReg /* Load L2CC base address base + Data Latency address */
ldr r1,=L2CCDataLatency /* set the latencies for the Data*/
str r1, [r0] /* store the Data Latency register Register */
ldr r0,=L2CCWay /* Load L2CC base address base + way register*/
ldr r2, =0xFFFF
str r2, [r0] /* force invalidate */
ldr r0,=L2CCSync /* need to poll 0x730, PSS_L2CC_CACHE_SYNC_OFFSET */
/* Load L2CC base address base + sync register*/
/* poll for completion */
Sync: ldr r1, [r0]
cmp r1, #0
bne Sync
ldr r0,=L2CCIntRaw /* clear pending interrupts */
ldr r1,[r0]
ldr r0,=L2CCIntClear
str r1,[r0]
ldr r0,=SLCRUnlockReg /* Load SLCR base address base + unlock register */
ldr r1,=SLCRUnlockKey /* set unlock key */
str r1, [r0] /* Unlock SLCR */
ldr r0,=SLCRL2cRamReg /* Load SLCR base address base + l2c Ram Control register */
ldr r1,=SLCRL2cRamConfig /* set the configuration value */
str r1, [r0] /* store the L2c Ram Control Register */
ldr r0,=SLCRlockReg /* Load SLCR base address base + lock register */
ldr r1,=SLCRlockKey /* set lock key */
str r1, [r0] /* lock SLCR */
ldr r0,=L2CCCrtl /* Load L2CC base address base + control register */
ldr r1,[r0] /* read the register */
mov r2, #L2CCControl /* set the enable bit */
orr r1,r1,r2
str r1, [r0] /* enable the L2 Caches */
#endif
mov r0, r0
mrc p15, 0, r1, c1, c0, 2 /* read cp access control register (CACR) into r1 */
orr r1, r1, #(0xf << 20) /* enable full access for p10 & p11 */
mcr p15, 0, r1, c1, c0, 2 /* write back into CACR */
/* enable vfp */
fmrx r1, FPEXC /* read the exception register */
orr r1,r1, #FPEXC_EN /* set VFP enable bit, leave the others in orig state */
fmxr FPEXC, r1 /* write back the exception register */
mrc p15,0,r0,c1,c0,0 /* flow prediction enable */
orr r0, r0, #(0x01 << 11) /* #0x8000 */
mcr p15,0,r0,c1,c0,0
mrc p15,0,r0,c1,c0,1 /* read Auxiliary Control Register */
orr r0, r0, #(0x1 << 2) /* enable Dside prefetch */
orr r0, r0, #(0x1 << 1) /* enable L2 Prefetch hint */
mcr p15,0,r0,c1,c0,1 /* write Auxiliary Control Register */
mrs r0, cpsr /* get the current PSR */
bic r0, r0, #0x100 /* enable asynchronous abort exception */
msr cpsr_xsf, r0
b _start /* jump to C startup code */
and r0, r0, r0 /* no op */
.Ldone: b .Ldone /* Paranoia: we should never get here */
/*
*************************************************************************
*
* invalidate_dcache - invalidate the entire d-cache by set/way
*
* Note: for Cortex-A9, there is no cp instruction for invalidating
* the whole D-cache. Need to invalidate each line.
*
*************************************************************************
*/
invalidate_dcache:
mrc p15, 1, r0, c0, c0, 1 /* read CLIDR */
ands r3, r0, #0x7000000
mov r3, r3, lsr #23 /* cache level value (naturally aligned) */
beq finished
mov r10, #0 /* start with level 0 */
loop1:
add r2, r10, r10, lsr #1 /* work out 3xcachelevel */
mov r1, r0, lsr r2 /* bottom 3 bits are the Cache type for this level */
and r1, r1, #7 /* get those 3 bits alone */
cmp r1, #2
blt skip /* no cache or only instruction cache at this level */
mcr p15, 2, r10, c0, c0, 0 /* write the Cache Size selection register */
isb /* isb to sync the change to the CacheSizeID reg */
mrc p15, 1, r1, c0, c0, 0 /* reads current Cache Size ID register */
and r2, r1, #7 /* extract the line length field */
add r2, r2, #4 /* add 4 for the line length offset (log2 16 bytes) */
ldr r4, =0x3ff
ands r4, r4, r1, lsr #3 /* r4 is the max number on the way size (right aligned) */
clz r5, r4 /* r5 is the bit position of the way size increment */
ldr r7, =0x7fff
ands r7, r7, r1, lsr #13 /* r7 is the max number of the index size (right aligned) */
loop2:
mov r9, r4 /* r9 working copy of the max way size (right aligned) */
loop3:
orr r11, r10, r9, lsl r5 /* factor in the way number and cache number into r11 */
orr r11, r11, r7, lsl r2 /* factor in the index number */
mcr p15, 0, r11, c7, c6, 2 /* invalidate by set/way */
subs r9, r9, #1 /* decrement the way number */
bge loop3
subs r7, r7, #1 /* decrement the index */
bge loop2
skip:
add r10, r10, #2 /* increment the cache number */
cmp r3, r10
bgt loop1
finished:
mov r10, #0 /* swith back to cache level 0 */
mcr p15, 2, r10, c0, c0, 0 /* select current cache level in cssr */
dsb
isb
bx lr
.end
/**
* @} End of "addtogroup a9_boot_code".
translation_table.S
/*****************************************************************************/
/**
* @file translation_table.s
*
* @addtogroup a9_boot_code
* @{
* <h2> translation_table.S </h2>
* translation_table.S contains a static page table required by MMU for
* cortex-A9. This translation table is flat mapped (input address = output
* address) with default memory attributes defined for zynq architecture. It
* utilizes short descriptor translation table format with each section defining
* 1MB of memory.
*
* The overview of translation table memory attributes is described below.
*
*| | Memory Range | Definition in Translation Table |
*|-----------------------|-------------------------|-----------------------------------|
*| DDR | 0x00000000 - 0x3FFFFFFF | Normal write-back Cacheable |
*| PL | 0x40000000 - 0xBFFFFFFF | Strongly Ordered |
*| Reserved | 0xC0000000 - 0xDFFFFFFF | Unassigned |
*| Memory mapped devices | 0xE0000000 - 0xE02FFFFF | Device Memory |
*| Reserved | 0xE0300000 - 0xE0FFFFFF | Unassigned |
*| NAND, NOR | 0xE1000000 - 0xE3FFFFFF | Device memory |
*| SRAM | 0xE4000000 - 0xE5FFFFFF | Normal write-back Cacheable |
*| Reserved | 0xE6000000 - 0xF7FFFFFF | Unassigned |
*| AMBA APB Peripherals | 0xF8000000 - 0xF8FFFFFF | Device Memory |
*| Reserved | 0xF9000000 - 0xFBFFFFFF | Unassigned |
*| Linear QSPI - XIP | 0xFC000000 - 0xFDFFFFFF | Normal write-through cacheable |
*| Reserved | 0xFE000000 - 0xFFEFFFFF | Unassigned |
*| OCM | 0xFFF00000 - 0xFFFFFFFF | Normal inner write-back cacheable |
*
* @note
*
* For region 0x00000000 - 0x3FFFFFFF, a system where DDR is less than 1GB,
* region after DDR and before PL is marked as undefined/reserved in translation
* table. In 0xF8000000 - 0xF8FFFFFF, 0xF8000C00 - 0xF8000FFF, 0xF8010000 -
* 0xF88FFFFF and 0xF8F03000 to 0xF8FFFFFF are reserved but due to granual size
* of 1MB, it is not possible to define separate regions for them. For region
* 0xFFF00000 - 0xFFFFFFFF, 0xFFF00000 to 0xFFFB0000 is reserved but due to 1MB
* granual size, it is not possible to define separate region for it
*
******************************************************************************/
#include "xparameters.h"
.globl MMUTable
.section .mmu_tbl,"a"
MMUTable:
/* Each table entry occupies one 32-bit word and there are
* 4096 entries, so the entire table takes up 16KB.
* Each entry covers a 1MB section.
*/
.set SECT, 0
#ifdef XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_START, XPAR_PS7_DDR_0_S_AXI_BASEADDR
.set DDR_END, XPAR_PS7_DDR_0_S_AXI_HIGHADDR
.set DDR_SIZE, (DDR_END - DDR_START)+1
.set DDR_REG, DDR_SIZE/0x100000
#else
.set DDR_REG, 0
#endif
.set UNDEF_REG, 0x3FF - DDR_REG
/*0x00000000 - 0x00100000 (cacheable )*/
.word SECT + 0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set SECT, SECT+0x100000
.rept DDR_REG /* (DDR Cacheable) */
.word SECT + 0x15de6 /* S=b1 TEX=b101 AP=b11, Domain=b1111, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept UNDEF_REG /* (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0400 /* 0x40000000 - 0x7fffffff (FPGA slave0) */
.word SECT + 0xc02 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0400 /* 0x80000000 - 0xbfffffff (FPGA slave1) */
.word SECT + 0xc02 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0200 /* 0xc0000000 - 0xdfffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x003 /* 0xe0000000 - 0xe02fffff (Memory mapped devices)
* UART/USB/IIC/SPI/CAN/GEM/GPIO/QSPI/SD/NAND */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0D /* 0xe0300000 - 0xe0ffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0010 /* 0xe1000000 - 0xe1ffffff (NAND) */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0020 /* 0xe2000000 - 0xe3ffffff (NOR) */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0020 /* 0xe4000000 - 0xe5ffffff (SRAM) */
.word SECT + 0xc0e /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0120 /* 0xe6000000 - 0xf7ffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
/* 0xf8000c00 to 0xf8000fff, 0xf8010000 to 0xf88fffff and
0xf8f03000 to 0xf8ffffff are reserved but due to granual size of
1MB, it is not possible to define separate regions for them */
.rept 0x0010 /* 0xf8000000 - 0xf8ffffff (AMBA APB Peripherals) */
.word SECT + 0xc06 /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b0, B=b1 */
.set SECT, SECT+0x100000
.endr
.rept 0x0030 /* 0xf9000000 - 0xfbffffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x0020 /* 0xfc000000 - 0xfdffffff (Linear QSPI - XIP) */
.word SECT + 0xc0a /* S=b0 TEX=b000 AP=b11, Domain=b0, C=b1, B=b0 */
.set SECT, SECT+0x100000
.endr
.rept 0x001F /* 0xfe000000 - 0xffefffff (unassigned/reserved).
* Generates a translation fault if accessed */
.word SECT + 0x0 /* S=b0 TEX=b000 AP=b00, Domain=b0, C=b0, B=b0 */
.set SECT, SECT+0x100000
.endr
/* 0xfff00000 to 0xfffb0000 is reserved but due to granual size of
1MB, it is not possible to define separate region for it
0xfff00000 - 0xffffffff
256K OCM when mapped to high address space
inner-cacheable */
.word SECT + 0x4c0e /* S=b0 TEX=b100 AP=b11, Domain=b0, C=b1, B=b1 */
.set SECT, SECT+0x100000
.end
/**
* @} End of "addtogroup a9_boot_code".
*/
There are a number of preprocessor option dependent code paths in your code, for example using SHAREABLE_DDR. Did you make sure that your startup code matches your hardware? Is your hardware custom, an eval board or a commercial board? If the latter, you should first check with your board manufacturer that the startup code fits your hardware.
Not gone back to the top of the thread the refresh my memory - but if I do recall correctly this is a dual core system where one core boots correctly and the other doesn’t. How is the second core meant to boot? Does it start running its own application out of reset, or is the first core that boots meant to be responsible for booting it? If the latter method is the case then I suspect the misbehaving core is just not being booted correctly. Maybe it is jumping to main() without first running the C-startup code, or maybe the first core to boot is meant to set up the environment for all the cores.
It is custom hardware.
Two ARM Cortex A9 cores are used.
The CPU0 is in charge for starting execution code on CPU1.
FSBL which is located in CPU0 is responsible for booting the system.
CPU1 starts correctly as shown in the screenshot below.
CPU1
Before the main() function starts, the values of ulCriticalNesting and uxCurrentNumberOfTasks
ulCriticalNesting 9999 (0x0000270f)
uxCurrentNumberOfTasks 0 (0x00000000)
well, if that’s the case, you won’t have a choice but wade through the MCU reference manual and write the startup code yourself. Or try all combinations of SHAREABLE_DDR etc. set and unset and hope for one combination to work…
The startup code you try to adapt makes assumptions about the hardware, and it’ll work only for a matching hardware. We (ie somebody who is familar enough with the A9 architecture) could only help you here if you disclosed the entire circuit diagram so we could design the startup code. However, that is a lot of work for which a consultant would charge a significant amount of money. I wouldn’t do that for free.
The other issue, of course, is that obviously your hardware is still in the prototypical stage, so the design must be debugged along with the startup code. There may simply be a wiring bug or some pin incompatibility.
@RAc : Thanks so much for your help and guidance!