0

I have an STM32H750 ARM Cortex M7 processor and am compiling C code using STM32CubeIDE. I want to have default code in FLASH that is able to run the application by itself but which can also load an updated application to RAM and run the update from RAM. Eventually the update will have different initialized variables and jump tables so my thinking is that it would make sense for the branch from FLASH to RAM to be in the startup code before that initialization takes place. However, for the moment, the default and update source code are identical except for the update branch-to address (more on that later). Eventually this has to work with RDP2 security, but for the moment RDP2 is not set.

The startup code looks for a magic cookie set by successfully loading the update and branches to the ContinueInit address in the update. I have not included the reset and interrupt vectors.

Reset_Handler:  
  ldr   sp, =_estack      /* set stack pointer */

  ldr   r1, =0x580244dc
  ldr   r2, =0xe0000000
  str   r2, [r1]    /* turn on the RAM1-3 clocks - this is essential!   */

  ldr   r0, =0x580244d0
  ldr   r1, [r0]    /* get the value in the reset status register       */
  ldr   r2, =0x00460000
  cmp   r2, r1      /* compare reset status to external reset value     */
  ldr   r2, =0x00010000
  str   r2, [r0]    /* clear the reset status register                  */
  bne   ContinueInit    /* use default code if not from external reset      */

  ldr   r0, =magic_cookie
  ldr   r1, [r0]    /* get the value in the magic cookie                */
  ldr   r2, =0x12345678
  cmp   r2, r1      /* compare magic cookie to update value             */
  bne   ContinueInit    /* use default code if no cookie match              */

  ldr   r2, =0x00100010
  str   r2, [r0]    /* clear the magic cookie                   */
  b UpdateContinueInit

ContinueInit:
/* Copy the data segment initializers from code to SRAM */
  movs  r1, #0
  b  LoopCopyDataInit
CopyDataInit:
  ldr   r3, =_sidata
  ldr   r3, [r3, r1]
  str   r3, [r0, r1]
  adds  r1, r1, #4
LoopCopyDataInit:
  ldr   r0, =_sdata
  ldr   r3, =_edata
  adds  r2, r0, r1
  cmp   r2, r3
  bcc   CopyDataInit
  ldr   r2, =_sbss
  b     LoopFillZerobss

/* Zero fill the bss segment. */
FillZerobss:
  movs  r3, #0
  str   r3, [r2], #4
LoopFillZerobss:
  ldr   r3, = _ebss
  cmp   r2, r3
  bcc   FillZerobss

/* Call the clock system intitialization function.*/
  bl    SystemInit

/* Call static constructors */
  bl    __libc_init_array

/* branch to the default main program */
  bl    main
  bx    lr

Separate .ld files govern the linking of default and update. The FLASH Continue_Init address appears in the update .ld file to maintain similarity between default and update.

/\* Default Entry Point \*/
ENTRY(Reset_Handler)

/\* Highest address of the user mode stack */
\_estack = 0x20020000;    /* end of DTCMRAM */
/* Generate a link error if heap and stack don't fit into RAM */
\_Min_Heap_Size  = 0x400; /* required amount of heap  */
\_Min_Stack_Size = 0x800; /* required amount of stack \*/

/\* 1mS counter location used by ISR \*/
uwTick = 0x20000000;
magic_cookie  = 0x20000004;
UpdateContinueInit = 0x24014572;

/\* Specify the memory areas \*/
MEMORY
{
FLASH (rx)     : ORIGIN = 0x08000000, LENGTH = 128K
DTCMRAM (xrw)  : ORIGIN = 0x20000008, LENGTH = 0x1fff8
RAM123 (xrw)   : ORIGIN = 0x30000000, LENGTH = 288K
}

/\* Define output sections */
SECTIONS
{
/* The startup code goes first into FLASH */
.isr_vector :
{
. = ALIGN(4);
KEEP(*(.isr_vector)) /\* Startup code \*/
. = ALIGN(4);
} \>FLASH

/\* The program code and other data goes into FLASH */
.text :
{
. = ALIGN(4);
(.text)           /* .text sections (code) */
(.text*)          / .text sections (code) \*/
*(.glue_7)         /* glue arm to thumb code \*/
*(.glue_7t)        /* glue thumb to arm code \*/
\*(.eh_frame)

    KEEP (*(.init))
    KEEP (*(.fini))
    
    . = ALIGN(4);
    _etext = .;        /* define a global symbols at end of code */

} \>FLASH

/\* Constant data goes into FLASH */
.rodata :
{
. = ALIGN(4);
(.rodata)         /* .rodata sections (constants, strings, etc.) */
(.rodata*)        / .rodata sections (constants, strings, etc.) \*/
. = ALIGN(4);
} \>FLASH

.ARM.extab   : { *(.ARM.extab* .gnu.linkonce.armextab.\*) } \>FLASH
.ARM : {
\__exidx_start = .;
*(.ARM.exidx*)
\__exidx_end = .;
} \>FLASH

.preinit_array     :
{
PROVIDE_HIDDEN (\__preinit_array_start = .);
KEEP (*(.preinit_array*))
PROVIDE_HIDDEN (\__preinit_array_end = .);
} \>FLASH
.init_array :
{
PROVIDE_HIDDEN (\__init_array_start = .);
KEEP (*(SORT(.init_array.*)))
KEEP (*(.init_array*))
PROVIDE_HIDDEN (\__init_array_end = .);
} \>FLASH
.fini_array :
{
PROVIDE_HIDDEN (\__fini_array_start = .);
KEEP (*(SORT(.fini_array.*)))
KEEP (*(.fini_array*))
PROVIDE_HIDDEN (\__fini_array_end = .);
} \>FLASH

/\* used by the startup to initialize data \*/
\_sidata = LOADADDR(.data);

/\* Initialized data sections goes into RAM, load LMA copy after code */
.data :
{
. = ALIGN(4);
\_sdata = .;        /* create a global symbol at data start */
(.data)           /* .data sections */
(.data*)          / .data sections \*/

    . = ALIGN(4);
    _edata = .;        /* define a global symbol at data end */

} \>DTCMRAM AT\> FLASH

/\* Uninitialized data section */
. = ALIGN(4);
.bss :
{
/* This is used by the startup in order to initialize the .bss secion */
\_sbss = .;         /* define a global symbol at bss start \*/
__bss_start__ = \_sbss;
\*(.bss)
*(.bss*)
\*(COMMON)

    . = ALIGN(4);
    _ebss = .;         /* define a global symbol at bss end */
    __bss_end__ = _ebss;

} \>RAM123

/\* User_heap_stack section, used to check that there is enough RAM left \*/
.\_user_heap_stack :
{
. = ALIGN(8);
PROVIDE ( end = . );
PROVIDE ( \_end = . );
. = . + \_Min_Heap_Size;
. = . + \_Min_Stack_Size;
. = ALIGN(8);
} \>DTCMRAM

/\* Remove information from the standard libraries \*/
/DISCARD/ :
{
libc.a ( \* )
libm.a ( \* )
libgcc.a ( \* )
}

.ARM.attributes 0 : { \*(.ARM.attributes) }
}
/* Update Entry Point */
ENTRY(Reset_Handler)

/* Highest address of the user mode stack */
_estack = 0x20020000;    /* end of RAM */
/* Generate a link error if heap and stack don't fit into RAM */
_Min_Heap_Size  = 0x400; /* required amount of heap  */
_Min_Stack_Size = 0x800; /* required amount of stack */

/* 1mS counter location used by ISR */
uwTick = 0x20000000;
magic_cookie 0= 0x20000004;
UpdateContinueInit = 0x08014572;

/* Specify the memory areas */
MEMORY
{
   DTCMRAM (xrw)  : ORIGIN = 0x2000008, LENGTH = 0x1fff8
   AXIRAM (xrw)   : ORIGIN = 0x24000000, LENGTH = 0x80000  
   RAM123 (xrw)   : ORIGIN = 0x30000000, LENGTH = 288K
}

/* Define output sections */
SECTIONS
{
  /* The startup code goes first into RSTRAM */
  .isr_vector :
  {
    . = ALIGN(4);
    KEEP(*(.isr_vector)) /* Startup code */
    . = ALIGN(4);
  } >AXIRAM

  /* The program code and other data goes into AXIRAM */
  .text :
  {
    . = ALIGN(4);
    *(.text)           /* .text sections (code) */
    *(.text*)          /* .text* sections (code) */
    *(.glue_7)         /* glue arm to thumb code */
    *(.glue_7t)        /* glue thumb to arm code */
    *(.eh_frame)

    KEEP (*(.init))
    KEEP (*(.fini))

    . = ALIGN(4);
    _etext = .;        /* define a global symbols at end of code */
  } >AXIRAM

  /* Constant data goes into AXIRAM */
  .rodata :
  {
    . = ALIGN(4);
    *(.rodata)         /* .rodata sections (constants, strings, etc.) */
    *(.rodata*)        /* .rodata* sections (constants, strings, etc.) */
    . = ALIGN(4);
  } >AXIRAM

  .ARM.extab   : { *(.ARM.extab* .gnu.linkonce.armextab.*) } >AXIRAM
  .ARM : {
    __exidx_start = .;
    *(.ARM.exidx*)
    __exidx_end = .;
  } >AXIRAM

  .preinit_array     :
  {
    PROVIDE_HIDDEN (__preinit_array_start = .);
    KEEP (*(.preinit_array*))
    PROVIDE_HIDDEN (__preinit_array_end = .);
  } >AXIRAM
  .init_array :
  {
    PROVIDE_HIDDEN (__init_array_start = .);
    KEEP (*(SORT(.init_array.*)))
    KEEP (*(.init_array*))
    PROVIDE_HIDDEN (__init_array_end = .);
  } >AXIRAM
  .fini_array :
  {
    PROVIDE_HIDDEN (__fini_array_start = .);
    KEEP (*(SORT(.fini_array.*)))
    KEEP (*(.fini_array*))
    PROVIDE_HIDDEN (__fini_array_end = .);
  } >AXIRAM

  /* used by the startup to initialize data */
  _sidata = LOADADDR(.data);

  /* Initialized data sections goes into RAM, load LMA copy after code */
  .data : 
  {
    . = ALIGN(4);
    _sdata = .;        /* create a global symbol at data start */
    *(.data)           /* .data sections */
    *(.data*)          /* .data* sections */

    . = ALIGN(4);
    _edata = .;        /* define a global symbol at data end */
  } >DTCMRAM AT> AXIRAM

  
  /* Uninitialized data section */
  . = ALIGN(4);
  .bss :
  {
    /* This is used by the startup in order to initialize the .bss secion */
    _sbss = .;         /* define a global symbol at bss start */
    __bss_start__ = _sbss;
    *(.bss)
    *(.bss*)
    *(COMMON)

    . = ALIGN(4);
    _ebss = .;         /* define a global symbol at bss end */
    __bss_end__ = _ebss;
  } >RAM123

  /* User_heap_stack section, used to check that there is enough RAM left */
  ._user_heap_stack :
  {
    . = ALIGN(8);
    PROVIDE ( end = . );
    PROVIDE ( _end = . );
    . = . + _Min_Heap_Size;
    . = . + _Min_Stack_Size;
    . = ALIGN(8);
  } >DTCMRAM

  

  /* Remove information from the standard libraries */
  /DISCARD/ :
  {
    libc.a ( * )
    libm.a ( * )
    libgcc.a ( * )
  }

  .ARM.attributes 0 : { *(.ARM.attributes) }
}

(It looks like stackoverflow has added some backward slashes.)

Not only does the update code always lock up - it locks up in different places. By setting different bits in the magic cookie and examining them after the next reset I have seen it lock up on the branch to UpdateContinueInit, before it finished with the update's .bss, on the branch to or inside the update's __libc_init_array, and sometime after branching to the update's main.

When running the default's main I have tried a long branch to a small subroutine in the the update and that works. I have checked the list files and setting the vector table offset register in SystemInit is correct in both cases. Trying to examine the the fault registers starting at 0xe000dc28 hasn't indicated anything.

The default in FLASH runs fine by itself when not trying to branch to the update. I am confident that the default loads the update into RAM properly. The update runs properly if I set the boot address register to the update's isr_vector, write the update to RAM via JTAG, and reset the processor.

I am hoping that someone has successfully done this before and can point me to the nuance of this processor that I am missing.

Thanks.

  • 1
    https://www.google.com/search?q=vtor – artless noise Dec 05 '22 at 23:40
  • There is a typo in my update .ld posting. DTCMRAM (xrw) : ORIGIN = 0x20000008, LENGTH = 0x1fff8 – obiewhistler Dec 06 '22 at 19:19
  • so are you trying to have a flash program and a ram program? and I assume you are just building them separately, and then copy and jump? and is that working or not? it only takes a few dozen lines to demonstrate that problem if that is your problem. or are you past that and into exceptions in ram which is just a vtor thing? 0x20000008 obviously cannot be the base address to change the vector table to if that is where you are stuck. please create a minimal example of the problem and explain more and show debug. – old_timer Dec 07 '22 at 01:00
  • I am trying to load and run a RAM update program instead of the FLASH default program. Yes, I am building them separately. Right now I am compiling the same source for both so the RAM update has exception vectors in RAM at 0x24000000 in the same way that the FLASH default has exception vectors in FLASH at 0x08000000. In fact, WinMerge shows the only hex file differences are 0x08 or 0x24 in address MSBs. VTOR is set properly in both cases. The load to RAM works - CRC verified - but the branch to RAM goes off into the weeds. Fixing the AXI read errata does not help. – obiewhistler Dec 07 '22 at 18:12

0 Answers0