0

I am trying to execute a simple code from RAM, but for some reason the program halts/throws hard fault. I am using CCMRAM for my data, heap and stack while SRAM1 for executing code. Here is my linker script and startup file.

LinkerScript.ld

/*
******************************************************************************
**
**  File        : LinkerScript.ld
**
**  Author      : Auto-generated by Ac6 System Workbench
**
**  Abstract    : Linker script for STM32F407ZETx Device from STM32F4 series
**                112Kbytes SRAM1
**                16Kbytes  SRAM2
**                64Kbytes  CCMRAM
**                512Kbytes ROM
**
**                Set heap size, stack size and stack location according
**                to application requirements.
**
**                Set memory bank area and size if external memory is used.
**
**  Target      : STMicroelectronics STM32
**
**  Distribution: The file is distributed �as is,� without any warranty
**                of any kind.
**
*****************************************************************************
** @attention
**
** <h2><center>&copy; COPYRIGHT(c) 2019 Ac6</center></h2>
**
** Redistribution and use in source and binary forms, with or without modification,
** are permitted provided that the following conditions are met:
**   1. Redistributions of source code must retain the above copyright notice,
**      this list of conditions and the following disclaimer.
**   2. Redistributions in binary form must reproduce the above copyright notice,
**      this list of conditions and the following disclaimer in the documentation
**      and/or other materials provided with the distribution.
**   3. Neither the name of Ac6 nor the names of its contributors
**      may be used to endorse or promote products derived from this software
**      without specific prior written permission.
**
** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
** DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
** FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
** DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
** SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
** CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**
*****************************************************************************
*/

/* Entry Point */
ENTRY(Reset_Handler)

/* Highest address of the user mode stack */
/*_estack = 0x2001C000;    /* start of SRAM2 */
_estack = 0x10010000;    /* end of CCMRAM */

_Min_Heap_Size = 0;      /* required amount of heap  */
_Min_Stack_Size = 0x400; /* required amount of stack */

/* Memories definition */
MEMORY
{
  SRAM1   (xrw) : ORIGIN = 0x20000000,  LENGTH = 112K
  SRAM2   (xrw) : ORIGIN = 0x2001C000,  LENGTH = 16K
  FLASH   (rx)  : ORIGIN = 0x08000000,  LENGTH = 512K
  CCMRAM  (rw)  : ORIGIN = 0x10000000,  LENGTH = 64K
}

/* Sections */
SECTIONS
{
  /* The startup code into ROM memory */
  .isr_vector :
  {
    . = ALIGN(4);
    KEEP(*(.isr_vector)) /* Startup code */
    . = ALIGN(4);
  } >FLASH

  /*sram1 section for code and data*/
  _sisram1 = LOADADDR(.sram1);
  .sram1 :
  {
    . = ALIGN(4);
    _s_sram1 = .;
    *(.sram1);
    *(.sram1*);
    . = ALIGN(4);
    _e_sram1 = .;
  } > SRAM1 AT >FLASH
  
   /*sram2 section for code and data*/
  _sisram2 = LOADADDR(.sram2);
  .sram2 :
  {
    . = ALIGN(4);
    _s_sram2 = .;
    *(.sram2);
    *(.sram2*);
    . = ALIGN(4);
    _e_sram2 = .;
  } > SRAM2 AT >FLASH
  
  /* The program code and other data into FLASH memory */
  .text :
  {
    . = ALIGN(4);
    *(.text)           /* .text sections (code) */
    *(.text*)          /* .text* sections (code) */
    *(.glue_7)         /* glue arm to thumb code */
    *(.glue_7t)        /* glue thumb to arm code */
    *(.eh_frame)

    KEEP (*(.init))
    KEEP (*(.fini))

    . = ALIGN(4);
    _etext = .;        /* define a global symbols at end of code */
  } >FLASH

  /* Constant data into FLASH memory*/
  .rodata :
  {
    . = ALIGN(4);
    *(.rodata)         /* .rodata sections (constants, strings, etc.) */
    *(.rodata*)        /* .rodata* sections (constants, strings, etc.) */
    . = ALIGN(4);
  } >FLASH

  .ARM.extab   : { 
    . = ALIGN(4);
    *(.ARM.extab* .gnu.linkonce.armextab.*)
    . = ALIGN(4);
  } > FLASH
  
  .ARM : {
    . = ALIGN(4);
    __exidx_start = .;
    *(.ARM.exidx*)
    __exidx_end = .;
    . = ALIGN(4);
  } >FLASH

  .preinit_array     :
  {
    . = ALIGN(4);
    PROVIDE_HIDDEN (__preinit_array_start = .);
    KEEP (*(.preinit_array*))
    PROVIDE_HIDDEN (__preinit_array_end = .);
    . = ALIGN(4);
  } >FLASH
  
  .init_array :
  {
    . = ALIGN(4);
    PROVIDE_HIDDEN (__init_array_start = .);
    KEEP (*(SORT(.init_array.*)))
    KEEP (*(.init_array*))
    PROVIDE_HIDDEN (__init_array_end = .);
    . = ALIGN(4);
  } >FLASH
  
  .fini_array :
  {
    . = ALIGN(4);
    PROVIDE_HIDDEN (__fini_array_start = .);
    KEEP (*(SORT(.fini_array.*)))
    KEEP (*(.fini_array*))
    PROVIDE_HIDDEN (__fini_array_end = .);
    . = ALIGN(4);
  } >FLASH

  /* Used by the startup to initialize data */
  _sidata = LOADADDR(.data);

  /* Initialized data sections into RAM memory */
  .data : 
  {
    . = ALIGN(4);
    _sdata = .;        /* create a global symbol at data start */
    *(.data)           /* .data sections */
    *(.data*)          /* .data* sections */

    . = ALIGN(4);
    _edata = .;        /* define a global symbol at data end */
  } >CCMRAM AT> FLASH

  
  /* Uninitialized data section into RAM memory */
  . = ALIGN(4);
  .bss :
  {
    /* This is used by the startup in order to initialize the .bss secion */
    _sbss = .;         /* define a global symbol at bss start */
    __bss_start__ = _sbss;
    *(.bss)
    *(.bss*)
    *(COMMON)

    . = ALIGN(4);
    _ebss = .;         /* define a global symbol at bss end */
    __bss_end__ = _ebss;
  } >CCMRAM

  /* User_heap_stack section, used to check that there is enough RAM left */
  ._user_heap_stack :
  {
    . = ALIGN(8);
    PROVIDE ( end = . );
    PROVIDE ( _end = . );
    . = . + _Min_Heap_Size;
    . = . + _Min_Stack_Size;
    . = ALIGN(8);
  } >CCMRAM

  

  /* Remove information from the compiler libraries */
  /DISCARD/ :
  {
    libc.a ( * )
    libm.a ( * )
    libgcc.a ( * )
  }

  .ARM.attributes 0 : { *(.ARM.attributes) }
}

And here is part of my startup file

/**
  ******************************************************************************
  * @file      startup_stm32.s dedicated to STM32F407ZETx device
  * @author    Ac6
  * @version   V1.0.0
  * @date      2019-03-30
  ******************************************************************************
  */

.syntax unified
.cpu cortex-m4
.fpu softvfp
.thumb

.global g_pfnVectors
.global Default_Handler

/* start address for the initialization values of the .data section.
defined in linker script */
.word _sidata
/* start address for the .data section. defined in linker script */
.word _sdata
/* end address for the .data section. defined in linker script */
.word _edata
/* start address for the .bss section. defined in linker script */
.word _sbss
/* end address for the .bss section. defined in linker script */
.word _ebss

/**
 * @brief  This is the code that gets called when the processor first
 *          starts execution following a reset event. Only the absolutely
 *          necessary set is performed, after which the application
 *          supplied main() routine is called.
 * @param  None
 * @retval : None
*/

  .section .text.Reset_Handler
  .weak Reset_Handler
  .type Reset_Handler, %function
Reset_Handler:
  ldr   r0, =_estack
  mov   sp, r0          /* set stack pointer */

/* Copy the data segment initializers from flash to SRAM */
  ldr r0, =_sdata
  ldr r1, =_edata
  ldr r2, =_sidata
  movs r3, #0
  b LoopCopyDataInit

CopyDataInit:
  ldr r4, [r2, r3]
  str r4, [r0, r3]
  adds r3, r3, #4

LoopCopyDataInit:
  adds r4, r0, r3
  cmp r4, r1
  bcc CopyDataInit

/* Zero fill the bss segment. */
  ldr r2, =_sbss
  ldr r4, =_ebss
  movs r3, #0
  b LoopFillZerobss

FillZerobss:
  str  r3, [r2]
  adds r2, r2, #4

LoopFillZerobss:
  cmp r2, r4
  bcc FillZerobss

///////////// Following blocks are for SRAM1 /////////////////////
// Copy from flash to SRAM1
  ldr r0, =_s_sram1
  ldr r1, =_e_sram1
  ldr r2, =_sisram1
  movs r3, #0
  b  LoopCopySRAM1Init

CopySRAM1Init:
    ldr r4, [r2, r3]
    str r4, [r0, r3]
    adds r3, r3, #4

LoopCopySRAM1Init:
    adds r4, r0, r3
    cmp r4, r1
    bcc CopySRAM1Init
// End of data copy from Flash to SRAM1

// Zero fill the SRAM1 segment.
ldr r2, =_s_sram1
b LoopFillZeroSRAM1

FillZeroSRAM1:
  movs r3, #0
  str r3, [r2]
  adds r2, r2, #4
LoopFillZeroSRAM1:
  ldr r3, = _e_sram1
  cmp r2, r3
  bcc FillZeroSRAM1
//////////////// End of SRAM1 Blocks /////////////////

///////////// Following blocks are for SRAM2 /////////////////////
// Copy from flash to SRAM2
  ldr r0, =_s_sram2
  ldr r1, =_e_sram2
  ldr r2, =_sisram2
  movs r3, #0
  b  LoopCopySRAM2Init

CopySRAM2Init:
    ldr r4, [r2, r3]
    str r4, [r0, r3]
    adds r3, r3, #4

LoopCopySRAM2Init:
    adds r4, r0, r3
    cmp r4, r1
    bcc CopySRAM2Init
// End of data copy from Flash to SRAM2

// Zero fill the SRAM2 segment.
ldr r2, =_s_sram2
b LoopFillZeroSRAM2

FillZeroSRAM2:
  movs r3, #0
  str r3, [r2]
  adds r2, r2, #4
LoopFillZeroSRAM2:
  ldr r3, = _e_sram2
  cmp r2, r3
  bcc FillZeroSRAM2
//////////////// End of SRAM2 Blocks /////////////////

/* Call the clock system intitialization function.*/
  bl  SystemInit
/* Call static constructors */
  bl __libc_init_array
/* Call the application's entry point.*/
  bl main

LoopForever:
    b LoopForever

Can anybody please help me if there is any bug here. Lets say, for now we are only trying to run code from SRAM1 since that is the one which is connected to I-bus and D-bus ports of the Cortex M4 on this MCU. With respect to the picture below, I would expect bl to be jumping to beginning of SRAM1.. Can you please shed some light on it:

Debug Perspective

STM32F407ZET6 AHB Bus Matrix

Debug Perspective Updated

Debug 2

Debug 3

The hardfault

  • From your linker script, the code that needs to be located in SRAM should be in sections .sram1 or .sram2. How do you tell the linker some of the functions must be in section .sramx ? By default every function will be placed in .text or .text.function – Guillaume Petitjean Jul 01 '22 at 13:24
  • Also if you execute step by step (asm instruction per asm instruction) you will understand better what's going on. The branch instruction that you're showing is juming in Flash memory (0x0800 0000) but it's only a veneer, continue to execute and check whether it jumps to RAM and whether the code is correctly loaded in RAM – Guillaume Petitjean Jul 01 '22 at 13:26
  • So, with further debugging it shows it does reach the code in RAM and does execute. But at some point in time inside veneer of GPIO_Toggle call it throws the hardfault with one of the co-processor instruction. I have no idea what this co-processor instruction is trying to do. With the new information, can you guide little more please? – Sheikh Muhammad Junaid Aslam Jul 02 '22 at 08:49
  • Also, I dont know why it tries to read memory outside the ram memory range while moving contents of the address at r0 which apparently contains the address 0x10010004 while i have simply passed nullptr to this function call. – Sheikh Muhammad Junaid Aslam Jul 02 '22 at 08:51
  • Not directly relevant (or maybe it is?). But whenever you do something with moving executable code around, you need to take special care about interrupt vector table. Are you 100% sure it's placed correctly and interrupts are correctly invoked? – Ilya Jul 02 '22 at 08:58
  • At least the hardfault is getting invoked... And for this experiment, i am not moving all the code to RAM. It is just one function for the sake of experiment. – Sheikh Muhammad Junaid Aslam Jul 02 '22 at 09:00
  • It's difficult to help you of you don't provide more detail. Show the disassembly window at the instruction that triggers the hardfault for example. I don't know what you mean by co-processor instruction. Do you mean enabling the FPU ? – Guillaume Petitjean Jul 04 '22 at 12:28
  • I have put that problem aside for now. But, i have a greater problem to be resolved now. When I use different memory segments for global data, then the amount of used flash increases by the size of data which is placed in ram segments. Why is that so? I mean if a data is read only, then it makes sense that it takes some space in flash, but if a data is unintialized just like a little memory pool then why the flash code size increases by the size of that data (lets say 1kb) when clearly that should go to SRAM1 or SRAM2 etc. – Sheikh Muhammad Junaid Aslam Jul 11 '22 at 22:08
  • The [ALIGN() statements](https://stackoverflow.com/questions/8458084/align-in-linker-scripts/72942905#72942905) at the start are not doing anything. I would put all the VMA!=LMA sections at the end with '.data'. You have an extra complication of alignment of the LMA (flash source to copy). Also, you need to set attributes (for example CONTENTS, ALLOC, LOAD, READONLY, DATA if the inputs don't have these). Supply `objdump -h` output. – artless noise Jul 27 '22 at 13:12
  • See also: [STM32 startup](https://stackoverflow.com/questions/58902628/what-are-data-segment-initializers/71849963#71849963) for better code than the supplied version. It is extremely in-efficient loop copies. – artless noise Jul 27 '22 at 13:15
  • Thanks a lot for a nice reference. I will definitely make use of it. – Sheikh Muhammad Junaid Aslam Jul 28 '22 at 08:23

1 Answers1

0

According to the screen captures, your code is jumping to SRAM (as expected) but there is no code in SRAM (MOV R0 R0 indicates the memory is filled with 0s) hence the hardFault.

You have to copy from Flash to RAM the code that you want to execute in RAM.

Guillaume Petitjean
  • 2,408
  • 1
  • 21
  • 47
  • Yes, it was solved indeed. i could successfully do that using my C implementation of the reset function. It was strange that the same code in assembly was having issues. may be due to optimization etc. in C i deliberately turned off the optimizations. – Sheikh Muhammad Junaid Aslam Aug 08 '22 at 10:43
  • There is no optimization by the compiler in assembly since the assembly ... well ...is not compiled :) – Guillaume Petitjean Aug 08 '22 at 13:56
  • Indeed at assembler level no high level optimizations. I don't know then why it was not working. Very weird. I copied that code from the application notes of STM. I did not have enough time to work on that so did a quick implementation in C and was working good. so that was sufficient. – Sheikh Muhammad Junaid Aslam Aug 09 '22 at 07:46