0

I was working on a project when I came across whit this behavior that I can't quite understand

Context

  • I have a function like:

    float SCL_calculate(AVG_struct_type* data)

    The inner workings of this function are not relevant, the function output a float number correctly (it has been fully debugged already)

  • I have a uint8_t global array defined like this:

    char output_buff[9] = {0x0};

  • I'm trying to write a float number from the [1] index of this array

  • All this in the context of embedded systems

  • I'm using a STM32F411CEU6

The problem

Originally I had this code:

*( (float *) (&output_buff[1]) ) = SCL_calculate(&Voltage);

but if I tried to use this, then the UC jumped into the HardFault_Handler when trying to write into the array, but if instead I do:

float data;
data = SCL_calculate(&Voltage);
*( (float *) (&output_buff[1]) ) = data;

it works just fine.

My question

Why one way it jumps into the HardFault_Handler and the other way not?

minimal reproducible example

Here is a minimal reproducible example, I left all stm32 device configurations by default, I deleted all the compiler comments and functions to make it easier to read.


float SCL_calculate(void);

int main(void)
{
  HAL_Init();
  float data = SCL_calculate( );
  *( (float *) (&output_buff[1]) ) = data; //NO ERROR
  *( (float *) (&output_buff[1]) ) = SCL_calculate( ); //ERROR
  while (1)
  {
  }
}

float SCL_calculate(void){
    return 12.34;
}

Here is the full main.c file

/* USER CODE BEGIN Header */
/**
  ******************************************************************************
  * @file           : main.c
  * @brief          : Main program body
  ******************************************************************************
  * @attention
  *
  * Copyright (c) 2023 STMicroelectronics.
  * All rights reserved.
  *
  * This software is licensed under terms that can be found in the LICENSE file
  * in the root directory of this software component.
  * If no LICENSE file comes with this software, it is provided AS-IS.
  *
  ******************************************************************************
  */
/* USER CODE END Header */
/* Includes ------------------------------------------------------------------*/
#include "main.h"

/* Private includes ----------------------------------------------------------*/
/* USER CODE BEGIN Includes */

/* USER CODE END Includes */

/* Private typedef -----------------------------------------------------------*/
/* USER CODE BEGIN PTD */

/* USER CODE END PTD */

/* Private define ------------------------------------------------------------*/
/* USER CODE BEGIN PD */
/* USER CODE END PD */

/* Private macro -------------------------------------------------------------*/
/* USER CODE BEGIN PM */

/* USER CODE END PM */

/* Private variables ---------------------------------------------------------*/

/* USER CODE BEGIN PV */

/* USER CODE END PV */

/* Private function prototypes -----------------------------------------------*/
void SystemClock_Config(void);
/* USER CODE BEGIN PFP */
char output_buff[9] = {0x0};
/* USER CODE END PFP */

/* Private user code ---------------------------------------------------------*/
/* USER CODE BEGIN 0 */
float SCL_calculate( void );
/* USER CODE END 0 */

/**
  * @brief  The application entry point.
  * @retval int
  */
int main(void)
{
  /* USER CODE BEGIN 1 */

  /* USER CODE END 1 */

  /* MCU Configuration--------------------------------------------------------*/

  /* Reset of all peripherals, Initializes the Flash interface and the Systick. */
  HAL_Init();

  /* USER CODE BEGIN Init */

  /* USER CODE END Init */

  /* Configure the system clock */
  SystemClock_Config();

  /* USER CODE BEGIN SysInit */

  /* USER CODE END SysInit */

  /* Initialize all configured peripherals */
  /* USER CODE BEGIN 2 */
  float data = SCL_calculate(  );
  *( (float *) (&output_buff[1]) ) = data; //NO ERROR
  *( (float *) (&output_buff[1]) ) = SCL_calculate(  ); //ERROR
  /* USER CODE END 2 */

  /* Infinite loop */
  /* USER CODE BEGIN WHILE */
  while (1)
  {
    /* USER CODE END WHILE */

    /* USER CODE BEGIN 3 */
  }
  /* USER CODE END 3 */
}

/**
  * @brief System Clock Configuration
  * @retval None
  */
void SystemClock_Config(void)
{
  RCC_OscInitTypeDef RCC_OscInitStruct = {0};
  RCC_ClkInitTypeDef RCC_ClkInitStruct = {0};

  /** Configure the main internal regulator output voltage
  */
  __HAL_RCC_PWR_CLK_ENABLE();
  __HAL_PWR_VOLTAGESCALING_CONFIG(PWR_REGULATOR_VOLTAGE_SCALE1);

  /** Initializes the RCC Oscillators according to the specified parameters
  * in the RCC_OscInitTypeDef structure.
  */
  RCC_OscInitStruct.OscillatorType = RCC_OSCILLATORTYPE_HSI;
  RCC_OscInitStruct.HSIState = RCC_HSI_ON;
  RCC_OscInitStruct.HSICalibrationValue = RCC_HSICALIBRATION_DEFAULT;
  RCC_OscInitStruct.PLL.PLLState = RCC_PLL_NONE;
  if (HAL_RCC_OscConfig(&RCC_OscInitStruct) != HAL_OK)
  {
    Error_Handler();
  }

  /** Initializes the CPU, AHB and APB buses clocks
  */
  RCC_ClkInitStruct.ClockType = RCC_CLOCKTYPE_HCLK|RCC_CLOCKTYPE_SYSCLK
                              |RCC_CLOCKTYPE_PCLK1|RCC_CLOCKTYPE_PCLK2;
  RCC_ClkInitStruct.SYSCLKSource = RCC_SYSCLKSOURCE_HSI;
  RCC_ClkInitStruct.AHBCLKDivider = RCC_SYSCLK_DIV1;
  RCC_ClkInitStruct.APB1CLKDivider = RCC_HCLK_DIV1;
  RCC_ClkInitStruct.APB2CLKDivider = RCC_HCLK_DIV1;

  if (HAL_RCC_ClockConfig(&RCC_ClkInitStruct, FLASH_LATENCY_0) != HAL_OK)
  {
    Error_Handler();
  }
}

/* USER CODE BEGIN 4 */
float SCL_calculate( void ){
    return 12.34; //random number
}
/* USER CODE END 4 */

/**
  * @brief  This function is executed in case of error occurrence.
  * @retval None
  */
void Error_Handler(void)
{
  /* USER CODE BEGIN Error_Handler_Debug */
  /* User can add his own implementation to report the HAL error return state */
  __disable_irq();
  while (1)
  {
  }
  /* USER CODE END Error_Handler_Debug */
}

#ifdef  USE_FULL_ASSERT
/**
  * @brief  Reports the name of the source file and the source line number
  *         where the assert_param error has occurred.
  * @param  file: pointer to the source file name
  * @param  line: assert_param error line source number
  * @retval None
  */
void assert_failed(uint8_t *file, uint32_t line)
{
  /* USER CODE BEGIN 6 */
  /* User can add his own implementation to report the file name and line number,
     ex: printf("Wrong parameters value: file %s on line %d\r\n", file, line) */
  /* USER CODE END 6 */
}
#endif /* USE_FULL_ASSERT */


  • The C code itself is functionally identical. If there's a problem, it may be an alignment issue: does the UC have an alignment requirement for floats? Does `output_buff` have the same memory alignment in the 2 cases? – QF0 Jan 17 '23 at 20:35
  • You need to provide a [mcve]. Yor code might have issues elsewhere. – Eugene Sh. Jan 17 '23 at 20:38
  • 1
    FWIW, `*( (float *) (&output_buff[1]) ) = ..` is a violation of *strict aliasing rule*, and might very well end up with mis-aligned access. Your difference between the two codes might cause certain shift of `output_buff` in the memory, changing it's alignment. – Eugene Sh. Jan 17 '23 at 20:48
  • 1
    You can work around this issue by doing something like `memcpy(&output_buff[1], &data, sizeof data);` – Eugene Sh. Jan 17 '23 at 20:50
  • @EugeneSh. I added a minimal reproducible example, hope it helps! – Electromosaw Jan 17 '23 at 20:59
  • @EugeneSh. I don't quite understand what is aliasing rule, I think is the first time I read about this, maybe I had heard about it but in my natal language, I will google it, but any advice where to study about that? – Electromosaw Jan 17 '23 at 21:01
  • 1
    `number = *((float *) &data);` - this does not make sense. `data` is a single byte variable that can be allocated at any alignment. You are trying to read a whole float from it's address, which can be both result in mis-aligned access and memory access violation. – Eugene Sh. Jan 17 '23 at 21:02
  • 1
    Regarding strict aliasing: https://stackoverflow.com/questions/98650/what-is-the-strict-aliasing-rule – Eugene Sh. Jan 17 '23 at 21:03
  • @EugeneSh. it does not, the original function is kinda large, so I basically just did a function that return any random number, I didn't quite bother in that function having sense, I understand that could be confusing tho, I will change it, (I'll keep returning a random float number tho) – Electromosaw Jan 17 '23 at 21:05
  • @Electromosaw what micro? – 0___________ Jan 17 '23 at 21:49
  • @EugeneSh. it is quite tricky in F4. Core does not require aligned access, but FPU does. And it can only happen if you do not enable optimizations :) – 0___________ Jan 17 '23 at 22:13
  • The rule of thumb is to _never_ do any wild and crazy pointer casts in C unless you have quite in-depth knowledge about the language. There's a lot of pitfalls, alignment and strict aliasing are just two of them. – Lundin Jan 18 '23 at 10:08
  • @Lundin any book that you recommend to learn deeper knowledge about C lenguage? – Electromosaw Jan 18 '23 at 13:35
  • 1
    @Electromosaw Depends on how deep :) The most up to date one is [Gustedt - Modern C](https://gustedt.gitlabpages.inria.fr/modern-c/). It's somewhat advanced too, as C books go (and available for free as pdf). The MISRA C and CERT C coding standards are also good to read just as learning material even if you don't plan to actually use them. – Lundin Jan 18 '23 at 14:29

2 Answers2

2

Why one way it jumps into the HardFault_Handler and the other way not?

Let's compile the code:

output_buff:
main:
        push    {r4, r7, lr}
        sub     sp, sp, #12
        add     r7, sp, #0
        bl      SCL_calculate
        vstr.32 s0, [r7, #4]
        ldr     r2, .L3
        ldr     r3, [r7, #4]      @ float
        str     r3, [r2]  @ float
        ldr     r4, .L3
        bl      SCL_calculate
        vmov.f32        s15, s0
        vstr.32 s15, [r4]
.L2:
        b       .L2
.L3:
        .word   output_buff+1
SCL_calculate:
        push    {r7}
        add     r7, sp, #0
        ldr     r3, .L7
        vmov    s15, r3
        vmov.f32        s0, s15
        mov     sp, r7
        ldr     r7, [sp], #4
        bx      lr
.L7:
        .word   1095069860

First store is using str instruction which does not require aligned access.

str     r3, [r2]  @ float

The latter is using FPU instruction vstr.32 (I assume standard Cube settings) and FPU instructions require aligned access.

vstr.32 s15, [r4]

That is the reason why the first one works, and the second does not. It can only happen if you do not enable the optimizations (-O3 version below):

main:
        ldr     r3, .L4
        ldr     r2, .L4+4
        str     r2, [r3, #1]      @ unaligned
.L2:
        b       .L2
.L4:
        .word   .LANCHOR0
        .word   1095069860
SCL_calculate:
        vldr.32 s0, .L7
        bx      lr
.L7:
        .word   1095069860
output_buff:

How to prevent problems? Simply do not use pointer punning.

#define STORE(dest, src, type)  do {type temp; temp = (src); memcpy(&(dest), &(temp), sizeof(temp));}while(0)

float SCL_calculate(void);
uint8_t output_buff[100];

int main(void)
{
  
  float data = SCL_calculate( );
  STORE(output_buff[1], data, float); //NO ERROR
  STORE(output_buff[1], SCL_calculate(), float);
  while (1)
  {
  }
}

float SCL_calculate(void){
    return 12.34;
}

Calls to memcpy will be optimized out even if optimizations are mot enabled.

output_buff:
main:
        push    {r7, lr}
        sub     sp, sp, #16
        add     r7, sp, #0
        bl      SCL_calculate
        vstr.32 s0, [r7, #12]
        ldr     r3, [r7, #12]     @ float
        str     r3, [r7, #8]      @ float
        ldr     r3, [r7, #8]
        ldr     r2, .L3
        str     r3, [r2, #1]      @ unaligned
        bl      SCL_calculate
        vmov.f32        s15, s0
        vstr.32 s15, [r7, #4]
        ldr     r3, [r7, #4]
        ldr     r2, .L3
        str     r3, [r2, #1]      @ unaligned
.L2:
        b       .L2
.L3:
        .word   output_buff
SCL_calculate:
        push    {r7}
        add     r7, sp, #0
        ldr     r3, .L7
        vmov    s15, r3
        vmov.f32        s0, s15
        mov     sp, r7
        ldr     r7, [sp], #4
        bx      lr
.L7:
        .word   1095069860

https://godbolt.org/z/37nY8Wbe9

Using memcpy will prevent another problem. If you port the code to for example Cortex-M0 it will actually call memcpy or will use byte size instructions as this core requires aligned access.

https://godbolt.org/z/9eo89anqa

0___________
  • 60,014
  • 4
  • 34
  • 74
  • `typedef union { float flt [n]; uint8_t raw [sizeof(float[n])];` and union type punning is another possible work-around, which gives a tiny performance benefit over `memcpy`. – Lundin Jan 18 '23 at 10:11
  • @Lundin the only problem is that if you want unaligned access you need to memcpy to union. So the code will be exactly the same – 0___________ Jan 19 '23 at 11:16
-1

You're writing a float (32-bit) into the second ([1]) element of char (8-bit) array. So if your array started at address 0x20000000, then you're writing a 32-bit (4-byte) value into 0x20000001. This is an alignment issue. You can write a 32-bit data block only to 32-bit aligned memory address, 0x20000000 or 0x20000004 or 0x20000008 and so on. Similarly, a 16-bit value should be 2-byte aligned and should not be partially in one 32-bit group and partially in another one.

Ilya
  • 992
  • 7
  • 14
  • It is F4 and does not need data to be aligned – 0___________ Jan 17 '23 at 21:51
  • @0___________ your statement is incorrect. PM0214 STM32F4 Programming Manual, page 65, section 3.3.5 Alignment. It doesn't require alignment only for integer loads. It explicitly states that all other instructions other than listed there must be aligned or else they cause UsageFault (which escalates to HardFault). Float is loaded via FPU's VLDR instruction (disassembled an example), which must therefore be aligned. – Ilya Jan 18 '23 at 09:15