I am assuming you are talking about ARM
architecture:
- compile the code with
__attribute__((always_inline));
on all related functions and compile with -fpic -fPIC
read here for more info.
- disassembling it and put it as-is on SRAM, e.g at adress
0xd1001000
- reserve
{r4-r15}
on SRAM.
- set
pc
to 0xd1001000
and sp
properly to point the stack.
- restore
{r4-r15}
- jump back to DDR.
You can look at here for a good resource of how to use the right gcc flags.
here is a refernce from uboot - it doesn't jump back to the initial place:
/*
* void relocate_code (addr_sp, gd, addr_moni)
*
* This "function" does not return, instead it continues in RAM
* after relocating the monitor code.
*
*/
.globl relocate_code
relocate_code:
mov r4, r0 /* save addr_sp */
mov r5, r1 /* save addr of gd */
mov r6, r2 /* save addr of destination */
/* Set up the stack */
stack_setup:
mov sp, r4
adr r0, _start
cmp r0, r6
moveq r9, #0 /* no relocation. relocation offset(r9) = 0 */
beq clear_bss /* skip relocation */
mov r1, r6 /* r1 <- scratch for copy_loop */
ldr r3, _image_copy_end_ofs
add r2, r0, r3 /* r2 <- source end address */
copy_loop:
ldmia r0!, {r9-r10} /* copy from source address [r0] */
stmia r1!, {r9-r10} /* copy to target address [r1] */
cmp r0, r2 /* until source end address [r2] */
blo copy_loop
#ifndef CONFIG_SPL_BUILD
/*
* fix .rel.dyn relocations
*/
ldr r0, _TEXT_BASE /* r0 <- Text base */
sub r9, r6, r0 /* r9 <- relocation offset */
ldr r10, _dynsym_start_ofs /* r10 <- sym table ofs */
add r10, r10, r0 /* r10 <- sym table in FLASH */
ldr r2, _rel_dyn_start_ofs /* r2 <- rel dyn start ofs */
add r2, r2, r0 /* r2 <- rel dyn start in FLASH */
ldr r3, _rel_dyn_end_ofs /* r3 <- rel dyn end ofs */
add r3, r3, r0 /* r3 <- rel dyn end in FLASH */
fixloop:
ldr r0, [r2] /* r0 <- location to fix up, IN FLASH! */
add r0, r0, r9 /* r0 <- location to fix up in RAM */
ldr r1, [r2, #4]
and r7, r1, #0xff
cmp r7, #23 /* relative fixup? */
beq fixrel
cmp r7, #2 /* absolute fixup? */
beq fixabs
/* ignore unknown type of fixup */
b fixnext
fixabs:
/* absolute fix: set location to (offset) symbol value */
mov r1, r1, LSR #4 /* r1 <- symbol index in .dynsym */
add r1, r10, r1 /* r1 <- address of symbol in table */
ldr r1, [r1, #4] /* r1 <- symbol value */
add r1, r1, r9 /* r1 <- relocated sym addr */
b fixnext
fixrel:
/* relative fix: increase location by offset */
ldr r1, [r0]
add r1, r1, r9
fixnext:
str r1, [r0]
add r2, r2, #8 /* each rel.dyn entry is 8 bytes */
cmp r2, r3
blo fixloop
b clear_bss
_rel_dyn_start_ofs:
.word __rel_dyn_start - _start
_rel_dyn_end_ofs:
.word __rel_dyn_end - _start
_dynsym_start_ofs:
.word __dynsym_start - _start
#endif /* #ifndef CONFIG_SPL_BUILD */
clear_bss:
#ifdef CONFIG_SPL_BUILD
/* No relocation for SPL */
ldr r0, =__bss_start
ldr r1, =__bss_end__
#else
ldr r0, _bss_start_ofs
ldr r1, _bss_end_ofs
mov r4, r6 /* reloc addr */
add r0, r0, r4
add r1, r1, r4
#endif
mov r2, #0x00000000 /* clear */
clbss_l:str r2, [r0] /* clear loop... */
add r0, r0, #4
cmp r0, r1
bne clbss_l
/*
* We are done. Do not return, instead branch to second part of board
* initialization, now running from RAM.
*/
jump_2_ram:
/*
* If I-cache is enabled invalidate it
*/
#ifndef CONFIG_SYS_ICACHE_OFF
mcr p15, 0, r0, c7, c5, 0 @ invalidate icache
mcr p15, 0, r0, c7, c10, 4 @ DSB
mcr p15, 0, r0, c7, c5, 4 @ ISB
#endif
ldr r0, _board_init_r_ofs
adr r1, _start
add lr, r0, r1
add lr, lr, r9
/* setup parameters for board_init_r */
mov r0, r5 /* gd_t */
mov r1, r6 /* dest_addr */
/* jump to it ... */
mov pc, lr
_board_init_r_ofs:
.word board_init_r - _start