1

The following is the assembly I have used in an attempt to print to console:

global _start

_start:
  addi   a0, x0, 1
  addi   a1, x0, 42
  addi   a7, x0, 63
  ecall

  addi   a0, x0, 0
  addi   a7, x0, 93
  ecall

.data
num:
  .byte 6  

I compiled with

riscv64-unknown-elf-as  -o example.o  example.S
riscv64-unknown-elf-ld  -o example  example.o

and run using spike and proxy kernel

spike pk example

No output is generated.

This works on https://www.kvakil.me/venus/ with

  addi   a0, x0, 1
  addi   a1, x0, 42
  ecall

and prints 42.

Also, if I wanted to print the contents of num in the data segment, how would I go about it?

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
code4eva
  • 33
  • 1
  • 5

2 Answers2

2

I managed a solution from Peter Cordes' answer. I am posting the implementation here in case someone needs it and for my own reference.

UPDATE:

Steps:

  1. Given a signed number, get its absolute value, if the number is negative, make a note of it through a variable.
  2. Choose an end address position justified by the next point.
  3. Perform repeated division and store the reminder in the appropriate memory location.
  4. If the number is negative add '-' at the beginning.
  5. Get length by subtracting first address from the end. Then call appropriate system call.

System calls can be found here.

C code logically mirrors the assembly

#include <unistd.h>

void num_print(long num){
    unsigned int base = 10;
    int sign_bit = 0;

    char string[20];
    char* end = string + 19;
    char* p   = end;
    *p = '\n';
    
    if (num < 0){
        num = 0 - num;
        sign_bit = 1;
    }

    do {
        *(--p) = (num % base) + '0';
        num /= base;
    } while (num);

    if (sign_bit)
        *(--p) = '-';
    
    size_t len = end - p;
    write(1, p, len + 1);
}

int main(){
    int arr[3] = {1234567, -1234567, 0};
    for (int i=0; i < 3; i++){
        num_print(arr[i]);
    }
    return 0;
}

Risc-v Assembly

.global _start

.text
_start:
    la           s1, arr          # s1: load arr address
    addi         s2, zero, 3      # s2: arr length

    addi         sp, sp, -8       # push 1 item to stack 
    sd           ra, 0(sp)        # save return address
    mv           s3, zero         # s3: i loop counter  
    j            compare_ipos

L1:
    slli         s4, s3, 3        # s4: i * 8
    add          s5, s1, s4       # s5: address of a[i]
    ld           a0, 0(s5)        # a0: arr[i]
    jal          ra, num_print    # call num_print
    addi         s3, s3, 1        # increment i

compare_ipos:
    blt          s3, s2, L1       # loop if i < 3
    j            exit
 
num_print:
    addi         sp, sp, -40      # create stack space
    sd           s0, 32(sp)       # store frame pointer
    addi         s0, sp, 40       # new frame pointer
  
    addi         t0, zero, 0      # initialize sign_bit
    addi         t1, zero, 10     # divisor and new-line char
    addi         t2, s0, -16      # t2: string[n] 
    add          a1, zero, t2     # a1: string[0] currently string[n]
  
    addi         t3, zero, '\n'   # '\n' char
    sb           t3, 0(a1)        # store '\n'
  
    bge          a0, zero, PVE    # if num >= 0 go to L1 else get absolute
    xori         a0, a0, -1       # (num ^ -1)
    addi         a0, a0, 1        # num + 1
    addi         t0, zero, 1      # set sign-bit to 1

PVE:
    remu         t3, a0, t1       # num % 10
    addi         t3, t3, 48       # convert to ascii
    addi         a1, a1, -1       # decrement start pointer
    sb           t3, 0(a1)        # store value
    divu         a0, a0, t1       # num /= 10
    blt          zero, a0, PVE    # if num > 0 loop

    beq          t0, zero, print  # if sign_bit = 0 go to print else, add '-' char
    addi         t3, zero, '-'    # ascii '-'
    addi         a1, a1, -1       # decrement start pointer
    sb           t3, 0(a1)        # store '-'

print:
    sub          t4, t2, a1       # t4: len -- string[n] - string[0]
    addi         a2, t4, 1        # len + 1
    addi         a0, zero, 1      # file descriptor to write to
    addi         a7, zero, 64     #  pk SYS_write
    ecall                         # transfer control to os

    ld           s0, 32(sp)       # restore frame pointer
    addi         sp, sp, 40       # restore stack pointer

    ret                           # return from function        
 
exit:
    ld           ra, 0(sp)        # restore ra
    addi         sp, sp, 8        # pop stack

    addi         a0, zero, 0      # return value
    addi         a7, zero, 93     # syscall exit code
    ecall

.data
arr:
  .dword  12345670, -12345670, 0
code4eva
  • 33
  • 1
  • 5
  • *Get the number's number of digits -- length. Necessary to figure out the memory offset.* - not really, you can just start storing at the *end* of the buffer, as shown in [How do I print an integer in Assembly Level Programming without printf from the c library?](https://stackoverflow.com/a/46301894). When you're done, you have a pointer to the start of the number, wherever that is, which you can pass to `write(fd, buf, len). You can calculate the length by subtracting end-start. – Peter Cordes Apr 12 '21 at 01:10
  • It's very wasteful to do the division twice; if you needed the digit-string to start at a specific position in memory (i.e. you weren't just going to pass a pointer to a write system call), copy them after you generate either backwards or right-justified to the end of a buffer. – Peter Cordes Apr 12 '21 at 01:10
  • If you're going to actually branch on `num < 0`, negate it with `0 - num`, i.e. `sub t0, zero, t0`. You only need that branchless `abs` implementation if you're going to avoid branching, e.g. with `signbit = num<0`, e.g. `slt`. Or better with `sra t2, t0, 31` to get a 0 / non-zero result (which you also want as the mask for the xor/sub 2's complement identity abs trick). – Peter Cordes Apr 12 '21 at 01:13
  • It's weird that you use `s0..2` as temporary registers. Those ABI names correspond to the calling convention: `s` registers are "saved", i.e. you should save/restore them if you modify them at all, and `t` registers are temporaries that your caller doesn't care if you modify. (You exit with a system call because `_start` isn't a function, but you're not gaining anything from using call-preserved registers here since you don't make any calls yourself.) – Peter Cordes Apr 12 '21 at 01:18
  • Your C uses `printf("%s")` but your asm uses `write(1, buf, len)`. That's weird; your asm wastes a couple instructions 0-terminating the string. Hopefully you're not actually writing the `'\0'` byte as part of the length, so the kernel doesn't care if it's there or not. So you should just remove `sb t6, 0(t5) # store null to address`. (Also, it's ASCII NUL, not null, and there was no reason to copy a zero to t6, you could have done `sb zero, 0(t5)`. You sometimes use `zero` and sometimes `x0`. Those are the same register in RISC-V; use the same name for consistency. – Peter Cordes Apr 12 '21 at 01:23
  • Also, comment your ecall instructions with something like `write(1, ptr, len)` to remind yourself what the values meant. You could also optimize by using `a1` instead of `t5` in the first place so the pointer would be where you wanted it. – Peter Cordes Apr 12 '21 at 01:25
  • That's a lot better. There are still some RISC-V-specific optimizations you can do, like putting `divu` and `remu` next to each other so they can fuse into a single division operation that produces both outputs. (https://riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf#page=45). (Apparently with `divu` first, which is inconvenient and might require an extra `mov` unless you unroll the loop, if you want to do that optimization). And yes, you should be using `divu`. `abs(LONG_MIN)` doesn't fit in a signed integer: you need to treat your abs result as unsigned. – Peter Cordes Apr 12 '21 at 20:01
  • Your assembler hopefully supports character constants like `'0'` and maybe `'\n'`; C certainly does, so use stuff like `*--p = '-';` instead of writing the decimal values of their ASCII codes as magic numbers that need comments to explain. Especially if you're going to put `10` into a register 2 separate times, once as the base and once as a newline. Writing one of them as a `10` and the other as `addi t4, zero, '\n'` can justify that redundancy instead of just doing `sb t2, 0(a1)` if you don't want to explain that optimization in comments. – Peter Cordes Apr 12 '21 at 20:04
  • Also note that you hardware division is slow compared to multiply, and you can use a multiplicative inverse to do exact division by 10. That makes the asm less "obvious", and is something a C compiler will do for you. But see [Integer-to-ASCII algorithm (x86 assembly)](https://codereview.stackexchange.com/q/142842) for an x86 example, and [Why does GCC use multiplication by a strange number in implementing integer division?](https://stackoverflow.com/q/41183935) for how it works. See https://godbolt.org/z/5Mcsra1qq for clang output for your C source tidied up a little. – Peter Cordes Apr 12 '21 at 20:09
  • I am grateful for the help. Replacing division with multiplication by 0.1 has had its hiccups -- 128 bit processing in C is quite the headache since I am using a 64 bit machine and my hardware is actually x86, the risc-v is a simulator. I will probably revisit this later on. – code4eva Apr 22 '21 at 09:35
  • Most of your changes mostly make sense I think (looping over args, and do{}while() is an improvement), but `(num ^ -1) + 1;` bithack 2's complement is worse for no reason; use `num = -num` like a normal person, as we discussed earlier. (Or `0UL - num` to avoid signed-overflow UB; that might even work right for LONG_MIN since you use `unsigned base`.) Also, your C is missing indenting for loop and if bodies. – Peter Cordes Apr 22 '21 at 09:36
  • You normally don't actually write out a multiplicative inverse in C; that's what compilers are for: just do `/` and `%` by `10UL` in C, and leave the `mulhu` for hand-written asm (using constants you get from compiler output; no need to spend time calculating it by hand for any given divisor.) But you can if you want with GNU C `unsigned __int128`, which is supported on all(?) 64-bit GCC/clang targets including x86-64. [Getting the high part of 64 bit integer multiplication](https://stackoverflow.com/q/28868367) – Peter Cordes Apr 22 '21 at 09:45
1

System calls depend on the environment. "Toy" systems like Venus or RARS have their own set of toy system calls that do things like print an integer.

In a real-world system like GNU/Linux, true system calls that you can access with ecall can only copy bytes to a file descriptor. If you want to output text, you need to create text in memory in user-space and pass a pointer to a write system call.

Spike + pk is apparently more like Linux, with a POSIX write(2) system call, not like those toy system-call environments where you could pass an integer directly to a print-int ecall. https://www.reddit.com/r/RISCV/comments/dagvzr/where_do_i_find_the_list_of_stdio_system_etc/ has some examples and links. Notably https://github.com/riscv/riscv-pk/blob/master/pk/syscall.h where we find #define SYS_write 64 as the call number (goes in a7) for a write system call.

A write system-call takes args: write(int fd, const void *buf, size_t count).

Formatting a binary integer into an ASCII string is something that library functions like printf will do. Toy systems don't have a library, so they just put a few useful functions as system calls. And if you want control over stuff like leading zeros or padding to a fixed width, you have to write it yourself. But on a system like Spike-pk, you only have simple Unix-like system calls and (perhaps?) no library at all, so you have to always do it yourself.

With just Linux / Unix / Spike-pk system-calls, you'll want to do repeated division by 10 to get the decimal digits of a binary integer. like in How do I print an integer in Assembly Level Programming without printf from the c library? which shows C and x86-64 assembly for Linux:

char *itoa_end(unsigned long val, char *p_end) {
  const unsigned base = 10;
  char *p = p_end;
  do {
    *--p = (val % base) + '0';
    val /= base;
  } while(val);                  // runs at least once to print '0' for val=0.

  // write(1, p,  p_end-p);
  return p;  // let the caller know where the leading digit is
}

Translate to RISC-V assembly (or compile with gcc or clang, e.g. via https://godbolt.org/). Reserving a small buffer on the stack is convenient.

Also, if I wanted to print the contents of num in the data segment, how would I go about it?

lw the number into a register, then do the same thing as above.

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847