1

I'm reading Programming from the Ground Up, and read the Chapter 5's Using Files in a Program., code in Page 63.

I tried to let this 32bit code convert to 64bit code.

Below is my code.

# system call numbers
.equ SYS_OPEN, 2
.equ SYS_WRITE, 1
.equ SYS_READ, 0
.equ SYS_CLOSE, 3
.equ SYS_EXIT, 60

.equ O_RDONLY, 0
.equ O_CREAT_WRONLY_TRUNC, 03101

# standard file descriptors
.equ STDIN, 0
.equ STDOUT, 1
.equ STDERR, 2

# system call interrupt
.equ LINUX_SYSCALL, 0x80
.equ END_OF_FILE, 0

.equ NUMBER_ARGUMENTS, 2

.section .bss
.equ BUFFER_SIZE, 500
.lcomm BUFFER_DATA, BUFFER_SIZE

.section .text

# STACK POSITIONS
.equ ST_SIZE_RESERVE, 16
.equ ST_FD_IN, -8
.equ ST_FD_OUT, -16
# main function arguments
.equ ST_ARGC, 0     # Number of arguments
.equ ST_ARGV_0, 8   # Name of program
.equ ST_ARGV_1, 16  # Input file name
.equ ST_ARGV_2, 24  # Output file name

.globl _start
_start:

    ### INITIALIZE PROGRAM ###
    # save the stack pointer
    movq %rsp, %rbp
    
    # Allocate space for our file descriptors on the stack
    subq $ST_SIZE_RESERVE, %rsp

open_files:
open_fd_in:
    ### OPEN INPUT FILE ###
    # open syscall
    movq $SYS_OPEN, %rax
    # input filename into %rbx
    movq ST_ARGV_1(%rbp), %rbx

    # read-only flag
    movq $O_RDONLY, %rcx
    # this doesn't really matter for reading
    movq $0666, %rdx
    # call Liunx
    int $LINUX_SYSCALL

store_fd_in:
    # save the given file descriptor
    movq %rax, ST_FD_IN(%rbp)

open_fd_out:
    ### OPEN OUTPUT FILE ###
    # open the file
    movq $SYS_OPEN, %rax
    # output filename into %rbx
    movq ST_ARGV_2(%rbp), %rbx
    # flags for writing to the file
    movq $O_CREAT_WRONLY_TRUNC, %rcx
    # mode for new file (if it's created)
    movq $0666, %rdx
    # call Linux
    int $LINUX_SYSCALL

store_fd_out:
    # store the file descriptor here
    movq %rax, ST_FD_OUT(%rbp)

    ### BEGIN MAIN LOOP ###
read_loop_begin:
    ### READ IN A BLOCK FROM THE INPUT FILE ###
    movq $SYS_READ, %rax
    # get the input descriptor
    movq ST_FD_IN(%rbp), %rbx
    # the location to read into
    movq $BUFFER_DATA, %rcx
    # the size of the buffer
    movq $BUFFER_SIZE, %rdx
    # Size of buffer read is returned in %rax
    int $LINUX_SYSCALL
    
    ### EXIT IF WE'RE REACHED THE END ###
    # check for end of file marker
    cmpq $END_OF_FILE, %rax
    # if found or on error, go to the end
    jle end_loop

continue_read_loop:
    ### CONVERT THE BLOCK TO UPPER CASE ###
    pushq $BUFFER_DATA  # loction of buffer
    pushq %rax      # size of the buffer
    callq convert_to_upper  
    popq %rax       # get the size back
    addq $8, %rsp       # restore %rsp

    ### WRITE THE BLOCK OUT TO THE OUTPUT FILE ###
    # size of the buffer
    movq %rax, %rdx
    movq $SYS_WRITE, %rax
    # file to use
    movq ST_FD_OUT(%rbp), %rbx
    # location of the buffer
    movq $BUFFER_DATA, %rcx
    int $LINUX_SYSCALL
    
    ### CONTINUE THE LOOP ###
    jmp read_loop_begin

end_loop:
    ### CLOSE THE FILES ###
    movq $SYS_CLOSE, %rax
    movq ST_FD_OUT(%rbp), %rbx
    int $LINUX_SYSCALL
    
    movq $SYS_CLOSE, %rax
    movq ST_FD_IN(%rbp), %rbx
    int $LINUX_SYSCALL

### EXIT ###
    movq $SYS_EXIT, %rax
    movq $0, %rbx
    int $LINUX_SYSCALL
    

### CONSTANTS ###
# The lower boundary of our search
.equ LOWERCASE_A, 'a'
# The lower boundary of our search
.equ LOWERCASE_Z, 'z'
# Conversion between upper and lower case
.equ UPPER_CONVERSION, 'A' - 'a'


### STACK STUFF ###
.equ ST_BUFFER_LEN, 24  # length of buffer
.equ ST_BUFFER, 32  # actual buffer

convert_to_upper:
    pushq %rbp
    movq %rsp, %rbp

### SET UP VARIABLES ###
    movq ST_BUFFER(%rbp), %rax
    movq ST_BUFFER_LEN(%rbp), %rbx
    movq $0, %rdi

    # if a buffer with zero length was given to us, just leave
    cmpq $0, %rbx
    je end_convert_loop
    
convert_loop:
    # get the current byte
    movb (%rax, %rdi, 1), %cl
    
    # go to the next byte unless it is between  
    # 'a' and 'z'
    cmpb $LOWERCASE_A, %cl
    jl next_byte
    cmpb $LOWERCASE_Z, %cl
    jg next_byte

    # otherwise convert the byte to uppercase
    addb $UPPER_CONVERSION, %cl
    # and store it back
    movb %cl, (%rax, %rdi, 1)

next_byte:
    incq %rdi   # next byte
    cmpq %rdi, %rbx # continue unless we've reached the end
    jne convert_loop

end_convert_loop:
    # no return value, just leave
    movq %rbp, %rsp
    popq %rbp
    retq

I run this code, it will appear Segmentation fault (core dumped), and then I tried to using gdb to found errors.

Below are I have problems!

  1. In the beginning, it was not creating files, and then I found system call numbers is changed, it was not applicable my ubuntu amd 64bit. finally, I examined this file, then modified my code. Now, I wanna know latest system call numbers about amd64. http://www.x86-64.org can't visit!!
  2. Why is executing int $LINUX_SYSCALL appear [Detaching after fork from child process ...], please use b 61 and b 78 set breakpoint!
  3. The last question is this post title, in code read_loop_begin section, cmpq $END_OF_FILE, %rax appear signal SIGTTIN, Stopped. Delete breakpoint before set, and use b 99 reset breakpoint, and then use r to rerun program! And then the program execution sequence is not in line with my expectations, why is not straight into continue_read_loop section after cmp executed, rather than straight into convert_to_upper section, and then appear Cannot find bounds of current function

Finally, if there are any logical errors, etc, please point them out! I'm deeply grateful. :-)

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
OnlyWick
  • 342
  • 2
  • 10
  • 1
    You have an extremely weird mix of 32 bit and 64 bit system call conventions. Familiarize yourself with how to do system calls on amd64 Linux. You do these with the `syscall` instruction and arguments in `rdi`, `rsi`, `rdx`, ... Curiously, your system call numbers are correct for 64 bit code, but you use the 32 bit system call interface through `int $0x80`. This interface has different system call numbers, so your code ends up doing random unrelated system calls. No idea where you got this from. But on the other hand, nicely commented code with meaningful names! – fuz Jun 23 '22 at 17:47
  • 2
    *I tried to let this 32bit code convert to 64bit code.* - Have you read a tutorial on x86-64 system calls with `syscall`? If no, do that. Random guessing at how to port a 32-bit tutorial to 64-bit is unlikely to get far. And use `strace`; with a new enough kernel and strace version, it can correctly decode `int 0x80` system calls made from 64-bit processes. See [What happens if you use the 32-bit int 0x80 Linux ABI in 64-bit code?](https://stackoverflow.com/q/46087730) . Call number for `int 0x80` are always the same, it's always the 32-bit ABI, that's why you shouldn't use it in 64-bit. – Peter Cordes Jun 23 '22 at 18:19
  • *`cmpq $END_OF_FILE, %rax` appear `signal SIGTTIN, Stopped`* - Well that doesn't make any sense. Executing a user-space instruction can't make a system call, unless it's `int` or `syscall` (or `sysenter`). – Peter Cordes Jun 23 '22 at 18:21
  • 1
    @Peter, the signal is delivered at the IP following the system call. The preceding instruction is a system call, and the parameters are bogus because of the other errors. – prl Jun 23 '22 at 19:25
  • 1
    @prl: Oh, yeah that must be what's going on. GDB shows you what instruction you're stopped at, which RIP points to. The one that will execute *next* if you `stepi`. I think we've had Q&As before about that confusion; it makes the question title very weird. Perhaps my edit distorted it, and it was supposed to say "why does executing up to cmpq produce SIGTTIN?" But that's not the useful instruction to mention... – Peter Cordes Jun 23 '22 at 19:30
  • 2
    And BTW, trying to port a tutorial to another OS or bitness rarely goes well if you're still just learning the things the tutorial was trying to teach for the system it was written for. You have to already understand in general how to write working code for both systems (original and new), and know which things should change and which should stay the same. – Peter Cordes Jun 23 '22 at 19:33
  • 2
    Frequently, beginners don't know that their 64 bit system can in fact run 32 bit code which is the reason why they try to convert things. – Jester Jun 23 '22 at 19:40
  • @PeterCordes Thanks, What is the `bitness`? – OnlyWick Jun 24 '22 at 02:26
  • 1
    "bitness" is whether you're writing code for 32-bit mode, 64-bit mode, or even 16-bit mode. (16-bit mode tends to go with different OSes than 32 or 64-bit code.) It's not a standard word, or at least I thought it wasn't but there's an entry for it on https://en.wiktionary.org/wiki/bitness (in the context of describing CPUs / architectures, rather than what mode you're in on an x86-64 CPU.) – Peter Cordes Jun 24 '22 at 02:28
  • @PeterCordes Thanks, I think I probably know why. I haven't learned os yet, so I don't understand about `user-space instruction` details. – OnlyWick Jun 24 '22 at 02:36
  • @Jester But it means need extra options, and mostly 64bit in future, [Building 32bit code on a 64bit system](https://stackoverflow.com/questions/36861903/assembling-32-bit-binaries-on-a-64-bit-system-gnu-toolchain/36901649#36901649) – OnlyWick Jun 24 '22 at 03:14

0 Answers0