-1

I have to create an assembly program using YASM on the i386 architecture (32 bits) that receives a text as a parameter and returns a text with the same text but with each line numbered.

Example:

00 this is what the final text should look like

01 all lines numbered

02 and the last line should have the amount of total lines

03 Total lines:3.

; $ yasm -f elf enum.asm
; $ ld -o enum enum.o
; $ ./fibonacci

%define stdout 1

section .data
    file_name db 'test.txt'
    new_file db 'resultado.txt'
    num db "00: ",4,
    numL equ $ - num
    bufferEntradaL dd 1
    salto db 0xa


section .bss
    descriptorEntrada resb 2
    bufferEntrada resb 2
    descriptorSalida resb 2
    descriptorEntrada resb 2
    punteroBuffer resb 2
    cant resb 2

section .text

global _start

abrirArchivoLectura: 
    ;Abre un archivo
    mov EAX, sys_open           ; Llamo a sys_open
    mov ECX, 0              ; Solo lectura
    mov EBX file_name       ; Nombre del archivo
    int 80h                 ; Llamo al sistema
    ret

abrirArchivoEscritura:
    mov EAX, sys_open           ; Llamo al sys_open
    mov ECX, 1              ; Modo solo escritura
    mov EBX new_file        ; Nombre del archivo
    int 80h                 ; Llamo al sistema
    ret

crearArchivoEscritura:
    mov EAX, sys_create
    mov EBX new_file
    mov ECX, 1  
    int 80h
    ret

leerArchivo:
    ;Lee un archivo
    mov EAX,  sys_read              ; Llamo a sys_read
    mov EBX,  [descriptorEntrada]       ; Descriptor del archivo
    mov ECX,  bufferEntrada         ; Buffer de entrada
    mov EDX,  bufferEntradaL        ; Tamaño del buffer
    int 80h                         ; Llamo al sistema
    ret

imprimirMensaje:
    ;Imprime un mensaje de ayuda
    mov EAX, sys_write          ; Llamo a sys_write
    mov EBX, stdout             ; Imprimir por pantalla 
    mov ECX, num            ; Mensaje a imprimir 
    mov EDX, numL           ; Longitud  
    int 0x80                ; Llamo al sistema
    jmp salirSinError           ; Sale sin error


imprimirSaltoPantalla:
    ;Imprime un salto de linea por pantalla 
    mov EAX, sys_write          ; Llamo a sys_write
    mov EBX, stdout             ; Imprimir por pantalla 
    mov ECX, salto              ; Mensaje a imprimir 
    mov EDX, 1              ; Longitud 
    int 0x80                ; Llamo al sistema
    ret


cerrarArchivoEntrada:
    ;Cierra el archivo de entrada
    mov EAX, sys_close          ; Llamo a sys_close
    mov EBX, [descriptorEntrada]        ; Muevo el descriptor de salida al                 registro EBX
    int 80h                 ; Llamo al sistema
    ret

cerrarArchivoSalida:
    ;Cierra el archivo de salida
    mov EAX, sys_close          ; Llamo a sys_close
    mov EBX, [descriptorSalida]     ; Muevo el descriptor de salida al registro EBX
    int 80h                 ; Llamo al sistema
    ret

leerHastaSaltoLinea:
    mov [punteroBuffer],ECX         ; Le asigna a la variable punteroBuffer el contenido del registro ECX
    mov [cant],EAX              ; Le asigna a la variable cant el contenido del registro EAX
    cmp cant,salto
    jne leerHastaSaltoLinea

loop:


_start:
    ;Comienza el programa
    call 
    call abrirArchivoLectura        ; Abre el archivo de entrada
    test EAX,EAX                ; Testea que el parametro ingresado por el usuario sea un archivo.txt
    js salirErrorArchivoEntrada     ; Si no es un archivo.txt sale con un error de archivo de entrada
    mov [descriptorEntrada],EAX     ; Guardo el descriptor del archivo de entrada
    call leerArchivo            ; Lee el archivo de salida
    call leerHastaSaltoLinea


salirErrorArchivoEntrada:
    ;Salir con error en archivo de entrada
    mov EAX,sys_exit            ; Llamo a sys_exit
    mov EBX, 2              ; Finalizo por error en el archivo de entrada
    int 0x80                ; Llamo al sistema
RusoMM
  • 1
  • 1
  • 3
    Can you edit your question to show how far you've made it so far? Post whatever code you've written and explain specifically where you're stuck or what's erroring out. – Robert Townley Nov 21 '17 at 19:46
  • Are you allowed to use libc functions like `fgets` and `printf`? You're clearly on Linux (from the `int 0x80` system calls) so normally libc is available. If so, you could just read one line at a time and print it back out with an asm implementation of `printf("%d %s", linenum++, buf);` – Peter Cordes Nov 22 '17 at 02:40
  • Use `resd 1` to reserve space for on dword. `resb 2` only reserves 16 bits. – Peter Cordes Nov 22 '17 at 23:28

1 Answers1

0

One obvious algorithm if you're just using read/write system calls directly is to allocate a 2nd buffer (large enough to hold the result even if every byte of input is a newline). Pages that you never touch don't really count as used, so it's fine to have a very large BSS.

In a loop:

  • do {
  • format the current line-number counter as a string, into the current position of the output buffer, plus a trailing space or tab. (NASM/YASM example How do I print an integer in Assembly Level Programming without printf from the c library? easy to port from x86-64 to i386.) But it would be more efficient to avoid re-doing the div-by-10 stuff every time, and just increment the least-significant digit until it's > '9', then redo the formatting.)
  • copy bytes from the input buffer up to and including the next newline ('\n' = 0xa. YASM doesn't support NASM's backtick-string / character literals that process C-style escape sequences). Also make sure you stop at the end of the buffer if it doesn't end with a newline. (You could check for this before the loop and append one if there isn't one, so simplifying your loop).
  • } while(input_pos < end_of_input)

When you're done, find the length of the result by subtracting the current position from the start of the buffer, and use that for a sys_write.

If you want to support files larger than your input/output buffer, remember whether you were at the end of a line or not when looping back to do another sys_read. (Instead of actually copying that partial line back to the beginning of the input buffer. The copying strategy would fail with a line longer than your buffer size.)

Don't forget that sys_read and sys_write can return early, having read or written fewer bytes than you asked, even if there are more bytes to read/write. (e.g. if you're reading from a pipe, including a named pipe, or if a signal came in during the system call). IIRC, the libc wrapper functions might handle retrying.


An alternative might be to scan through counting newlines, then work from the end of the buffer to expand it in-place. This would be slightly more cache-efficient, but it has the downside of needing an initial scan to count newlines to figure out how much space you'll need to leave (and what line number to count down from).

My first suggestion has the advantage that you only touch each input character once, instead of twice for this one. If you use a small-ish buffer that fits in L1D cache (like 16kiB), then expanding it in-place from the end might be worth considering, but it's more complex.

OTOH, if you're really trying to optimize for efficiency, you could maybe use vmsplice(2) to "gift" some pages to the kernel, into a temporary pipe and from there splice them into a regular file. So the physical pages you wrote end up as part of the pagecache with zero-copy. This might have more overhead than just making write() system calls, though.


Both of the previous methods have the advantage of only making one large write() system call for a whole buffer. It would also be possible to make an inefficient program that copies line-number + a line into a tmp buffer and uses sys_write on that, or even worse sys_write the line number text and then sys_write the line in-place in the input buffer.

IDK if that's any easier to implement, because you still have to get all the byte-counts right, and it sucks for performance. A sys_write is pretty slow compared to copying a few bytes.

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
  • Hi, thank you very mutch for the help! Since I'm new to assembly I am having allot of issues copying each line into a different file. Can you give0 me an example of code? Thanks! – RusoMM Nov 22 '17 at 16:35
  • @RusoMM: You don't have even the start of an implementation of a loop that looks at the data to debug, and SO isn't a code-writing service. See https://meta.stackoverflow.com/questions/334822/how-do-i-ask-and-answer-homework-questions. I already linked you complete working code for formatting numbers into ASCII strings. – Peter Cordes Nov 22 '17 at 23:30