0

I am a beginner in ASM and I am trying to solve the next problem: A text file is given. Read the content of the file, determine the uppercase letter with the highest frequency and display the letter along with its frequency on the screen. The name of text file is defined in the data segment.

I have no idea how to count the frequency of the letters. This is what I have so far:

`

bits 32

global start

extern exit, fopen, fread, fclose, printf
import exit msvcrt.dll
import fopen msvcrt.dll
import fread msvcrt.dll
import fclose msvcrt.dll
import printf msvcrt.dll

segment data use32 class=data
    file_name db "ana.txt", 0   ; filename to be read
    access_mode db "r", 0       ; file access mode:
                                ; r - opens a file for reading. The file must exist.
    file_descriptor dd -1       ; variable to hold the file descriptor
    len equ 100                 ; maximum number of characters to read
    text times (len+1) db 0     ; string to hold the text which is read from file
    format db "We've read %d chars from file. The text is: %s", 0
    v times 30 db 0 
    vec times 100 db 0
    m db 0
    aux db 0

segment code use32 class=code
    start:
        ; call fopen() to create the file
        ; fopen() will return a file descriptor in the EAX or 0 in case of error
        ; eax = fopen(file_name, access_mode)
        push dword access_mode     
        push dword file_name
        call [fopen]
        add esp, 4*2                ; clean-up the stack

        mov [file_descriptor], eax  ; store the file descriptor returned by fopen

        ; check if fopen() has successfully created the file (EAX != 0)
        cmp eax, 0
        je final

        ; read the text from file using fread()
        ; after the fread() call, EAX will contain the number of chars we've read 
        ; eax = fread(text, 1, len, file_descriptor)
        push dword [file_descriptor]
        push dword len
        push dword 1
        push dword text        
        call [fread]
        add esp, 4*4
        
        ;https://stackoverflow.com/questions/30383575/counting-character-frequencies-in-an-array-of-characters-x86-assembly
        mov ECX,EAX  ; ECX-lenght
        mov [m], EAX ; m-lenght
        mov ESI, -1
        mov EDI, 0
        jecxz endloop
        myloop:
            inc ESI
            mov AL, [text+ESI]
            ;lodsd  ; The character is stored in AL
            cmp AL, "A"  ; compare character and "A"
            JB notcapitala  ;jump to notcapitala if input<A
            cmp AL, "Z"   ; compare character and "Z"
            JA notcapitalz  ; jump to notcapitalz if input>Z
            
            
            noB:
            notcapitala:
            notcapitalz:
        loop myloop
        endloop:
        ; display the number of chars we've read and the text
        ; printf(format, eax, text)
        push dword v
        push dword [aux]
        push dword format
        call [printf]
        add esp, 4*3

        ; call fclose() to close the file
        ; fclose(file_descriptor)
        push dword [file_descriptor]
        call [fclose]
        add esp, 4

      final:

        ; exit(0)
        push dword 0
        call [exit]

`

  • The normal histogram algorithm is `if (c is uppercase) ++counts[ c-'A' ]`, then scan for the max count. Where `c is uppercase` can be `sub eax, 'A'` / `cmp eax, 25` / `ja not_upper`. [What is the idea behind ^= 32, that converts lowercase letters to upper and vice versa?](https://stackoverflow.com/a/54585515) / [double condition checking in assembly](https://stackoverflow.com/q/5196527) – Peter Cordes Nov 30 '22 at 21:22
  • You even linked [Counting character frequencies in an array of characters - x86 Assembly](https://stackoverflow.com/q/30383575) but don't seem to have implemented it – Peter Cordes Nov 30 '22 at 21:27
  • BTW, your current branching with separate `cmp/jb` / `cmp/ja` isn't wrong. Just less efficient, especially when you already want the index into the alphabet as an integer. You also only need one label at the same place, both branches can jump to the same place. – Peter Cordes Nov 30 '22 at 21:36

0 Answers0