1

I've been working on the project. The main goal is to calculate how many words do not contain letters 'B' nor 'C'. Given file has [0;1000] lines. Every line contains 6 columns.

he first two columns contain string with [1; 20] characters. Characters could be letters, numbers, and whitespaces.

3-5 columns contain integers in the range [-100; 100]. 6th column contain real numbers in range [-9.99; 9.99] with only two digits after decimal point.

Each section I separated by a semicolon ';'.

FILE EXAMPLE:

helloA;lB;lC;lD;lE;lF
A11;bas morning;0;0;5;1.15
B12; Hello WoRlD;-100;11;78;1.33
B11;table;10;0;55;-2.44
C1;OakWood;0;8;17;3.77

TASK: calculate how many words (word is one or more symbols without ' '(space)) in the first two columns do not contain letters 'B' or 'C'. And print that integer number.

I have dealt with the most part of the task. I already did File's name reading from command line, reading the file, priting the integer out. But I have stuck on one thing. I don't really get it how to check every word one by one. I almost every time get a wrong answer.

Input: ( a A a a aba aca;BA A C BA a;1;1;1;1.00) - without brackets.
Output: 6

MY CODE SO FAR

org 100h

%include 'yasmmac.inc'

section .text

    startas:
        macPutString 'Write outpu file name', crlf, '$'
        
        ; Reading file from command line
        .commandLine:
            mov bx, 82h
            mov si, 0h
            jmp .checkInputFile

        ; Turns it into ASCIIZ
        .checkInputFile:
            mov cl, [bx+si]
            cmp cl, 20h
            jl .addZero
            inc si
            jmp .checkInputFile
        
        .addZero:
            mov byte [bx+si], 0h
            xor si, si
        
            ; Saving writing file
        mov al, 128         
        mov dx, writingFile
        call procGetStr     
        macNewLine
        
        ; Open reading file
        mov dx, bx
        call procFOpenForReading
        jnc .writingFileIsOpened
        macPutString 'Error while opening reading file', '$'
        exit
        
        ; Atidarome rasymo faila
        .writingFileIsOpened:
            mov [readingDescriptor], bx
            mov dx, writingFile
            call procFCreateOrTruncate
            jnc .openWritingFile
            macPutString 'Error while opening writing file', '$'
            jmp .writingError
        
        ; Save writing descriptor
        .openWritingFile:
            mov [writingDescriptor], bx
            
            
        ; Reads first line
        call procReadLine
    
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

        ; Main loop
        .whileNotEndOfFile:
            xor di, di
            xor si, si
            call procReadLine
            
            ; Check first two columns
            ;mov al, ';'

        mov di, line
        mov al, [di]
        cmp al, ' '
        je .nextWord
        cmp al, ';'
        je .nextWord
            
            ; Checking words
            .checkWord:
                mov al, [di]
                inc di
                
                cmp al, byte 'B'
                je .nextWord
                cmp al, byte 'b'
                je .nextWord
                cmp al, byte 'C'
                je .nextWord
                cmp al, byte 'c'
                je .nextWord
                
                cmp al, byte ' '
                je .addNumber
                cmp al, byte ';'
                je .semicolon
                jmp .checkWord

            .nextWord:
                call procNextWord
                jmp .checkWord

            .semicolon:
                call procAddNumber
                inc si
                cmp si, 0x2
                je .skipLine
                jmp .nextWord

            .addNumber:
                call procAddNumber
                jmp .nextWord
            
            ; If this is not the end of file, repeat loop
            .skipLine:
            cmp [readLastLine], byte 0
            je .whileNotEndOfFile
            
            ; Hexadecimal convertion to decimal
           mov dx, lineCount
           mov ax, [lineCount]
           call procUInt16ToStr
           call procPutStr
           macNewLine
           mov si, dx


           .writingToFile:
           lodsb
           cmp al, 0
           jne .writingToFile
           sub si, dx
           lea cx, [si-1]
           mov bx, [writingDescriptor]
           mov ah, 40h
           int 21h

        
        
        ; Closing Files
        .end:
            mov bx, [writingDescriptor]
            call procFClose
        
        .writingError:
            mov bx, [readingDescriptor]
            call procFClose
        
        macPutString 'Program ends', crlf, '$'

        exit
        
%include 'yasmlib.asm'

; void procReadLine()
; Read line to buffer ‘eilute’
procReadLine:
    push ax
    push bx
    push cx
    push si
    
    mov bx, [readingDescriptor]
    mov si, 0


    .loop:
        call procFGetChar
    
        ; End if the end of file or error
        cmp ax, 0
        je .endOfFile
        jc .endOfFile
        
        ; Putting symbol to buffer
        mov [line+si], cl
        inc si
    
        ; Check if there is \n?
        cmp cl, 0x0A
        je .endOfLine
    
        jmp .loop
        
        
    .endOfFile:
        mov [readLastLine], byte 1
    .endOfLine:
    
    mov [line+si], byte '$'
    mov [lineLenght], si
    
    pop si
    pop cx
    pop bx
    pop ax
    ret

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
    
procAddNumber:
    push si
    push ax
    push bx
    push cx
    push dx
    
    ;lineCount++
    mov ax, word [lineCount]
    inc ax
    mov [lineCount], ax
    
    pop dx
    pop cx
    pop bx
    pop ax
    pop si
    ret

procNextWord:

    .loop:
        inc di
        mov al, [di]
        cmp al, byte ' '
        je .loop
        jmp .t

    .t:
        cmp al, byte ';'
        je .t2
        jmp .return

    .t2:
        inc di
        mov al, [di]
        cmp al, byte ' '
        je .t2
        jmp .return

    .return: 
        ret


section .data
        
    readingDescriptor:
        dw 0000
        
    writingFile:
        times 128 db 00
        
    writingDescriptor:
        dw 0000
        
    readLastLine:
        db 00
        
    line:
        db 64
        times 66 db '$'
        
    lineLenght:
        dw 0000
    
    lineCount:
        times 128 db 00

GITHUB: yasmmac.inc/yasmlib.asm

Any help will be appreaciated.

Kurbamit
  • 89
  • 8

2 Answers2

3

Completely starting over on the thinking process that is needed to solve the main task, I came up with the following:

  • Before checking the character in a word, I skip all the leading whitespace (if present at all)
  • If the leading whitespace ends with a semicolon, it is rather the trailing whitespace from the last word in the current column
  • A word ends when finding a space or a semicolon
  • .checkWord does not early-out on finding an invalid character {BbCc}, but sets a flag to 0
; Main loop
.whileNotEndOfFile:
  call procReadLine
  mov  si, 2       ; Do 2 columns
  mov  di, line
.skipSpaces:
  mov  al, [di]
  inc  di
  cmp  al, ' '
  je   .skipSpaces
  cmp  al, ';'
  je   .q3         ; It's trailing whitespace
  dec  di
.checkWord:
  mov  bx, 1       ; Assuming it will be a 'good' word
.q1:
  mov  al, [di]
  inc  di
  cmp  al, ' '
  je   .q2
  cmp  al, ';'
  je   .q2
  or   al, 32      ; LCase
  cmp  al, 'b'
  jb   .q1
  cmp  al, 'c'
  ja   .q1
  xor  bx, bx      ; One or more invalid chars in current word
  jmp  .q1
     
.q2:
  add  [lineCount], bx  ; BX=[0,1] Counting the 'good' words
  cmp  al, ';'
  jne  .skipSpaces
.q3:
  dec  si          ; Next column ?
  jnz  .skipSpaces
.skipLine:
  cmp  [readLastLine], byte 0
  je   .whileNotEndOfFile

The label lineCount is no longer particularly good for counting valid words, wouldn't you say?

Sep Roland
  • 33,889
  • 7
  • 43
  • 76
  • 2
    `or al, 32` makes a letter *lower* case. (As I'm sure you know, but for future readers: [What is the idea behind ^= 32, that converts lowercase letters to upper and vice versa?](https://stackoverflow.com/a/54585515)). – Peter Cordes Nov 18 '22 at 09:16
  • 1
    That sounds like a sensible algorithm. You might try using a bitmap as a lookup table instead of a sequence of compares, but that only works well in 32 or 64-bit mode (`bt` or `mov`+`shr`+`test al,1`); a 16-bit register isn't wide enough even to check the whole alphabet. And we expect most characters to not match any of the special values, so a 4-bit hash or bloom filter probably doesn't help, unless we can rule out the possibility of it being one of the stop characters more quickly. And that's more algorithmic complexity than necessary – Peter Cordes Nov 18 '22 at 09:18
  • 1
    @Sep Roland, Thank you very much for Your help. I got the idea, of how I can do it, and I DID IT. Thanks to you. So my new task was that "good" words are words that do not contain letters from the alphabet to 'k', lower case, and upper case. I did that with your algorithm. Thank you very much. – Kurbamit Nov 18 '22 at 13:09
1
    mov al, [di]
    cmp al, ' '         *
    je .nextWord        *
    cmp al, ';'         *
    je .nextWord        *
.checkWord:
    mov al, [di]
    inc di

You are losing characters from the string!
Consider eg. "mouse bat" where .checkWord will fetch the space character and leave DI pointing at the "b", but then procNextWord starts by incrementing DI, effectively erasing that "b" from inspection. The rest of the program only sees "at" and thinks it's a 'good' word.

procNextWord:
        dec  di         ; Correcting DEC
    .t1:
        inc  di
        mov  al, [di]
        cmp  al, ' '
        je   .t1
        cmp  al, ';'
        jne  .return
    .t2:
        inc  di
        mov  al, [di]
        cmp  al, ' '
        je   .t2
    .return: 
        ret

If you apply the proposed correction, then also change the lines that I marked with asterisks and write:

    cmp  al, ' '
    je   .nextWordEx
    cmp  al, ';'
    je   .nextWordEx

    ...

.nextWordEx:
    inc  di
.nextWord:
    call procNextWord
    jmp  .checkWord

[edit]

When you found an unsuitable character in a word, you need to skip the remainder of that word and that is not something the procNextWord was build for! You need another code:

  cmp  al, 'B'
  je   .skipRemaining
  cmp  al, 'b'
  je   .skipRemaining
  cmp  al, 'C'
  je   .skipRemaining
  cmp  al, 'c'
  je   .skipRemaining
                
  cmp  al, ' '
  je   .addNumber
  cmp  al, ';'
  je   .semicolon
  jmp  .checkWord

.skipRemaining:
  call procSkipRemaining
  jmp  .checkWord

  ...

procSkipRemaining:
  mov  al, [di]
  cmp  al, " "
  je   .return
  cmp  al, ";"
  je   .return
  inc  di
  jmp  procSkipRemaining
.return:
  ret
Sep Roland
  • 33,889
  • 7
  • 43
  • 76
  • Yeah, I got why I need "dec di". So I would not lose some parts of the string. But somehow this brings me another problem. Eg. the word "bbaa", program counts this word as a "good" word. And if I change last word's symbol to 'b', it doesn't count that word. It looks like it only checks the last symbol of the word. – Kurbamit Nov 17 '22 at 19:48
  • 1
    @Kurbamit See the edit to the answer. – Sep Roland Nov 17 '22 at 20:13
  • Thank you very much. I know that I might be asking too much and You have already helped me so much. I did try to count everything and write it down, in my counting all of this should work, but for some reason, the program is not paying attention to all the cmp ('B' 'b' 'C' 'c') and still counting these (that have letters 'b' 'c') words like "good" words. I am getting out of my mind by trying to solve this. – Kurbamit Nov 17 '22 at 20:51
  • 1
    @Kurbamit I don't see how eliminating 'B' would work and 'b' would not. Run your tests from an inputfile with just 2 lines: the headers lines and just 1 data line. – Sep Roland Nov 17 '22 at 20:57
  • Sorry, it was my bad. I meant all the letters 'B' 'b' 'C' 'c'. The program just ignores them for some reason. It just counts them as normal. I ran the test file with 1 data line. a;b;1;1;1;1.00 and the answer is 2. – Kurbamit Nov 17 '22 at 21:17
  • [github](https://github.com/Kurbamit/test/blob/main/test.asm) – Kurbamit Nov 17 '22 at 21:27
  • I put my code in github. I was looking and trying to understand, why this program don't do anything with words, that contains 'B' 'b' 'C' or 'c'. In my head, this should skip that word, because I am comparing with them and if it is equal, I am trying to skip that word. But that doesn't work for some reason. – Kurbamit Nov 17 '22 at 21:52
  • 1
    @Kurbamit See my second answer for a radically new approach. Sure hope this works (I don't actually *run* this code) – Sep Roland Nov 17 '22 at 22:14