0

I'm trying to calculate the value of sum of two linked list each representing a number. Every link in a list contains 5 bytes. The first byte representing the value of the link (4 bits for representing an hexa digit and 4 zeroes) and the other 4 bytes is the address of the next link in the list. Also I allocated bytes called "stack" holds the pointers to the first links of every linked list. The first link in every list is the less significant digit of the number it is represented. I'm trying to calculate the value on the top two lists in my stack. I iterate them both and considerate the carry, then every iterate push the result to the stack and eventually pop the results from the stack and create new list with the value of the sum.

Code snipped:

my auxiliary functions:

section .data

    firstLink: dd 0
    stackSize: dd 0
    stackCurSizeBytes: dd 0
    stackPointer: dd 0
    stackCurSize: dd 0
    nextLink: dd 0
    data: db 0
    carry: db 0
    numPush: dd 0 

%macro addNewLink 1
    mov [add], %1
    push 5
    call malloc
    add esp, 4

    mov bl, [add]
    mov [eax], byte bl 
    mov edx, [firstLink]
    mov [eax+1], edx
    mov [firstLink], eax
    mov [add], byte 0

%endmacro


%macro getNext 1 ; get pointer to link and return pointer to next link
    mov eax, %1
    mov eax, [eax+1]
    mov [nextLink], eax
%endmacro

%macro pushLinkedList 0
    mov edx, [firstLink]
    mov ecx, [stackPointer]
    add ecx, [stackCurSizeBytes]
    mov [ecx], edx
    inc dword [stackCurSize]
    add [stackCurSizeBytes], dword 4
%endmacro

%macro getLinkData 1 ; gets pointer to link and return its data (byte)
    mov ebx, %1
    mov ebx, [ebx]
    mov [data], dword ebx
%endmacro

sum function:

ratorPlus:
    cmp [stackCurSize], dword 1
    jle  argumentError

    mov edx, [stackPointer] ;edx has the top operand pointer
    add edx, [stackCurSizeBytes]
    sub edx, 4
    mov edx, [edx]

    dec dword [stackCurSize] ; dec stack
    sub [stackCurSizeBytes], dword 4 ; dec stack

    mov ecx, [stackPointer] ;ecx has the second top operand pointer
    add ecx, [stackCurSizeBytes]
    sub ecx, 4
    mov ecx, [ecx]

    dec dword [stackCurSize] ; dec stack
    sub [stackCurSizeBytes], dword 4 ; dec stack

    mov [firstLink], dword 0    ;initialize function params
    mov [numPush], dword 0
    mov [carry], byte 0

    checkLen3:
        cmp edx, dword 0x00
        je endFirstNumber3
        cmp ecx, dword 0x00
        je SecondEndedFirstNot3
        ;both numbers still not null
        getLinkData edx

        mov ebx, [data] ;ebx is the first list data
        mov [tempreg], dword ebx
        getLinkData ecx

        mov ebx, [tempreg]
        mov eax, [data] ;eax is the second list data

        add ebx, eax    ;preform plus and and stores the result in ebx
        add ebx, [carry]

        cmp ebx, 0xF
        jle noCary1
        mov [carry], byte 1
        sub ebx, 0x10
        jmp endCary1
        noCary1:
        mov [carry], byte 0
        jmp endCary1
        endCary1:
        push ebx
        inc dword [numPush]
        getNext edx
        mov edx, [nextLink]
        getNext ecx
        mov ecx, [nextLink]
        jmp checkLen3

    endFirstNumber3:    
    cmp ecx, dword 0x00
    je BothEnded3
    ;second is longer
        getLinkData ecx
        mov eax, [data] ;eax is the data
        add eax, [carry]
        cmp eax, 0xF
        jle noCary2
        mov [carry], byte 1
        sub eax, 0x10
        jmp endCary2
        noCary2:
        mov [carry], byte 0
        jmp endCary2
        endCary2:
        push eax
        inc dword [numPush]
        getNext ecx
        mov ecx, [nextLink]
        jmp checkLen3

    SecondEndedFirstNot3: ;first s longer
        getLinkData edx
        mov eax, [data] ;eax is the data
        add eax, [carry]
        cmp eax, 0xF
        jle noCary3
        mov [carry], byte 1
        sub eax, 0x10
        jmp endCary3
        noCary3:
        mov [carry], byte 0
        jmp endCary3
        endCary3:
        push eax
        inc dword [numPush]
        getNext edx
        mov edx, [nextLink]

        jmp checkLen3

    BothEnded3:

        cmp [carry], byte 1 ;both numbers ended
        jne BothEnded2Loop
        push 1
        inc dword [numPush]
        BothEnded2Loop:
        cmp [numPush], dword 0
        je endRatorPlus
        pop eax
        addNewLink al
        dec dword [numPush]
        jmp BothEnded2Loop

    endRatorPlus:
        mov [numPush], dword 0
        mov [carry], dword 0
        pushLinkedList
        jmp running

I think the logic here is pretty simple but somehow it doesn't work well, I also tried to debug it using gdb but then wierd things happens.

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
  • So, what weird thing happened? – Jester May 15 '20 at 23:03
  • I excpect the carry to be either 0 or 1 but somehow it gets garbage values. Also the registers before I push them to the stack contains garbage values. – Avi Ferdman May 15 '20 at 23:07
  • 1
    If carry is not 0 or 1 that means something has overwritten it. Put a write watch on it and/or single step the program to see where it goes wrong. This is a prime candidate: `mov [data], dword ebx`. Note your `data` is `db` not `dd`. – Jester May 15 '20 at 23:12
  • Example for a wired thing is when I write: mov eax,0 add eax, [data] ;eax is the second list data I see when I debug it that eax is 0 and on the second line it sets the first byte correct because [data] is one byte but also all the rest of the register get garbage values. the same things happens when I use: mov eax,0 mov eax, [data] ;eax is the second list data – Avi Ferdman May 15 '20 at 23:37
  • 1
    That is because you allocated 1 byte for `data` not 4, as I already pointed out. `add eax, [data]` will fetch 4 bytes, it doesn't know that you only allocated/filled 1. – Jester May 15 '20 at 23:48
  • It happens also when I write: mov eax, 0 mov al, [data] does it also explain it? – Avi Ferdman May 15 '20 at 23:53
  • 1
    That has no effect on the top bits of `eax` so whatever was in there stays in there. If you zeroed the full `eax` then that should work as far as the `mov` is considered. – Jester May 15 '20 at 23:55
  • I don't see the point of having `data` and `carry` in memory at all; you should have enough registers for two pointers and a sum / carry-in/out, with plenty left over for temporaries if you need more than 4 ever. Storing / reloading from static storage just makes your code more complicated. – Peter Cordes May 16 '20 at 00:01
  • And BTW, I thought an array of decimal digits was [the worst imaginable way to store a BigInteger](//codereview.stackexchange.com/a/237764). But this is several times worse for efficiency and performance just to iterate over, and at least an order of magnitude worse when calling `malloc` to allocate new storage for a result, once per 4 result bits. I don't see why a linked list with separate allocations for each digit would be useful; are you doing to remove a random digit from the middle of the number in O(1) time more often than you add or copy? I guess as an exercise in using linked lists – Peter Cordes May 16 '20 at 03:15
  • yes, it is an exercise in assembly, the demand was to use those kind of linked lists – Avi Ferdman May 16 '20 at 09:02
  • Using O(n) space on the stack seems weird to me. If you have to use linked lists, it would make more sense to allocate nodes as you calculate them. – Peter Cordes May 16 '20 at 14:51

1 Answers1

2

db 0 is only 1 byte, but ebx and eax are dword registers. You're overwriting later data in memory with your dword store operations, and / or loading garbage into the high bytes with dword loads. Use movzx eax, byte [symbol] to load and zero-extend a byte. Use mov [symbol], al to store a byte. There are several places in your code where the comments say "byte" and the code explicitly specifies "dword"...


Just for the record, a container of single digits is just about the worst imaginable data structure for a BigInteger. Using a linked list instead of array makes this much worse, and is very silly because most operations are going to allocate or free all the nodes for a number at once, so having them as separate allocations with pointers wastes a huge amount of time and space.

That said, using the callstack to hold all N digits temporarily won't work if your numbers are truly huge, and it's unnecessary work.

Also, your code is overcomplicated. There's no need for any static storage at all, and barely any stack space. (Just one pointer). You can malloc on the fly as you calculate new digits.

I was curious what a more efficient version could look like, so I wrote one. Untested.

;; UNTESTED
extern malloc

;; Node *llplus(const Node *L1, const Node *L2);
;; returns a pointer to the head of a newly-allocated linked list
global llplus
llplus:
    push    ebp
    push    ebx                 ; save call-preserved regs for locals that survive malloc
    push    esi
    push    edi
    mov     edi, [esp+16 + 4]        ; Node *L1
    mov     esi, [esp+16 + 8]        ; Node *L2
    sub     esp, 12                  ; align the stack for malloc, space for an arg a [esp] and locals

    xor     ebx, ebx                 ; carry  from last iteration
    lea     ebp, [esp+7]             ; dummy first node will get a pointer to the first real malloced node at [esp+8]

.mainloop:
    mov     dword [esp], 5
    call    malloc              ; malloc(5)
    mov     [ebp+1], eax        ; tail->next = p
    mov     ebp, eax            ; tail = p

    add     bl, [edi]
    add     bl, [esi]           ; sum = carry + L1->val + L2->val
    movzx   eax, bl
    shr     bl, 4               ; carry for next time = sum>>4
    and     al, 0xf
    mov     [ebp+0], al         ; tail->val = digit = sum & 0xf

    mov     edi, [edi+1]        ; L1 = L1->next
    mov     esi, [esi+1]
    test    edi, edi
    jz     .L1_end
    test    esi, esi
    jnz    .mainloop
    ; else fall through
.L2_end:                        ; do{ propagate carry to the end of L1
    ; another version of the same loop that only reads from one linked list
    mov     dword [esp], 5
    call    malloc
    mov     [ebp+1], eax        ; tail->next = p
    mov     ebp, eax            ; tail = p

    add     bl, [edi]           ; carry += L1->val
    movzx   eax, bl
    shr     bl, 4               ; carry for next time = sum>>4
    and     al, 0xf
    mov     [ebp+0], al         ; tail->val = digit = sum & 0xf

    mov     edi, [edi+1]
    test    edi, edi
    jnz     .L2_end             ; }while(L1 = L1->next);

    test    bl, bl
    jnz    .final_carry_out

.both_end:
    mov     dword [ebp+1], 0    ; tail->next = NULL
    mov     eax, [esp+8]        ; return dummy_head->next = first result node.
    add     esp, 12
    pop     edi
    pop     esi
    pop     ebx
    pop     ebp
    ret

.L1_end:
    test  esi, esi
    jz    .both_end

    mov   edi, esi       ; L1 = L2
    jmp   .L2_end        ; and use the other cleanup

.final_carry_out:
    mov     dword [esp], 5
    call    malloc
    mov     [ebp+1], eax      ; tail->next = p
    mov     byte [eax], bl    ; p->val = carry = 1
    mov     ebp, eax          ; tail = p
    jmp     .both_end

The L2_end loop repeats most of the same code as the main loop. We could maybe avoid it if we had a dummy linked list node that pointed to itself and had val = 0. Then end conditions could check for NULL or that dummy node and jump back in to the main loop. That might or might not save overall static code size, but would make branching more complicated.


To avoid needing all 4 call-preserved registers (including EBP) for locals, we could leave the carry between iterations in memory. If malloc is very fast, that might create a bottleneck on store / reload latency, though. (Then EBX would be free to hold an output pointer).

Note that adding two 4-bit values (even with carry in) can produce at most a 5-bit value, so the carry out is the 1<<4 bit of the result.

.mainloop:
    mov     dword [esp], 5
    call    malloc              ; malloc(5)

    btr     [ebx], 4            ; CF = bit 4 of tail->val, and clear it
    mov     [ebx+1], eax        ; tail->next = p
    mov     ebp, eax            ; tail = p

    movzx   eax, [edi]
    adc     al, [esi]           ; sum = CF + L1->val + L2->val
    mov     [ebp+0], al         ; tail->val = sum = carry:digit, carry to be cleared later


    ...  ; after loops

    btr    byte [ebx], 4
    jc   .final_carry_out

btr is Bit Test and Reset. It might cause store-forwarding stalls here if it does a dword load and store after the previous iteration only did a byte store. If you actually cared about performance you wouldn't be doing anything like this in the first place, or you'd be using EBP for another pointer. Or writing 64-bit code which isn't so register-poor.

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847