[EDITED]
After running the code using the gdb debugger, movapd
really wasn't the issue (thanks to everyone who pointed that out). Going line by line, the fault is in the second comparison being made between xmm7
and xmm0
registers. Flow control looks like this:
everything up to checking which radius is larger is fine;
comisd xmm1, xmm3
compares accordingly
(p $xmm1.v2_double $1 = {10, 0} p $xmm3.v2_double $2 = {2, 0}
)
concludes that xmm1 is greater and jumps to the first
label
from there things go wrong when comisd
somehow deduces that xmm7
, is greater than xmm1
(p $xmm7.v2_double $3 = {1.4142135623730951, 1}
p $xmm1.v2_double $4 = {10, 0}
)
(Also I'm pretty sure that the values in the curly brackets of the print command in gdb are in reversed order. What I mean is in xmm1
10 is actually in the lower quadword and zero in the higher, so I couldn't make sense of how is 1.41 greater than 10)
This is the assembly code
#extern int circles(int n, double* cr);
.intel_syntax noprefix
.data
three: .int 3
.text
.global circles
circles:
enter 0, 0
mov rax, 1
cpuid
test rdx, 0x2000000
jz notSupported
mov rbx, rsp
and rsp, 0xfffffffffffffff0
sub rsp, 512
fxsave [rsp]
xor r10, r10 #pair counter
xor r8, r8 #outter loop counter
xor r9, r9 #inner loop counter
mov rax, rdi
mul dword ptr three
sub rax, 3
#rax value 3n-3 since our step is length of 3
pivotCircle:
cmp r8, rax
je done
#this is the line where it goes wrong
movupd xmm0, [rsi + 8*r8] #first circle center
movsd xmm1, [rsi + 8*r8 + 16] #radius first circle
mov r9, r8
nextCircle:
add r9, 3
cmp r9, rax
jg nextPivot
movupd xmm2, [rsi + 8*r9] #second circle center
movsd xmm3, [rsi + 8*r9 + 16] #second circle radius
#calculating distance between the centers
movapd xmm7, xmm0
subpd xmm7, xmm2
mulpd xmm7, xmm7
movapd xmm2, xmm7
shufpd xmm2, xmm2, 0b11
addsd xmm7, xmm2
sqrtsd xmm7, xmm7 # |c1 - c2| in xmm7
#checking which radius is bigger
comisd xmm1, xmm3
jge first
jmp second
first: #first one greater
comisd xmm7, xmm1
jge nextCircle
movsd xmm6, xmm1
subsd xmm6, xmm7
comisd xmm3, xmm6
jg nextCircle
jmp found
second: #second one greater
comisd xmm7, xmm3
jge nextCircle
movsd xmm6, xmm3
subsd xmm6, xmm7
comisd xmm1, xmm6
jg nextCircle
found:
inc r10
jmp nextCircle
nextPivot:
add r8, 3
jmp pivotCircle
done:
fxrstor [rsp]
mov rsp, rbx
mov rax, r10
leave
ret
notSupported:
mov rdi, 1
call exit
This is the main.c file. Elements in the array come in groups of three (a, b, radius).
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
extern int circles(int n, double* cr);
int main(int argc, char const *argv[])
{
int n;
double* cr;
scanf("%d", &n);
assert(n > 0);
cr = malloc(n * sizeof(double) * 3);
assert(cr != NULL);
for (int i = 0; i < n; i++)
{
scanf("%lf%lf%lf", cr+i*3, cr+i*3+1, cr+i*3+2);
assert(*(cr+3*i+2) > 0);
}
printf("%d\n", circles(n, cr));
return 0;
}