I'm implementing 3*3 matrix and 3*1 matrix multiplication using RISC-V assembly language, with GNU C inline asm.
// description: matrix multiply with two-level for loop
#include<stdio.h>
int main()
{
int f,i=0;
int h[9]={0}, x[3]={0}, y[3]={0};
FILE *input = fopen("../input/3.txt","r");
for(i = 0; i<9; i++) fscanf(input, "%d", &h[i]);
for(i = 0; i<3; i++) fscanf(input, "%d", &x[i]);
for(i = 0; i<3; i++) fscanf(input, "%d", &y[i]);
fclose(input);
int *p_x = &x[0] ;
int *p_h = &h[0] ;
int *p_y = &y[0] ;
for (i = 0 ; i < 3; i++)
{
p_x = &x[0] ;
/*
for (f = 0 ; f < 3; f++)
*p_y += *p_h++ * *p_x++ ;
*/
for (f = 0 ; f < 3; f++){
asm volatile (
"addi t0, zero, 2\n\t"
"bne t0, %[f], Multi\n\t"
"mul t1, %[sp_h], %[sp_x]\n\t"
"add %[sp_y], %[sp_y], t1\n\t"
"addi %[p_h], %[p_h], 4\n\t"
"addi %[p_x], %[p_x], 4\n\t"
"addi %[p_y], %[p_y], 4\n\t"
"beq zero, zero, Exit\n\t"
"Multi: \n\t"
"mul t1, %[sp_h], %[sp_x]\n\t"
"add %[sp_y], %[sp_y], t1\n\t"
"addi %[p_h], %[p_h], 4\n\t"
"addi %[p_x], %[p_x], 4\n\t"
"Exit: \n\t"
:[p_y] "+&r"(p_y),
[p_x] "+&r"(p_x),
[p_h] "+&r"(p_h),
[sp_y] "+&r"(*p_y)
:[sp_h] "r"(*p_h),
[sp_x] "r"(*p_x),
[f] "r"(f)
:"t0","t1"
);
printf("x value=%d, h value=%d, y value=%d, y address=%d\n", *p_x, *p_h, *p_y, p_y);
}
}
p_y = &y[0];
for(i = 0; i<3; i++)
printf("%d \n", *p_y++);
return(0) ;
}
I want to transfer this comment
for (f = 0 ; f < 3; f++)
*p_y += *p_h++ * *p_x++ ;
p_y++;
into asm volatile(...), but above code in asm I encounter this problem:
my problem
It seems that p_y
address is added correct, and my multiplication is correct, but my value store into memory is wrong. It will store into memory too fast, and now my answer adds previous answer together. Above code answer is 5 28 69
Could anybody help me?
I have edited the "+r"
to "+&r"
and add the clobbers to the code, but it doesn't work.
And by the way, i get right answer from this code:
for (f = 0 ; f < 3; f++)
asm volatile ("mul %[tmp], %[sp_h], %[sp_x]\n\t"
"add %[sp_y], %[sp_y], %[tmp]\n\t"
"addi %[p_x], %[p_x], 4\n\t"
"addi %[p_h], %[p_h], 4\n\t"
:[tmp] "=r"(tmp),
[p_x] "+r"(p_x),
[p_h] "+r"(p_h),
[sp_y] "+r"(*p_y)
:[sp_h] "r"(*p_h),
[sp_x] "r"(*p_x)
);
p_y++;
I expect
14
32
50
for the answer. And here is input data:
1 2 3 4 5 6 7 8 9
1 2 3
0 0 0