0

this question is a follow up to: Finding an efficient shift/add/LEA instruction sequence to multiply by a given constant (avoiding MUL/IMUL) I'm learning assembly and I encountered this exercise: I need to write a c file that receives as argument (argv[1]) an integer number that is a constant that we should multiply by, and the c file creates an assembly file that multiplies some parameter x (received in %rdi) in that constant. It should be efficient in terms of lines of assembly code and follow these rules:

1) if k is 0,1,-1 no need to multiply (obviously)
2) if k is 3,5,9 I then need to replace the multiplication command in LEA.
3) if k is 3*2^n, 5*2^n, 9*2^n then I need to replace the multiplication command in LEA and shifting.
4) all other cases suppose to work also of course.

The automatic checker tells me my code is not efficient enough for some reason although it does produce the right results (at least from what I've checked) so can anyone please help me and tell me how to reduce the assembly lines in the file that is created by my code?

My code:

#include <stdio.h>
#include <stdlib.h>
#include<stdbool.h>
#include<math.h>

int charToInt(const char * c);
unsigned charToUnsigned(const char * c);

int main (int argc, char *argv[])
{

  char *p;
  FILE * fPtr;
  fPtr = fopen("mult.s", "w");

  fputs( ".section .text\n", fPtr );
  fputs( ".globl   mult\n", fPtr );
  fputs( "mult:  \n", fPtr );

  int k = charToInt(argv[1]);
  if (k == 0) {
    fputs("\txor\t%eax,%eax\n", fPtr);
  } else if (k == 1) {
    fputs("\tmov\t%edi,%eax\n", fPtr);
  } else if (k == -1) {
    fputs("\tmov\t%edi,%eax\n", fPtr);
    fputs("\tneg\t%eax\n", fPtr);
  }
  else if(k==3)
  {
    fputs("\tleal\t(%edi,%edi,2),%eax\n", fPtr);
  }
  else if(k==5)
  {
    fputs("\tleal\t(%edi,%edi,4),%eax\n", fPtr);
  }
  else if(k==9)
  {
    fputs("\tleal\t(%edi,%edi,8),%eax\n", fPtr);
  } else {
    int count_zeros = __builtin_ctz(k);//check the number of zeros
    int arg = k >> count_zeros;
    if (arg == 3){
      fputs("\tleal\t(%edi,%edi,2),%eax\n", fPtr);
      fputs("\tshl\t$", fPtr);
      fprintf(fPtr, "%d,", count_zeros);
      fputs("%eax\n", fPtr);
    }
    else if (arg == 5){
      fputs("\tleal\t(%edi,%edi,4),%eax\n", fPtr);
      fputs("\tshl\t$", fPtr);
      fprintf(fPtr, "%d,", count_zeros);
      fputs("%eax\n", fPtr);
    }
    else if (arg == 9){
      fputs("\tleal\t(%edi,%edi,8),%eax\n", fPtr);
      fputs("\tshl\t$", fPtr);
      fprintf(fPtr, "%d,", count_zeros);
      fputs("%eax\n", fPtr);
    } else {
      fputs("\txor\t%eax, %eax\n", fPtr);
      int l = 0;
      int i;
      for(i=0; i<32;i++)
      {
        l = k >> i;
        l = l & 0x00000001;

        if (l == 1) {
          if (i==0)
          {
            fputs("\tmov\t%edi,%eax\n", fPtr);
          }
          if(i>0 && i<31)
          {
            fputs("\tmov\t%edi,%edx\n", fPtr);
            fputs("\tshl\t$", fPtr);
            fprintf(fPtr, "%d,", i);
            fputs("%edx\n", fPtr);
            fputs("\tadd\t%edx, %eax\n", fPtr);
          } else if (i == 31){
            fputs("\tmov\t%edi,%edx\n", fPtr);
            fputs("\tshl\t$", fPtr);
            fprintf(fPtr, "%d,", i);
            fputs("%edx\n", fPtr);
            fputs("\tsub\t%edx, %eax\n", fPtr);
          }
        }

      }
    }

  }

  fputs("\t\tret\n", fPtr);
  fclose(fPtr);

  return 0;
}
unsigned charToUnsigned(const char * c) {
  char p = *c;
  unsigned arg = 0;
  while (p) {
    arg = arg * 10 + (p - '0');
    c++;
    p = *c;
  }
  return arg;
}

int charToInt(const char * c) {
  return (*c == '-') ? -charToUnsigned(c+ 1) : charToUnsigned(c);
}

Test code to run:

#include <stdio.h>
extern int mult(int);
int main ()
{
    int k, x;
    printf ("Enter k and x: ");
    scanf ("%d %d", &k, &x);
    printf("\nUsing k * x:\n");
    printf ("%d * %d = %d\n", k, x, k * x);
    printf("\nUsing mult(%d):\n", x);
    printf ("%d * %d = %d\n", k, x, mult(x));
    return 0;
}

Thanks!

Jester
  • 56,577
  • 4
  • 81
  • 125
Blur
  • 85
  • 1
  • 5
  • 1
    I believe the `xor` before the `int l = 0;` is not needed. You will next emit a `mov %edi,%eax` which overwrites it. You sometimes generate a `shl $x, %edx;add %edx, %eax` which can be written as `lea (%eax, %edx, 2^x), %eax` if `x` is 1,2 or 3. Your code seems to fall back to bitwise multiplication, so e.g. `324` takes 8 instructions instead of 3 (`lea (%edi, %edi, 8), %eax; lea (%eax, %eax, 8), %eax; shl $2, %eax`). Those are just some things I spotted, maybe more possibilities. – Jester Dec 13 '19 at 13:47
  • @Jester Can you please explain again what you meant with the lea command and how to improve it? – Blur Dec 13 '19 at 13:55
  • What do you need explained? – Jester Dec 13 '19 at 14:07
  • @Blur: That's the x * 3, 5, or 9 case. Shift by 1,2, or 3 and then add another copy can be done with one LEA. – Peter Cordes Dec 13 '19 at 14:13
  • `leal\t(%edi,%edi,8),%eax` This is an x86-64 question. You don't need to and shouldn't use 32-bit address size for LEA ever in 64-bit cdoe. Truncating the result to 32-bit with 32-bit *operand*-size is efficient. – Peter Cordes Dec 13 '19 at 14:18
  • Still didn't understand how can it be done in general I'm afraid... I also have efficiency problem with 14 apperantly... – Blur Dec 13 '19 at 14:37
  • Yeah 14 falls back to the simple multiplication too, instead of doing for example `lea (%edi, %edi, 2), %eax; lea (%eax, %edx, 4), %eax; shl $1, %eax`. – Jester Dec 13 '19 at 14:43
  • I can't seem to grasp the idea you're trying to convey to me. How 234 is x*2^n? And thanks a lot for trying to help Jester and Peter Cordes! – Blur Dec 13 '19 at 14:44
  • And in response to your new comment how 14 is x*2^n? I don't seem to understand... – Blur Dec 13 '19 at 14:48
  • Actually they are, depending on what `x` means :). I just showed that your program generates less efficient code than it could. `324` is `9*9*2^2` and for `9` you already have an efficient `lea` that you could use twice. For `14` you can use `(3+4)*2`. – Jester Dec 13 '19 at 14:49
  • oh I think I finally start to get what you're saying... But how can I idenify a 9 case? doesn't it changes my program completely? – Blur Dec 13 '19 at 14:55
  • and for your your new comment Jester how can I identify the (3+4) and with what to replace them? I really don't know how to edit my code so it can fit your described cases... – Blur Dec 13 '19 at 15:11
  • You might just want to extend your cases to include the values you left out, e.g. 6 and 7. Then you could apply the rules in a loop testing not just for equal but also divisible. So `324` would give you `81` after chopping off two zeroes, then you'd see it's divisible by 9 so you could apply that rule, leaving you with `9` for yet another application of that rule. still might not be optimal. – Jester Dec 13 '19 at 15:16
  • Could you please edit my code? I really don't understand how can I add 6 and 7 and thenbe covered in the divisible case... or should I write a new one completely? – Blur Dec 13 '19 at 15:19
  • You can add 6 and 7 the same way you added all the other cases. Divisible test would be replacing `if (k == foo)` with `if (k % foo == 0)`. PS: also currently you only handle `-1` not any negative numbers so that needs to be fixed too. – Jester Dec 13 '19 at 15:28
  • So if I'm getting your point I need to completley change the flow of my program, right? And one more thing how can I get the (3+4)*2 example work? – Blur Dec 13 '19 at 15:42
  • Yeah. `(3+4)` is the 7 case. It will work by putting the code into your program :) The `*2` will be stripped by the trailing zero logic. – Jester Dec 13 '19 at 15:44

0 Answers0