0

I've been tasked with programming an emulator to translate and execute MIPS machine code in C.

Currently, the program takes the MIPS code, translates it into binary, and stores it in an array that all currently works. The issue lies within the execute function of the code.

Most of the instructions execute but JR, JAL, BNE and BLEZ are not functional. I believe this is because when I attempt to change the PC counter to execute the instruction it causes an error. The registry output must remain the same as when it is returned on MARS and the program counter must increment correctly.

  // JR
  else if((text[TEXT_POS(pc)] & 0x0000003F) == 0x8)
  {
    printf("| Executing JR:\n");
    int s = (text[TEXT_POS(pc)] & 0x03E00000);
    pc = registers[s]; // <-- I believe the issue lies here.
  }



// JAL
else if((text[TEXT_POS(pc)] & 0xFC000000) == 0x0C000000)
{
  printf("| Executing JAL:");
  int address = (text[TEXT_POS(pc)] & 0x03FFFFFF);
  registers[/*$*/31] = pc + 8; // <-- And here
  pc = address;
}

I am attempting to set the PC counter = to the variable s Since this is how the MIPS Green sheet says it should be executed. However, it is not returning the expected value.

// BNE
else if((text[TEXT_POS(pc)] & 0xFC000000) == 0x14000000)
{
  printf("| EXECUTING BNE:\n");
  int s = (text[TEXT_POS(pc)] & 0x03E00000) >> 21;
  int t = (text[TEXT_POS(pc)] & 0x001F0000) >> 16;
  int imm = (text[TEXT_POS(pc)] & 0xFFFF)/*>> 2*/;
  if(registers[s] != registers[t])
  {
    printf("| EXECUTING PC BRANCH - BNE:\n");
    printf("PC (START) = %x\n",pc);
    printf("imm = %d\n", imm);
    pc = pc + 4 + imm; // <-- Unsure about '<< 2' (BLEZ Too)
    printf("PC (END) = %x\n",pc);
  }
}

// BLEZ
else if ((text[TEXT_POS(pc)] & 0xFC000000) == 0x18000000)
{
  printf("| EXECUTING BLEZ:\n");
  int s = (text[TEXT_POS(pc)] & 0x03E00000) >> 21;
  int tempJump = (text[TEXT_POS(pc)] & 0xFFFF) >> 2;
  if(registers[s] <= 0)
  {
    printf("| EXECUTING PC BRANCH - BLEZ :\n");
    pc = pc + 4 + tempJump; // <-- 'Label'? (MIPS REFERENCE SHEET)
  }
}

All this code is doing is executing the operations and this has proved successful for the other types like ADD, ADDI and SLL. However as soon as I try to change the program counter (PC) everything breaks and it no longer works.

int exec_bytecode()
{ 
  printf("EXECUTING PROGRAM ...\n");
  pc = ADDR_TEXT; // Set program counter to the start of our program.
  int count = 1; // <-- Stops while loop running forever. (REMOVE WHEN DONE!)

  // Debugging for checking value of program counter.
  printf("----- TESTING THE OPERATOR -----\n");
  printf("FIRST ADDRESS = %x\n",pc);
  for(int c1=0; c1<3; c1++)
  {
    printf("index[%d] = %x\n", c1,TEXT_POS(pc));
    pc=pc+4;
    printf("PC (inside the loop) = %x\n",pc);

  }

  // Debugging for checking value of text array.
  printf("----- TEST TEXT ARRAY -----\n");
  for(int c=0; c<3; c++)
  {
    printf("text[%d] = %x\n", c,text[c]);
  }

  pc = ADDR_TEXT; // Set program counter to the start of our program.

  // While loop to execute bytecode until text array is null.
  while(text[TEXT_POS(pc)] != 0)
  {   
    printf("----- WHILE LOOP EXECUTED -----\n"); 
    printf("text array = %x\n", text[TEXT_POS(pc)]);
    printf("PC = %x\n",pc);

    // ADDI - Finished
    if((text[TEXT_POS(pc)] & 0xFC000000) == 0x20000000)
    {
      printf("| EXECUTING ADDI:\n"); // <-- Printf for debuggin purposes
      int s = (text[TEXT_POS(pc)] & 0x03E00000) >> 21;
      int t = (text[TEXT_POS(pc)] & 0x001F0000) >> 16;
      short int imm = text[TEXT_POS(pc)] & 0xFFFF; // Last 16 bits
      registers[t] = registers[s] + imm;
      pc = pc + 4;
    }

    // ANDI - Finished
    else if((text[TEXT_POS(pc)] & 0xFC000000) == 0x30000000)
    {
      printf("| EXECUTING ANDI:\n"); // <-- Printf for debuggin purposes
      int s = (text[TEXT_POS(pc)] & 0x03E00000) >> 21;
      int t = (text[TEXT_POS(pc)] & 0x001F0000) >> 16;
      short int imm = text[TEXT_POS(pc)] & 0xFFFF;
      registers[t] = registers[s] + imm;
      pc = pc + 4;
    }

    // BNE
    else if((text[TEXT_POS(pc)] & 0xFC000000) == 0x14000000)
    {
      printf("| EXECUTING BNE:\n");
      int s = (text[TEXT_POS(pc)] & 0x03E00000) >> 21;
      int t = (text[TEXT_POS(pc)] & 0x001F0000) >> 16;
      int imm = (text[TEXT_POS(pc)] & 0xFFFF)/*>> 2*/;
      if(registers[s] != registers[t])
      {
        printf("| EXECUTING PC BRANCH - BNE:\n");
        printf("PC (START) = %x\n",pc);
        printf("imm = %d\n", imm);
        pc = pc + 4 + imm; // <-- Unsure about '<< 2' (BLEZ Too)
        printf("PC (END) = %x\n",pc);
      }
    }

    // BLEZ
    else if ((text[TEXT_POS(pc)] & 0xFC000000) == 0x18000000)
    {
      printf("| EXECUTING BLEZ:\n");
      int s = (text[TEXT_POS(pc)] & 0x03E00000) >> 21;
      int tempJump = (text[TEXT_POS(pc)] & 0xFFFF) >> 2;
      if(registers[s] <= 0)
      {
        printf("| EXECUTING PC BRANCH - BLEZ :\n");
        pc = pc + 4 + tempJump; // <-- 'Label'? (MIPS REFERENCE SHEET)
      }
    }
    

    else if((text[TEXT_POS(pc)] & 0xFC000000) == 0)
    {
      printf("| R-TYPE IF STATEMENT |\n");
      // ADD - Finished
      if((text[TEXT_POS(pc)] & 0x0000003F) == 0x20)
      {
        printf("| Executing ADD:\n");
        int d = (text[TEXT_POS(pc)] & 0x0000F800) >> 11;
        int s = (text[TEXT_POS(pc)] & 0x03E00000) >> 21;
        int t = (text[TEXT_POS(pc)] & 0x001F0000) >> 16;
        registers[d] = registers[s] + registers[t];
        pc = pc + 4;
      }

      // SLL - Finished
      else if((text[TEXT_POS(pc)] & 0x0000003F) == 0x00)
      {
        printf("| Executing SLL:\n");
        int t = (text[TEXT_POS(pc)] & 0x001F0000) >> 16;
        int d = (text[TEXT_POS(pc)] & 0x0000F800) >> 11;
        short int shamt = (text[TEXT_POS(pc)] & 0x000007C0);
        registers[d] = registers[t] << shamt;
        pc = pc + 4;
      }

      // SRL - Finished
      else if((text[TEXT_POS(pc)] & 0x0000003F) == 0x2)
      {
        printf("| Executing SRL:\n");
        int t = (text[TEXT_POS(pc)] & 0x001F0000) >> 16;
        int d = (text[TEXT_POS(pc)] & 0x0000F800) >> 11;
        short int shamt = (text[TEXT_POS(pc)] & 0x000007C0);
        registers[d] = registers[t] >> shamt;
        pc = pc + 4;
      }

      // JR
      else if((text[TEXT_POS(pc)] & 0x0000003F) == 0x8)
      {
        printf("| Executing JR:\n");
        int s = (text[TEXT_POS(pc)] & 0x03E00000);
        pc = registers[s]; // <-- I believe the issue lies here.
      }
    }

    // JAL
    else if((text[TEXT_POS(pc)] & 0xFC000000) == 0x0C000000)
    {
      printf("| Executing JAL:");
      int address = (text[TEXT_POS(pc)] & 0x03FFFFFF);
      registers[/*$*/31] = pc + 8; // <-- And here
      pc = address;
    }
    
    if(count == 10)
    {
      break;
    }
    count++;
  }

  printf("----- PRINTING REGISTERS -----\n");
  print_registers(); // print out the state of registers at the end of execution

  printf("... DONE!\n");
  return (0);
}

Above is the full EXC function

ADDI was given as an example to base the rest on!

datenwolf
  • 159,371
  • 13
  • 185
  • 298
  • Your `addi` implementation doesn't check for signed overflow. It, unlike `addiu`, needs to take an exception if the inputs are the same sign but the output has opposite sign. (http://teaching.idallen.com/dat2343/10f/notes/040_overflow.txt). Unless you're implementing a fake / simplified MIPS without that feature, so `addi` and `addiu` are the same instruction. – Peter Cordes Jun 01 '22 at 00:48
  • But your C implementation of it has the same wrapping of a 32-bit `int` in C, which is C undefined behaviour unless you compile with `gcc -fwrapv` or similar. Use `unsigned int` or `uint32_t` (but still signed `int16_t`) to make it safe in C. MIPS is a 2's complement machine, so signed wrapping is the same as unsigned wrapping, so you can use `uint32_t` addition. – Peter Cordes Jun 01 '22 at 00:49
  • Your `andi` implementation has some bugs beyond the ones copied from `addi`: you should be zero-extending the immediate for bitwise booleans, so don't use signed `short`. Also, you forgot to change `+` instead of `&`. Your comment on it says "Finished", but it looks like you copy/pasted `addi` and didn't change anything except the printf message. And BTW, you don't need decode `rt` and `rs` separately; you can make that common for all I-type instructions. MIPS machine code uses a couple common formats to make it easy for hardware to decode, which also makes it easy for SW. – Peter Cordes Jun 01 '22 at 00:53
  • 1
    Do not edit questions to "solved" or "thanks for the help", deleting all the question text here on StackOverflow. This is not how this site is supposed to work. Please read the the guide for new users on the etiquette here. – datenwolf Jun 01 '22 at 14:56

1 Answers1

1
  • JR

    The handling code fails to right justify the register number: it is unshifted, so I don't see how you can use that for an array index into the register file.  Have you tried debugging this C code?  You should have noticed the register number s being very large.

  • JAL

    The handling code doesn't follow the specification, which is to keep the upper 4 bits of the existing PC+4 (or maybe +0 or even +8 in a simulator that uses different machine-code than real MIPS). Only merge in the shifted 26-bit immediate into the lower 28 bits of the new PC.

  • BNE

    The immediate is signed 16 bits, so sign extend to 32 bits and then shift it to quadruple.  Sign extending a 16 bit value to 32 bits is easy in C: just cast to short, we won't even need the mask: (short) text[TEXT_POS(pc)].  This shortening cast will immediately return to full 32 bits when used with anything else, but this time sign extending from 16 bits.  (The sign extended, shifted immediate is relative to PC+4. Or PC+0 on QtSPIM with branch delay slots disabled.)

    Most C implementations use a 16-bit short, but int16_t from stdint.h would be reliably 16-bit 2's complement on any C implementation where it exists. (And fail to build on ones that don't have an int16_t, which is what you want instead of working wrong.)


See How to Calculate Jump Target Address and Branch Target Address? for how real MIPS calculates branch targets, in the full ISA with a branch delay slot. Some simulators differ, even though they don't need to. (Return address calculation needs to depend on whether there's a delay slot, but target-calculation for J-type jumps and I-type branches can always work the same.)

If you're trying to be compatible with MARS, you should double check whether it is using PC+4 or PC+8, and do that everywhere you use the PC.  In the JAL, for example, you have PC+8 for the return address, but in the BNE you have PC+4 to work with the immediate.  If there's no branch delay slot, then your JAL emulation will skip one instruction upon function return.

MARS & QtSpim, the other popular MIPS simulator, have differences in this area, and, differences from official MIPS documentation, so double check what the simulator is doing for each such, if you want to be compatible with it.


We don't generally refer to MIPS machine code as bytecode, but rather as machine code.  It is a fixed length 32-bit instruction set, and there's nothing "byte" about it, really — we might call it wordcode, but that's not a term.

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
Erik Eidt
  • 23,049
  • 2
  • 29
  • 53
  • PC+8 is the return address for JAL, but in what environment does J / JAL use PC+8 for the target address? I think real MIPS (with a branch delay slot) still uses the address of the end of the J itself (the branch delay instruction) for the high 4 bits to keep for the section-absolute target calculation. Does MARS with branch delay slots enabled work differently? If you're sure about this, it would be a good idea to update [How to Calculate Jump Target Address and Branch Target Address?](https://stackoverflow.com/q/6950230) - I've previously edited it to the best of my understanding. – Peter Cordes May 31 '22 at 21:12
  • @PeterCordes, with `label: beq $0, $0, label`, on MARS4.5 we get 0x1000ffff, and on QtSpim9.1.20 we get 0x10000000, so these two simulators (both with branch delay slot enabled *unselected*, as is their default) differ in their PC basis for I-Type branch instructions. Sorry for the confusion, I was just warning the OP if they need simulator compatibility to check these things. – Erik Eidt May 31 '22 at 22:26
  • So QtSPIM's no-branch-delay option really does change the machine code for branches as well as how they executes, which seems like a bad design. And yeah worth warning about. Perhaps deserves a mention in that canonical Q&A about branch targets, maybe as a separate answer. – Peter Cordes May 31 '22 at 22:35
  • But you talked about differences in J-type instructions, too, and suggested that PC+8 could enter into it. If QtSPIM handles J-type like it does I-type, it would be section-absolute to PC+0 by disabling branch delays, assuming it matches hardware when the delay slot is enabled. If PC+0 vs. PC+4 are the realistic possibilities, you should probably change this answer. – Peter Cordes May 31 '22 at 22:35
  • @PeterCordes, the OP mentioned MARS compatibility, and MARS by default does not enable branch delay slot. So, if that is their environment, then PC+8 captured into `$ra` as the return address would skip one instruction upon return. Again, not sure what is proper for them, just trying to offer a cautionary note of doing what MARS does vs. e.g. the MIPS green sheet. – Erik Eidt May 31 '22 at 22:37
  • I'm talking about the `JAL` bullet point where you wrote *...which is to keep the upper 4 bits of the existing PC (+8 or +4)*. I'm of course talking about the jal target calculation, not the return address which has to vary with branch-delay disable/enable, independently of which address provides the top 4 bits for the JAL target. A J or JAL as the last instruction in a 256MiB region is a rare corner case (unlike `b` where it always matters and thus will be detected by any testing or comparison against other implementations), but it's good to get it right. – Peter Cordes May 31 '22 at 22:41
  • @PeterCordes, oh, right. I don't know this one, was just suggesting that the OP check what MARS actually does, in the rare case that a `jal` executes at the last or second last address in the 2^28 segment, as if it is PC+4 vs. PC+8, it could affect the upper 4 bits. As I mentioned, I haven't checked this one, just trying to elaborate the gotcha's. – Erik Eidt May 31 '22 at 22:48
  • 1
    Real MIPS uses PC+4. I don't think it's plausible that any simulator would use PC+8, but PC+0 is a definite possibility. It makes sense to suggest checking on that, but with PC+4 (real MIPS) vs. PC+0 (some simulators might do this, especially without branch-delay slots.) – Peter Cordes May 31 '22 at 22:50
  • (On further thought, it's possible a buggy or inaccurate simulator could take the high bits relative to the end of the branch delay slot, so PC+8 is worth at least considering. But definitely PC+0 and PC+4 are the ones to mention.) – Peter Cordes May 31 '22 at 23:42
  • @ErikEidt This is my end of the year project for my first year at university and I'm struggling with shifting the immediate for branching operation. Would it be possible to get more details on how you would do that in C because I can't seem to find the answer anywhere and this coursework is now approaching its deadline and I need to be able to do this for the A. –  Jun 01 '22 at 11:59
  • @PeterCordes This is my end of the year project for my first year at university and I'm struggling with shifting the immediate for branching operation. Would it be possible to get more details on how you would do that in C because I can't seem to find the answer anywhere and this coursework is now approaching its deadline and I need to be able to do this for the A. Sorry to repost but the deadline is in 2 hours and I'm in a rush. –  Jun 01 '22 at 12:44
  • I wrote how to sign extend, then all you need is to quadruple it. – Erik Eidt Jun 01 '22 at 14:03
  • @OmygodcheeseXY: For `short imm`, implicit conversion to `int` happens *before* `imm << 2;`, so that Just Works, not shifting bits out the top of the 16-bit type because `<<` itself still triggers the default integer promotions. (But unlike for `+`, not converting both sides to match each other with further implicit conversions.) – Peter Cordes Jun 01 '22 at 15:54