1

So I've been working on implementing the metamorphic code example from James Holderness found here: Metamorphic Code Examples.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <time.h>

#define PUSH 0x50
#define POP 0x58
#define MOV 0xB8
#define NOP 0x90

#define ADD 0x01
#define AND 0x21
#define XOR 0x31
#define OR  0x09
#define SBB 0x19
#define SUB 0x29

#define JUNK asm __volatile__(PUSH,NOP,NOP,NOP,NOP,NOP,NOP,NOP,NOP,POP)
#define JUNKLEN 8

const unsigned char prefixes[] = {ADD, AND, XOR, OR, SBB, SUB, 0};
unsigned char *code;
int codelen;

void readCode(const char *filename)
{
    FILE *fp = fopen(filename, "rb");       JUNK;
    fseek(fp, 0L, SEEK_END);            JUNK;
    codelen = ftell(fp);
    code = malloc(codelen);             JUNK;
    fseek(fp, 0L, SEEK_SET);
    fread(code, codelen, 1, fp);            JUNK;
}

void writeCode(const char *filename)
{
    FILE *fp;
    int lastOffset = strlen(filename) - 1;
    char lastChar = filename[lastOffset];
    char *newFileName = strdup(filename);       JUNK;
    lastChar = '0' + (isdigit(lastChar)?(lastChar - '0' + 1) %10:0);
    newFileName[lastOffset] = lastChar;
    fp = fopen(newFileName, "wb");          JUNK;
    fwrite(code, codelen, 1, fp);           JUNK;
    fclose(fp);
    free(newFileName);
}

int writeInstruction(unsigned reg, int offset, int space)
{
    if (space < 2) {
        code[offset] = NOP;         JUNK;
        return 1;
    } else if (space < 5 || rand() % 2 == 0) {
        code[offset] = prefixes[rand() % 6];    JUNK;
        code[offset + 1] = 0xC0 + rand() % 8 * 8 + reg; JUNK;
        return 2;
    } else {
        code[offset] = MOV + reg;       JUNK;
        *(short *)(code + offset + 1) = rand();
        *(short *)(code + offset + 3) = rand(); JUNK;
        return 5;
    }
}

int readInstruction(unsigned reg, int offset)
{
    unsigned c1 = code[offset];
    if (c1 == NOP)
        return 1;               JUNK;
    if (c1 == MOV + reg)
        return 5;               JUNK;
    if (strchr(prefixes, c1)) {
        unsigned c2 = code[offset + 1];     JUNK;
        if (c2 >= 0xC0 && c2 <= 0xFF && (c2 & 7) == reg)
            return 2;           JUNK;
    }                       JUNK;
    return 0;
}

void replaceJunk(void)
{
    int i, j, inc, space;
    srand(time(NULL));              JUNK;

    for (i = 0; i < codelen - JUNKLEN - 2; i++) {
        unsigned start = code[i];
        unsigned end = code[i + JUNKLEN + 1];
        unsigned reg = start - PUSH;

        if (start < PUSH || start >= PUSH + 8)
            continue;           JUNK;
        if (end != POP + reg)
            continue;           JUNK;
        if (reg == 4)
            continue;

        j = 0;                  JUNK;
        while (inc = readInstruction(reg, i + 1 + j))
            j = j + inc;
        if (j != JUNKLEN)
            continue;           JUNK;

        reg = rand() % 7;           JUNK;
        reg += (reg >= 4);
        code[i] = PUSH + reg;           JUNK;
        code[i + JUNKLEN + 1] = POP + reg;  JUNK;

        space = JUNKLEN;
        j = 0;
        while (space) {
            inc = writeInstruction(reg, i + 1 + j, space);  JUNK;
            j = j + inc;
            space = space - inc;        JUNK;
        }
        printf("%d\n", i);          JUNK;
    }
}

int main(int argc, char *argv[])
{
    readCode(argv[0]);              JUNK;
    replaceJunk();                  JUNK;
    writeCode(argv[0]);             JUNK;

    return 0;
}

I'm attempting to compile using GCC (version 6.3.0) on Raspbian 4.9 but the compile keeps failing and issuing errors "undefined reference to __emit__. Now I know this is because emit is a Borland C Compiler macro and so I've consequently attempted to implement similar functionality using the asm volatile macro found here (Implementing Borland's __emit__ macro in GCC).

How can I change the code to work with GCC? I've tried a number of different uses of asm volatile but nothing seems to work. I expect that most of the #defines will have to change, I just don't know the correct way to do it.

Matt
  • 13
  • 2
  • 1
    In addition to the answer that you already have, note that if you're using Raspbian on a Raspberry Pi, you should probably be using ARM opcodes, not x86 opcodes. – zneak Oct 18 '17 at 04:51
  • @zneak - oops, right, I missed the reference to Raspbian! The whole example would need to be modified in that case, since the whole approach is somewhat specific to x86 opcodes. – BeeOnRope Oct 18 '17 at 05:06
  • You need to understand how C compiles to assembly language, and then look at what your specific version of gcc does, then you can figure out how you can add extra instructions to mess around with the compiler's data. (see https://stackoverflow.com/questions/38552116/how-to-remove-noise-from-gcc-clang-assembly-output for example). This is all *very* compiler-specific. – Peter Cordes Oct 18 '17 at 05:13
  • 1
    @PeterCordes - take a closer look. I doesn't seem very compiler-specific and it should mostly "just work" on x86. Basically it isn't modifying arbitrary code generated by the compiler, but rather searching for the `JUNK` sequences inserted verbatim by the `asm` macro. As long as the compiler emits those sequences faithfully, it should find them. It then rewrites those sequences in such a way that it is still effectively a no-op, but with new instructions. I don't see any big issues to making this work. There is a small chance of false positives: falsely finding the `JUNK` signature. – BeeOnRope Oct 18 '17 at 16:23
  • Futhermore, the OP reported that "it works" below, apparently with little changes other than replacing the `__emit__` with an `asm` block. – BeeOnRope Oct 18 '17 at 16:24
  • @BeeOnRope: Oh, yeah I just read the OP's link and found out that these extra instructions are always supposed to do nothing. Just don't clobber the red-zone with push/pop. (Although IIRC, gcc makes a stack frame and doesn't use the red-zone with `-O0`.) – Peter Cordes Oct 18 '17 at 16:33
  • @PeterCordes `push` and `pop` by definition don't "use" the red-zone since they adjust `rsp`. The red-zone is the 128-byte area below `rsp` that it is safe to use without adjusting the stack pointer, but `push` and `pop` do adjust it, so they are always safe in this respect. – BeeOnRope Oct 18 '17 at 16:35
  • 1
    @BeeOnRope: But gcc doesn't know about the push/pop! It's the same problem as https://stackoverflow.com/questions/34520013/using-base-pointer-register-in-c-inline-asm/34522750#34522750, where inline-asm using `push %%rbp`/`pop %%rbp` clobbers something gcc spilled to `-8(%rsp)`. – Peter Cordes Oct 18 '17 at 16:40
  • @PeterCordes - good point, that's a huge gotcha to manipulating the stack from inline `asm`! I would have hoped that gcc wouldn't keep spilled stuff in the redzone across an `asm` call, but I guess that would hurt performance for the small number of inline blocks that want to do that. Perhaps they could add a clobber for it. It's a limitation of treating the asm totally opaquely - if they could only look into the block and see the `push`... (yes, I know that the `asm` blocks don't work that way, the text is basically emitted directly into the .S output). – BeeOnRope Oct 18 '17 at 16:57
  • 1
    I guess to make this code safe, you could change `JUNK` to `sub rsp, 128; push rax; ... pop rax; add rsp 128` so you are sure you are avoiding any clobber of the redzone. – BeeOnRope Oct 18 '17 at 16:58
  • @BeeOnRope: yeah, it would be nice if there was a way to declare a clobber on the red-zone. But usually the answer is to either write your whole function in asm, or https://gcc.gnu.org/wiki/DontUseInlineAsm. It's mostly a problem when people want to use `call` from inline asm (which is highly questionable anyway; gcc or clang will sometimes optimize a `static` function definition to assume things that are true for the only visible call sites, e.g. not returning unused struct members. Although I think by passing a function-pointer as an asm operand you guarantee a proper func def.) – Peter Cordes Oct 18 '17 at 17:02

1 Answers1

3

You can put arbitrary bytes at the location of an asm block using the .byte directive like this:

asm __volatile__(".byte 0x50, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x58\n");

Here's a live example on godbolt, including the far right pane which shows that it decompiled fine into a push rax, eight nops, and pop rax.

See more about the .byte directive here.

However, this will still not work on the Raspberry Pi because the opcodes appear to for x86. You will have to change them to the corresponding ARM opcodes. Furthermore, the GCC is a highly optimizing compiler, and you cannot manipulate the C stack in the way this code did with the old Borland C compiler.

BeeOnRope
  • 60,350
  • 16
  • 207
  • 386
  • Would I then replace every instance of "JUNK" in the code with "asm"? Would the same functionality be preserved? – Matt Oct 18 '17 at 04:59
  • You can keep the same JUNK macro but just redefine it in terms of `asm` with the `.byte` directive. With use of macro stringification you can keep the same `PUSH` `NOP` etc macros. – BeeOnRope Oct 18 '17 at 05:02
  • Like this? #define asm __volatile__(".byte 0x50, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x58\n")? – Matt Oct 18 '17 at 05:04
  • @Matt, yes - but as zneak pointed out in the comments and in the edit to my answer, this won't work on ARM. The example given is specific to x86. – BeeOnRope Oct 18 '17 at 05:06
  • What exactly would I have to change to get it to work on x86? I was planning on running it on my RPI but if its easier to run on my computer then I'll go that way. – Matt Oct 18 '17 at 05:29
  • If you run it on your x86 computer it may very well work. @Matt – BeeOnRope Oct 18 '17 at 05:30
  • So I updated the problem line to #define JUNK __volatile__(".byte 0x50, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x58\n") and kept all the other inline JUNK usages the same. However, when I compile I get a whole mess of errors stating: "Expected identified or '(' before string constant. Referring to #define JUNK __volatile__(".byte ...... yadayada"). The error points to the period before byte – Matt Oct 18 '17 at 05:39
  • @Matt you need the `asm` keyword in there! Here's [a link](https://godbolt.org/g/Pdyq4u) showing it working. – BeeOnRope Oct 18 '17 at 05:43
  • it works. :) Thank you so much for your help! I think I'll be working on getting everything ported to ARM next, with hopefully fewer problems. – Matt Oct 18 '17 at 05:52