2

In compiler theory, copy propagation is the process of replacing the occurrences of targets of direct assignments with their values. A direct assignment is an instruction of the form x = y, which simply assigns the value of y to x.

y = x
z = 3 + y

Copy propagation would yield:

z = 3 + x

From the wiki, Copy propagation is implemented by using use-definition chains.

The code below (my best try) implements copy propagation by creating use-def chains and substituting the value of the variable wherever it is used (even if the variable is not live at that point of the program). In the second run, it evaluates all the constants (constant folding).

the char *str_replace(char *orig, char *rep, char *with) is attributed to https://stackoverflow.com/a/779960/10603510

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>

char *str_replace(char *orig, char *rep, char *with) {
    char *result; // the return string
    char *ins;    // the next insert point
    char *tmp;    // varies
    int len_rep;  // length of rep (the string to remove)
    int len_with; // length of with (the string to replace rep with)
    int len_front; // distance between rep and end of last rep
    int count;    // number of replacements

    // sanity checks and initialization
    if (!orig || !rep)
        return NULL;
    len_rep = strlen(rep);
    if (len_rep == 0)
        return NULL; // empty rep causes infinite loop during count
    if (!with)
        with = "";
    len_with = strlen(with);

    // count the number of replacements needed
    ins = orig;
    for (count = 0; (tmp = strstr(ins, rep)); ++count) {
        ins = tmp + len_rep;
    }

    tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);

    if (!result)
        return NULL;

    // first time through the loop, all the variable are set correctly
    // from here on,
    //    tmp points to the end of the result string
    //    ins points to the next occurrence of rep in orig
    //    orig points to the remainder of orig after "end of rep"
    while (count--) {
        ins = strstr(orig, rep);
        len_front = ins - orig;
        tmp = strncpy(tmp, orig, len_front) + len_front;
        tmp = strcpy(tmp, with) + len_with;
        orig += len_front + len_rep; // move to next "end of rep"
    }
    strcpy(tmp, orig);
    return result;
}

bool is_operator(char c) {
    return c == '+' || c == '-' || c == '*' || c == '/' || c == '(' || c == ')';
}

int calculate(int a, int b, char op)
{
    switch (op)
    {
    case '+': return a + b;
    case '-': return a - b;
    case '*': return a * b;
    case '/': return a / b;
    case '^': return pow(a, b);
    }
    return -1;
}

bool is_operand(char c)
{
    return strchr("0123456789", c);
}

int priority(char c)
{
    switch (c)
    {
    case '+':
    case '-':
        return 0;
    case '*':
    case '/':
        return 1;
    case '^':
        return 2;
    }
    return -1;
}

int evaluate(char *expression)
{
    int op1;
    int op2;
    int top = 0;
    int ops = 0;
    int operand_stack[50];
    char operators[50];
    char *p = expression;
    
    for (; *p; p++)
    {
        if (*p == ' ')
        {
            continue;
        }
        else if (isalpha(*p))
        {
            return -1;
        }
        else if (is_operand(*p))
        {
            operand_stack[++top] = strtol((char*)p, (char**)&p, 10);
            p--;
        }
        else if (is_operator(*p)) 
        {
            while (ops) {
                if (priority(*p) < priority(operators[ops])) {
                    op2 = operand_stack[top--];
                    op1 = operand_stack[top--];
                    operand_stack[++top] = calculate(op1, op2, operators[ops--]);
                }
                else {
                    break;
                }
            }
            operators[++ops] = *p;

        }
    }
    
    while (ops) {
        op2 = operand_stack[top--];
        op1 = operand_stack[top--];
        operand_stack[++top] = calculate(op1, op2, operators[ops--]);
    }
    return operand_stack[top];
}

char expressions[50][50];
int n;

void constant_folding() {
    for (int i = 0; i < n; i++) {
        char *p = strchr(expressions[i], (int)'=');
        if (p) {
            char integer[20];
            int a = evaluate(p+1);
        
            if (a != -1) {
                sprintf(integer, "%d", a);
                strcpy(expressions[i], str_replace(expressions[i], p + 1, integer));
            }
        }
    }
}

// line starts from 0
typedef struct use_def {
    int line;
    int use_count;
    char var[20];
    char replacement[20];
    int lines_used[20];
} use_def;
use_def use_defs[5];
int use_def_count = 0;

void constant_propogation() {
    for (int i = 0; i < use_def_count; i++) {
        use_def *a = &use_defs[i];
        for (int j = 0; j < a->use_count; j++) {
            strcpy(expressions[a->lines_used[j]], str_replace(expressions[a->lines_used[j]], a->var, a->replacement));
        }
    }
}

int main()
{

    printf("\n\nEnter the number of expressions : ");
    scanf("%d", &n);

    for(int i=0; i<n;i++)
    {
        scanf(" %[^\n]", expressions[i]);
    }

    for (int i = 0; i < n; i++) 
    {
        use_def *a = use_defs + i;
        a->line = i;
        char buff[20];
        strcpy(buff, expressions[i]);
        strcpy(a->var, strtok(buff, "="));
        if (a->var) {
            strcpy(a->replacement, strtok(NULL, ""));
            for (int j = i + 1; j < n ; j++) {
                if (strstr(expressions[j], a->var)) {
                    a->lines_used[a->use_count++] = j;
                }
            }
            use_def_count++;
        }
    }
    
    constant_propogation();
    constant_folding();
    
    printf("\nCode after propagation: \n");
    for(int i=0;i<n;i++) {
        printf("%s\n", expressions[i]);
    }
    return 0;

}

However my algorithm doesn't work with all the essential test cases
(please ignore the hardcoded values, I couldn't proceed with my faulty code design).

I want guidance on how to really implement copy propagation,
perhaps a more detailed explanation of how the algorithm for Global Common Subexpression Elimination (GCSE) and CProp works.

greybeard
  • 2,249
  • 8
  • 30
  • 66
  • 4
    I am voting to close the question as it is to broad for StackOverflow. Books on compilers should typically answer such a question (but this is off topic on StackOverflow). If you have some (much more) *specific* question, then please update the question. – Jérôme Richard Dec 05 '21 at 15:16
  • 1
    What is the question here? – vish4071 Dec 20 '21 at 07:19
  • @vish4071: was asking myselve the same... The question is "How to implement Copy Propagation", a topic of compiler optimization, which is *very* broad, since there down-sides to blunt copying. In gcc, one might want to look at options Global Common Subexpression Elimination (GCSE) and CProp in various forms. Per the implementation above: you have a lot of hard-coded values like length of variable (20) and number of expressions (50), which are not checked... That's dangerous. – Rainer Keller Dec 20 '21 at 08:38
  • 2
    @GraceMathew, You say that you implemented copy-propagation in your code above, is there some problem with it? If not, then haven't you already answered your own question? I think that like you may not be getting any response because we cannot figure out what the problem is here, and/or what you are looking for in an answer. – RBarryYoung Dec 21 '21 at 14:15
  • @RBarryYoung I have tried to implement copy-propagation, but my code doesn't cover all the test cases and my code design is naive. I'm looking for a design that's more robust, a simple version of what a compiler does to understand and appreciate what compilers do. – Grace Mathew Dec 24 '21 at 06:31
  • @vish4071 revised the post – Grace Mathew Dec 24 '21 at 06:33
  • @RainerKeller revised the post, made the question clearer – Grace Mathew Dec 24 '21 at 06:34
  • LLVM has an "extremely simple" [copy propagation pass](https://github.com/llvm/llvm-project/blob/ca2f53897a2f2a60d8cb1538d5fcf930d814e9f5/llvm/lib/CodeGen/MachineCopyPropagation.cpp) that takes 940 lines. – David Grayson Dec 24 '21 at 07:04
  • Do you *need* to use C here? C++ std::string would free you from all sorts of complexity and bug-prone code managing C strings and dynamic memory - not to mention being able to use search, substring, and append methods for much more readable code. – Brett Hale Dec 24 '21 at 07:24
  • Isn't the correct case here is definition-use chains (DU) and not UD? – CIsForCookies Dec 24 '21 at 07:37

1 Answers1

0

Buffer overflow potential

May or may not explain OP's copy propagation issue, but certainly a code weakness.

Code exhibits undefined behavior should input exceed buffer's capacities.

scanf(" %[^\n]", ... worse than gets().

    char expressions[50][50];

    // scanf(" %[^\n]", expressions[i]);

    char buf[sizeof expressions[i] * 2];
    if (fgets(buf, sizeof buf, stdin) == NULL) {
      fprintf(stderr, "No input\n");
      exit(-1);
    }
    size_t len = strlen(buf);
    if (len > 0 && buf[len-1] == '\n') {
      buf[--len] = '\0';
    }
    if (len >= sizeof expressions[i]) {
      fprintf(stderr, "Input too long %zu.\n", len);
      exit(-1);
    }
    strcpy(expressions[i], buf);

Later

    char buff[20];

    // add check
    if (strlen(expressions[i] >= sizeof buff) {
      fprintf(stderr, "Expression <%s> too long.\n", expressions[i]);
      exit(-1);
    }

    strcpy(buff, expressions[i]);

Perhaps other risky buffer copies.


Minor

Suggest long to go along with strtol().

// int operand_stack[50];
long operand_stack[50];

Watch out that char operators[50]; ... operators[++ops] does index to 50.

chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256