In compiler theory, copy propagation is the process of replacing the occurrences of targets of direct assignments with their values. A direct assignment is an instruction of the form x = y, which simply assigns the value of y to x.
y = x
z = 3 + y
Copy propagation would yield:
z = 3 + x
From the wiki, Copy propagation is implemented by using use-definition chains.
The code below (my best try) implements copy propagation by creating use-def chains and substituting the value of the variable wherever it is used (even if the variable is not live at that point of the program). In the second run, it evaluates all the constants (constant folding).
the char *str_replace(char *orig, char *rep, char *with)
is attributed to https://stackoverflow.com/a/779960/10603510
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <math.h>
#include <stdbool.h>
char *str_replace(char *orig, char *rep, char *with) {
char *result; // the return string
char *ins; // the next insert point
char *tmp; // varies
int len_rep; // length of rep (the string to remove)
int len_with; // length of with (the string to replace rep with)
int len_front; // distance between rep and end of last rep
int count; // number of replacements
// sanity checks and initialization
if (!orig || !rep)
return NULL;
len_rep = strlen(rep);
if (len_rep == 0)
return NULL; // empty rep causes infinite loop during count
if (!with)
with = "";
len_with = strlen(with);
// count the number of replacements needed
ins = orig;
for (count = 0; (tmp = strstr(ins, rep)); ++count) {
ins = tmp + len_rep;
}
tmp = result = malloc(strlen(orig) + (len_with - len_rep) * count + 1);
if (!result)
return NULL;
// first time through the loop, all the variable are set correctly
// from here on,
// tmp points to the end of the result string
// ins points to the next occurrence of rep in orig
// orig points to the remainder of orig after "end of rep"
while (count--) {
ins = strstr(orig, rep);
len_front = ins - orig;
tmp = strncpy(tmp, orig, len_front) + len_front;
tmp = strcpy(tmp, with) + len_with;
orig += len_front + len_rep; // move to next "end of rep"
}
strcpy(tmp, orig);
return result;
}
bool is_operator(char c) {
return c == '+' || c == '-' || c == '*' || c == '/' || c == '(' || c == ')';
}
int calculate(int a, int b, char op)
{
switch (op)
{
case '+': return a + b;
case '-': return a - b;
case '*': return a * b;
case '/': return a / b;
case '^': return pow(a, b);
}
return -1;
}
bool is_operand(char c)
{
return strchr("0123456789", c);
}
int priority(char c)
{
switch (c)
{
case '+':
case '-':
return 0;
case '*':
case '/':
return 1;
case '^':
return 2;
}
return -1;
}
int evaluate(char *expression)
{
int op1;
int op2;
int top = 0;
int ops = 0;
int operand_stack[50];
char operators[50];
char *p = expression;
for (; *p; p++)
{
if (*p == ' ')
{
continue;
}
else if (isalpha(*p))
{
return -1;
}
else if (is_operand(*p))
{
operand_stack[++top] = strtol((char*)p, (char**)&p, 10);
p--;
}
else if (is_operator(*p))
{
while (ops) {
if (priority(*p) < priority(operators[ops])) {
op2 = operand_stack[top--];
op1 = operand_stack[top--];
operand_stack[++top] = calculate(op1, op2, operators[ops--]);
}
else {
break;
}
}
operators[++ops] = *p;
}
}
while (ops) {
op2 = operand_stack[top--];
op1 = operand_stack[top--];
operand_stack[++top] = calculate(op1, op2, operators[ops--]);
}
return operand_stack[top];
}
char expressions[50][50];
int n;
void constant_folding() {
for (int i = 0; i < n; i++) {
char *p = strchr(expressions[i], (int)'=');
if (p) {
char integer[20];
int a = evaluate(p+1);
if (a != -1) {
sprintf(integer, "%d", a);
strcpy(expressions[i], str_replace(expressions[i], p + 1, integer));
}
}
}
}
// line starts from 0
typedef struct use_def {
int line;
int use_count;
char var[20];
char replacement[20];
int lines_used[20];
} use_def;
use_def use_defs[5];
int use_def_count = 0;
void constant_propogation() {
for (int i = 0; i < use_def_count; i++) {
use_def *a = &use_defs[i];
for (int j = 0; j < a->use_count; j++) {
strcpy(expressions[a->lines_used[j]], str_replace(expressions[a->lines_used[j]], a->var, a->replacement));
}
}
}
int main()
{
printf("\n\nEnter the number of expressions : ");
scanf("%d", &n);
for(int i=0; i<n;i++)
{
scanf(" %[^\n]", expressions[i]);
}
for (int i = 0; i < n; i++)
{
use_def *a = use_defs + i;
a->line = i;
char buff[20];
strcpy(buff, expressions[i]);
strcpy(a->var, strtok(buff, "="));
if (a->var) {
strcpy(a->replacement, strtok(NULL, ""));
for (int j = i + 1; j < n ; j++) {
if (strstr(expressions[j], a->var)) {
a->lines_used[a->use_count++] = j;
}
}
use_def_count++;
}
}
constant_propogation();
constant_folding();
printf("\nCode after propagation: \n");
for(int i=0;i<n;i++) {
printf("%s\n", expressions[i]);
}
return 0;
}
However my algorithm doesn't work with all the essential test cases
(please ignore the hardcoded values, I couldn't proceed with my faulty code design).
I want guidance on how to really implement copy propagation,
perhaps a more detailed explanation of how the algorithm for Global Common Subexpression Elimination (GCSE) and CProp works.