3

The problem is the following: Given "ABC+DEF=GHI" format string, where A,B,C etc. represent unique digits, find the expression that gives maximum GHI. Ex: Input string is AAB+AAB=AAB, then there's no solution. If it is instead AAA + BBB = AAA, a solution is 999 + 000 = 999. Another example string: ABC + CBA = GGG, a result is => 543 + 345 = 888.

I have ruled out impossible cases easily. The algorithm I have in mind is a bruteforce, that simply tries maximizing the rhs first. However my problem was doing this fast, and also watching out for the unique digits. What's an efficient way to solve this problem?

Notes: I wish to solve this in a singlethreaded approach, and my current problem is detecting if a unique digit is used in "assign_value" function. Perhaps a better method to assign values is there?

EDIT: As per smci's suggestion, here's what I want to achieve, in the very end: ABRA + CADABRA + ABRA + CADABRA == HOUDINI ; 7457 + 1797457 + 7457 + 1797457 == 3609828 -- A system that can handle not only strings of the form I provided in the beginning (3 digit number + 3 digit number = 3 digit number) but also those. However it doesn't hurt to start simple and go with the solution of format I gave :)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_EXPRESSION_SIZE 11 + 1
#define MAX_VARIABLES 9

int variables_read[MAX_VARIABLES] = { 0 };

struct variable {
    int coefficient;
    int* ptr;
    int side;
    int canhavezero;
    unsigned value_max;
};

typedef struct variable Variable;

struct equation {
    Variable* variables[9]; // max
    unsigned distinct_on_rhs;
    unsigned var_count;
};

typedef struct equation Equation;

int int_pow(int n, int k) {
    int res = 1;
    for(int i = 0; i < k; ++i)
        res *= n;
    return res;
}

void AddVariable(Equation* E, Variable* V) {
    E->variables[E->var_count++] = V;
}

int IsImpossible(char* expression) {
    // if all letters are same or end letters are same, no solution
    if(
        (expression[0] == expression[4] && expression[0] == expression[8]) ||
        (!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
      )
        return 1;
    return 0;
}

int assign_value(Equation* E, int pos, int* values) {
    if(!E->variables[pos]->value_count) {
        if(pos < 0)
            return 2;
        // if no possible values left, reset this, but take one value count from the closest variable
        E->variables[pos - 1]->value_count--;
        E->variables[pos]->value_count = E->variables[pos]->value_max;
        return 0;
    }
    int i;
    for(i = 9; i >= 0 && values[i] == -1; --i)
    printf("Assigning %d to %c\n", E->variables[pos]->value_set[E->variables[pos]->value_count - 1], 'A' + (E->variables[pos]->ptr - E->variables[0]->ptr));
    *(E->variables[pos]->ptr) = values[i];
    values[i] = -1; // we have unique numbers
    return 0;
}

int isSolved(Equation E) {
    int sum = 0, coeff = 0;
    printf("Trying...\n");
    for(int i = 0; i < E.var_count; ++i) {
        coeff = E.variables[i]->coefficient * (*E.variables[i]->ptr);
        printf("%d ", *E.variables[i]->ptr);
        if(E.variables[i]->side)
            coeff *= -1;
        sum += coeff;
    }
    printf("\nSum was %d\n", sum);
    return !sum;
}

char* evaluate(char* expression) {
    char* res;
    // check for impossible cases first
    if(IsImpossible(expression)) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
        return res;
    }
    res = (char *) malloc(sizeof(char) * MAX_EXPRESSION_SIZE);
    // now try to find solutions, first describe the given characters as equations
    Equation E;
    E.var_count = 0;
    E.distinct_on_rhs = 0;
    int side_mode = 0, powcounter = 0;
    int a = -1, b = -1, c = -1, d = -1, e = -1, f = -1, g = -1, h = -1, i = -1;
    int* max_variables[MAX_VARIABLES] = { &a, &b, &c, &d, &e, &f, &g, &h, &i };
    for(int j = 0; j < MAX_EXPRESSION_SIZE - 1; ++j) {
        if(expression[j] == '+')
            continue;
        if(expression[j] == '=') {
            side_mode = 1;
            continue;
        }
        Variable* V = (Variable *) malloc(sizeof(Variable));
        // we know we always get 3 digit numbers but we can easily change if we need to
        V->coefficient = int_pow(10, 2 - (powcounter % 3));
        V->ptr = max_variables[expression[j] - 'A'];
        V->side = side_mode;
        E.distinct_on_rhs += side_mode && !variables_read[expression[j] - 'A'];
        if(!(powcounter % 3)) { // beginning of a number
            V->value_count = 9;
            V->value_max = 9;
            V->canhavezero = 0;
        }
        else {
            V->value_count = 10;
            V->value_max = 10;
            V->canhavezero = 1;
        }
        AddVariable(&E, V);
        variables_read[expression[j] - 'A'] = 1;
        ++powcounter;
    }
    for(int j = 0; j < E.var_count; ++j)
        printf("%d %c %d\n", E.variables[j]->coefficient, 'A' + (E.variables[j]->ptr - max_variables[0]), E.variables[j]->side);
    // we got a representaion of the equation, now try to solve it
    int solved = 0;
    // O(9^N), where N is number of distinct variables.
    // An optimization we can do is, we first assign possible max values to rhs number, then go down. We need max number.
    printf("Distincts: %d\n", E.distinct_on_rhs);
    do {
        // try to assign values to all variables and try if it solves the equation
        // but first try to assign rhs as max as possible
        int values[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
        int temp = E.var_count - E.distinct_on_rhs;
        while(temp < E.var_count) {
            solved = assign_value(&E, temp, values);
            ++temp;
        }
        for(int j = E.var_count - 1 - E.distinct_on_rhs; j >= 0; --j)
            solved = assign_value(&E, j, values);
        if(solved) // can return no solution
            break;
        printf("Solving...\n");
        solved = isSolved(E);
        system("PAUSE");
    } while(!solved);
    if(solved == 2) {
        res = (char *) malloc(sizeof(char) * strlen("No Solution!"));
        strcpy(res, "No Solution!");
    }
    else {

    }
    return res;
}

int main() {
    char expression[MAX_EXPRESSION_SIZE] = { 0 };
    do {
        printf("Enter the formula: ");
        scanf("%s", expression);
        char* res = evaluate(expression);
        printf("%s\n", res);
        free(res);
    } while(expression[0] != '-');
    return 0;
}
SenselessCoder
  • 1,139
  • 12
  • 27
  • 2
    When you say if it´s instead AAB+AAB=AAB didn´t you mean AAA + BBB = AAA ? – sergio0983 Oct 27 '16 at 21:59
  • I've corrected the examples, and added more. – SenselessCoder Oct 27 '16 at 22:05
  • Great, gonna try to take it :) – sergio0983 Oct 27 '16 at 22:05
  • Isn't it 9^N, where N is the unique digits present in the formula? Also, how can I take care of the uniqueness problem? How can I go step by step while paying attention to the uniqueness of other digits? I posted my current implementation, hope it helps. – SenselessCoder Oct 27 '16 at 22:06
  • 2
    The number of possible letter-to-digit matchings is no more than 10! = 3,628,800. That's small enough to go and try all matchings. A proper C program would work well under a second then. – Gassa Oct 27 '16 at 22:07
  • I am horrible at probability math. I should just stop trying... – Michael Dorgan Oct 27 '16 at 22:08
  • Feed all the "legal" cases into an array for testing. Then you can multi-thread it if you want as well. Stupid for such a small test case, but if the number of variables were to go up and/or the test became more complex, it might save time. – Michael Dorgan Oct 27 '16 at 22:14
  • @MichaelDorgan, what do you mean by "legal" cases? Do you mean like a lookup table of verified solutions, given a format string? Doesn't that count as cheating, and also imply I have a working algorithm :) – SenselessCoder Oct 27 '16 at 22:20
  • This is interesting but is going to get closed very soon as offtopic and unclear (not specific question). You need to tell us specifically where your bottleneck is and show us some profiling numbers or callcounts. Must the answer be single-threaded? (if multi-threaded, which threading library and do you care about portability?) – smci Oct 27 '16 at 22:42
  • I wish to solve this in singlethreaded, I forgot to specify. Also, my bottlenecks are unavailable because my generalization is not finished to even test... But I asked, in general, if there was a better way than bruteforce or perhaps a way to optimize the brute force method further. Current problem: In my algorithm I try to assign values, to keep things to be modular. But I can't detect uniqueness. – SenselessCoder Oct 27 '16 at 22:44
  • 1
    FYI, if you want to learn good programming decomposition, style, compactness, C is like the last language I'd pick for this. Note how much time you waste managing string memory allocation, sets, counters, return-conditions... and how it clutters the code. I recommend you use Python, the result will be so much simpler. Or Scala. Or else Java or C++. – smci Oct 27 '16 at 22:50
  • @smci I realize that but everyone has some dirty coding fantasies, and this is mine :) – SenselessCoder Oct 27 '16 at 22:55
  • 1
    What do you mean by *"tries maximizing the RHS first"*? Do you mean assigning candidates 9,8,7... to the variables in order as they appear on RHS? It shouldn't make any difference, except note the implied constraint from the RHS like A cannot be 0 in `AAA + BBB = AAA` since it is the first digit of RHS. – smci Oct 27 '16 at 22:55
  • Raymond Hettinger's [**Python solution is 16 lines long**, and one function](http://code.activestate.com/recipes/576615/). Also has video tutorial. See [Josip's answer to "Efficient way of Solving Cryptarithms"](http://stackoverflow.com/a/751121/202229) – smci Oct 27 '16 at 23:04
  • To prevent this question getting closed and define the efficiency, I recommend using some specific testcases e.g. `ABRA + CADABRA + ABRA + CADABRA == HOUDINI ; 7457 + 1797457 + 7457 + 1797457 == 3609828`. Then try to reduce runtime and callcount. (Mind you that one uses 11 letters). – smci Oct 27 '16 at 23:12
  • @smci Updated OP with your suggestions. If there's anything else missing let me know. – SenselessCoder Oct 27 '16 at 23:23

3 Answers3

2

I would start with the result. There are not that many different cases:

  • AAA
  • AAB, ABA, BAA
  • ABC

All other cases can be reduced to these by renaming the variables. ABC + CBA = GGG would become DBC + CBD = AAA.

Then you have

  • 10 possible solutions for the one-variable case AAA
  • 90 (10*9) for the two variable cases
  • 720 (10*9*8) for the three variable case

assuming that zero is allowed anywhere. If not, you can filter out those that are not allowed.

This sets the variables for the right side of the equation. Each variable that appears only on the left, adds possible solutions. B adds a factor of 9, C a factor of 8, D 7 and so forth.

alain
  • 11,939
  • 2
  • 31
  • 51
  • I think I have an idea on how I can make this work now. I'll try a few things and see if they work. – SenselessCoder Oct 27 '16 at 22:52
  • Yes, this is just an idea, I don't know if this works. You could also wait until tomorrow with accepting an answer, maybe there will be a better one. – alain Oct 27 '16 at 23:00
  • The thing is, I'm probably going to have to rewrite a large portion of my code, because it depends too much on ordering of the characters now. I'll need to reorganize and come up with a cleaner design. Could take a while but I'll update OP with a solution if I have one. – SenselessCoder Oct 27 '16 at 23:01
  • Yes, post your solution as an answer, it could be interesting for others. – alain Oct 27 '16 at 23:05
0

The most "efficient" solution would take all knowledge of the task and simple print the result. So the question is how much of the conditions can be coded and where and what flexibility is needed.

An alternative is to view the generation of test cases and evaluation of them separately.

A simple recursion function can generate the 10! (362880) test cases of unique digits.

unsigned long long count = 0;
unsigned long long sol = 0;

void evaluate(int object[]) {
  count++;
  int ABC = object[0] * 100 + object[1] * 10 + object[2];
  int DEF = object[3] * 100 + object[4] * 10 + object[5];
  int GHI = object[6] * 100 + object[7] * 10 + object[8];
  if (ABC + DEF == GHI) {
    printf("%4llu %03d + %03d = %03d\n", ++sol, ABC,DEF,GHI);
  }
}

void form_combos(int pool[], size_t pool_count, int object[],
    size_t object_count, size_t object_count_max) {
  if (object_count >= object_count_max) {
    evaluate(object);
    return;
  }
  assert(pool_count > 0);
  int *pool_end = pool + pool_count - 1;
  for (size_t p = 0; p < pool_count; p++) {
    int sample = pool[p];  // take one out
    pool[p] = *pool_end;   // replace it with the end
    object[object_count] = sample;
    form_combos(pool, pool_count - 1, object, object_count + 1,
        object_count_max);
    pool[p] = sample;      // restore pool item
  }
}

int main() {
  int pool[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
  size_t pool_size = sizeof pool / sizeof pool[0];
  #define object_count 9
  int object[object_count];
  form_combos(pool, pool_size, object, 0, object_count);
  printf("Evaluate() iterations %llu\n", count);
}

Output

   1 091 + 762 = 853
   2 091 + 763 = 854
   3 091 + 735 = 826
...
1726 874 + 061 = 935
1727 875 + 046 = 921
1728 876 + 045 = 921
Evaluate() iterations 3628800

What is nice about this approach is that if the task was now find

ABC*ABC + DEF*DEF == GHI*GHI

Changing only 2 lines of code:

  if (ABC*ABC + DEF*DEF == GHI*GHI) {
    printf("%4llu sqr(%03d) + sqr(%03d) = sqr(%03d)\n", ++sol, ABC,DEF,GHI);
  }

results in

   1 sqr(534) + sqr(712) = sqr(890)
   2 sqr(546) + sqr(728) = sqr(910)
   3 sqr(712) + sqr(534) = sqr(890)
   4 sqr(728) + sqr(546) = sqr(910)
Evaluate() iterations 3628800
chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256
0

Ok, so for a trivial solution (a base to build a generalization on, so far it only works on the format <3 digit number> + <3 digit number> = <3 digit number>) inspired from @chux and @alain's suggestions is the following code. It truly runs on O(10^N) where N is the distinct number of digits present, or variables if you'd like to call them that. I'll see if I can generalize this even further.

Note that this is for the initial problem of finding the largest rhs. Take that into account as well.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_DIGITS 10
#define MAX_VARIABLES 9
#define MAX_EXPRESSION_SIZE 11

int IsImpossible(char* expression) {
    // if all letters are same or end letters are same, no solution
    if(
        (expression[0] == expression[4] && expression[0] == expression[8]) ||
        (!strncmp(expression, expression + 4, 3) && !strncmp(expression, expression + 8, 3))
      )
        return 1;
    return 0;
}

int ArePointersAssigned(int*** pointers) {
    for(int i = 0; i < MAX_VARIABLES; ++i) {
        if(**pointers[i] == -1)
            return 0;
    }
    return 1;
}

int evaluate(int*** pointers) {
    int ABC = *(*pointers[0]) * 100 + *(*pointers[1]) * 10 + *(*pointers[2]);
    int DEF = *(*pointers[3]) * 100 + *(*pointers[4]) * 10 + *(*pointers[5]);
    int GHI = *(*pointers[6]) * 100 + *(*pointers[7]) * 10 + *(*pointers[8]);
    if (ABC + DEF == GHI) { // since we use dfs, if this is a solution simply return it
        //printf("%d + %d = %d\n", ABC, DEF, GHI);
        return 1;
    }
    return 0;
}

// use the solved pointer to escape recursion early
// check_end checks if we reached 6 for the 2nd time, if it's first time we ignore (because it's start state)
void form_combos(int pool[], int pool_count, int object_count, int*** pointers, int* solved) {
    if(object_count == MAX_DIGITS - 1)
        object_count = 0;
    if(*solved) // if a branch solved this, escape recursion
        return;
    if (ArePointersAssigned(pointers)) { // that means we got a full equation set
        *solved = evaluate(pointers);
        if(*solved)
            return;
    }
    int *pool_end = pool + pool_count - 1;
    for (int p = pool_count - 1; p >= 0 && !*solved; p--) {
        int sample = pool[p];  // take one out
        pool[p] = *pool_end;   // replace it with the end
        int temp = **pointers[object_count];
        if(**pointers[object_count] == -1)
            **pointers[object_count] = sample;
        form_combos(pool, pool_count - 1, object_count + 1, pointers, solved);
        pool[p] = sample;      // restore pool item
        if(!*solved)
            **pointers[object_count] = temp;
    }
}

int main() {
    char expression[MAX_EXPRESSION_SIZE] = { 0 };
    printf("Enter the formula: ");
    scanf("%s", expression);
    while(expression[0] != '-') {
        if(IsImpossible(expression))
            printf("No solution!\n");
        else {
            int digits[MAX_DIGITS] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
            int object[MAX_VARIABLES] = { -1, -1, -1, -1, -1, -1, -1, -1, -1 }; // stack for dfs
            int *A = &object[0], *B = &object[1], *C = &object[2], 
                *D = &object[3], *E = &object[4], *F = &object[5], 
                *G = &object[6], *H = &object[7], *I = &object[8];
            // set same pointers
            int** pointers[MAX_VARIABLES] = { &A, &B, &C, &D, &E, &F, &G, &H, &I };

            // analyze the equation
            int var = 0;
            for(int p = 0; p < MAX_EXPRESSION_SIZE; ++p) {
                if(expression[p] >= 'A' && expression[p] <= 'I') {
                    *pointers[var++] = &object[expression[p] - 'A']; // link same pointers
                }
            }
            int solved = 0, check_end = 0;
            form_combos(digits, MAX_DIGITS, MAX_DIGITS - 4, pointers, &solved);
            if(!solved) // it can be unsolvable still
                printf("No solution!\n");
            else
                printf("%d%d%d + %d%d%d = %d%d%d\n", *A, *B, *C, *D, *E, *F, *G, *H, *I);
        }
        printf("Enter the formula: ");
        scanf("%s", expression);
    }
    return 0;
}
SenselessCoder
  • 1,139
  • 12
  • 27