It would also be helpful if anyone could help me find identical columns in an array without resorting to a bool function
Well, it might be possible, but a bool function is at the core (e.g. a function that returns true if two different columns match).
But, to find all matches, another function is required. It must iterate over unique column pairings and call the match function to see if the two columns match.
It helps to define an "iterator" struct that keeps track of the search indexes and maximum geometry.
Here's a [completely] refactored version. It has a diagnostic test that generates a random matrix with random identical columns. It iterates over all column pairs and prints the unique matching column numbers.
Note that it does not sort any columns. This is left as an exercise for the reader.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
unsigned int opt_R;
int opt_M;
typedef struct {
int maxrow; // maximum number of rows
int maxcol; // maximum number of columns
int lcol; // starting left column
int rcol; // starting right column
int match; // 1=match
} search_t;
#define MAXROW 100
#define MAXCOL 100
int arrbig[MAXROW][MAXCOL];
// search_init -- initialize for full matrix search
void
search_init(search_t *srch,int maxrow,int maxcol)
{
srch->maxrow = maxrow;
srch->maxcol = maxcol;
srch->lcol = 0;
srch->rcol = 1;
}
// search_match -- decide if two given columns match
// RETURNS: 1=match
int
search_match(int lcol,int rcol,int maxrow,int arr[MAXROW][MAXCOL])
{
int match = 0;
for (int irow = 0; irow < maxrow; ++irow) {
match = (arr[irow][lcol] == arr[irow][rcol]);
if (! match)
break;
}
return match;
}
// search_next -- advance to next search starting point
// RETURNS: 1=more to do
int
search_next(search_t *srch)
{
int maxcol = srch->maxcol;
srch->rcol += 1;
if (srch->rcol >= maxcol) {
srch->lcol += 1;
srch->rcol = srch->lcol + 1;
}
return (srch->lcol < (maxcol - 1));
}
// search_find -- find next column pair that matches
int
search_find(search_t *srch,int arr[MAXROW][MAXCOL])
{
int lcol = srch->lcol;
int rcol = srch->rcol;
int maxrow = srch->maxrow;
int maxcol = srch->maxcol;
int match = 0;
while (lcol < (maxcol - 1)) {
for (; rcol < maxcol; ++rcol) {
match = search_match(lcol,rcol,maxrow,arr);
if (match)
break;
}
if (match)
break;
if (! search_next(srch))
break;
rcol = srch->rcol;
lcol = srch->lcol;
}
srch->lcol = lcol;
srch->rcol = rcol;
srch->match = match;
return match;
}
// dojoin -- ensure unique pairing
void
dojoin(const search_t *srch,int arr[MAXROW][MAXCOL])
{
int lcol;
int rcol;
search_t *uniq;
search_t uniqlist[opt_M];
// make certain columns identical
for (int iter = 0; iter < opt_M; ++iter) {
while (1) {
// get two different column numbers
while (1) {
lcol = rand() % srch->maxcol;
rcol = rand() % srch->maxcol;
if (lcol != rcol)
break;
}
// we want low/high
if (lcol > rcol) {
int tmp = lcol;
lcol = rcol;
rcol = tmp;
}
// have we set this before?
int match = 0;
for (int uniqidx = 0; uniqidx < iter; ++uniqidx) {
uniq = &uniqlist[uniqidx];
match = ((lcol == uniq->lcol) && (rcol == uniq->rcol));
if (match)
break;
}
// got a _unique_ pair of column numbers
if (! match) {
uniq = &uniqlist[iter];
uniq->lcol = lcol;
uniq->rcol = rcol;
break;
}
}
// copy from one column to another
for (int irow = 0; irow < srch->maxrow; ++irow)
arr[irow][rcol] = arr[irow][lcol];
printf("dojoin: EXPECTED %d,%d\n",lcol,rcol);
}
}
// dotest -- perform test
void
dotest(int arr[MAXROW][MAXCOL])
{
search_t srch;
// NOTE: a smaller geometry can be used if desired
search_init(&srch,MAXROW,MAXCOL);
// create random matrix
for (int irow = 0; irow < srch.maxrow; ++irow) {
for (int icol = 0; icol < srch.maxcol; ++icol)
arr[irow][icol] = rand();
}
// create columns that are the same
dojoin(&srch,arr);
// search entire matrix looking for matching columns
for (int iter = 0; iter < (opt_M * 2); ++iter) {
int match = search_find(&srch,arr);
if (! match)
break;
printf("dotest: ACTUAL %d,%d\n",srch.lcol,srch.rcol);
if (! search_next(&srch))
break;
}
}
int
main(int argc,char **argv)
{
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'M':
opt_M = (*cp != 0) ? atoi(cp) : (MAXCOL / 4);
break;
case 'R':
opt_R = (*cp != 0) ? atoi(cp) : 1;
break;
}
}
// by printing the random seed value, and using -R on a subsequent program
// invocation, we can make the result repeatable
if (opt_R == 0)
opt_R = time(NULL);
printf("R: %u\n",opt_R);
srand(opt_R);
// set number of matching columns
if (opt_M == 0)
opt_M = 5;
dotest(arrbig);
return 0;
}
Here is the program output using -R1612382146
. It has multiple columns that are the same:
R: 1612382146
dojoin: EXPECTED 12,99
dojoin: EXPECTED 2,74
dojoin: EXPECTED 18,91
dojoin: EXPECTED 18,34
dojoin: EXPECTED 64,93
dotest: ACTUAL 2,74
dotest: ACTUAL 12,99
dotest: ACTUAL 18,34
dotest: ACTUAL 18,91
dotest: ACTUAL 34,91
dotest: ACTUAL 64,93
Here are some more random outputs:
R: 1612387497
dojoin: EXPECTED 35,72
dojoin: EXPECTED 72,82
dojoin: EXPECTED 60,93
dojoin: EXPECTED 34,45
dojoin: EXPECTED 79,90
dotest: ACTUAL 34,45
dotest: ACTUAL 35,72
dotest: ACTUAL 35,82
dotest: ACTUAL 60,93
dotest: ACTUAL 72,82
dotest: ACTUAL 79,90
R: 1612387500
dojoin: EXPECTED 14,68
dojoin: EXPECTED 60,80
dojoin: EXPECTED 22,84
dojoin: EXPECTED 11,15
dojoin: EXPECTED 1,52
dotest: ACTUAL 1,52
dotest: ACTUAL 11,15
dotest: ACTUAL 14,68
dotest: ACTUAL 22,84
dotest: ACTUAL 60,80
R: 1612387503
dojoin: EXPECTED 40,42
dojoin: EXPECTED 16,29
dojoin: EXPECTED 8,69
dojoin: EXPECTED 23,74
dojoin: EXPECTED 44,67
dotest: ACTUAL 8,69
dotest: ACTUAL 16,29
dotest: ACTUAL 23,74
dotest: ACTUAL 40,42
dotest: ACTUAL 44,67
R: 1612387506
dojoin: EXPECTED 20,32
dojoin: EXPECTED 36,59
dojoin: EXPECTED 9,36
dojoin: EXPECTED 20,48
dojoin: EXPECTED 38,48
dotest: ACTUAL 9,36
dotest: ACTUAL 20,32
dotest: ACTUAL 38,48
R: 1612387508
dojoin: EXPECTED 40,71
dojoin: EXPECTED 34,84
dojoin: EXPECTED 53,72
dojoin: EXPECTED 8,73
dojoin: EXPECTED 17,75
dotest: ACTUAL 8,73
dotest: ACTUAL 17,75
dotest: ACTUAL 34,84
dotest: ACTUAL 40,71
dotest: ACTUAL 53,72
UPDATE:
Here's a version that puts all information into the struct
so that a separate array pointer argument is not needed.
It uses a special "pointer to array" to help with multidimensional arrays of the form:
int (*arr)[MAXCOL];
See: C pointer to array/array of pointers disambiguation
This preserves the correct indexing for an array with arbitrary/dynamic array dimensions:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
unsigned int opt_R;
int opt_M;
int opt_W;
int opt_H;
typedef struct {
int maxrow; // maximum number of rows
int maxcol; // maximum number of columns
int *arr; // array base
int lcol; // starting/current left column
int rcol; // starting/current right column
int match; // 1=match
} search_t;
#define ARRDEF \
int (*arr)[srch->maxcol] = (__typeof__(arr)) srch->arr
#define MAXROW 100
#define MAXCOL 100
// search_init -- initialize for full matrix search
void
search_reset(search_t *srch)
{
srch->lcol = 0;
srch->rcol = 1;
}
// search_init -- initialize for full matrix search
search_t *
search_init(int maxrow,int maxcol)
{
search_t *srch;
srch = calloc(1,sizeof(*srch));
srch->maxrow = maxrow;
srch->maxcol = maxcol;
srch->arr = malloc(sizeof(*srch->arr) * maxrow * maxcol);
search_reset(srch);
return srch;
}
// search_free -- free up array
void
search_free(search_t *srch)
{
free(srch->arr);
free(srch);
}
// search_match -- decide if two given columns match
// RETURNS: 1=match
int
search_match(const search_t *srch)
{
ARRDEF;
int match = 0;
int lcol = srch->lcol;
int rcol = srch->rcol;
for (int irow = 0; irow < srch->maxrow; ++irow) {
match = (arr[irow][lcol] == arr[irow][rcol]);
if (! match)
break;
}
return match;
}
// search_next -- advance to next search starting point
// RETURNS: 1=more to do
int
search_next(search_t *srch)
{
int maxcol = srch->maxcol;
srch->rcol += 1;
if (srch->rcol >= maxcol) {
srch->lcol += 1;
srch->rcol = srch->lcol + 1;
}
return (srch->lcol < (maxcol - 1));
}
// search_find -- find next column pair that matches
int
search_find(search_t *srch)
{
int maxcol = srch->maxcol;
int match = 0;
while (srch->lcol < (maxcol - 1)) {
for (; srch->rcol < maxcol; ++srch->rcol) {
match = search_match(srch);
if (match)
break;
}
if (match)
break;
if (! search_next(srch))
break;
}
srch->match = match;
return match;
}
// dojoin -- ensure unique pairing
void
dojoin(const search_t *srch)
{
ARRDEF;
int lcol;
int rcol;
search_t *uniq;
search_t uniqlist[opt_M];
// make certain columns identical
for (int iter = 0; iter < opt_M; ++iter) {
while (1) {
// get two different column numbers
while (1) {
lcol = rand() % srch->maxcol;
rcol = rand() % srch->maxcol;
if (lcol != rcol)
break;
}
// we want low/high
if (lcol > rcol) {
int tmp = lcol;
lcol = rcol;
rcol = tmp;
}
// have we set this before?
int match = 0;
for (int uniqidx = 0; uniqidx < iter; ++uniqidx) {
uniq = &uniqlist[uniqidx];
match = ((lcol == uniq->lcol) && (rcol == uniq->rcol));
if (match)
break;
}
// got a _unique_ pair of column numbers
if (! match) {
uniq = &uniqlist[iter];
uniq->lcol = lcol;
uniq->rcol = rcol;
break;
}
}
// copy from one column to another
for (int irow = 0; irow < srch->maxrow; ++irow)
arr[irow][rcol] = arr[irow][lcol];
printf("dojoin: EXPECTED %d,%d\n",lcol,rcol);
}
}
// dofill -- fill matrix
void
dofill(const search_t *srch)
{
ARRDEF;
for (int irow = 0; irow < srch->maxrow; ++irow) {
for (int icol = 0; icol < srch->maxcol; ++icol)
arr[irow][icol] = rand();
}
}
// dotest -- perform test
void
dotest(int maxrow,int maxcol)
{
search_t *srch;
// NOTE: a smaller geometry can be used if desired
srch = search_init(maxrow,maxcol);
// create random matrix
dofill(srch);
// create columns that are the same
dojoin(srch);
// search entire matrix looking for matching columns
for (int iter = 0; iter < (opt_M * 2); ++iter) {
int match = search_find(srch);
if (! match)
break;
printf("dotest: ACTUAL %d,%d\n",srch->lcol,srch->rcol);
if (! search_next(srch))
break;
}
search_free(srch);
}
int
main(int argc,char **argv)
{
--argc;
++argv;
for (; argc > 0; --argc, ++argv) {
char *cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch (cp[-1]) {
case 'M':
opt_M = (*cp != 0) ? atoi(cp) : (MAXCOL / 4);
break;
case 'R':
opt_R = (*cp != 0) ? atoi(cp) : 1;
break;
case 'H':
opt_H = atoi(cp);
break;
case 'W':
opt_W = atoi(cp);
break;
}
}
// by printing the random seed value, and using -R on a subsequent program
// invocation, we can make the result repeatable
if (opt_R == 0)
opt_R = time(NULL);
printf("R: %u\n",opt_R);
srand(opt_R);
// set number of matching columns
if (opt_M == 0)
opt_M = 5;
if (opt_H == 0)
opt_H = MAXROW;
if (opt_W == 0)
opt_W = MAXCOL;
dotest(opt_H,opt_W);
return 0;
}