I am reading from a file called reg.dat, and setting the first variable in each column as an index of variable Y, and the remaining variables in each column as a index of X. Then, I want to feed X and Y into dgesv function to calculate the linear regression.
My code to do so follows (in chunks, because I could not include it all at once on this website). The error I get when I run gcc -ansi -pedantic readReg.c -o readReg -llapack -lblas -lgfortran
, is as follows:
readReg.c: In function ‘main’:
readReg.c:18: warning: ISO C90 forbids variable length array ‘ipiv’
readReg.c:19: warning: ISO C90 forbids variable length array ‘X1’
readReg.c:19: warning: ISO C90 forbids variable length array ‘X1’
readReg.c:19: warning: ISO C90 forbids variable length array ‘XtX’
readReg.c:19: warning: ISO C90 forbids variable length array ‘XtY’
readReg.c:48: error: subscripted value is neither array nor pointer
For instance, if the file reg.dat is:
5.1 3.5 1.4
4.9 3 1.4
4.7 3.2 1.3
4.6 3.1 1.5
5 3.6 1.4
then X = [5.1, 4.9, 4.7, 4.6, 5] and Y = [3.5, 1.4, 3, 1.4, 3.2, 1.3, 3.1, 1.5, 3.6, 1.4]:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
int getCol(char *myStr);
int getRow(char *fileName);
int assignY(int nCol, int nRow, double *Y, char *fileName);
int assignX(int nCol, int nRow, double *X, char *fileName);
void dgesv_(int *n, int *nrhs, double *a, int *lda, int *ipiv, double *b, int *ldb, int *info);
int main(){
FILE *f;
char myStr[1000];
int strL;
int nCol;
int nRow;
char *fileName = "reg.dat";
int i, j, k, n1=nCol, n2=1, ipiv[nCol], info;
double X1[nRow][nCol], XtX[(nCol) * (nCol)], XtY[nCol];
double *X;
double *Y;
f = fopen(fileName, "r");
if (f == NULL) perror ("Error opening file");
else {
if (fgets(myStr, 1000, f) != NULL )
puts(myStr);
fclose(f);
}
strL = strlen(myStr);
nCol = getCol(myStr);
nRow = getRow(fileName);
printf("Sample size and number of predictors are %d and %d respectively.\n", nRow, nCol-1);
X = (double *) malloc(sizeof(double) * ((nCol-1) * nRow));
Y = (double *) malloc(sizeof(double) * nRow);
assignY(nCol, nRow, Y, fileName);
assignX(nCol, nRow, X, fileName);
Next, I manipulate X and Y...
/* The following is for doing the dgesv function */
/* design matrix */
for (i=0; i<nRow; i++){
X1[i][0] = 1;
for (j=1; j<n1; j++)
X1[i][j] = X[i][j-1];
}
/* t(X1) %*% X1 */
for (i=0; i<n1; i++){
for (j=0; j<n1; j++){
XtX[i*n1+j] = 0;
for (k=0; k<nRow; k++)
XtX[i*n1+j] += X1[k][i] * X1[k][j];
}
}
/* t(X1) %*% Y */
for (i=0; i<n1; i++){
XtY[i] = 0;
for (j=0; j<nRow; j++){
XtY[i] += X1[j][i] * Y[j];
}
}
Next I print results
/* XtX is symmetric, no transpose needed before passing to Fortran subrountine */
dgesv_(&n1, &n2, XtX, &n1, ipiv, XtY, &n1, &info);
if (info!=0) printf("failure with error %d\n", info);
/* print beta */
printf("The regression coefficients: ");
for (i=0; i<n1; i++){
printf("%f ", XtY[i]);
}
printf("\n");
return 0;
}
Helper functions...
int assignY(int nCol, int nRow, double *Y, char *fileName){
int i=0;
int j;
char string[1000];
char* data = NULL;
FILE *f;
f = fopen(fileName, "r");
while(fgets(string, sizeof(string), f) != NULL){
data = strtok(string, " ");
for (j=0; NULL != data && j<nCol; j++){
if (data[strlen(data) - 1] == '\n')
data[strlen(data) - 1] = '\0';
if (j==0){
Y[i] = atof(data);
i++;
}
data = strtok(NULL, " ");
}
}
for (i=0;i<nRow;i++){
printf("%f\n", Y[i]);
}
return 0;
}
Helper functions...
int assignX(int nCol, int nRow, double *X, char *fileName){
int i=0;
int j;
char string[1000];
char* data = NULL;
FILE *f;
f = fopen(fileName, "r");
while(fgets(string, sizeof(string), f) != NULL){
data = strtok(string, " ");
for (j=0; NULL != data && j<nCol; j++){
if (data[strlen(data) - 1] == '\n')
data[strlen(data) - 1] = '\0';
if (j!=0){
X[i] = atof(data);
i++;
}
data = strtok(NULL, " ");
}
}
for (i=0;i<(nRow*(nCol-1));i++){
printf("%f\n", X[i]);
}
return 0;
}
Helper functions...
int getCol(char *myStr){
int length,i,count=0;
char prev;
length=strlen(myStr);
if(length > 0){
prev = myStr[0];
}
for(i=0; i<=length; i++){
if(myStr[i]==' ' && prev != ' '){
count++;
}
prev = myStr[i];
}
if(count > 0 && myStr[i] != ' '){
count++;
}
return count;
}
int getRow(char *fileName){
char ch;
int count=0;
FILE *f;
f = fopen(fileName, "r");
while(!feof(f)){
ch = fgetc(f);
if(ch == '\n')
{
count++;
}
}
fclose(f);
return count;
}
EDIT:
I now changed to malloc() for X1, XtY, XtX, and ipiv. I also now used multi-dimensional dynamic array for X1. The errors are all gone when I run gcc, except for:
readReg.c: In function ‘main’:
readReg.c:62: error: subscripted value is neither array nor pointer
Below is the updated main function:
int main(){
FILE *f;
char myStr[1000];
int strL;
int nCol;
int nRow;
char *fileName = "reg.dat";
int i, j, k, n1=nCol, n2=1, info;
double *X;
double *Y;
double **X1;
double *XtX;
double *XtY;
int *ipiv;
double *temp;
f = fopen(fileName, "r");
if (f == NULL) perror ("Error opening file");
else {
if (fgets(myStr, 1000, f) != NULL )
puts(myStr);
fclose(f);
}
strL = strlen(myStr);
nCol = getCol(myStr);
nRow = getRow(fileName);
printf("Sample size and number of predictors are %d and %d respectively.\n", nRow, nCol-1);
X = (double *) malloc(sizeof(double) * ((nCol-1) * nRow));
Y = (double *) malloc(sizeof(double) * nRow);
XtX = (double *) malloc(sizeof(double) * (nCol*nCol));
XtY = (double *) malloc(sizeof(double) * nCol);
ipiv = (int *) malloc(sizeof(int) * nCol);
assignY(nCol, nRow, Y, fileName);
assignX(nCol, nRow, X, fileName);
X1 = malloc(nRow * sizeof(double*));
temp = malloc(nRow * nCol * sizeof(double));
for (i= 0; i < nRow; i++) {
X1[i] = temp + (i * nCol);
}
...
}