0

I have to create a function that reads a file called grwords.txt containing around 540000 words which are written in Greek letters.

I have to convert these words to uppercase and fill an array called char **words.

This is what I have so far.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <windows.h>
#include <ctype.h>


void fp();

int main(int argc, char *argv[]) {

    SetConsoleOutputCP(1253);

    fp();
    return 0;
}

void fp(){
    char **words;
    words = malloc(546490 * sizeof(int *));
    for (i = 0; i < 546490; i++)
             words[i] = malloc(24 * sizeof(int));
    FILE *file;
    char *word;
    size_t cnt;

    file = fopen("grwords.txt", "rt");
    if (file == NULL){
        printf("File cannot be opened.\n");
        exit(1);
    }
    cnt = 0;
    while (1==fscanf(file, "%24s",word)){
        if (cnt == 546490)
            break;
        strcpy(words[cnt++], word);
    }
    fclose(file);
}

I'm still trying to figure out pointers. I know that & makes a pointer from a value and * a value from a pointer. Updated the program and it successfully fills the array with the words from the file! I still have no idea how to convert Greek lowercase to uppercase.

Raymond Chen
  • 44,448
  • 11
  • 96
  • 135
user3601507
  • 173
  • 1
  • 2
  • 8
  • How is your Greek file encoded? UTF8, UTF16, Windows CP 1253? I am not so sure `toupper` works "out of the box". (Or is it a part of your assignment to work this out?) – Jongware Jun 05 '14 at 17:19
  • `words` is a pointer that points to a random location in memory. It needs to have memory allocated and assigned to it before you try to use it. Investigate the `malloc` function. – Carey Gregory Jun 05 '14 at 17:20
  • It's CP 1253. I don't know exactly. The assignment only mentions the conversion and nothing else. @Carey Can this only be done with dynamic memory allocation? – user3601507 Jun 05 '14 at 17:21
  • 1
    Yes, that's exactly what `malloc` is. – Carey Gregory Jun 05 '14 at 17:31
  • 1
    To be blunt, you have numerous errors in your code that demonstrate a lack of understanding of some really key concepts. I recommend that you find a good C textbook and read the chapters on arrays and pointers. Do the exercises in those chapters. Once you've done that, come back to this problem. I think it's over your head for now. – Carey Gregory Jun 05 '14 at 17:34
  • I tried the book from Dennis M. Ritchie and Brian W. Kernighan but I don't like it at all and I think it's too vague. – user3601507 Jun 05 '14 at 17:39
  • @user3601507 Too vague? That book is the definitive book on C written by the authors of the language. It is not vague in the least. If you can't grasp the concepts from that book, then I don't know what to tell you. Generations of C programmers have "grown up" using that book. I recommend you give it another try. – Carey Gregory Jun 06 '14 at 02:11
  • Well I tried reading about pointers in that one and I didn't understand anything. I might retry reading it in English because a lot could have been lost in translation. – user3601507 Jun 06 '14 at 08:36
  • 1
    "The definitive book on X" is usually not a good tutorial on X, but it might be a good reference. – user253751 Jun 06 '14 at 11:00
  • 1
    I agree with @immibis. It really depends on your background. C was my first PL and couldn't get much for K&R book's. The notes of our professor were those who got me started. Also, I upvoted the question of the OP since it isn't something trivial and I didn't like the -1 there. – gsamaras Jun 06 '14 at 13:19

1 Answers1

2

Handling Greek words can be dependent on your platform.

First of all, you need to understand how file handling works. Here is what I wrote:

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define bufSize 1024 // max lenght of word
// we are going to receive the .txt from cmd line
int main(int argc, char *argv[])
{
  FILE *fp;

  // Assume file has max 10 words
  const size_t N = 10;

  // Allocate a 2D array of N rows
  // and bufSize columns.
  // You can think of it like an array
  // of N strings, where every string
  // has, at most, bufSize length.
  char buf[N][bufSize];

  // make sure we got the .txt
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }

  // open the file
  if ((fp = fopen(argv[1], "r")) == NULL)
  { /* Open source file. */
    perror("fopen source-file");
    return 1;
  }

  // we will use that for toupper()
  char c;

  // counters
  int i = 0, j;


  while (fscanf(fp, "%1024s", buf[i]) == 1)
  { /* While we don't reach the end of source. */
    /* Read characters from source file to fill buffer. */

    // print what we read
    printf("%s\n", buf[i]);

    j = 0;
    // while we are on a letter of word placed
    // in buf[i]
    while (buf[i][j])
    {
      // make the letter capital and print it
      c = buf[i][j];
      putchar (toupper(c));
      j++;
    }
    i++;
    printf("\ndone with this word\n");
  }
  // close the file
  fclose(fp);

  return 0;
}

For this test.txt file:

Georgios
Samaras
Γιώργος
Σαμαράς

the code would run as:

./exe test.txt
Georgios
GEORGIOS
done with this word
Samaras
SAMARAS
done with this word
Γιώργος
Γιώργος
done with this word
Σαμαράς
Σαμαράς
done with this word

As you can see, I could read the Greek words, but failed to convert them in upper case ones.

Once you got how file handling goes, you need to use wide characters to read a file with Greek words.

So, by just modifying the above code, we get:

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>

#define bufSize 1024

int main(int argc, char *argv[])
{
  setlocale(LC_CTYPE, "en_GB.UTF-8");
  FILE *fp;
  const size_t N = 15;
  wchar_t buf[N][bufSize];
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }
  if ((fp = fopen(argv[1], "r")) == NULL)
  {
    perror("fopen source-file");
    return 1;
  }
  wchar_t c;
  int i = 0, j;
  while (fwscanf(fp, L"%ls", buf[i]) == 1)
  {
    wprintf( L"%ls\n\n", buf[i]);
    j = 0;
    while (buf[i][j])
    {
      c = buf[i][j];
      putwchar (towupper(c));
      j++;
    }
    i++;
    wprintf(L"\ndone with this word\n");
  }
  fclose(fp);
  return 0;
}

And now the output is this:

Georgios

GEORGIOS
done with this word
Samaras

SAMARAS
done with this word
Γιώργος

ΓΙΏΡΓΟΣ
done with this word
Σαμαράς

ΣΑΜΑΡΆΣ
done with this word

I see that you may want to create a function which reads the words. If you need a simple example of functions in C, you can visit my pseudo-site here.

As for the 2D array I mentioned above, this picture might help:

enter image description here

where N is the number of rows (equal to 4) and M is the number of columns (equal to 5). In the code above, N is N and M is bufSize. I explain more here, were you can also found code for dynamic allocation of a 2D array.

I know see that you are on Windows. I tested the code in Ubuntu.

For Windows you might want to take a good look at this question.

So, after you read all the above and understand them, you can see what you asked for with dynamic memory management.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <locale.h>

#define bufSize 1024

wchar_t **get(int N, int M);
void free2Darray(wchar_t** p, int N);

int main(int argc, char *argv[])
{
  setlocale(LC_CTYPE, "en_GB.UTF-8");
  FILE *fp;
  const size_t N = 15;
  wchar_t** buf = get(N, bufSize);
  if (argc != 2)
  {
    fprintf(stderr,
            "Usage: %s <soure-file>\n", argv[0]);
    return 1;
  }
  if ((fp = fopen(argv[1], "r")) == NULL)
  {
    perror("fopen source-file");
    return 1;
  }
  wchar_t c;
  int i = 0, j;
  while (fwscanf(fp, L"%ls", buf[i]) == 1)
  {
    wprintf( L"%ls\n", buf[i]);
    j = 0;
    while (buf[i][j])
    {
      c = buf[i][j];
      putwchar (towupper(c));
      j++;
    }
    i++;
    wprintf(L"\ndone with this word\n");
  }
  fclose(fp);
  // NEVER FORGET, FREE THE DYNAMIC MEMORY
  free2Darray(buf, N);
  return 0;
}

// We return the pointer
wchar_t **get(int N, int M) /* Allocate the array */
{
    /* Check if allocation succeeded. (check for NULL pointer) */
    int i;
    wchar_t **table;
    table = malloc(N*sizeof(wchar_t *));
    for(i = 0 ; i < N ; i++)
        table[i] = malloc( M*sizeof(wchar_t) );
    return table;
}

void free2Darray(wchar_t** p, int N)
{
    int i;
    for(i = 0 ; i < N ; i++)
        free(p[i]);
    free(p);
}

Note that this code is expected to work on Linux (tested on Ubuntu 12.04), not on Windows (tested on Win 7).

Community
  • 1
  • 1
gsamaras
  • 71,951
  • 46
  • 188
  • 305