I am trying to implement Huffman encoding in C. I am done with the tree construction and obtained the codeword for each symbol as the algorithm proceeds. But now I am stuck with insertion of the codewords into binary files for the corresponding symbol. Can someone suggest how the codeword or binary bits can be written into binary file so that i can obtain the compressed file.
The codewords are of variable length.
A function to write and read these bits to/from the file would be helpful.
This is the code I have written
void create_compressed_file()
{
char str[20], ch, *str2, str1[10], str_arr[6], str3[10];
FILE *fp, *fp2, *fp3;
int i, array[20], j = 0;
fp2 = fopen("newfile.txt", "r"); // contains the original text file
fp3 = fopen("codeword.txt", "r"); // contains the symbol and codeword
while (fscanf(fp2, "%s", &str) == 1) {
rewind(fp3);
str2 = strtok(str, "-");
while (str2 != NULL) {
strcpy(str_arr, str2);
printf("str2= %s ", str_arr); //str2 stores the symbol(not char but a string)
printf(" %s-", str2);
while (fscanf(fp3, "%s", &str1) == 1) {
if (strcmp(str1, str_arr) == 0) {
fscanf(fp3, "%s", &str1); // extracted corresponding codeword(1s and 0s) of the symbol and stored it into str1
printf("%s\n", str1);
write_codeword_to_binaryfile(); // function that i want to create with is incomplete and need your help.
}
}
str2 = strtok(NULL, "-");
rewind(fp3);
}
printf("\nspace:");
strcpy(str_arr, "space");
while (fscanf(fp3, "%s", &str1) == 1) {
if (strcmp(str1, str_arr) == 0) {
fscanf(fp3, "\n%s", &str1); // extract the codeword for(space)character
printf("%s\n", str1);
}
}
}
fclose(fp2);
fclose(fp3);
}
codeword.txt:
is 0000
por 00010
Plain 000110
most 0001110
the 0001111
ted 00100
text 00101
ly 0011000
near 0011001
pli 0011010
ap 0011011
ble 0011100
ta 0011101
by 0011110
sup 0011111
cryp 0100000
In 0100001
ra 0100010
tog 0100011
ting 0100100
tain 0100101
mands 0100110
com 0100111
mes 0101000
to 0101001
ge 0101010
sa 0101011
plain 0101100
phy 0101101
I tried the above code as below but it didnt write anything... The file size after execution was 0 bytes:
#include <stdio.h>
#include <conio.h>
#include <stdint.h>
void write_codeword_to_binaryfile(
const char *codeword, // codeword to write, in ASCII format
FILE *file, // destination file
uint8_t *buffer,
int *fullness)
{
char c;
// fullness = ;
*buffer = 0;
for (c = *codeword++; c != '\0'; c = *codeword++) // iterate
{
int bit = c - '0'; // convert from ASCII to binary 0/1
*buffer |= bit << (7 - fullness);
++fullness;
}
fputc(*buffer, file);
}
int main() {
FILE *fp;
uint8_t *buffer = 0;
char *c = "10101010";
char b = 0;
int i;
fp = fopen("myfile.bin", "wb");
write_codeword_to_binaryfile(c, fp, buffer, 8);
fclose(fp);
getch();
}