0

In my Huffman Algorithm project, so far I have generated the codes for each character of the input file. I have also stored the characters and their corresponding codes in an unordered map. Now, I want to read our input string, and print the corresponding codes of each character in the output file. However, printing the codes in string format will not compress the file. I want to convert my string code to bits format. I know that we need to use a byte buffer, but I do not know how I will apply this concept to my code. Any help will be very much appreciated.

#include<iostream>
#include<string>
#include<queue>
#include<vector>
#include<bitset>
#include<fstream>
#include<unordered_map>
#include<map>
using namespace std;

struct node
{
    char c; //character in the string
    int f; //Frequency of character in the string
    node* next;
    node* left, * right; //left and right child of binary tree respectively

    node()
    {
        f = 0;
        left = NULL;
        right = NULL;
        c = NULL;
        next = NULL;

        
    }
};


struct compare {
public:
    bool operator()(node* a, node* b) // overloading both operators 
    {
        
        return a->f > b->f; //To maintain the order of min heap priority queue
    }
};
class Huffman
{
    string filename; //The name of the file we want to encode
    string text; //The text that will be encoded
    priority_queue<node*, vector<node*>, compare> pq; //Priority queue that will contian characters of our string and their frequency
    string encoded;
    unordered_map <char, string> um;
public:
    Huffman()
    {
        
        text = "";
        encoded = "";
    }

    void FileRead()
    {
        cout << "Enter the name of the file you want to encode:";
        cin >> filename;
        fstream readfile(filename, fstream::in);
        getline(readfile, text, '\0');

        cout << text << endl;
        readfile.close();
    }

    

    //Function which will calculate the frequency of characters in the string entered by the user
    void CharacterFrequency()
    {
        
        for (int i = 0; i < text.length(); i++)
        {
            int sum = 0;
            for (int j = 0; j < text.length(); j++)
            {

                if (j < i and text[i] == text[j])
                {
                    break;
                }


                    if (text[i] == text[j])
                    {
                        sum++;
                        
                        
                    } 
                    
                    
                
            }

            if (sum != 0)
            {
                PriorityQueue(text[i], sum);
            }
        }

            
            
        

    }

    // This will push our characters and their frequencies into our STL min heap priority queue
    void PriorityQueue(char ch, int freq)
    {
        
        node* n=new node; //pointer of type node is created
        n->c = ch; //Pointer stores character
        n->f = freq; //Pointer stores frequency of the character
        pq.push(n); //The node is pushed into the priority queue
        

        
    }

    //Will display the whole priority queue. All of the elements will be popped from it as a result.
    void PriorityQueueDisplay()
    {
        while (!pq.empty())
        {
            cout << (pq.top())->c<<" "<<(pq.top())->f << endl;
            pq.pop();
        }
    }


    //This function will create our Huffman Tree from a priority queue
    void HuffmanTree()
    {
        node* n1, * n2; //The nodes that will be popped each time from the priority queue

        //This loop will continue to pop out two nodes from the priority queue until only one nodes is left
        //in the priority queue
        while (pq.size()!=1)
        {
            n1 = pq.top();
            pq.pop();
            n2 = pq.top();
            pq.pop();
            node* z = new node; //Creation of new node of Huffman tree
            z->left = n1;
            z->right = n2;
            z->f = (n1->f) + (n2->f); //Storing sum of the two popped nodes in Huffman tree node
            z->c = '&'; //Assigning the new node a character that is not used in formal speech
            pq.push(z); //Pushing the node into the priority queue again
            
        }

        node* root = pq.top(); //Making the last node the root node
        EncodeAndPrintCodes(root,encoded); //Passing the root node and a string that will encode each character of our inputted string
    }

    //This function will recursively search for a character in the string, and will print it's corresponding code.
    //It will do this for all our characters
    void EncodeAndPrintCodes(node* root,string en)
    {
        
        if (root == NULL)
        {
            
            return ;
        }

        if (root->c != '&')
        {
            //cout << root->c << ":" << en;
            StoreinMap(root->c, en);
            
        }
        
        EncodeAndPrintCodes(root->left, en + "0");
        EncodeAndPrintCodes(root->right, en + "1");
        
        
        
    }

    //Will convert our code in string to bitstream and then store it in a text file
    void CompressedFile(char ch, string code)
    {
        
        ofstream compressed;
        compressed.open("CompressedFile.txt", ios::app | ios::out);
    }

    void StoreinMap(char ch, string code)
    {
        
        
        um.emplace(pair<char, string>(ch,code));
        
    }

    /*void DisplayEncoded()
    {
        cout << encoded;
    }*/



    //Displays the size of the priority queue
    void DisplaySize()
    {
        cout<<pq.size();
    }
};

int main()
{
    Huffman obj;
    obj.FileRead();
    obj.CharacterFrequency();
    //obj.PriorityQueueDisplay();
    obj.HuffmanTree();
    //obj.DisplaySize();
    //obj.DisplayEncoded();
    //obj.CompressedFile();
    return 0;
}
  • [https://stackoverflow.com/questions/23344257/convert-a-string-of-binary-into-an-ascii-string-c](https://stackoverflow.com/questions/23344257/convert-a-string-of-binary-into-an-ascii-string-c) – drescherjm Jun 14 '22 at 15:44
  • Do you know how to turn 8 bits into a byte? – user253751 Jun 14 '22 at 15:47
  • 2
    If your question is really about "converting a string", you may be limiting your responses by first asking everyone to read and comprehend more than 200 lines of code. A [mre] would help you, as most of this code is unrelated to your question. – Drew Dormann Jun 14 '22 at 15:47
  • In its simplest form you can just have a variable holding a byte and a second variable with the number of bits currently stored in the byte. For each code you want to write add its bits to the byte, when the byte gets to 8 bits, write it to the file and reset it back to 0 and the counter to 0. Yourl can make it more efficient by writing more bytes at a time but the same basic structure applies – Alan Birtles Jun 14 '22 at 15:56
  • `char c;` `c = NULL;` - don't do things like that. `c` isn't a pointer. – Ted Lyngmo Jun 14 '22 at 15:56
  • See https://stackoverflow.com/a/33050228/1180620 – Mark Adler Jun 14 '22 at 23:20

1 Answers1

0

Copied from this answer, this is a way to write bits to a file of bytes. You have a bit buffer consisting of:

unsigned long bitBuffer = 0;
int bitcount = 0;

To add the bits bits in value to the buffer:

bitBuffer |= value << bitCount;
bitcount += bits;

To write and remove available bytes:

while (bitCount >= 8) {
    writeByte(bitBuffer & 0xff);
    bitBuffer >>>= 8;
    bitCount -= 8;
}

At the end, you need to write any bits remaining in the bit buffer. When decoding, you need to take care to not interpret any filler bits in the last byte as data that it is not. For that, you'll need an end marker in your data.

Some side comments. Somehow you turned the O(n) calculation of character frequencies into an O(n^2) calculation! You should think about that some more. Don't define special character values. You should be able to compress any sequence of bytes. Your use of getline() stops reading the input if it gets to a zero byte. Use rdbuf(). Your use of & as an indicator of a node because you think the character is "not used in formal speech" is wrong. (It is commonly used in writing.) If there is an ampersand in the input, your program will crash, trying to access an uninitialized pointer. Use left as your indicator of whether this is a node or a leaf by setting it to nullptr if it's a leaf.

Mark Adler
  • 101,978
  • 13
  • 118
  • 158