1

After having saved a huffman tree to a file, I am trying to read and rebuild this same tree again, with the same structure it had before. I follow this post and write this function to do that:

void build_tree(BinaryTree<HuffmanNode> * tree, List<NodeTree<HuffmanNode>> * list, int start, int end) {
  if(start > end)
    return;

  tree->insert(list->get(start)->getData().getData());

  int i;
  for(i = start; i <= end; i++) {
    if(list->get(i)->getData().getData() > list->get(start)->getData().getData()) {
      break;
    }
  }

  build_tree(tree, list, start + 1, i - 1);
  build_tree(tree, list, i, end);
}

the classes List and BinaryTree in the example are:

List.h

template<class T>
class List {
private:
  NodeList<T> * first;
public:
  List();
  ~List();

  void insert(T data);
  void update(int index, T data);
  void remove(int index);

  int size();
  NodeList<T> * get(int index);
  void set(int index, NodeList<T> * value);
  int find(T data);
  void sort();
  void print();
  T * toArray();
};

NodeList.h

template<class T>
class NodeList {
private:
  T data;
  NodeList * next;
public:
  NodeList();
  ~NodeList();

  T getData();
  void setData(T value);

  NodeList * getNext();
  void setNext(NodeList<T> * next);
  void setNext(NodeList<T> next);
};

binaryTree.h

template<class T>
class BinaryTree {
protected:
  NodeTree<T> * root;
public:
  BinaryTree();
  ~BinaryTree();

  NodeTree<T> * getRoot();
  void setRoot(NodeTree<T> * node);

  void insert(T value);
  void update(T old_value, T new_value);
  void remove(T value);

  List<T> preOrder();
  List<T> inOrder();
  List<T> postOrder();

  void preOrder(NodeTree<T> * node, List<T> * list);
  void inOrder(NodeTree<T> * node, List<T> * list);
  void postOrder(NodeTree<T> * node, List<T> * list);

  List<T> level(int value);
  int levelOf(T data);
  int levelOf(NodeTree<T> * node, T data, int level);
  int height();
  int height(NodeTree<T> * node, int height);
  List<T> leafs();
};

NodeTree.h

template<class T>
class NodeTree {
private:
  T data;
  NodeTree * left;
  NodeTree * right;
public:
  NodeTree();
  NodeTree(T data);
  ~NodeTree();

  T getData();
  void setData(T value);

  NodeTree * getLeft();
  void setLeft(NodeTree<T> * left);
  void setLeft(NodeTree<T> left);

  NodeTree * getRight();
  void setRight(NodeTree<T> * right);
  void setRight(NodeTree<T> right);
};

HuffmanNode is implemented like this:

struct HuffmanNode {
  char data;
  int frequency;

  bool operator==(HuffmanNode other) { return this->data == other.data; }
  bool operator==(char data) { return this->data == data; }
  bool operator!=(HuffmanNode other) { return this->data != other.data; }
  bool operator!=(char data) { return this->data != data; }

  bool operator<(HuffmanNode other) { return frequency < other.frequency; }
  bool operator<=(HuffmanNode other) { return frequency <= other.frequency; }
  bool operator>(HuffmanNode other) { return frequency > other.frequency; }
  bool operator>=(HuffmanNode other) { return frequency >= other.frequency; }

  HuffmanNode operator++() { this->frequency++; return *this; }
  HuffmanNode operator++(int) { this->frequency++; return *this; }
  HuffmanNode operator--() { this->frequency--; return *this; }
  HuffmanNode operator--(int) { this->frequency--; return *this; }

  friend ostream &operator<<( ostream &output, const HuffmanNode &node ) { output << node.data << " ( " << node.frequency << " ) "; return output; }
  friend istream &operator>>( istream  &input, HuffmanNode &node ) { input >> node.data >> node.frequency; return input; }
};
typedef struct HuffmanNode HuffmanNode;

Anyone can tell me what I am doing wrong here?

full code: https://pastebin.com/5dfHRhLe

UPDATE

After some searches, I change my code based on the answers I found here and here. The second one, I am using the 2 functions exactly as they are show in the answer. I try adapt the first one to my use case, but I suspect that is something wrong.

For write the file, I have this:

  getCode(&encodeTable, toEncode.getRoot());
  if (output.is_open()) {
    List<HuffmanNode> list = toEncode.preOrder();

    vector<bool> bits;
    for(int i=1; i<=list.size(); i++) {
      HuffmanNode node = list.get(i)->getData();
      if(node.data == 0x0)
        bits.push_back(true);
      else
        bits.push_back(false);
    }
    cout << endl;

    binary_write(output, bits);

    for(long unsigned int i=1; i<=bits.size(); i++) {
      HuffmanNode node = list.get(i)->getData();
      if(node.data != 0x0) {
          char c = node.data;
          output.write(&c, sizeof(c));
      }
    }

    input.clear();
    input.seekg(0, ios::beg);

    string encoded_file = "";
    char c;
    while (input.get(c))
      if(encodeTable.get(c) != nullptr)
        encoded_file = encoded_file + encodeTable.get(c)->getValue();
    if(encoded_file.length() % 8 != 0)
      encoded_file = encoded_file + getSubstring(encoded_file.length() % 8);

    for(long unsigned int i=0; i<encoded_file.length(); i+=8) {
      string data = encoded_file.substr(i, 8);
      bitset<8> b(data);
      unsigned long x = b.to_ulong();
      unsigned char c = static_cast<unsigned char>( x );
      output.write(reinterpret_cast<char*>(&c), sizeof(c));
    }
  }

for read the bits from the file, I have that:

  string coded_file = "";
  if (input.is_open()) {
    vector<bool> bits;
    binary_read(input, bits);

    toDecode.insert(HuffmanNode());
    NodeTree<HuffmanNode> * temp = toDecode.getRoot();

    for(long unsigned int i=1; i<bits.size();) {
      if(bits[i++]) {
        temp->setLeft(NodeTree<HuffmanNode>());
        temp = temp->getLeft();
      } else {
        char c;
        input.read(&c, sizeof(c));
        HuffmanNode node;
        node.data = c;
        temp->setLeft(NodeTree<HuffmanNode>(node));
      }

      if(bits[i++]) {
        temp->setRight(NodeTree<HuffmanNode>());
        temp = temp->getRight();
      } else {
        char c;
        input.read(&c, sizeof(c));
        HuffmanNode node;
        node.data = c;
        temp->setRight(NodeTree<HuffmanNode>(node));
      }
    }

    char c;
    while(input.read(reinterpret_cast<char*>(&c), sizeof(c))) {
      bitset<8> b(c);
      coded_file = coded_file + b.to_string();
    }
  }
  getCode(&decodeTable, toDecode.getRoot());
Kleber Mota
  • 8,521
  • 31
  • 94
  • 188

1 Answers1

0

The problem you are having is with how to serialize the random chunks of memory (with in this case are the tree nodes), in your example you tried to encode the content but this is no necessary as you can just write the chunks of memory and you did not include in the file the necessary data to re-construct the tree.

This example uses a unique index to identify each node, so that you can save them in any order and just query them in the reconstruction with:

int left_child_index = parent_index * 2 + 1;
int right_child_index = parent_index * 2 + 2;
#include <iostream>
#include <fstream>
#include <vector>

template<typename V> 
struct Node {
    size_t    size = 0; // weight
    V         data  = NULL;
    Node<V> * left  = nullptr;
    Node<V> * right = nullptr;

    // New data node
    Node(V value, size_t weight){
        this->size = weight; 
        this->data = value;
    };
    // New parent node
    Node(Node<V> * left, Node<V> * right){
        this->size = right->size + left->size;
        this->right = right;
        this->left = left; 
    };
};

template<typename V> 
class Huffman {

    struct save_chunk {
        size_t index;
        size_t weight;
        V data;
    };

    static void save_nodes(std::ostream * out, Node<V> * node, int i){
        if(node->left != nullptr){
            save_nodes(out, node->left, i * 2 + 1);
        }
        if(node->right != nullptr){
            save_nodes(out, node->right, i * 2 + 2);
        }
        // it is recursive, but still one threaded
        // so making only one instance for all calls
        // is a great optimization
        static save_chunk chunk;
        chunk.weight = node->size;
        chunk.data = node->data;
        chunk.index = i;
        out->write((const char *)&chunk, sizeof(save_chunk));
    }
    static void load_nodes(std::vector<save_chunk*> & chunks, Node<V> * node, int _e){
        size_t i; 
        int l = _e * 2 + 1;
        int r = _e * 2 + 2;
        for (i = 0; i < chunks.size(); i++){
            // if the next node is found join it 
            // to the parent node
            if (chunks[i]->index == l){
                node->left = new Node<V>(
                    chunks[i]->data, 
                    chunks[i]->weight
                );
                chunks[i] = chunks[chunks.size() - 1];
                chunks.pop_back();  
                // repeat to the posible e child nodes
                load_nodes(chunks, node->left, l);
                break;
            }
        }
        for (i = 0; i < chunks.size(); i++){
            if (chunks[i]->index == r){
                node->right = new Node<V>(
                    chunks[i]->data, 
                    chunks[i]->weight
                );
                chunks[i] = chunks[chunks.size() - 1];
                chunks.pop_back();  
                load_nodes(chunks, node->right, r);
                break;
            }
        }
    }

    public:

    static void save_tree(const char * path, Node<V> * tree){
        std::ofstream file;
        file.open(path, std::ofstream::binary);
        file.seekp(0);  // move the cursor to the start
        save_nodes(&file, tree, 0);
        file.close();
    }
    static Node<V> * load_tree(const char * path){
        std::ifstream file;
        file.open(path, std::ifstream::binary);

        // obtaind how log is the file, the length 
        // is a multimple of save_chunk
        file.seekg (0, file.end);
        size_t len = file.tellg();
        file.seekg (0, file.beg);

        // this just moves the file content into 
        // memory as an array of the chunks
        save_chunk * chunk = nullptr;
        std::vector<save_chunk*> chunks;
        for (int i = 0; i < len / sizeof(save_chunk); i++){
            chunk = new save_chunk;
            file.read((char *)chunk, sizeof(save_chunk));
            chunks.push_back(chunk);
        }

        // becouse its recursive, the root element
        // needs to be created first
        size_t i;
        Node<V> * tree;
        for (i = 0; i < chunks.size(); i++){
            if (chunks[i]->index == 0){
                break;
            }
        }
        tree = new Node<V>(chunks[i]->data, chunks[i]->weight);
        chunks[i] = chunks[chunks.size() - 1];
        chunks.pop_back();  

        // and just create the rest
        load_nodes(chunks, tree, 0);

        file.close();
        return tree;
    }

    static Node<V> * gen_tree(std::vector<int> & weight, std::vector<V> & data){
        size_t i, min;
        Node<V> * selected = nullptr;
        std::vector<Node<V>*> nodes;
        if (weight.size() != data.size()){
            return nullptr;
        }
        for (i = 0; i < weight.size(); i++){
            nodes.push_back(new Node<V>(data[i], weight[i]));
        }
        while(!nodes.empty()){
            min = 0;
            for (i = 1; i < nodes.size(); i++){
                if(nodes[i]->size < nodes[min]->size){
                    min = i;
                }
            }
            if(selected != nullptr){
                nodes[min] = new Node<V>(nodes[min], selected);
                selected = nullptr;
            } else {
                selected = nodes[min];
                nodes[min] = nodes[i - 1];
                nodes.pop_back();  
            }

        }
        return selected;
    }

    static void erase_tree(Node<V> * node, bool _root = true){
        if(node->left != nullptr){
            erase_tree(node->left, false);
            delete node->left;
        }
        if(node->right != nullptr){
            erase_tree(node->right, false);
            delete node->right;
        }
        if (_root){
            delete node;
        }
    }
    
    static void print_tree(Node<V> * node, int _c = 0, int _i = 0){
        for (int i = 0; i < _c; i++){ 
            std::cout << "   "; 
        }
        std::cout << "(" << _i << ")" << "-> " << node->size;
        if(node->data != NULL){
            std::cout << " > " << node->data << std::endl;
        } else {
            std::cout << std::endl;
        }
        if(node->right != nullptr){
            print_tree(node->right, _c + 1, _i * 2 + 2);
        }
        if(node->left != nullptr){
            print_tree(node->left, _c + 1, _i * 2 + 1);
        }
    }
};


int main(void) {
    Node<char> * tree;
    std::vector<char> data = { 'a', 'b', 'c', 'd', 'e', 'f', 'g' };
    std::vector<int>  wei  = { 15,  10,   8,  12,  20,  16,  25  };

    tree = Huffman<char>::gen_tree(wei, data); 
    std::cout << "Tree addr -> " << tree << std::endl;
    Huffman<char>::save_tree("D:/Escritorio/tmp_cpp/tree.bin", tree);
    Huffman<char>::print_tree(tree);
    Huffman<char>::erase_tree(tree);

    std::cout << std::endl;

    tree = Huffman<char>::load_tree("D:/Escritorio/tmp_cpp/tree.bin");
    std::cout << "Tree addr -> " << tree << std::endl;
    Huffman<char>::print_tree(tree);
    Huffman<char>::erase_tree(tree);

    return 0;
}

Sample output

Tree addr -> 000002DE2A6C2380
(0)-> 106
   (2)-> 45
      (6)-> 20 > e
      (5)-> 25 > g
   (1)-> 61
      (4)-> 27
         (10)-> 12 > d
         (9)-> 15 > a
      (3)-> 34
         (8)-> 16 > f
         (7)-> 18
            (16)-> 8 > c
            (15)-> 10 > b

Tree addr -> 000002DE2A6C2AA0
(0)-> 106
   (2)-> 45
      (6)-> 20 > e
      (5)-> 25 > g
   (1)-> 61
      (4)-> 27
         (10)-> 12 > d
         (9)-> 15 > a
      (3)-> 34
         (8)-> 16 > f
         (7)-> 18
            (16)-> 8 > c
            (15)-> 10 > b
SrPanda
  • 854
  • 1
  • 5
  • 9