0

I wrote my own data structure (Linked List) and used it in the code below. When I analyze the program with valgrind, both push and push_back methods of linked list cause memory leak. Could you help me find why is that happening?

Linked list:

template <typename T>
struct Node {
  T data;
  Node *next;
};

/**
 * @brief Simple Linked List implementation
 * 
 * @tparam T 
 */
template <typename T> class List{
private:

public:

    Node<T> *head;


    /**
     * @brief Amount of nodes in the list
     * 
     */
    int length;
    /**
     * @brief Construct a new List object
     * 
     */
    List(){
        head = NULL;
        length = 0;
    }

    /**
     * @brief Add new node to the list and increase size
     * 
     * @param val 
     */
    void push(T val){
        Node<T> *n = new Node<T>();   
        n->data = val;             
        n->next = head;        
        head = n;              
        length++;
    }

    /**
     * @brief Add new node to the end of the list and increase size
     * 
     * @param val 
     */
    void push_back(T val) {
        Node<T> *n = new Node<T>();
        Node<T> * temp = head;
        n->data = val;
        n->next = nullptr;
        if (head) {
            while (temp->next != NULL) {
                temp = temp->next;
            }
            temp->next = n;
        } else {
            head = n;
        }
        length++;
    }

    /**
     * @brief Remove the node from the list and decrease size
     * 
     * @return T 
     */
    T pop(){
      if(head) {
        T p = head->data;
        head = head->next;
        length--;
        return p;
      }
    }


/**
 * @brief Get n-th item on the list
 * 
 * @param index Index of the item 
 * @return T 
 */
    T get(int index) {
        T value_to_return;
        Node<T> * temp = head;
        if (index == 0) {
            return head->data;
        }
        for (int i = 0; i < index; i++) {
            temp = temp->next;
            value_to_return = temp->data;
        }
        return value_to_return;
    }
};

Code causing errors:

/**
 * @file file_reader.h
 * @author Dawid Cyron (software@dawidcyron.me)
 * @brief File with functions used for processing required text files
 * @version 0.1
 * @date 2020-01-26
 * 
 * @copyright Copyright (c) 2020
 * 
 */
#include <vector>
#include "bibliography.h"
#include <fstream>
#include <regex>
#include "list.h"
#include "map.h"


/**
 * @brief Function used for sorting list of bibliography
 * 
 * @param bibliography_list List to sort
 */
void sort_bibliography_list(List<bibliography> bibliography_list) {
    Node <bibliography> * current = bibliography_list.head, * index = NULL;
    bibliography temp;

    if (bibliography_list.head == NULL) {
        return;
    } else {
        while (current != NULL) {
            index = current->next;

            while (index != NULL) {
                if (current->data.author.substr(current->data.author.find(" "), current->data.author.length() - 1) > index->data.author.substr(index->data.author.find(" "), index->data.author.length() - 1)) {
                    temp = current->data;
                    current->data = index->data;
                    index->data = temp;
                }
                index = index->next;
            }
            current = current->next;
        }   
    }
}

/**
 * @brief Funciton used for reading the contents of bibliography file
 * 
 * @param filename Name of the file containing bibliography
 * @return std::vector < bibliography > Vector containing bibliography objects (tag, author, book title), alphabetically sorted by surname
 */
List < bibliography > readBibliographyFile(char * filename) {
    std::ifstream bibliography_file(filename);
    std::string line;
    int line_counter = 0;
    bibliography bib;

    List<bibliography> storage_test;

    if (bibliography_file.is_open()) {
        while (getline(bibliography_file, line)) {
            if (line_counter == 0) {
                if (line == "") {
                    std::cout << "Incorrect data format. Exiting" << std::endl;
                    exit(1);
                }
                bib.label = line;
            } else if (line_counter == 1) {
                if (line == "") {
                    std::cout << "Incorrect data format. Exiting" << std::endl;
                    exit(1);
                }
                bib.author = line;
            } else if (line_counter == 2) {
                if (line == "") {
                    std::cout << "Incorrect data format. Exiting" << std::endl;
                    exit(1);
                }
                bib.book = line;
                storage_test.push_back(bib);
                line_counter = 0;
                // Skip the empty line
                getline(bibliography_file, line);
                continue;
            }
            line_counter++;
        }
    }
    sort_bibliography_list(storage_test);
    return storage_test;
}


/**
 * @brief Function used to load references footer
 * 
 * @param references List of references
 * @param output Reference to the output file
 */
void loadReferenceFooter(List<std::string> references, std::ofstream & output) {
    output << "\nReferences\n \n";;
    for (int i = 0; i < references.length; i++) {
        output << references.get(i);
    }
}

/**
 * @brief Function used to replace cite tags with referenes
 * 
 * @param filename Name of the file containing the text
 * @param data Vector of Bibliography objects (tag, author, book title), has to be sorted by surname
 * @param output_filename Name of the file where the content should be saved
 */
void replaceCites(char * filename, List < bibliography > data, char * output_filename) {
    std::ifstream text_file(filename);
    std::string content;
    content.assign((std::istreambuf_iterator < char > (text_file)), (std::istreambuf_iterator < char > ()));
    //std::map < std::string, int > map;
    std::ofstream output(output_filename);
    int cite_counter = 1;
    List<std::string> references;
    Hashtable<std::string, int> hash_table; 
    HashtableItem<std::string, int> * item;

    for (int i=0; i < data.length; i++) {
        std::smatch matches;
        std::regex regex("\\\\cite\\{" + data.get(i).label + "\\}");
        std::regex_search(content, matches, regex);
        if (!matches.empty()) {
            item = hash_table[data.get(i).label];
            if (item != nullptr) {
                content = std::regex_replace(content, regex, "[" + std::to_string(item->Value()) + "]");
            } else {
                content = std::regex_replace(content, regex, "[" + std::to_string(cite_counter) + "]");
                references.push_back("[" + std::to_string(cite_counter) + "] " + data.get(i).author + ", " + data.get(i).book + "\n");
                hash_table.Add(data.get(i).label, cite_counter);
                cite_counter++;
            }
        }
    }
    output << content << std::endl;
    text_file.close();
    loadReferenceFooter(references, output);
    output.close();
}

As far as my knowledge goes, the data structure should work correctly. I tried creating a destructor that went through all nodes of the linked list and deleted them one by one, but that didn't work either (in fact, it caused the application to not even start).

Dawid Cyron
  • 162
  • 1
  • 1
  • 11
  • ***I tried creating a destructor that went through all nodes of the linked list and deleted them one by one*** What happens to the ones that are removed using pop. Also if you want help with your fixing your delete code you need to show it. – drescherjm Feb 04 '20 at 17:34
  • What is the point of the `push()` function? It creates a circular, single node list that is incompatible with the rest of your class. – sweenish Feb 04 '20 at 17:35
  • 2
    `pop()` is broken as well. No destructor at all? The whole class leaks memory, it's not specific to `push()` or `push_back()`. – sweenish Feb 04 '20 at 17:39
  • Yeah guys, I realize that pop() leaks, but I don't use that method. I simply forgot to delete it, as I'm leaving in only the parts that I need. Same thing goes for the push(), I used that one before. – Dawid Cyron Feb 04 '20 at 17:41
  • 1
    The [mcve] is a great thing. By cutting the problem code back until all that's left is the bug it's usually really easy to see what the bug is. – user4581301 Feb 04 '20 at 17:42
  • Traditionally, linked lists aren't accessed with index numbers. The get function should be a search by value, and then you have to account for the value not existing, usually by throwing an exception. You could try to get fancy with `std::optional`. – sweenish Feb 04 '20 at 17:43
  • I realize that I could get fancy about it, I should probably have more error handling and all of that stuff, but this is just a quick project for uni. If it was my personal project, I would probably put much more care into it, but with this I have a short deadline and a lot of studying for exams – Dawid Cyron Feb 04 '20 at 17:44
  • push_back having to parse the entire list every. single. time. you want to push_back is so slow. Just store a tail pointer as well. – sweenish Feb 04 '20 at 17:44
  • Don't get fancy. Code is almost always better if you get simpler. If deleting the nodes caused problems you should have debugged that. If you're lucky enough to get a reliable and repeatable crash, fix the crash. Always choose to debug the solution that should work over a different solution that can't work. – user4581301 Feb 04 '20 at 17:48
  • 2
    If your homework isn't to write a linked list, why not just use `std::list` and be done with it? Same goes for a map. – sweenish Feb 04 '20 at 17:49
  • @DawidCyron "I should probably have more error handling and all of that stuff, but this is just a quick project for uni" - What a sad attitude. In *my* opinion, a *good* developer cares about correctness and quality for *every* project; throwaway, prototype, production - doesn't matter, you should *always* strive to do it properly so it becomes a habit and the default way you do things. Cutting corners is the root of an unimaginable amount of software bugs. And if you are sloppy with small stuff you'll likely also be sloppy about big stuff. – Jesper Juhl Feb 04 '20 at 17:51
  • 1
    One thing to keep an eye on is [the Rule of Three](https://stackoverflow.com/questions/4172722/what-is-the-rule-of-three). If you have a class that [owns a resource](https://stackoverflow.com/questions/49024982/what-is-ownership-of-resources-or-pointers) you need to make sure it's copied correctly or disable copying because you never know when you might accidentally make a copy and then blow up later. – user4581301 Feb 04 '20 at 17:52
  • 1
    Reiterating some of @JesperJuhl 's point, but not out of pride-in-product. I'm lazy. It's MUCH easier to debug a program that checks and tells you when something is screwy. – user4581301 Feb 04 '20 at 17:53

4 Answers4

2

"Why is this function causing memory leak?" - Simple: You allocate memory with new that you never release with delete.

In modern C++ you should generally prefer using smart pointers (a std::unique_ptr in this case) and/or container classes rather than doing manual memory management with new/delete.

Jesper Juhl
  • 30,449
  • 3
  • 47
  • 70
  • Could you possibly show me, how should I implement smart pointers in this case? I tried it, but could not figure out how to do that. I also have a deadline set for a really short time, so any help is appreciated – Dawid Cyron Feb 04 '20 at 17:40
  • 1
    @DawidCyron 1) your time constraints are irrelevant, not our problem. 2) these links may help: https://en.cppreference.com/w/cpp/memory/unique_ptr , https://en.cppreference.com/w/cpp/memory/unique_ptr/make_unique - there are examples there. – Jesper Juhl Feb 04 '20 at 17:43
  • 2
    [Herb Sutter's presentation on Smart Pointers](https://www.youtube.com/watch?v=JfmTagWcqoE) from a few years ago. He goes over the Smart pointer linked list about 20 minutes in. – user4581301 Feb 04 '20 at 17:44
2

Memory is leaked because there is no destructor to free the allocated Nodes. The asker notes that they removed the destructor because the program crashed when they had one. This is the error that should have been addressed because having a destructor is the correct thing to do.

Solution

Put the destructor back and solve why the destructor caused the program to crash.

Solution for the Solution

List violates the Rule of Three. This means when a List is copied and you have two objects both pointing to the same head Node. This Node can only be deleted once and both objects will try to delete it in the List destructor. Sooner or later the program dies a painful death.

Normally you could replace the passes by value with passes by reference, and then disallow copying by deleteing the special member functions. Eg. add

List(const List & src) = delete;
List& operator=(List src) = delete;

to List and wait for the compiler to start screaming about List being copied when the copy special functions are deleted. Replace all of the passes by value with passes by reference.

Unfortunately

List<bibliography> readBibliographyFile(char * filename)

returns by value. Return by reference of a local variable is doomed. The local variable goes out of scope and is destroyed, leaving you with a reference to an invalid object. This means you have to do this the hard way:

Implement all three special member functions:

// destructor
~List()
{
    while (head)
    {
        Node<T> * temp = head->next;
        delete head;
        head = temp;
    }
}

// copy constructor
List(const List & src): head(NULL), length(src.length)
{
    Node<T> ** destpp = &head; // point at the head.
                               // using a pointer to a pointer because it hides 
                               // the single difference between head and a Node's 
                               // next member: their name. This means we don't need 
                               // any special cases for handling the head. It's just 
                               // another pointer to a Node.
    Node<T> * srcnodep = src.head;
    while (srcnodep) //  while there is a next node in the source list
    {
        *destpp = new Node<T>{srcnodep->data, NULL}; // copy the node and store it at 
                                                     // destination
        destpp = &((*destpp)->next); // advance destination to new node
        srcnodep = srcnodep->next; // advance to next node in source list
    }
}

List& operator=(List src) // pass list by value. It will be copied
{
    length = src.length; // Technically we should swap this, but the copy 
                         // is gonna DIE real soon.
    // swap the node lists. use std::swap if you can.
    Node<T> * temp = head;
    head = src.head; 
    src.head = temp;

    // now this list has the copy's Node list and the copy can go out of scope 
    // and destroy the list that was in this List.

    return *this;
}

Notes

operator= is taking advantage of the Copy and Swap Idiom. It's often not the fastest solution, but it's easy to write and next to impossible to get wrong. I start with Copy and Swap and only migrate to something faster only when profiling the code's performance shows I have to, and that almost never happens.

The pointer-to-pointer trick used in the copy constructor also comes in really handy when inserting and removing list items.

Know and understand the Rule of Three and its friends. You cannot write complex and efficient C++ programs without it. It is possible that this assignment was given, at least in part, to force you to learn it.

user4581301
  • 33,082
  • 7
  • 33
  • 54
0

If you can chose what type of list you implement, here's a vector like version ( compared to your linked list )

#include <cstdlib>
#include <iostream>

template <typename T>
struct list final {
  T* values;
  std::size_t capacity, size;

  list() : values{nullptr}, capacity{0u}, size{0u} {}
  ~list() { std::free(values); }
  void push_back(T value) {
    if (size == capacity) {
      capacity = (capacity + 1) * 2;
      if (void* mem = std::realloc(values, capacity * sizeof(T))) {
        values = reinterpret_cast<T*>(mem);
      } else {
        throw std::bad_alloc();
      }
    }
    values[size++] = value;
  }

  void pop_back() { --size; }
};

int main() {
  list<int> integers;
  for (int i = 0; i < 10; ++i) {
    integers.push_back(i);
  }
  for (int i = 0; i < integers.size; ++i) {
    std::cout << integers.values[i] << std::endl;
  }
}
Yamahari
  • 1,926
  • 9
  • 25
-1

Ok, I found out where the problem was for me and I did a pretty dirty fix, but it works. For anyone who told me about caring more about the code, I will improve it as soon as I'm finished with my exams, but with such a strict deadline I just needed the quickest solution. You can find it below.

I added this function to the list:

void clear() {
    while (head) {
        Node<T> * temp = head->next;
        delete head;
        head = temp;
        length--;
    }
}

And then at the end of the replaceCites function I call:

data.clear();
references.clear();
Dawid Cyron
  • 162
  • 1
  • 1
  • 11
  • 1
    Unrelated: This only hides the Rule of Three violation. If this code functions it's because you are unlucky and circumstances are hiding the bug. In `void replaceCites(char * filename, List < bibliography > data, char * output_filename)` `List < bibliography > data` is passed by value, causing the the argument used to be copied into `data`. List` lacks a copy constructor, so the compiler generates one that simply copies the pointer and now you have two `List`s pointing at the same data. This blew up when you had a destructor because both the copy and the original tried to delete the nodes. – user4581301 Feb 04 '20 at 20:49
  • 1
    If your instructor actually knows C++ and is paying attention, they will catch this mistake and dock you marks. If they don't catch it, pray they are not paying attention as the alternative is you're being taught by an incompetent and will suffer for it later. – user4581301 Feb 04 '20 at 20:51