1

I want to create a graph with 2-3 million vertices using an adjacency list. The input is created randomly. When I ran a version that only prints out the increasing numbers of edges, it worked perfectly (returned 0). But when I add the BFS and DFS, it only prints out about 80% of the numbers and then returns 123456789.

Here is my code:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>

struct Node {
    int vertex;
    struct Node* next;
};

struct Graph {
    int num_vertices;
    struct Node** adj_list;
};

struct Node* createNode(int v) {
    struct Node* newNode = (struct Node*)malloc(sizeof(struct Node));
    newNode->vertex = v;
    newNode->next = NULL;
    return newNode;`your text`
}

struct Graph* createGraph(int num_vertices) {
    struct Graph* graph = (struct Graph*)malloc(sizeof(struct Graph));
    graph->num_vertices = num_vertices;
    graph->adj_list = (struct Node**)malloc(num_vertices * sizeof(struct Node*));

    int i;
    for (i = 0; i < num_vertices; i++) {
        graph->adj_list[i] = NULL;
    }

    return graph;
}

void addEdge(struct Graph* graph, int src, int dest) {
    struct Node* newNode = createNode(dest);
    newNode->next = graph->adj_list[src];
    graph->adj_list[src] = newNode;

    newNode = createNode(src);
    newNode->next = graph->adj_list[dest];
    graph->adj_list[dest] = newNode;
}

void DFSUtil(struct Graph* graph, int v, int* visited) {
    visited[v] = 1;
    printf("%d ", v);

    struct Node* temp = graph->adj_list[v];
    while (temp) {
        int adj_vertex = temp->vertex;
        if (!visited[adj_vertex]) {
            DFSUtil(graph, adj_vertex, visited);
        }
        temp = temp->next;
    }
}

void DFS(struct Graph* graph, int start_vertex) {
    int* visited = (int*)calloc(graph->num_vertices, sizeof(int));
    DFSUtil(graph, start_vertex, visited);
    free(visited);
}

void BFS(struct Graph* graph, int start_vertex) {
    int* visited = (int*)calloc(graph->num_vertices, sizeof(int));
    int* queue = (int*)malloc(graph->num_vertices * sizeof(int));
    int front = 0, rear = 0;

    visited[start_vertex] = 1;
    queue[rear++] = start_vertex;

    while (front < rear) {
        int current_vertex = queue[front++];
        printf("%d ", current_vertex);

        struct Node* temp = graph->adj_list[current_vertex];
        while (temp) {
            int adj_vertex = temp->vertex;
            if (!visited[adj_vertex]) {
                visited[adj_vertex] = 1;
                queue[rear++] = adj_vertex;
            }
            temp = temp->next;
        }
    }

    free(visited);
    free(queue);
}

int main() {
    int num_vertices = 50000000;
    int num_edges = 10000000;
    struct Graph* graph = createGraph(num_vertices);
    srand(time(NULL));
    int i;
    for (i = 0; i < num_edges; i++) {
        int src = rand() % num_vertices;
        int dest = rand() % num_vertices;
        addEdge(graph, src, dest);
        //print the number of edge
        printf("\ncount: %d",i);
    }
    
    //BFS and DFS code
    int start_vertex = 0;

    printf("Depth-First Search (DFS): ");
    DFS(graph, start_vertex);
    printf("\n");

    printf("Breadth-First Search (BFS): ");
    BFS(graph, start_vertex);
    printf("\n");

    

    return 0;
}

if I change the values of num_vertices and num_edges to smaller ones:

int num_vertices = 1000000;
int num_edges = 2000000;

the code runs to completion with return=0, but if I change the values of num_vertices and num_edges to bigger ones:

int num_vertices = 10000000;
int num_edges = 20000000;

the code return=164564564...

I think maybe the numbers are too big, but I don't know why or how to solve it.

John Bollinger
  • 160,171
  • 8
  • 81
  • 157
bui hiep
  • 21
  • 3
  • The recursion is too deep and you're getting a stack overflow. `3221225725` (better known as `0xC00000FD`) is the Windows error number for stack overflow. You need either increase the stack size or design your algorithm in such a way that it doesn't need so much recursion depth. – Jabberwocky May 11 '23 at 14:20
  • I have the strong impression that `DFSUtil` can be written as an iterative function rather than a recursive one. – Jabberwocky May 11 '23 at 14:27
  • It doesn't look like you're actually performing depth-first or breadth-first *searches*. Rather, you are *traversing* the whole connected component to which the start vertex belongs (which is probably, but not necessarily, the whole graph), in either depth-first or breadth-first order. – John Bollinger May 11 '23 at 14:38
  • 2
    Your impression is correct, @Jabberwocky. Not only can *any* recursive algorithm be rewritten as an iterative one, but DFS is one of the poster children for this. DFS and BFS can be viewed as exactly the same algorithm, except for BFS using a FIFO queue to track the nodes to process, and DFS using a LIFO stack for the same purpose. Recursive DFS relies on the *call* stack for that, whereas iterative DFS uses an application-managed stack. – John Bollinger May 11 '23 at 15:02

1 Answers1

1

There are at least two possibilities:

  1. You are exhausting available memory (that your C implementation is willing to let you allocate). As a result, you are performing null-pointer dereferences, and undefined behavior ensues. (Probably the program crashes.)

  2. You are recursing deeply enough to overflow the stack. Just how much stack is available for you to use is system- and context-dependant, but you are already pushing your luck if you rely on a thousand levels. On some systems, the limit is much less; on others, more. (In any case, probably the program crashes.)

If the system can't or won't give you enough memory then you need a beefier system, but you should detect that by checking whether each memory allocation succeeds, and failing gracefully, with an informative diagnostic, if any of them don't.

If you have a stack overflow then you may be able to overcome that by switching from your recursive version of DFS to an iterative version. Ultimately, though, you are still limited by how much memory the system will let you use, so if you keep increasing the problem size then you will still eventually reach a point where you need a system with more resources.

John Bollinger
  • 160,171
  • 8
  • 81
  • 157