I'm trying to write a function to parse and extract the components of a URL. Moreover, I need the components (e.g. hostname) to have the type char *
since I intend to pass them to C APIs.
My current approach is to save the components in the parse_url
function to the heap by calling malloc
. But for some reason, the following code is printing gibberish. I'm confused by this behavior because I thought memory allocated on the heap will persist even after the function allocating it returns.
I'm new to C/C++, so please let me know what I did wrong and how to achieve what I wanted. Thank you.
#include <iostream>
#include <string>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
using namespace std;
void cast_to_cstyle(string source, char *target)
{
target = (char *)malloc(source.size() + 1);
memcpy(target, source.c_str(), source.size() + 1);
}
void parse_url(string url, char *protocol_cstyle, char *hostname_cstyle, char *port_cstyle, char *path_cstyle)
{
size_t found = url.find_first_of(":");
string protocol = url.substr(0, found);
string url_new = url.substr(found + 3); // `url_new` is the url excluding the "http//" part
size_t found1 = url_new.find_first_of(":");
string hostname = url_new.substr(0, found1);
size_t found2 = url_new.find_first_of("/");
string port = url_new.substr(found1 + 1, found2 - found1 - 1);
string path = url_new.substr(found2);
cast_to_cstyle(protocol, protocol_cstyle);
cast_to_cstyle(hostname, hostname_cstyle);
cast_to_cstyle(port, port_cstyle);
cast_to_cstyle(path, path_cstyle);
}
int main() {
char *protocol;
char *hostname;
char *port;
char *path;
parse_url("http://www.example.com:80/file.txt", protocol, hostname, port, path);
printf("%s, %s, %s, %s\n", (protocol), (hostname), (port), (path));
return 0;
}