strtok()
operates on a null-terminated string as input, but it modifies the string as it goes. When it finds a delimiter in the string, it injects a nul character where the delimiter is located and returns a pointer to the token that is in front of the injected nul. On the next call with a NULL input pointer, the search starts after the previously injected nul.
So, when you run through your first loop, everything is output correctly, but strtok()
has altered the contents of cmd
to replace all of the space characters with nul characters. So, when you run your second loop, it can't find any space characters in cmd
anymore, and stops looking when the nul character after the first token is reached, even though it is not really the last nul in the string.
So, you have a few choices.
You could duplicate cmd
on each loop. You will also have to duplicate each token that you store in the output array, since they will be pointing at temporary memory:
int simple_shell::parse_command(char* cmd, char*** cmdTokens)
{
*cmdTokens = NULL;
char *dup = strdup(cmd);
if (!dup)
return -1;
int count = 0;
char *token = strtok(dup, " ");
while (token)
{
++count;
token = strtok(NULL, " ");
}
free(dup);
try {
*cmdTokens = new char*[count];
}
catch (const std::bad_alloc&) {
return -1;
}
if (count > 0)
{
dup = strdup(cmd);
if (!dup)
{
delete[] *cmdTokens;
return -1;
}
token = strtok(dup, " ");
count = 0;
while (token)
{
cmdTokens[count] = strdup(token);
if (!cmdTokens[count])
{
for(int i = 0; i < count; ++i)
free(cmdTokens[i]);
delete[] *cmdTokens;
return -1;
}
++count;
token = strtok(NULL, " ");
}
}
return count;
}
char** tokens;
int numTokens = shell.parse_command("hello world", &tokens);
if (numTokens != -1)
{
for (int i = 0; i < numTokens; ++i)
{
std::cout << tokens[i] << std::endl;
free(tokens[i]);
}
delete[] tokens;
}
Or, you could stop using strtok()
and use a less destructive way to extract substrings. You will still have to duplicate the individual tokens, though, since there won't be any nul characters injected into cmd
anymore:
const char* nextToken(const char *str)
{
if (!str)
return NULL;
while (*str == ' ')
++str;
if (*str == '\0')
return NULL;
return str;
}
const char* endOfToken(const char *str)
{
if (!str)
return NULL;
while ((*str != ' ') && (*str != '\0'))
++str;
return ptr;
}
int simple_shell::parse_command(char* cmd, char*** cmdTokens)
{
*cmdTokens = NULL;
char *ptr = cmd;
int count = 0;
const char *token = nextToken(ptr);
while (token)
{
++count;
token = nextToken(endOfToken(token));
}
try {
*cmdTokens = new char*[count];
}
catch (const std::bad_alloc&) {
return -1;
}
if (count > 0)
{
ptr = cmd;
count = 0;
token = nextToken(ptr);
while (token)
{
const char *end = endOfToken(token);
int len = (end-token);
try {
cmdTokens[count] = new char[len+1];
}
catch (const std::bad_alloc&) {
for(int i = 0; i < count; ++i)
delete[] cmdTokens[i];
return -1;
}
memcpy(cmdTokens[count], token, len);
cmdTokens[count][len] = '\0';
++count;
token = nextToken(end);
}
}
return count;
}
char** tokens;
int numTokens = shell.parse_command("hello world", &tokens);
if (numTokens != -1)
{
for (int i = 0; i < numTokens; ++i)
{
std::cout << tokens[i] << std::endl;
delete[] tokens[i];
}
delete[] tokens;
}
Or, if you don't like duplicating the tokens, just return pointers to them, as well as their lengths:
struct token_info
{
char* token;
int length;
}
const char* nextToken(const char *str)
{
if (!str)
return NULL;
while (*str == ' ')
++str;
if (*str == '\0')
return NULL;
return str;
}
const char* endOfToken(const char *str)
{
if (!str)
return NULL;
while ((*str != ' ') && (*str != '\0'))
++str;
return ptr;
}
int simple_shell::parse_command(char* cmd, token_info** cmdTokens)
{
*cmdTokens = NULL;
char *ptr = cmd;
int count = 0;
const char *token = nextToken(ptr);
while (token)
{
++count;
token = nextToken(endOfToken(token));
}
try {
*cmdTokens = new token_info[count];
}
catch (const std:bad_alloc&) {
return -1;
}
if (count > 0)
{
ptr = cmd;
count = 0;
token = nextToken(ptr);
while (token)
{
const char *end = endOfToken(token);
cmdTokens[count].token = const_cast<char*>(token);
cmdTokens[count].length = (end-token);
++count;
token = nextToken(end);
}
}
return count;
}
token_info* tokens;
int numTokens = shell.parse_command("hello world", &tokens);
if (numTokens != -1)
{
for (int i = 0; i < numTokens; ++i)
std::cout << std::string(tokens[i].token, tokens[i].length) << std::endl;
delete[] tokens;
}
But, since you are clearly using C++, you should stop using such archaic C-style methods, and use more modern C++ practices instead, eg:
std::vector<std::string> simple_shell::parse_command(const std::string &cmd)
{
std::istringstream iss(cmd);
return std::vector<std::string>(
std::istream_iterator<std::string>(iss),
std::istream_iterator<std::string>()
);
}
std::vector<std::string> tokens = shell.parse_command("hello world");
for (auto &token: tokens)
std::cout << token << std::endl;