So, here's a working sketch of how you might go about it in C on Linux. This is a quick hack that I do not represent as being exemplary code, efficient, etc. It (ab)uses PATH_MAX
, uses “bad” string functions, and may leak memory, eat your cat, and have corner cases that segfault, etc. When it breaks, you get to keep both parts.
The basic idea is to go through the given path, breaking it up into “words” using “/” as the delimiter. Then, go through the list, pushing the “words” onto a stack, but ignoring if empty or “.”, and popping if “..”, then serializing the stack by starting at the bottom and accumulating a string with slashes in between.
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <linux/limits.h>
typedef struct stack_s {
char *data[PATH_MAX];
int top;
} stack_s;
void stack_push(stack_s *s, char *c) {
s->data[s->top++] = c;
}
char *stack_pop(stack_s *s) {
if( s->top <= 0 ) {
return NULL;
}
s->top--;
return s->data[s->top];
}
// DANGER! DANGER! Returns malloc()ed pointer that you must free()
char *stack_serialize(stack_s *s) {
int i;
char *buf;
int len=1;
for(i=0; i<s->top; i++) {
len += strlen(s->data[i]);
len++; // For a slash
}
buf = malloc(len);
*buf = '\0';
for(i=0; i<s->top-1; i++) {
strcat(buf, s->data[i]);
strcat(buf, "/");
}
strcat(buf, s->data[i]);
return buf;
}
// DANGER! DANGER! Returns malloc()ed pointer that you must free()
char *semicanonicalize(char *src) {
char *word[PATH_MAX] = {NULL};
int w=0;
int n_words;
char *buf;
int len;
char *p, *q;
stack_s dir_stack = {{NULL},0};
// Make a copy of the input string:
len = strlen(src);
buf = strdup(src);
// Replace slashes with NULs and record the start of each "word"
q = buf+len;
word[0]=buf;
for(p=buf,w=0; p<q; p++) {
if(*p=='/') {
*p = '\0';
word[++w] = p+1;
}
}
n_words=w+1;
// We push w[0] unconditionally to preserve slashes and dots at the
// start of the source path:
stack_push(&dir_stack, word[0]);
for(w=1; w<n_words; w++) {
len = strlen(word[w]);
if( len == 0 ) {
// Must've hit a double slash
continue;
}
if( *word[w] == '.' ) {
if( len == 1 ) {
// Must've hit a dot
continue;
}
if( len == 2 && *(word[w]+1)=='.' ) {
// Must've hit a '..'
(void)stack_pop(&dir_stack);
continue;
}
}
// If we get to here, the current "word" isn't "", ".", or "..", so
// we push it on the stack:
stack_push(&dir_stack, word[w]);
}
p = stack_serialize(&dir_stack);
free(buf);
return p;
}
int main(void)
{
char *in[] = { "/home/emmet/../foo//./bar/quux/../.",
"../home/emmet/../foo//./bar/quux/../.",
"./home/emmet/../foo//./bar/quux/../.",
"home/emmet/../foo//./bar/quux/../."
};
char *out;
for(int i=0; i<4; i++) {
out = semicanonicalize(in[i]);
printf("%s \t->\t %s\n", in[i], out);
free(out);
}
return 0;
}