Expat is a streaming XML parser. Ergo, it parses the document in chunks, and it's up to you (the programmer) to maintain state, monitor progress, and capture the data you need as it's parsed, not afterwards.
There are three primary handlers you need to build:
XML_StartElementHandler
XML_EndElementHandler
XML_CharacterDataHandler
You'll find function signatures for these in expat.h
. The first function is invoked when a starting tag is found (<tag>
), the second function is when a ending tag is found (</tag>
), and the third function captures the text between the tags. This is a rudimentary but fast parsing technique.
Here's a proof of concept using Expat 2.1:
#include <expat.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef XML_LARGE_SIZE
#if defined(XML_USE_MSC_EXTENSIONS) && _MSC_VER < 1400
#define XML_FMT_INT_MOD "I64"
#else
#define XML_FMT_INT_MOD "ll"
#endif
#else
#define XML_FMT_INT_MOD "l"
#endif
struct setting {
const char *key;
char *value;
} config[] = {
{"board_height", NULL}, {"board_width", NULL}, {"maximum_highscores", NULL}
};
struct setting *current_setting;
int
key_cmp(void const *ld, void const *rd)
{
struct setting const *const l = ld;
struct setting const *const r = rd;
return strcmp(l->key, r->key);
}
void XMLCALL
handler(void *userData, const XML_Char *s, int len)
{
if(len == 0){
return;
}
if(!current_setting){
return;
}
char *value = malloc((len+1) * sizeof(XML_Char));
strncpy(value, s, len);
current_setting->value = value;
}
static void XMLCALL
startElement(void *userData, const char *name, const char **atts)
{
struct setting key = { .key = name };
current_setting = bsearch(&key, config, sizeof(config)/sizeof(config[0]), sizeof(config[0]), key_cmp);
}
static void XMLCALL
endElement(void *userData, const char *name)
{
current_setting = NULL;
}
int
main(int argc, char *argv[])
{
char buf[BUFSIZ];
XML_Parser parser = XML_ParserCreate(NULL);
int done;
int depth = 0;
XML_SetUserData(parser, &depth);
XML_SetElementHandler(parser, startElement, endElement);
XML_SetCharacterDataHandler(parser, handler);
FILE *fp = fopen("config.xml", "r");
do {
int len = (int)fread(buf, 1, sizeof(buf), fp);
done = len < sizeof(buf);
if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
fprintf(stderr,
"%s at line %" XML_FMT_INT_MOD "u\n",
XML_ErrorString(XML_GetErrorCode(parser)),
XML_GetCurrentLineNumber(parser));
return 1;
}
} while (!done);
XML_ParserFree(parser);
int i;
for (i = 0; i < (sizeof(config)/sizeof(config[0])); i++) {
struct setting current = config[i];
printf("%s: %s\n", current.key, current.value);
free(current.value);
}
return 0;
}
This program opens a file config.xml
for reading, sets the appropriate Expat handlers, and duplicates the "value" strings it finds (assuming the key is the XML tag).
The code provided is merely an example. I'm not a professional C programmer.